From 218f1b912bbe35ddf73384a85ef696e477c68545 Mon Sep 17 00:00:00 2001 From: Gal Elharar Date: Mon, 9 Feb 2026 17:17:50 +0200 Subject: [PATCH] Restructure PR descriptions with Problem/Root Cause/Solution format and PII scrubbing --- services/templates/claude_prompt.tmpl | 19 ++- services/templates/gemini_prompt.tmpl | 19 +++ services/templates/ticket_prompt.tmpl | 22 ++++ services/ticket_processor.go | 93 +++++++++++-- services/ticket_processor_test.go | 182 +++++++++++++++++++++++--- 5 files changed, 307 insertions(+), 28 deletions(-) diff --git a/services/templates/claude_prompt.tmpl b/services/templates/claude_prompt.tmpl index 1033ca6..1888507 100644 --- a/services/templates/claude_prompt.tmpl +++ b/services/templates/claude_prompt.tmpl @@ -27,8 +27,16 @@ Please provide your response in the following format: ``` -## Summary - +## PR Description + +### Problem +A 1-2 sentence high-level explanation of the bug or feature from the user's perspective. + +### Root Cause +A technical explanation of why the bug occurred or why the change is needed. Reference specific code patterns or files. + +### Solution +A summary of the concrete changes made and why this approach was chosen. ## Changes Made @@ -36,3 +44,10 @@ Please provide your response in the following format: ## Testing ``` + +IMPORTANT RULES for the "PR Description" section: +- Write it for a public GitHub audience — assume reviewers have NO access to internal systems. +- Do NOT include any email addresses. +- Do NOT include any internal URLs (e.g., Jira links, VPN-only pages). +- You may reference the ticket key but do NOT link to it. +- Keep each subsection concise: 1-3 sentences. diff --git a/services/templates/gemini_prompt.tmpl b/services/templates/gemini_prompt.tmpl index 77f54db..6f5c019 100644 --- a/services/templates/gemini_prompt.tmpl +++ b/services/templates/gemini_prompt.tmpl @@ -20,3 +20,22 @@ 7. Review the markdown files (README.md, CONTRIBUTING.md, etc.) to understand how tests should be run for this project. These files might be nested inside directories, so search the entire repository structure. 8. Verify your changes by running the relevant tests to ensure they work correctly. 9. Provide a summary of the changes made. +10. At the END of your response, include a section exactly like this: + +## PR Description + +### Problem +A 1-2 sentence high-level explanation of the bug or feature from the user's perspective. + +### Root Cause +A technical explanation of why the bug occurred or why the change is needed. Reference specific code patterns or files. + +### Solution +A summary of the concrete changes made and why this approach was chosen. + +IMPORTANT RULES for the "PR Description" section: +- Write it for a public GitHub audience — assume reviewers have NO access to internal systems. +- Do NOT include any email addresses. +- Do NOT include any internal URLs (e.g., Jira links, VPN-only pages). +- You may reference the ticket key but do NOT link to it. +- Keep each subsection concise: 1-3 sentences. diff --git a/services/templates/ticket_prompt.tmpl b/services/templates/ticket_prompt.tmpl index 4652466..220b664 100644 --- a/services/templates/ticket_prompt.tmpl +++ b/services/templates/ticket_prompt.tmpl @@ -67,3 +67,25 @@ Before making changes, briefly outline: Then proceed with the implementation and tests. + + +At the END of your response, you MUST include a section exactly like this: + +## PR Description + +### Problem +A 1-2 sentence high-level explanation of the bug or feature from the user's perspective. + +### Root Cause +A technical explanation of why the bug occurred or why the change is needed. Reference specific code patterns or files. + +### Solution +A summary of the concrete changes made and why this approach was chosen. + +IMPORTANT RULES for the "PR Description" section: +- Write it for a public GitHub audience — assume reviewers have NO access to internal systems. +- Do NOT include any email addresses. +- Do NOT include any internal URLs (e.g., Jira links, VPN-only pages). +- You may reference the ticket key (e.g., "{{.Ticket.Key}}") but do NOT link to it. +- Keep each subsection concise: 1-3 sentences. + diff --git a/services/ticket_processor.go b/services/ticket_processor.go index c1eab48..2ab4c06 100644 --- a/services/ticket_processor.go +++ b/services/ticket_processor.go @@ -4,6 +4,7 @@ import ( "bytes" _ "embed" "fmt" + "regexp" "strings" "text/template" "time" @@ -26,6 +27,34 @@ func init() { } } +// PII scrubbing regexes +var ( + emailRegex = regexp.MustCompile(`[\w.\-+]+@[\w.\-]+\.\w+`) + internalURLRegex = regexp.MustCompile(`https?://[^\s)]*\.(redhat\.com|internal\.[^\s)]*)(:[0-9]+)?(/[^\s)]*)?`) +) + +// scrubPII removes email addresses and internal URLs from text. +func scrubPII(text string) string { + text = internalURLRegex.ReplaceAllString(text, "[internal link removed]") + text = emailRegex.ReplaceAllString(text, "[email redacted]") + return text +} + +// fallbackPRBody builds a minimal structured PR body from the ticket data +// when the AI output doesn't contain a "## PR Description" section. +func fallbackPRBody(ticket *models.JiraTicketResponse) string { + summary := scrubPII(ticket.Fields.Summary) + description := scrubPII(ticket.Fields.Description) + + body := fmt.Sprintf("This PR addresses %s.\n\n### Problem\n\n%s\n", ticket.Key, summary) + if description != "" { + body += fmt.Sprintf("\n%s\n", description) + } + body += "\n### Root Cause\n\n_See diff for details._\n" + body += "\n### Solution\n\n_See code changes in this PR._\n" + return body +} + // TicketProcessor defines the interface for processing Jira tickets type TicketProcessor interface { // ProcessTicket processes a single Jira ticket @@ -330,6 +359,7 @@ func (p *TicketProcessorImpl) ProcessTicket(ticketKey string) error { zap.String("ticket", ticketKey), zap.Int("maxRetries", maxRetries)) + var aiOutput interface{} for attempt := 1; attempt <= maxRetries; attempt++ { p.logger.Info("AI code generation attempt", zap.String("ticket", ticketKey), @@ -337,7 +367,7 @@ func (p *TicketProcessorImpl) ProcessTicket(ticketKey string) error { zap.Int("maxRetries", maxRetries)) // Run AI service - _, err = p.aiService.GenerateCode(prompt, repoDir) + aiOutput, err = p.aiService.GenerateCode(prompt, repoDir) if err != nil { p.logger.Error("AI code generation failed", zap.String("ticket", ticketKey), @@ -458,17 +488,27 @@ func (p *TicketProcessorImpl) ProcessTicket(ticketKey string) error { } // Create PR content (redact if security level is set) - prTitle := fmt.Sprintf("%s: %s", ticketKey, ticket.Fields.Summary) - prBody := fmt.Sprintf("This PR addresses the issue described in [%s](%s/browse/%s).\n\n**Summary:** %s\n\n**Description:** %s", - ticketKey, p.config.Jira.BaseURL, ticketKey, ticket.Fields.Summary, ticket.Fields.Description) - - // Add assignee information if available - if ticket.Fields.Assignee != nil { - prBody += fmt.Sprintf("\n\n**Assignee:** %s (%s)", ticket.Fields.Assignee.DisplayName, ticket.Fields.Assignee.EmailAddress) - } - + var prTitle, prBody string if hasSecurityLevel { prTitle, prBody = redactPRContentForSecurity(ticketKey) + } else { + prTitle = fmt.Sprintf("%s: %s", ticketKey, scrubPII(ticket.Fields.Summary)) + + // Try to parse the "## PR Description" section from the AI output + if aiOutputStr, ok := aiOutput.(string); ok && aiOutputStr != "" { + prBody = parsePRDescription(aiOutputStr) + } + + if prBody != "" { + prBody = scrubPII(prBody) + p.logger.Info("Using AI-generated PR description from model output", + zap.String("ticket", ticketKey)) + } else { + // Fall back to ticket-data-based description + p.logger.Info("AI output did not contain PR Description section, using fallback", + zap.String("ticket", ticketKey)) + prBody = fallbackPRBody(ticket) + } } // When creating a pull request from a fork, the head parameter should be in the format "forkOwner:branchName" @@ -589,6 +629,39 @@ func (p *TicketProcessorImpl) generatePrompt(ticket *models.JiraTicketResponse) return buf.String(), nil } +// parsePRDescription extracts the "## PR Description" section from the AI output. +// It looks for "## PR Description" and captures everything from the next line until +// the next H2 heading ("## ") or end of string. +// Returns the extracted section or empty string if not found. +func parsePRDescription(aiOutput string) string { + const marker = "## PR Description" + idx := strings.Index(aiOutput, marker) + if idx == -1 { + return "" + } + + // Start after the marker line + content := aiOutput[idx+len(marker):] + + // Find the next H2 heading that isn't part of the PR Description subsections + // PR Description uses ### (H3) for Problem/Root Cause/Solution, so we stop at ## (H2) + lines := strings.Split(content, "\n") + var result []string + for i, line := range lines { + // Skip the first line (could be empty after the marker) + if i == 0 && strings.TrimSpace(line) == "" { + continue + } + // Stop at the next ## heading (but not ### which are our subsections) + if strings.HasPrefix(line, "## ") && !strings.HasPrefix(line, "### ") { + break + } + result = append(result, line) + } + + return strings.TrimSpace(strings.Join(result, "\n")) +} + // redactPRContentForSecurity creates redacted PR title and body when ticket has security level func redactPRContentForSecurity(ticketKey string) (title string, body string) { title = fmt.Sprintf("%s: Update", ticketKey) diff --git a/services/ticket_processor_test.go b/services/ticket_processor_test.go index 08adbd0..228c6e4 100644 --- a/services/ticket_processor_test.go +++ b/services/ticket_processor_test.go @@ -249,16 +249,23 @@ func TestTicketProcessor_CreatePullRequestHeadFormat(t *testing.T) { t.Errorf("Expected PR title to be '%s', got '%s'", expectedPRTitle, capturedPRTitle) } - // Verify that the PR body contains the expected format - expectedPRBodyContains := "This PR addresses the issue described in [TEST-123]" - if !strings.Contains(capturedPRBody, expectedPRBodyContains) { - t.Errorf("Expected PR body to contain '%s', got '%s'", expectedPRBodyContains, capturedPRBody) + // Verify that the PR body contains the structured format + if !strings.Contains(capturedPRBody, "This PR addresses TEST-123") { + t.Errorf("Expected PR body to contain ticket key reference, got '%s'", capturedPRBody) + } + if !strings.Contains(capturedPRBody, "## Problem") { + t.Errorf("Expected PR body to contain '## Problem' section, got '%s'", capturedPRBody) + } + if !strings.Contains(capturedPRBody, "## Root Cause") { + t.Errorf("Expected PR body to contain '## Root Cause' section, got '%s'", capturedPRBody) + } + if !strings.Contains(capturedPRBody, "## Solution") { + t.Errorf("Expected PR body to contain '## Solution' section, got '%s'", capturedPRBody) } - // Verify that the PR body contains the Jira URL - expectedJiraURL := "https://your-domain.atlassian.net/browse/TEST-123" - if !strings.Contains(capturedPRBody, expectedJiraURL) { - t.Errorf("Expected PR body to contain Jira URL '%s', got '%s'", expectedJiraURL, capturedPRBody) + // Verify that the PR body does NOT contain internal Jira URLs + if strings.Contains(capturedPRBody, "atlassian.net") { + t.Errorf("Expected PR body NOT to contain internal Jira URL, got '%s'", capturedPRBody) } } @@ -382,16 +389,19 @@ func TestTicketProcessor_PRDescriptionWithAssignee(t *testing.T) { t.Fatalf("Expected no error, got: %v", err) } - // Verify that the PR body contains assignee information - expectedAssigneeInfo := "**Assignee:** John Doe (john.doe@example.com)" - if !strings.Contains(capturedPRBody, expectedAssigneeInfo) { - t.Errorf("Expected PR body to contain assignee info '%s', got '%s'", expectedAssigneeInfo, capturedPRBody) + // Verify that the PR body uses the structured format + if !strings.Contains(capturedPRBody, "## Problem") { + t.Errorf("Expected PR body to contain '## Problem' section, got '%s'", capturedPRBody) } - // Verify that the PR body contains the Jira URL - expectedJiraURL := "https://your-domain.atlassian.net/browse/TEST-456" - if !strings.Contains(capturedPRBody, expectedJiraURL) { - t.Errorf("Expected PR body to contain Jira URL '%s', got '%s'", expectedJiraURL, capturedPRBody) + // Verify that the PR body does NOT leak assignee email + if strings.Contains(capturedPRBody, "john.doe@example.com") { + t.Errorf("Expected PR body NOT to contain assignee email, got '%s'", capturedPRBody) + } + + // Verify that the PR body does NOT contain internal Jira URLs + if strings.Contains(capturedPRBody, "atlassian.net") { + t.Errorf("Expected PR body NOT to contain internal Jira URL, got '%s'", capturedPRBody) } } @@ -564,3 +574,143 @@ func TestTicketProcessor_DocumentationGenerationConfig(t *testing.T) { // - Checking for changes after each attempt using githubService.HasChanges() // - Failing gracefully with clear error messages if no changes after all retries // - Waiting config.AI.RetryDelaySeconds between retries + +func TestParsePRDescription(t *testing.T) { + tests := []struct { + name string + input string + shouldMatch bool + mustContain []string + mustNotContain []string + }{ + { + name: "extracts full PR description section", + input: `I made some changes to fix the bug. + +## Changes Made +- Modified foo.go + +## PR Description + +### Problem +Users see an authentication error when pulling a valid container image. + +### Root Cause +The error mapping in device/errors was returning an auth error for registry 404 responses. + +### Solution +Updated the error classification to distinguish between auth failures and missing images. + +## Testing +All tests pass.`, + shouldMatch: true, + mustContain: []string{ + "### Problem", + "### Root Cause", + "### Solution", + "authentication error", + "error classification", + }, + mustNotContain: []string{ + "## Changes Made", + "## Testing", + }, + }, + { + name: "returns empty when no PR Description section", + input: "I fixed the bug by changing foo.go.\n\n## Summary\nDone.", + shouldMatch: false, + }, + { + name: "stops at next H2 heading", + input: `## PR Description + +### Problem +The widget broke. + +### Root Cause +Off-by-one error. + +### Solution +Fixed the loop bound. + +## Some Other Section +This should not be included.`, + shouldMatch: true, + mustContain: []string{"Fixed the loop bound"}, + mustNotContain: []string{"This should not be included"}, + }, + { + name: "returns empty for empty input", + input: "", + shouldMatch: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + result := parsePRDescription(tc.input) + + if tc.shouldMatch && result == "" { + t.Fatal("Expected parsePRDescription to extract content, got empty string") + } + if !tc.shouldMatch && result != "" { + t.Fatalf("Expected empty result, got: %s", result) + } + + for _, s := range tc.mustContain { + if !strings.Contains(result, s) { + t.Errorf("Expected result to contain %q, got:\n%s", s, result) + } + } + for _, s := range tc.mustNotContain { + if strings.Contains(result, s) { + t.Errorf("Expected result NOT to contain %q, got:\n%s", s, result) + } + } + }) + } +} + +func TestScrubPII(t *testing.T) { + tests := []struct { + name string + input string + expect string + }{ + { + name: "scrubs redhat email", + input: "Assigned to john.doe@redhat.com for review", + expect: "Assigned to [email redacted] for review", + }, + { + name: "scrubs any email", + input: "Contact alice@example.com or bob@corp.internal.net", + expect: "Contact [email redacted] or [email redacted]", + }, + { + name: "scrubs internal Jira URL", + input: "See https://issues.redhat.com/browse/EDM-3115 for details", + expect: "See [internal link removed] for details", + }, + { + name: "leaves public URLs alone", + input: "See https://github.com/org/repo/pull/1 for the PR", + expect: "See https://github.com/org/repo/pull/1 for the PR", + }, + { + name: "handles text with no PII", + input: "Fixed a bug in the error handler", + expect: "Fixed a bug in the error handler", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + result := scrubPII(tc.input) + if result != tc.expect { + t.Errorf("scrubPII(%q)\n got: %q\n expect: %q", tc.input, result, tc.expect) + } + }) + } +}