diff --git a/src/mergai/agent_executor.py b/src/mergai/agent_executor.py index db06638..e6e4187 100644 --- a/src/mergai/agent_executor.py +++ b/src/mergai/agent_executor.py @@ -13,6 +13,7 @@ import git from .agents.base import Agent +from .utils import git_utils class AgentExecutionError(Exception): @@ -312,6 +313,51 @@ def validate_solution_files(self, solution: dict) -> str | None: return None + def validate_resolved_files_have_no_markers(self, solution: dict) -> str | None: + """Validate that files reported as 'resolved' have no conflict markers left. + + The agent is allowed to leave conflict markers in files listed under + `response.unresolved`, but any file listed under `response.resolved` + must be free of markers. + + Args: + solution: The solution dict from the agent, expected to have + structure: {"response": {"resolved": {path: ...}}}. + + Returns: + None if all resolved files are free of conflict markers, or an + error message listing the offending files. + """ + offending: list[str] = [] + for path in solution["response"].get("resolved", {}): + if git_utils.file_has_conflict_markers_in_workdir(path): + offending.append(path) + + if offending: + return ( + "The following files were marked as resolved but still contain " + "conflict markers: " + + ", ".join(offending) + + ". Remove the conflict markers from these files, or move them " + "to the 'unresolved' section of your response." + ) + return None + + def validate_solution(self, solution: dict) -> str | None: + """Combined validator for agent solutions. + + Runs `validate_solution_files` (files listed as resolved/modified were + actually changed on disk) followed by + `validate_resolved_files_have_no_markers` (no conflict markers remain + in files the agent claimed to have resolved). + + Returns the first error encountered, or None if both checks pass. + """ + error = self.validate_solution_files(solution) + if error: + return error + return self.validate_resolved_files_have_no_markers(solution) + def validate_describe_response(self, response: dict) -> str | None: """Validate that describe response has the correct format. diff --git a/src/mergai/app.py b/src/mergai/app.py index 5f05a6b..e0f5ca2 100644 --- a/src/mergai/app.py +++ b/src/mergai/app.py @@ -253,7 +253,7 @@ def resolve(self, force: bool, yolo: bool, agent_desc: str | None = None): try: solution = executor.run_with_retry( prompt=prompt, - validator=executor.validate_solution_files, + validator=executor.validate_solution, ) except AgentExecutionError as e: raise Exception(str(e)) from e @@ -392,6 +392,11 @@ def add_selective_note(self, commit: str, fields: list[str]): selective_note["user_comment"] = self.note.user_comment elif field == "merge_description" and self.note.has_merge_description: selective_note["merge_description"] = self.note.merge_description + elif field == "ci_fix_history" and self.note.has_ci_fix_history: + # Attach only the most recent attempt; older ones already + # live on earlier commits' notes. + assert self.note.ci_fix_history is not None # for type-checkers + selective_note["ci_fix_history"] = [self.note.ci_fix_history[-1]] # Write selective note to temp file and attach as git note with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: diff --git a/src/mergai/ci/__init__.py b/src/mergai/ci/__init__.py new file mode 100644 index 0000000..e176ed6 --- /dev/null +++ b/src/mergai/ci/__init__.py @@ -0,0 +1,15 @@ +"""CI workflow handling for mergai. + +This package implements the automated fix loop for CI workflow failures +on mergai PRs (PSMDB-1972). Top-level pieces: + +- :mod:`mergai.ci.context_builders` — turn workflow failure artifacts + (git diffs, SARIF files, logs) into a structured + :class:`~mergai.ci.context_builders.base.WorkflowContext`. +- :mod:`mergai.ci.handlers` — execute a fix given a + ``WorkflowContext``: either a shell command (``command``) or an AI + agent run (``resolve``). + +The ``mergai ci handle`` click command in :mod:`mergai.commands.ci` +wires these together and is the public entry point. +""" diff --git a/src/mergai/ci/context_builders/__init__.py b/src/mergai/ci/context_builders/__init__.py new file mode 100644 index 0000000..d3564f6 --- /dev/null +++ b/src/mergai/ci/context_builders/__init__.py @@ -0,0 +1,47 @@ +"""Context builders for CI workflow failures. + +Looks up a :class:`~.base.WorkflowContextBuilder` by ``type`` string +from :class:`mergai.config.WorkflowContextConfig`. New types can be +registered by appending to ``_BUILDERS``. +""" + +from ...app import AppContext +from .base import WorkflowContext, WorkflowContextBuilder +from .diff import DiffContextBuilder +from .sarif import SARIFContextBuilder + +_BUILDERS: dict[str, type[WorkflowContextBuilder]] = { + "diff": DiffContextBuilder, + "sarif": SARIFContextBuilder, +} + + +def get_context_builder(app: AppContext, type_: str) -> WorkflowContextBuilder: + """Return a context-builder instance for the given context type. + + Args: + app: The active :class:`~mergai.app.AppContext`. Builders receive + it so they can fall back to GitHub-API sources (e.g. job + logs) when the expected artifact is missing. + type_: ``WorkflowContextConfig.type`` value (e.g. ``"diff"``, + ``"sarif"``). + + Raises: + ValueError: If no builder is registered for ``type_``. + """ + builder_cls = _BUILDERS.get(type_) + if builder_cls is None: + known = ", ".join(sorted(_BUILDERS)) or "(none)" + raise ValueError( + f"No context builder registered for type '{type_}'. Known: {known}" + ) + return builder_cls(app) + + +__all__ = [ + "WorkflowContext", + "WorkflowContextBuilder", + "DiffContextBuilder", + "SARIFContextBuilder", + "get_context_builder", +] diff --git a/src/mergai/ci/context_builders/base.py b/src/mergai/ci/context_builders/base.py new file mode 100644 index 0000000..4e5b5c8 --- /dev/null +++ b/src/mergai/ci/context_builders/base.py @@ -0,0 +1,84 @@ +"""Base types for workflow context builders.""" + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any + +from ...app import AppContext +from ...config import WorkflowContextConfig + + +@dataclass +class WorkflowContext: + """Structured context extracted from a failed CI workflow run. + + Context builders populate this from workflow artifacts, the GitHub API, + or logs. Handlers consume it: ``CommandHandler`` mostly uses it for + reporting, while ``ResolveHandler`` feeds ``summary`` + ``details`` + + ``files_affected`` into the AI prompt. + + Attributes: + workflow_name: The failing workflow's name (e.g. ``"format"``). + run_id: GitHub workflow run ID that produced this failure. + pr_number: Pull request number the run is associated with. + summary: One-line human-readable summary of the failure. + files_affected: Paths (repo-relative) implicated by the failure. + details: Full text content for the AI prompt (the diff, SARIF + findings, log excerpt — whatever the builder extracts). + raw_data: Original parsed data, kept for storage/debugging. + """ + + workflow_name: str + run_id: str + pr_number: int + summary: str + files_affected: list[str] = field(default_factory=list) + details: str = "" + raw_data: dict[str, Any] = field(default_factory=dict) + + +class WorkflowContextBuilder(ABC): + """Abstract base class for context builders. + + A builder maps a ``WorkflowContextConfig`` (with a ``type`` and + ``source``) to a concrete :class:`WorkflowContext`. Subclasses are + registered by type via + :func:`mergai.ci.context_builders.get_context_builder`. + + Builders receive the active :class:`~mergai.app.AppContext` so they + can fall back to GitHub-API sources (e.g. job logs) when the expected + artifact is missing — the SARIF builder uses this when the workflow + failed before producing its SARIF report. + """ + + def __init__(self, app: AppContext): + self.app = app + + @abstractmethod + def build_context( + self, + config: WorkflowContextConfig, + workflow_name: str, + run_id: str, + pr_number: int, + artifacts_dir: str | None, + ) -> WorkflowContext: + """Build a :class:`WorkflowContext` for a given failed run. + + Args: + config: The per-workflow context config (``type``, ``source``, + ``artifact_name``, ``extract_pattern``). + workflow_name: Name of the failing workflow. + run_id: GitHub workflow run ID. + pr_number: PR number. + artifacts_dir: Directory with downloaded workflow artifacts. + Each artifact is extracted into a subdirectory named after + the artifact. + + Returns: + Populated WorkflowContext. + + Raises: + FileNotFoundError: If the expected artifact is missing. + ValueError: If the artifact content can't be parsed. + """ diff --git a/src/mergai/ci/context_builders/diff.py b/src/mergai/ci/context_builders/diff.py new file mode 100644 index 0000000..13d470a --- /dev/null +++ b/src/mergai/ci/context_builders/diff.py @@ -0,0 +1,62 @@ +"""Context builder for workflows that upload a git diff artifact. + +Used by the ``format`` workflow: format.yml writes ``diff.patch`` +(unified diff) and ``files.txt`` (one path per line) into the +``format-results`` artifact. +""" + +from pathlib import Path + +from ...config import WorkflowContextConfig +from .base import WorkflowContext, WorkflowContextBuilder + + +class DiffContextBuilder(WorkflowContextBuilder): + """Reads ``diff.patch`` + ``files.txt`` from a workflow artifact.""" + + def build_context( + self, + config: WorkflowContextConfig, + workflow_name: str, + run_id: str, + pr_number: int, + artifacts_dir: str | None, + ) -> WorkflowContext: + if artifacts_dir is None: + raise FileNotFoundError( + f"Workflow '{workflow_name}' needs artifacts_dir (diff context)" + ) + if not config.artifact_name: + raise ValueError( + f"Workflow '{workflow_name}' diff context requires " + f"'context.artifact_name' to be set" + ) + + artifact_path = Path(artifacts_dir) / config.artifact_name + diff_file = artifact_path / "diff.patch" + files_file = artifact_path / "files.txt" + + diff_content = diff_file.read_text() if diff_file.exists() else "" + files_affected: list[str] = [] + if files_file.exists(): + files_affected = [ + line.strip() + for line in files_file.read_text().splitlines() + if line.strip() + ] + + count = len(files_affected) + summary = ( + f"{workflow_name} failed: {count} file{'s' if count != 1 else ''} " + f"need changes" + ) + + return WorkflowContext( + workflow_name=workflow_name, + run_id=run_id, + pr_number=pr_number, + summary=summary, + files_affected=files_affected, + details=diff_content, + raw_data={"diff": diff_content, "files": files_affected}, + ) diff --git a/src/mergai/ci/context_builders/sarif.py b/src/mergai/ci/context_builders/sarif.py new file mode 100644 index 0000000..1860ec6 --- /dev/null +++ b/src/mergai/ci/context_builders/sarif.py @@ -0,0 +1,316 @@ +"""Context builder for workflows that upload a SARIF file artifact. + +Used by the ``clang-tidy`` workflow: clang-tidy.yml uploads +``clang-tidy-results.sarif`` as the ``clang-tidy-results`` artifact +(alongside the Code Scanning upload). + +SARIF (Static Analysis Results Interchange Format) is a JSON schema for +reporting static-analysis findings. The parser here extracts enough to +build an AI prompt: affected files and a per-finding summary of +rule-id + location + message. It is deliberately tolerant of missing or +odd fields — real-world SARIF files vary. + +When the SARIF artifact is absent — typically because the workflow +failed before clang-tidy could run (e.g. a Bazel build error during +``compile_commands.json`` generation) — the builder falls back to +fetching the failing job's log via the GitHub API. The agent then sees +the actual build error rather than a silent crash. +""" + +import json +import logging +import urllib.error +import urllib.request +from pathlib import Path +from typing import Any + +from ...config import WorkflowContextConfig +from .base import WorkflowContext, WorkflowContextBuilder + +log = logging.getLogger(__name__) + +# Cap for the log fallback. Job logs can be megabytes; the agent doesn't +# need every Bazel progress line. Tail-only because GHA build errors and +# their summary lines ("ERROR: Build did NOT complete successfully") +# always appear at the end of the failing step's output. +_LOG_TAIL_BYTES = 64 * 1024 + + +class SARIFContextBuilder(WorkflowContextBuilder): + """Parses a SARIF JSON file from a workflow artifact. + + Falls back to the failing job's log via the GitHub API when the + SARIF artifact is missing — see module docstring. + """ + + def build_context( + self, + config: WorkflowContextConfig, + workflow_name: str, + run_id: str, + pr_number: int, + artifacts_dir: str | None, + ) -> WorkflowContext: + if config.source != "artifact": + raise NotImplementedError( + f"SARIF source '{config.source}' is not supported yet " + f"(only 'artifact'). See PSMDB-1972 follow-ups." + ) + if not config.artifact_name: + raise ValueError( + f"Workflow '{workflow_name}' sarif context requires " + f"'context.artifact_name' to be set" + ) + + sarif_path = self._try_find_sarif( + artifacts_dir, config.artifact_name, workflow_name + ) + if sarif_path is None: + return self._build_log_fallback_context( + workflow_name=workflow_name, + run_id=run_id, + pr_number=pr_number, + ) + + sarif_data = json.loads(sarif_path.read_text()) + findings = self._flatten_findings(sarif_data) + + files_affected = sorted({f["file"] for f in findings if f["file"]}) + details = self._format_details(findings) + + count = len(findings) + summary = ( + f"{workflow_name} reported {count} finding{'s' if count != 1 else ''} " + f"in {len(files_affected)} file{'s' if len(files_affected) != 1 else ''}" + ) + + return WorkflowContext( + workflow_name=workflow_name, + run_id=run_id, + pr_number=pr_number, + summary=summary, + files_affected=files_affected, + details=details, + raw_data={"findings": findings}, + ) + + @staticmethod + def _try_find_sarif( + artifacts_dir: str | None, artifact_name: str, workflow_name: str + ) -> Path | None: + """Locate the SARIF file in the downloaded artifacts, or return None. + + Returns None when the artifact directory is absent (the workflow + failed before producing its SARIF report) or contains no + ``*.sarif`` file. Callers fall back to a log-based context. + """ + if artifacts_dir is None: + return None + + artifact_dir = Path(artifacts_dir) / artifact_name + if not artifact_dir.is_dir(): + return None + + candidate = artifact_dir / f"{workflow_name}-results.sarif" + if candidate.exists(): + return candidate + + for entry in sorted(artifact_dir.iterdir()): + if entry.is_file() and entry.suffix == ".sarif": + return entry + + return None + + def _build_log_fallback_context( + self, + workflow_name: str, + run_id: str, + pr_number: int, + ) -> WorkflowContext: + """Build a context from the failing job's log when SARIF is absent. + + Pulls the run's failing jobs via PyGithub, downloads the log of + the first one, and trims it to the tail. The agent reads the + result as raw build/CI output and can fix the underlying issue + (for clang-tidy this is typically a BUILD.bazel mismatch after + an upstream merge). + """ + if self.app.gh is None: + raise FileNotFoundError( + f"No SARIF file found for '{workflow_name}' and no GitHub " + f"token available to fetch the job log as fallback." + ) + + run = self.app.gh_repo.get_workflow_run(int(run_id)) + failing_job = next( + (j for j in run.jobs() if j.conclusion == "failure"), + None, + ) + if failing_job is None: + raise FileNotFoundError( + f"No SARIF file found for '{workflow_name}' and run {run_id} " + f"has no failing job to inspect." + ) + + failing_step_name = next( + (s.name for s in (failing_job.steps or []) if s.conclusion == "failure"), + None, + ) + + log_text = self._download_job_log(failing_job.logs_url()) + details = self._failing_step_excerpt(log_text, _LOG_TAIL_BYTES) + + step_label = f" at step '{failing_step_name}'" if failing_step_name else "" + summary = ( + f"{workflow_name} failed before producing SARIF; using log of " + f"job '{failing_job.name}'{step_label}" + ) + + return WorkflowContext( + workflow_name=workflow_name, + run_id=run_id, + pr_number=pr_number, + summary=summary, + files_affected=[], + details=details, + raw_data={ + "fallback": "job_log", + "job_id": failing_job.id, + "job_name": failing_job.name, + "failing_step": failing_step_name, + "truncated": len(log_text) > len(details), + }, + ) + + @staticmethod + def _download_job_log(url: str) -> str: + """Fetch a job log from the presigned URL returned by PyGithub. + + ``WorkflowJob.logs_url()`` resolves the ``/jobs/{id}/logs`` 302 + and returns the Location it points at — a presigned URL that + needs no auth headers. + """ + try: + with urllib.request.urlopen( + url, timeout=30 + ) as resp: # noqa: S310 — GitHub-issued URL + data: bytes = resp.read() + return data.decode("utf-8", errors="replace") + except urllib.error.URLError as e: + log.warning("Failed to download job log: %s", e) + raise FileNotFoundError(f"Could not download job log: {e}") from e + + @staticmethod + def _failing_step_excerpt(text: str, max_bytes: int) -> str: + """Excerpt the failing step's section of a job log. + + GHA writes ``##[group]Run `` at the start of every step + and ``##[error]Process completed with exit code N.`` at the end + of failing ones. We anchor on the *first* error marker (the + actual step that broke the run, not downstream cleanup noise) + and walk back to the preceding ``##[group]Run`` to delimit the + section. If that section is too big to keep whole, return its + head + tail joined by an omission marker — root-cause errors + from build tools usually appear at the *start* of the output + while the failure summary appears at the *end*, so a plain tail + loses the original error. + + Falls back to a tail-only excerpt if no error marker is found. + """ + error_marker = "##[error]Process completed with exit code" + error_idx = text.find(error_marker) + if error_idx < 0: + return SARIFContextBuilder._tail(text, max_bytes) + + line_start = text.rfind("\n", 0, error_idx) + 1 + error_end = text.find("\n", error_idx) + section_end = error_end + 1 if error_end >= 0 else len(text) + + group_marker = "##[group]Run " + group_idx = text.rfind(group_marker, 0, line_start) + if group_idx < 0: + return SARIFContextBuilder._tail(text[:section_end], max_bytes) + + # Walk back to the start of the line that contains ##[group]Run. + section_start = text.rfind("\n", 0, group_idx) + 1 + section_size = section_end - section_start + + if section_size <= max_bytes: + return text[section_start:section_end] + + # Section too large — keep head + tail so the agent sees both + # the originating error and the failure summary. + half = max_bytes // 2 + head = text[section_start : section_start + half] + tail = text[section_end - half : section_end] + + # Trim head to a line boundary (last full line) and tail to start + # at a line boundary (first full line) for readability. + last_nl = head.rfind("\n") + if last_nl >= 0: + head = head[: last_nl + 1] + first_nl = tail.find("\n") + if 0 <= first_nl < len(tail) - 1: + tail = tail[first_nl + 1 :] + + omitted = section_size - len(head) - len(tail) + return f"{head}... (~{omitted // 1024} KiB omitted) ...\n{tail}" + + @staticmethod + def _tail(text: str, max_bytes: int) -> str: + """Return the last ``max_bytes`` of ``text``, prefixed with a marker.""" + if len(text) <= max_bytes: + return text + truncated = text[-max_bytes:] + # Drop the partial first line so the agent doesn't see a + # mid-line fragment. + newline = truncated.find("\n") + if 0 <= newline < len(truncated) - 1: + truncated = truncated[newline + 1 :] + return ( + f"... (log truncated; showing last ~{max_bytes // 1024} KiB)\n{truncated}" + ) + + @staticmethod + def _flatten_findings(sarif: dict[str, Any]) -> list[dict[str, Any]]: + """Extract ``{rule_id, level, message, file, line}`` per finding.""" + findings: list[dict[str, Any]] = [] + for run in sarif.get("runs", []): + for result in run.get("results", []): + rule_id = result.get("ruleId", "") + level = result.get("level", "warning") + message = (result.get("message") or {}).get("text", "") + + file_path = "" + line = 0 + locations = result.get("locations") or [] + if locations: + phys = (locations[0] or {}).get("physicalLocation") or {} + file_path = (phys.get("artifactLocation") or {}).get("uri", "") + line = (phys.get("region") or {}).get("startLine", 0) + + findings.append( + { + "rule_id": rule_id, + "level": level, + "message": message, + "file": file_path, + "line": line, + } + ) + return findings + + @staticmethod + def _format_details(findings: list[dict[str, Any]]) -> str: + """Render findings as a Markdown list suitable for AI prompts.""" + if not findings: + return "(no findings reported)" + + lines = [] + for f in findings: + loc = f"{f['file']}:{f['line']}" if f["file"] else "(unknown location)" + header = f"- [{f['level']}] {f['rule_id']} at {loc}" + lines.append(header) + if f["message"]: + lines.append(f" {f['message']}") + return "\n".join(lines) diff --git a/src/mergai/ci/handlers/__init__.py b/src/mergai/ci/handlers/__init__.py new file mode 100644 index 0000000..1a1ca43 --- /dev/null +++ b/src/mergai/ci/handlers/__init__.py @@ -0,0 +1,52 @@ +"""Handlers that attempt CI-failure fixes. + +Dispatch from :class:`mergai.config.WorkflowConfig.action_type` to a +concrete :class:`~.base.WorkflowHandler`. New action types can be +registered by appending to ``_HANDLERS``. +""" + +from ...app import AppContext +from ...config import ( + WORKFLOW_ACTION_COMMAND, + WORKFLOW_ACTION_RESOLVE, + WorkflowConfig, +) +from .base import WorkflowHandler +from .command import CommandHandler +from .resolve import ResolveHandler + +_HANDLERS: dict[str, type[WorkflowHandler]] = { + WORKFLOW_ACTION_COMMAND: CommandHandler, + WORKFLOW_ACTION_RESOLVE: ResolveHandler, +} + + +def get_handler(app: AppContext, config: WorkflowConfig) -> WorkflowHandler: + """Return a handler for ``config.action_type``. + + Args: + app: The active :class:`~mergai.app.AppContext`. + config: The per-workflow config. + + Raises: + ValueError: If no handler is registered for + ``config.action_type``. Should not happen in practice — + :meth:`mergai.config.WorkflowConfig.from_dict` validates + against :data:`mergai.config.VALID_WORKFLOW_ACTION_TYPES`. + """ + handler_cls = _HANDLERS.get(config.action_type) + if handler_cls is None: + known = ", ".join(sorted(_HANDLERS)) or "(none)" + raise ValueError( + f"No handler registered for action_type '{config.action_type}'. " + f"Known: {known}" + ) + return handler_cls(app, config) + + +__all__ = [ + "WorkflowHandler", + "CommandHandler", + "ResolveHandler", + "get_handler", +] diff --git a/src/mergai/ci/handlers/base.py b/src/mergai/ci/handlers/base.py new file mode 100644 index 0000000..77446cb --- /dev/null +++ b/src/mergai/ci/handlers/base.py @@ -0,0 +1,41 @@ +"""Base type for workflow handlers.""" + +from abc import ABC, abstractmethod + +from ...app import AppContext +from ...config import WorkflowConfig +from ..context_builders.base import WorkflowContext + + +class WorkflowHandler(ABC): + """Executes a fix for a CI workflow failure given its context. + + Subclasses live in :mod:`mergai.ci.handlers`; one per + ``WorkflowConfig.action_type`` value: + + - ``command`` → :class:`.command.CommandHandler` (run a shell command) + - ``resolve`` → :class:`.resolve.ResolveHandler` (run the AI agent) + """ + + def __init__(self, app: AppContext, config: WorkflowConfig): + self.app = app + self.config = config + + @abstractmethod + def execute(self, context: WorkflowContext) -> bool: + """Attempt the fix. + + Handlers should write their changes to the working tree. The + caller (``mergai ci handle``) is responsible for creating the + commit and pushing. Handlers must NOT commit themselves. + + Args: + context: The failure context built by the configured + :class:`~mergai.ci.context_builders.base.WorkflowContextBuilder`. + + Returns: + True if the fix appears to have been applied (e.g. the + working tree is dirty), False otherwise. The caller decides + what to do with a False result (retry vs. give up is driven + by ``WorkflowConfig.max_attempts`` at the command level). + """ diff --git a/src/mergai/ci/handlers/command.py b/src/mergai/ci/handlers/command.py new file mode 100644 index 0000000..5d21459 --- /dev/null +++ b/src/mergai/ci/handlers/command.py @@ -0,0 +1,70 @@ +"""Shell-command handler for CI workflow fixes.""" + +import logging +import os +import subprocess + +from ...app import AppContext +from ...config import WorkflowConfig +from ..context_builders.base import WorkflowContext +from .base import WorkflowHandler + +log = logging.getLogger(__name__) + + +class CommandHandler(WorkflowHandler): + """Runs a shell command configured per-workflow. + + Suitable for deterministic auto-fixers like ``bazel run format`` that + simply reformat files in place. The command is expected to leave the + working tree dirty on success; the outer loop commits and pushes. + """ + + def __init__(self, app: AppContext, config: WorkflowConfig): + if not config.command: + raise ValueError( + "CommandHandler requires config.command to be a non-empty string" + ) + self.app = app + self.config = config + + def execute(self, context: WorkflowContext) -> bool: + env = os.environ.copy() + env["TARGET_BRANCH"] = self.app.note.merge_info.target_branch + env["PR_NUMBER"] = str(context.pr_number) + env["WORKFLOW_NAME"] = context.workflow_name + + # `config.command` is enforced non-empty by __init__. + command = str(self.config.command) + log.info("Running fix command for %s: %s", context.workflow_name, command) + + try: + result = subprocess.run( # noqa: S602 — command comes from trusted config + command, + shell=True, + env=env, + check=False, + capture_output=True, + text=True, + cwd=self.app.repo.working_tree_dir, + ) + except OSError as e: + log.error("Failed to spawn fix command: %s", e) + return False + + if result.stdout: + log.debug("stdout:\n%s", result.stdout) + if result.stderr: + log.debug("stderr:\n%s", result.stderr) + + if result.returncode != 0: + log.warning( + "Fix command exited with %s; tree-state will still be checked", + result.returncode, + ) + + # Success = the command changed something we can commit. A non-zero + # exit with dirty tree still counts (some formatters return non-zero + # when they *do* reformat). An exit of 0 with a clean tree means + # the command ran but made no changes — treat as "no fix applied". + return self.app.repo.is_dirty(untracked_files=True) diff --git a/src/mergai/ci/handlers/resolve.py b/src/mergai/ci/handlers/resolve.py new file mode 100644 index 0000000..69ea41e --- /dev/null +++ b/src/mergai/ci/handlers/resolve.py @@ -0,0 +1,95 @@ +"""AI-agent handler for CI workflow fixes.""" + +import logging + +from ...agent_executor import AgentExecutionError, AgentExecutor +from ...app import AppContext +from ...config import WorkflowConfig +from ..context_builders.base import WorkflowContext +from .base import WorkflowHandler + +log = logging.getLogger(__name__) + + +class ResolveHandler(WorkflowHandler): + """Runs the AI agent over the failure context. + + Reuses the existing :class:`~mergai.agent_executor.AgentExecutor` + retry loop but with a custom validator that checks the working tree + is dirty after the agent runs (the agent's JSON response shape is + not prescribed for CI fixes). + """ + + PROMPT_TEMPLATE = ( + "A CI workflow failed on the current branch and you need to fix the " + "source files so that the workflow passes next time.\n" + "\n" + "Workflow: {workflow_name}\n" + "PR: #{pr_number}\n" + "Run ID: {run_id}\n" + "\n" + "{summary}\n" + "\n" + "## Affected files\n" + "{files}\n" + "\n" + "## Details\n" + "{details}\n" + "\n" + "Please edit the files in the working tree to fix the reported " + "issues. Do not run any build or test commands yourself — this job " + "will commit and push your changes, and the CI workflow will rerun " + "automatically. If you cannot fix an issue, leave the file as-is " + "and note it in your response.\n" + ) + + def __init__(self, app: AppContext, config: WorkflowConfig): + self.app = app + self.config = config + + def execute(self, context: WorkflowContext) -> bool: + prompt = self._build_prompt(context) + agent = self.app.get_agent(yolo=True) + executor = AgentExecutor( + agent=agent, + state_dir=self.app.state.path, + max_attempts=self.config.max_attempts, + repo=self.app.repo, + ) + + try: + executor.run_with_retry(prompt=prompt, validator=self._validate_fix) + except AgentExecutionError as e: + log.warning("Agent failed to produce a fix: %s", e) + return False + + return self.app.repo.is_dirty(untracked_files=True) + + def _build_prompt(self, context: WorkflowContext) -> str: + files = ( + "\n".join(f"- {p}" for p in context.files_affected) + if context.files_affected + else "(none listed)" + ) + return self.PROMPT_TEMPLATE.format( + workflow_name=context.workflow_name, + pr_number=context.pr_number, + run_id=context.run_id, + summary=context.summary, + files=files, + details=context.details or "(no additional details)", + ) + + def _validate_fix(self, _response: dict) -> str | None: + """Validator for ``AgentExecutor.run_with_retry``. + + The agent's JSON response shape is unconstrained for CI fixes — + what matters is that the working tree contains modifications we + can commit. If it's clean, tell the agent to actually make edits. + """ + if self.app.repo.is_dirty(untracked_files=True): + return None + return ( + "The working tree has no modifications after your run. " + "You must edit the affected files to fix the CI failure." + ) diff --git a/src/mergai/commands/ci.py b/src/mergai/commands/ci.py new file mode 100644 index 0000000..45058de --- /dev/null +++ b/src/mergai/commands/ci.py @@ -0,0 +1,216 @@ +"""``mergai ci`` click command group. + +Entry point invoked by the ``ci-fix`` job in ``.github/workflows/mergai.yml`` +when a CI workflow (``format``, ``clang-tidy``) completes with failure on a +mergai PR. Reads per-workflow config, builds a context from the failure +artifact, runs the configured handler, and commits the fix. The workflow +pushes after this command returns. +""" + +import logging +from datetime import datetime, timezone +from pathlib import Path + +import click + +from ..app import AppContext +from ..ci.context_builders import get_context_builder +from ..ci.handlers import get_handler +from ..config import WorkflowConfig + +log = logging.getLogger(__name__) + + +@click.group() +@click.pass_obj +@click.option( + "--repo", + "repo", + type=str, + required=False, + envvar="GH_REPO", + help="The repository where the PR is located.", +) +def ci(app: AppContext, repo: str | None): + """CI workflow integration commands.""" + if repo is None: + raise click.ClickException( + "GitHub repository not set. Use --repo or set GH_REPO environment variable." + ) + app.gh_repo_str = repo + + +@ci.command() +@click.pass_obj +@click.option("--workflow", required=True, help="Name of the failed workflow.") +@click.option("--run-id", required=True, help="GitHub workflow run ID.") +@click.option("--pr", required=True, type=int, help="PR number.") +@click.option( + "--artifacts-dir", + type=click.Path(exists=True, file_okay=False, dir_okay=True), + required=False, + help="Directory containing extracted workflow artifacts.", +) +def handle( + app: AppContext, + workflow: str, + run_id: str, + pr: int, + artifacts_dir: str | None, +) -> None: + """Handle a completed CI workflow failure for the current branch. + + Looks up the workflow's ``WorkflowConfig`` in the mergai config, + enforces ``max_attempts``, builds a failure context from the + downloaded artifacts, dispatches to the configured handler, and on + success creates a commit recording the attempt. + """ + config = app.config.workflows.get(workflow) + if config is None: + click.echo(f"No configuration for workflow '{workflow}'; nothing to do.") + return + + if not config.enabled: + click.echo(f"Workflow '{workflow}' handling is disabled; nothing to do.") + return + + prior_attempts = app.note.get_ci_attempts(workflow) + attempt_number = len(prior_attempts) + 1 + if attempt_number > config.max_attempts: + click.echo( + f"Max attempts ({config.max_attempts}) reached for '{workflow}'; " + f"giving up." + ) + _post_max_attempts_comment(app, pr, workflow, config) + _record_attempt( + app, + workflow=workflow, + attempt_number=attempt_number, + run_id=run_id, + pr_number=pr, + action_type=config.action_type, + summary="(skipped — max attempts reached)", + files_affected=[], + success=False, + give_up=True, + ) + return + + click.echo( + f"Handling '{workflow}' failure (attempt {attempt_number} of " + f"{config.max_attempts})" + ) + + builder = get_context_builder(app, config.context.type) + context = builder.build_context( + config.context, + workflow_name=workflow, + run_id=run_id, + pr_number=pr, + artifacts_dir=artifacts_dir, + ) + click.echo(f"Context: {context.summary}") + + handler = get_handler(app, config) + success = handler.execute(context) + + _record_attempt( + app, + workflow=workflow, + attempt_number=attempt_number, + run_id=run_id, + pr_number=pr, + action_type=config.action_type, + summary=context.summary, + files_affected=context.files_affected, + success=success, + give_up=False, + ) + + if not success: + click.echo( + f"Handler did not produce any changes for '{workflow}'. " + f"Will retry on the next workflow run." + ) + return + + _commit_fix(app, workflow=workflow, attempt_number=attempt_number) + + +def _post_max_attempts_comment( + app: AppContext, pr_number: int, workflow: str, config: WorkflowConfig +) -> None: + """Post a PR comment when the per-workflow max-attempts cap is hit.""" + if app.gh is None: + log.warning("GitHub auth not available; skipping max-attempts PR comment.") + return + body = ( + f"mergai gave up auto-fixing the **{workflow}** workflow after " + f"{config.max_attempts} attempts. Manual intervention required." + ) + try: + app.gh_repo.get_pull(pr_number).create_issue_comment(body) + except Exception as e: # noqa: BLE001 — best-effort notification + log.warning("Failed to post PR comment on #%s: %s", pr_number, e) + + +def _record_attempt( + app: AppContext, + *, + workflow: str, + attempt_number: int, + run_id: str, + pr_number: int, + action_type: str, + summary: str, + files_affected: list[str], + success: bool, + give_up: bool, +) -> None: + """Append the attempt to the note's ``ci_fix_history`` and persist it.""" + attempt = { + "workflow": workflow, + "attempt_number": attempt_number, + "timestamp": datetime.now(timezone.utc).isoformat(), + "run_id": run_id, + "pr_number": pr_number, + "action_type": action_type, + "context_summary": summary, + "files_affected": list(files_affected), + "success": success, + "give_up": give_up, + } + app.note.add_ci_attempt(attempt) + app.save_note(app.note) + + +def _commit_fix(app: AppContext, workflow: str, attempt_number: int) -> None: + """Stage all changes and commit the CI fix. + + The corresponding mergai note is attached via + ``add_selective_note(..., ["ci_fix_history"])``, recording only the + just-added attempt entry. + """ + repo = app.repo + work_dir = repo.working_tree_dir + if work_dir is None: + raise click.ClickException("Repo has no working tree; cannot commit fix") + + repo.git.add("-A", str(Path(work_dir))) + + if not repo.index.diff("HEAD"): + # Should not happen — handler returned success because tree was + # dirty — but guard against subtle edge cases (e.g. all changes + # in submodules, ignored paths) so we don't create empty commits. + click.echo("No staged changes after `git add`; skipping commit.") + return + + message = f"fix({workflow}): automated fix attempt {attempt_number}\n" + if app.config.commit.footer: + message += "\n" + app.config.commit.footer + + repo.git.commit("-m", message) + + commit_sha = repo.head.commit.hexsha + app.add_selective_note(commit_sha, ["ci_fix_history"]) + click.echo(f"Committed CI fix as {commit_sha[:11]}.") diff --git a/src/mergai/commands/pr.py b/src/mergai/commands/pr.py index 74805d8..3510dc7 100644 --- a/src/mergai/commands/pr.py +++ b/src/mergai/commands/pr.py @@ -137,18 +137,53 @@ def _create_pr( raise click.ClickException( "GitHub rejected the PR: branch(es) not found on remote. Push your branches first." ) from e - msg = e.data.get("message", str(e)) if isinstance(e.data, dict) else str(e) - raise click.ClickException(f"GitHub API error: {msg}") from e + raise click.ClickException(_format_github_error(e)) from e -def _build_solutions_pr_body(app: AppContext) -> str: +def _format_github_error(e: GithubException) -> str: + """Format a GithubException with details from response payload. + + GitHub's 422 validation errors carry the actual reason in + `errors[*].message` (or `errors[*].code`), not in the top-level + `message` field. Surface those so callers see *why* validation failed + (e.g. "Body is too long (maximum is 65536 characters)") instead of the + generic "Validation Failed". + """ + data = e.data if isinstance(e.data, dict) else {} + top = data.get("message") or str(e) + parts: list[str] = [] + for err in data.get("errors") or []: + if not isinstance(err, dict): + continue + # Prefer human message; fall back to field+code. + msg = err.get("message") + if not msg: + field = err.get("field") + code = err.get("code") + if field and code: + msg = f"{field}: {code}" + elif code: + msg = code + elif field: + msg = field + if msg: + parts.append(msg) + detail = "; ".join(parts) + if detail: + return f"GitHub API error ({e.status}): {top} — {detail}" + return f"GitHub API error ({e.status}): {top}" + + +def _build_solutions_pr_body(app: AppContext, skip_commit_list: bool = False) -> str: markdown_config = MarkdownConfig.for_pr(app.repo) body = formatters.merge_info_to_markdown(app.note.merge_info, markdown_config) body += "\n\n" if app.note.has_merge_context and app.note.merge_context is not None: body += formatters.merge_context_to_markdown( - app.note.merge_context, markdown_config + app.note.merge_context, + markdown_config, + include_commit_list=not skip_commit_list, ) body += "\n\n" if app.note.has_merge_description and app.note.merge_description is not None: @@ -167,14 +202,16 @@ def _build_solutions_pr_body(app: AppContext) -> str: return body -def _build_merge_pr_body(app: AppContext) -> str: +def _build_merge_pr_body(app: AppContext, skip_commit_list: bool = False) -> str: markdown_config = MarkdownConfig.for_pr(app.repo) body = formatters.merge_info_to_markdown(app.note.merge_info, markdown_config) body += "\n\n" if app.note.has_merge_context and app.note.merge_context is not None: body += formatters.merge_context_to_markdown( - app.note.merge_context, markdown_config + app.note.merge_context, + markdown_config, + include_commit_list=not skip_commit_list, ) body += "\n\n" if app.note.has_merge_description and app.note.merge_description is not None: @@ -190,6 +227,7 @@ def _create_solution_pr( dry_run: bool, url_only: bool = False, skip_body: bool = False, + skip_commit_list: bool = False, labels: list[str] | None = None, ) -> None: """Create a PR from the current branch (with existing solution commits) to the conflict branch.""" @@ -208,7 +246,11 @@ def _create_solution_pr( title = app.pr_titles.solution_title - body = "" if skip_body else _build_solutions_pr_body(app) + body = ( + "" + if skip_body + else _build_solutions_pr_body(app, skip_commit_list=skip_commit_list) + ) _create_pr( app, @@ -222,15 +264,15 @@ def _create_solution_pr( ) -def _build_main_pr_body(app: AppContext) -> str: +def _build_main_pr_body(app: AppContext, skip_commit_list: bool = False) -> str: """Build PR body for main PR from merge_context or conflict resolution data.""" # If we have solutions (from any source - AI, human, or synced), include them if app.note.has_solutions: - return _build_solutions_pr_body(app) + return _build_solutions_pr_body(app, skip_commit_list=skip_commit_list) # No solutions - use merge PR body if we have merge_context if app.note.has_merge_context: - return _build_merge_pr_body(app) + return _build_merge_pr_body(app, skip_commit_list=skip_commit_list) raise click.ClickException( "No merge_context or solutions found. " @@ -244,13 +286,16 @@ def _create_main_pr( dry_run: bool, url_only: bool = False, skip_body: bool = False, + skip_commit_list: bool = False, labels: list[str] | None = None, ) -> None: """Create a PR from the main branch to target_branch (merge or conflict resolution).""" title = app.pr_titles.main_title - body = "" if skip_body else _build_main_pr_body(app) + body = ( + "" if skip_body else _build_main_pr_body(app, skip_commit_list=skip_commit_list) + ) _create_pr( app, @@ -299,6 +344,16 @@ def pr(app: AppContext, repo: str | None): default=False, help="Skip creating a body for the PR (create with empty body).", ) +@click.option( + "--skip-commit-list", + is_flag=True, + default=False, + help=( + "Omit the per-merged-commit table from the PR body. " + "Use this when the body would otherwise exceed GitHub's 65,536 " + "character limit (typical for merges with hundreds of commits)." + ), +) @click.option( "--labels", "labels_arg", @@ -325,6 +380,7 @@ def create( dry_run: bool, url_only: bool, skip_body: bool, + skip_commit_list: bool, labels_arg: str | None, no_labels: bool, ): @@ -358,6 +414,10 @@ def create( fields pre-filled (title, body, branches). You can review and edit everything before submitting. --skip-body Skip creating a body for the PR (create with empty body). + --skip-commit-list + Omit the per-merged-commit table from the PR body. Use + this when a normal `pr create` fails GitHub validation + because the body exceeds the 65,536 character limit. --labels Labels to apply to the PR. By default, uses labels from config. Specifying labels without +/- prefix overrides config labels. Use +label to add, -label to remove from config labels. @@ -369,6 +429,7 @@ def create( mergai pr create solution # Create PR from solution branch to conflict branch mergai pr create main --url-only # Get URL to create PR manually on GitHub mergai pr create main --skip-body # Create PR with empty body + mergai pr create main --skip-commit-list # Drop merged-commits table from body mergai pr create main --labels=urgent,review # Override config labels mergai pr create main --labels=+urgent,-auto # Add/remove from config labels mergai pr create main --no-labels # Create PR without any labels @@ -381,9 +442,17 @@ def create( # Get config labels based on PR type if pr_type.lower() == "solution": - config_labels = app.config.pr.solution.labels + pr_type_config = app.config.pr.solution else: - config_labels = app.config.pr.main.labels + pr_type_config = app.config.pr.main + + config_labels = list(pr_type_config.labels) + if app.note.has_unresolved_conflicts: + config_labels.extend( + lbl + for lbl in pr_type_config.labels_on_unresolved + if lbl not in config_labels + ) # Compute final labels if no_labels: @@ -392,9 +461,13 @@ def create( final_labels = _parse_labels_option(labels_arg, config_labels) if pr_type.lower() == "solution": - _create_solution_pr(app, dry_run, url_only, skip_body, final_labels) + _create_solution_pr( + app, dry_run, url_only, skip_body, skip_commit_list, final_labels + ) else: - _create_main_pr(app, dry_run, url_only, skip_body, final_labels) + _create_main_pr( + app, dry_run, url_only, skip_body, skip_commit_list, final_labels + ) def get_prs_for_current_branch(app: AppContext) -> list[GithubPullRequest.PullRequest]: diff --git a/src/mergai/commands/rebase.py b/src/mergai/commands/rebase.py index a7bb6ee..50043f0 100644 --- a/src/mergai/commands/rebase.py +++ b/src/mergai/commands/rebase.py @@ -588,20 +588,6 @@ def _get_rebase_commit_message(app: AppContext) -> str | None: return None -def _content_has_conflict_markers(content: str) -> bool: - """Check if content contains git conflict markers. - - Args: - content: File content to check. - - Returns: - True if conflict markers are found. - """ - has_start = bool(re.search(r"^<{7}", content, re.MULTILINE)) - has_end = bool(re.search(r"^>{7}", content, re.MULTILINE)) - return has_start and has_end - - def _apply_file_resolution( app: AppContext, file_path: str, @@ -629,7 +615,7 @@ def _apply_file_resolution( return False # Verify no conflict markers in the resolved content - if _content_has_conflict_markers(content): + if git_utils.content_has_conflict_markers(content): return False # Write to working directory diff --git a/src/mergai/config.py b/src/mergai/config.py index 885c89c..2ae8de2 100644 --- a/src/mergai/config.py +++ b/src/mergai/config.py @@ -135,7 +135,11 @@ class PRTypeConfig: Attributes: title_format: Format string for PR titles. - labels: List of labels to apply to the PR when created. + labels: Labels always applied to the PR when created. + labels_on_unresolved: Labels applied only when the PR's note contains + at least one solution with unresolved conflicts (any file left with + conflict markers). Typically used to attach skip labels like + ``ci-skip-format`` so CI only runs on fully-resolved PRs. Available tokens for title_format: - %(target_branch) - The target branch name @@ -145,6 +149,7 @@ class PRTypeConfig: title_format: str = "" labels: list[str] = field(default_factory=list) + labels_on_unresolved: list[str] = field(default_factory=list) @classmethod def from_dict(cls, data: dict, default_title_format: str = "") -> "PRTypeConfig": @@ -160,6 +165,7 @@ def from_dict(cls, data: dict, default_title_format: str = "") -> "PRTypeConfig" return cls( title_format=data.get("title_format", default_title_format), labels=data.get("labels", []), + labels_on_unresolved=data.get("labels_on_unresolved", []), ) @@ -794,6 +800,173 @@ def from_dict(cls, data: dict) -> "InitConfig": ) +# Valid values for workflow action_type config +WORKFLOW_ACTION_COMMAND = "command" +WORKFLOW_ACTION_RESOLVE = "resolve" +VALID_WORKFLOW_ACTION_TYPES = [WORKFLOW_ACTION_COMMAND, WORKFLOW_ACTION_RESOLVE] + + +@dataclass +class WorkflowContextConfig: + """Configuration for extracting failure context from a CI workflow run. + + Resolved at runtime by the ``mergai.ci.context_builders`` factory, which + maps ``type`` to a concrete builder. Unknown types are rejected there, + not here, so new builders can be added without touching config parsing. + + Attributes: + type: Context type (e.g. ``"diff"``, ``"sarif"``, ``"logs"``). + source: Where to read the context from. Currently ``"artifact"`` + (downloaded workflow artifact) or ``"code-scanning"`` (GitHub + Code Scanning API). + artifact_name: Name of the artifact to download when ``source`` is + ``"artifact"``. + extract_pattern: Regex pattern used by log-style context builders to + extract relevant lines. + + Example YAML config:: + + context: + type: diff + source: artifact + artifact_name: format-results + """ + + type: str = "logs" + source: str = "artifact" + artifact_name: str | None = None + extract_pattern: str | None = None + + @classmethod + def from_dict(cls, data: dict) -> "WorkflowContextConfig": + """Create a WorkflowContextConfig from a dictionary.""" + return cls( + type=data.get("type", cls.type), + source=data.get("source", cls.source), + artifact_name=data.get("artifact_name"), + extract_pattern=data.get("extract_pattern"), + ) + + +@dataclass +class WorkflowConfig: + """Configuration for handling failures of a specific CI workflow. + + Attributes: + enabled: Whether mergai will attempt fixes for this workflow. + max_attempts: Maximum number of fix attempts before giving up and + posting a PR comment. + action_type: How to attempt the fix. Either ``"command"`` (run a + shell command) or ``"resolve"`` (invoke the AI agent via the + existing ``AgentExecutor`` retry loop). + command: Shell command to run when ``action_type`` is ``"command"``. + Receives ``TARGET_BRANCH``, ``PR_NUMBER``, and ``WORKFLOW_NAME`` + as environment variables. + context: Configuration for extracting failure context for this + workflow (used to build the AI prompt and/or for logging). + + Example YAML config:: + + format: + enabled: true + max_attempts: 3 + action_type: command + command: "bazel run format -- --origin-branch=origin/${TARGET_BRANCH}" + context: + type: diff + source: artifact + artifact_name: format-results + """ + + enabled: bool = False + max_attempts: int = 3 + action_type: str = WORKFLOW_ACTION_RESOLVE + command: str | None = None + context: WorkflowContextConfig = field(default_factory=WorkflowContextConfig) + + @classmethod + def from_dict(cls, data: dict) -> "WorkflowConfig": + """Create a WorkflowConfig from a dictionary. + + Raises: + ValueError: If ``action_type`` is not one of + ``VALID_WORKFLOW_ACTION_TYPES``, or if ``action_type`` is + ``"command"`` without a non-empty ``command``. + """ + action_type = data.get("action_type", cls.action_type) + if action_type not in VALID_WORKFLOW_ACTION_TYPES: + raise ValueError( + f"Invalid workflow action_type: '{action_type}'. " + f"Valid values are: {', '.join(VALID_WORKFLOW_ACTION_TYPES)}" + ) + + command = data.get("command") + if action_type == WORKFLOW_ACTION_COMMAND and not command: + raise ValueError( + "Workflow action_type 'command' requires a non-empty 'command' field" + ) + + context_data = data.get("context", {}) + context = ( + WorkflowContextConfig.from_dict(context_data) + if context_data + else WorkflowContextConfig() + ) + + return cls( + enabled=data.get("enabled", cls.enabled), + max_attempts=data.get("max_attempts", cls.max_attempts), + action_type=action_type, + command=command, + context=context, + ) + + +@dataclass +class WorkflowsConfig: + """Configuration for CI workflow failure handlers, keyed by workflow name. + + The workflow name is the value of ``name:`` in the GitHub Actions + workflow file (e.g. ``"format"``, ``"clang-tidy"``). When a ``workflow_run`` + event fires, mergai looks up the matching ``WorkflowConfig`` here. + + Example YAML config:: + + workflows: + format: + enabled: true + max_attempts: 3 + action_type: command + command: "bazel run format" + context: + type: diff + source: artifact + artifact_name: format-results + clang-tidy: + enabled: true + max_attempts: 2 + action_type: resolve + context: + type: sarif + source: artifact + artifact_name: clang-tidy-results + """ + + workflows: dict[str, WorkflowConfig] = field(default_factory=dict) + + @classmethod + def from_dict(cls, data: dict) -> "WorkflowsConfig": + """Create a WorkflowsConfig from a dictionary of workflow-name → config.""" + workflows: dict[str, WorkflowConfig] = {} + for name, wf_data in data.items(): + workflows[name] = WorkflowConfig.from_dict(wf_data or {}) + return cls(workflows=workflows) + + def get(self, name: str) -> WorkflowConfig | None: + """Look up a workflow's config by name. Returns None if absent.""" + return self.workflows.get(name) + + @dataclass class MergaiConfig: """Configuration settings for MergAI. @@ -811,6 +984,7 @@ class MergaiConfig: merge: Configuration for the merge command. context: Configuration for context creation (conflict context, etc.). config: Configuration for the 'mergai config' command. + workflows: Configuration for CI workflow failure handlers. _raw: Raw dictionary data for accessing arbitrary sections. """ @@ -824,6 +998,7 @@ class MergaiConfig: merge: MergeConfig = field(default_factory=MergeConfig) context: ContextConfig = field(default_factory=ContextConfig) config: InitConfig = field(default_factory=InitConfig) + workflows: WorkflowsConfig = field(default_factory=WorkflowsConfig) _raw: dict[str, Any] = field(default_factory=dict) @property @@ -923,6 +1098,14 @@ def from_dict(cls, data: dict) -> "MergaiConfig": else InitConfig() ) + # Parse workflows section if present + workflows_data = data.get("workflows", {}) + workflows_config = ( + WorkflowsConfig.from_dict(workflows_data) + if workflows_data + else WorkflowsConfig() + ) + return cls( fork=fork_config, resolve=resolve_config, @@ -934,6 +1117,7 @@ def from_dict(cls, data: dict) -> "MergaiConfig": merge=merge_config, context=context_config, config=config_config, + workflows=workflows_config, _raw=data, ) diff --git a/src/mergai/main.py b/src/mergai/main.py index b563729..615bdde 100644 --- a/src/mergai/main.py +++ b/src/mergai/main.py @@ -71,6 +71,10 @@ def register_commands(cli): cli.add_command(config) + from .commands.ci import ci + + cli.add_command(ci) + @click.group() @click.version_option(version=__version__, prog_name="mergai") diff --git a/src/mergai/models.py b/src/mergai/models.py index c6c123a..5db19f2 100644 --- a/src/mergai/models.py +++ b/src/mergai/models.py @@ -832,6 +832,11 @@ class MergaiNote: user_comment: Optional user-provided comment. merge_description: Optional AI-generated merge description. note_index: Optional index tracking which commits have which fields. + ci_fix_history: Optional list of CI fix attempts. Each entry is a + free-form dict produced by ``mergai ci handle``, typically with + keys ``workflow``, ``attempt_number``, ``timestamp``, + ``action_type``, ``context_summary``, ``files_affected``, and + ``success``. New attempts are appended. """ merge_info: MergeInfo @@ -843,6 +848,7 @@ class MergaiNote: user_comment: dict | None = None # Dict with user, email, date, body merge_description: dict | None = None note_index: list[dict] | None = None + ci_fix_history: list[dict] | None = None # Cached repo reference (not serialized) _repo: Optional["Repo"] = field(default=None, repr=False, compare=False) @@ -884,6 +890,7 @@ def from_dict(cls, data: dict, repo: "Repo | None" = None) -> "MergaiNote": user_comment=data.get("user_comment"), merge_description=data.get("merge_description"), note_index=data.get("note_index"), + ci_fix_history=data.get("ci_fix_history"), _repo=repo, ) return note @@ -974,6 +981,13 @@ def combine_from_dicts( if "user_comment" in git_note and "user_comment" not in combined: combined["user_comment"] = git_note["user_comment"] + # ci_fix_history - combine all into array (preserves attempt order) + if "ci_fix_history" in git_note: + if "ci_fix_history" not in combined: + combined["ci_fix_history"] = [] + for attempt in git_note["ci_fix_history"]: + combined["ci_fix_history"].append(attempt) + # Cast the result since from_dict returns MergaiNote, but cls is type[T] # where T is bound to MergaiNote note = cls.from_dict(combined, repo) @@ -996,6 +1010,18 @@ def has_solutions(self) -> bool: """Check if solutions are present.""" return self.solutions is not None and len(self.solutions) > 0 + @property + def has_unresolved_conflicts(self) -> bool: + """Check if any solution reports files left with conflict markers. + + A solution's ``response.unresolved`` dict maps file paths to reasons. + Any non-empty ``unresolved`` on any solution means the agent could not + fully resolve conflicts and the PR still contains conflict markers. + """ + if not self.has_solutions or self.solutions is None: + return False + return any(s.get("response", {}).get("unresolved") for s in self.solutions) + @property def has_pr_comments(self) -> bool: """Check if pr_comments are present.""" @@ -1016,6 +1042,36 @@ def has_note_index(self) -> bool: """Check if note_index is present.""" return self.note_index is not None and len(self.note_index) > 0 + @property + def has_ci_fix_history(self) -> bool: + """Check if ci_fix_history has at least one attempt recorded.""" + return self.ci_fix_history is not None and len(self.ci_fix_history) > 0 + + def get_ci_attempts(self, workflow: str) -> list[dict]: + """Return CI fix attempts recorded for a given workflow name. + + Args: + workflow: Workflow name as it appears in the ``workflow`` key of + each attempt dict (e.g. ``"format"``). + + Returns: + Attempts matching ``workflow``, in insertion order. Empty list + if none. + """ + if not self.ci_fix_history: + return [] + return [a for a in self.ci_fix_history if a.get("workflow") == workflow] + + def add_ci_attempt(self, attempt: dict) -> "MergaiNote": + """Append a CI fix attempt dict to ``ci_fix_history``. + + Initialises the list if it is None. Returns self for chaining. + """ + if self.ci_fix_history is None: + self.ci_fix_history = [] + self.ci_fix_history.append(attempt) + return self + # --- Repo Binding --- def bind_repo(self, repo: "Repo") -> "MergaiNote": @@ -1408,4 +1464,6 @@ def to_dict(self) -> dict: result["merge_description"] = self.merge_description if self.note_index: result["note_index"] = self.note_index + if self.ci_fix_history: + result["ci_fix_history"] = self.ci_fix_history return result diff --git a/src/mergai/templates/markdown/merge_context.jinja2 b/src/mergai/templates/markdown/merge_context.jinja2 index 53585f4..bb378b3 100644 --- a/src/mergai/templates/markdown/merge_context.jinja2 +++ b/src/mergai/templates/markdown/merge_context.jinja2 @@ -23,6 +23,7 @@ No files were auto-merged. **Merged with strategy:** {{ merge_context.auto_merged.strategy }} {% endif %} {% endif %} +{% if include_commit_list | default(true) %}
Show details @@ -34,3 +35,4 @@ No files were auto-merged. {%- endfor %}
+{% endif %} diff --git a/src/mergai/utils/formatters.py b/src/mergai/utils/formatters.py index b46a85e..246277a 100644 --- a/src/mergai/utils/formatters.py +++ b/src/mergai/utils/formatters.py @@ -286,13 +286,18 @@ def merge_context_to_text(merge_context: MergeContext) -> str: def merge_context_to_markdown( - merge_context: MergeContext, markdown_config: MarkdownConfig | None = None + merge_context: MergeContext, + markdown_config: MarkdownConfig | None = None, + include_commit_list: bool = True, ) -> str: """Convert MergeContext to markdown. Args: merge_context: MergeContext object. markdown_config: Optional MarkdownConfig for PR-style formatting with links. + include_commit_list: If False, omit the per-merged-commit table. + Useful when the rendered output must stay below GitHub's 65,536 + character limit on PR/issue bodies for merges with many commits. Returns: Markdown formatted string. @@ -305,7 +310,11 @@ def merge_context_to_markdown( format_sha = _create_format_sha_func(markdown_config) return render_template( - "markdown", "merge_context", merge_context=merge_context, format_sha=format_sha + "markdown", + "merge_context", + merge_context=merge_context, + format_sha=format_sha, + include_commit_list=include_commit_list, ) diff --git a/src/mergai/utils/git_utils.py b/src/mergai/utils/git_utils.py index 2e03bea..27ca366 100644 --- a/src/mergai/utils/git_utils.py +++ b/src/mergai/utils/git_utils.py @@ -385,7 +385,12 @@ def get_conflict_context( blobs_map = repo.index.unmerged_blobs() ours_commit = repo.head.commit - theirs_commit = repo.commit("MERGE_HEAD") + # Read MERGE_HEAD from the worktree-local gitdir directly. GitPython's + # ref resolver only treats HEAD/ORIG_HEAD/FETCH_HEAD/index/logs as + # per-worktree, so repo.commit("MERGE_HEAD") looks in common_dir and + # fails inside a linked worktree. + merge_head_sha = (Path(repo.git_dir) / "MERGE_HEAD").read_text().strip() + theirs_commit = repo.commit(merge_head_sha) base_commit = repo.merge_base(ours_commit, theirs_commit)[0] context = { @@ -1069,14 +1074,26 @@ def get_file_content_at_commit( return None +def content_has_conflict_markers(content: str) -> bool: + """Check if content contains git conflict markers. + + A conflict is recognized only when both a start marker (`<<<<<<<`) and + an end marker (`>>>>>>>`) are present at the beginning of a line. + + Args: + content: File content to check. + + Returns: + True if conflict markers are found. + """ + has_start = bool(re.search(r"^<{7}", content, re.MULTILINE)) + has_end = bool(re.search(r"^>{7}", content, re.MULTILINE)) + return has_start and has_end + + def file_has_conflict_markers(repo: Repo, commit_sha: str, file_path: str) -> bool: """Check if a file at a specific commit contains unresolved conflict markers. - Scans the file content for standard git conflict marker patterns: - - <<<<<<< (start marker) - - ======= (middle marker) - - >>>>>>> (end marker) - Args: repo: GitPython Repo instance. commit_sha: The commit SHA to check the file at. @@ -1089,13 +1106,7 @@ def file_has_conflict_markers(repo: Repo, commit_sha: str, file_path: str) -> bo content = get_file_content_at_commit(repo, commit_sha, file_path) if content is None: return False - - # Check for conflict marker patterns at the start of lines - # We need at least one start marker and one end marker to consider it a conflict - has_start = bool(re.search(r"^<{7}", content, re.MULTILINE)) - has_end = bool(re.search(r"^>{7}", content, re.MULTILINE)) - - return has_start and has_end + return content_has_conflict_markers(content) def file_has_conflict_markers_in_workdir(file_path: str) -> bool: @@ -1113,10 +1124,7 @@ def file_has_conflict_markers_in_workdir(file_path: str) -> bool: return False try: - content = path.read_text() - has_start = bool(re.search(r"^<{7}", content, re.MULTILINE)) - has_end = bool(re.search(r"^>{7}", content, re.MULTILINE)) - return has_start and has_end + return content_has_conflict_markers(path.read_text()) except Exception: return False