Merge branch 'issue/CG-62' - block merge when verification output indicates failure

Adds _detect_verification_result() to parse QA agent stdout for failure markers. Even if the QA agent exits with code 0, the merge will be blocked if the output contains patterns like "verification failed", "requirements not met", "not actually implemented", etc. Also handles merge failures (e.g., conflicts) by moving to Triage. Resolves: CG-62 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-11 08:00:44 -07:00
parent 876b8789b8 fb99626e7e
commit 70c3c847a9
1 changed files with 76 additions and 3 deletions
--- a/runner.py
+++ b/runner.py
@@ -29,6 +29,7 @@ from gitea_client import GiteaClient, load_gitea_config, git_push, git_merge_to_
 from agent import AgentPool, AgentTask, build_prompt
 from webhook_server import WebhookServer, WebhookEvent, load_webhook_config
 from woodpecker_client import WoodpeckerClient, BuildInfo
+import re

 # Constants
 DEFAULT_WEBHOOK_QUEUE_SIZE = 1000  # Maximum queued webhook events
@@ -166,6 +167,59 @@ class ConfigurationError(Exception):
    pass


+def _detect_verification_result(stdout: str) -> tuple[bool, str]:
+    """
+    Parse agent stdout for explicit verification pass/fail markers.
+
+    The QA agent may exit with code 0 but still report verification failure
+    in its output. This function detects failure markers in stdout to provide
+    a secondary check.
+
+    Args:
+        stdout: The agent's stdout output
+
+    Returns:
+        Tuple of (passed: bool, reason: str) where:
+        - passed: True if verification passed, False if failure markers found
+        - reason: Human-readable explanation of the detection result
+    """
+    if not stdout:
+        return True, "no stdout to analyze"
+
+    stdout_lower = stdout.lower()
+
+    # Check for explicit failure markers (case-insensitive)
+    failure_patterns = [
+        (r"verification\s+failed", "found 'verification failed'"),
+        (r"requirements?\s+not\s+met", "found 'requirements not met'"),
+        (r"was\s+not\s+implemented", "found 'was not implemented'"),
+        (r"were\s+not\s+implemented", "found 'were not implemented'"),
+        (r"not\s+actually\s+implemented", "found 'not actually implemented'"),
+        (r"none\s+of\s+the\s+.*\s+changes\s+were\s+(actually\s+)?implemented", "found 'none of the changes were implemented'"),
+        (r"acceptance\s+criteria\s+not\s+met", "found 'acceptance criteria not met'"),
+        (r"##\s*verification\s+failed", "found 'Verification Failed' header"),
+    ]
+
+    for pattern, reason in failure_patterns:
+        if re.search(pattern, stdout_lower):
+            return False, reason
+
+    # Check for explicit pass markers
+    pass_patterns = [
+        (r"verification\s+passed", "found 'verification passed'"),
+        (r"verification\s+succeeded", "found 'verification succeeded'"),
+        (r"all\s+acceptance\s+criteria\s+met", "found 'all acceptance criteria met'"),
+        (r"##\s*verification\s+passed", "found 'Verification Passed' header"),
+    ]
+
+    for pattern, reason in pass_patterns:
+        if re.search(pattern, stdout_lower):
+            return True, reason
+
+    # No explicit markers found - assume pass (rely on exit code)
+    return True, "no explicit pass/fail markers found"
+
+
 class Runner:
    def __init__(self, config_path: str):
        self.config = self._load_config(config_path)
@@ -374,9 +428,28 @@ class Runner:

            # Merge to main after verification succeeds
            if task.task_type == "verification":
-                if not self._merge_feature_branch(task):
-                    # Merge failed - move to Triage for human intervention
-                    logger.warning(f"Merge failed for {task.issue_id}, moving to {triage_state}")
+                # Secondary check: parse stdout for explicit failure markers
+                # The QA agent may exit with code 0 but still report verification failure
+                stdout_passed, detection_reason = _detect_verification_result(task.stdout or "")
+                logger.info(f"Verification result for {task.issue_id}: exit_code=0, stdout_check={stdout_passed} ({detection_reason})")
+
+                if stdout_passed:
+                    if not self._merge_feature_branch(task):
+                        # Merge failed - move to Triage for human intervention
+                        logger.warning(f"Merge failed for {task.issue_id}, moving to {triage_state}")
+                        self.youtrack.update_issue_state(task.issue_id, triage_state)
+                else:
+                    logger.warning(f"Blocking merge for {task.issue_id}: stdout indicates failure despite exit code 0")
+                    # Add failure comment and move to triage
+                    self.youtrack_qa.add_issue_comment(
+                        task.issue_id,
+                        f"## Merge Blocked\n\n"
+                        f"**Date:** {datetime.now().strftime('%Y-%m-%d %H:%M')}\n"
+                        f"**Reason:** Verification output indicates failure despite exit code 0\n"
+                        f"**Detection:** {detection_reason}\n\n"
+                        f"The QA agent's output contains failure markers. Please review the verification "
+                        f"output and address the issues before the branch can be merged."
+                    )
                    self.youtrack.update_issue_state(task.issue_id, triage_state)

        elif task.returncode == 2: