Merge branch 'issue/CG-62' - block merge when verification output indicates failure
Adds _detect_verification_result() to parse QA agent stdout for failure markers. Even if the QA agent exits with code 0, the merge will be blocked if the output contains patterns like "verification failed", "requirements not met", "not actually implemented", etc. Also handles merge failures (e.g., conflicts) by moving to Triage. Resolves: CG-62 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
79
runner.py
79
runner.py
@@ -29,6 +29,7 @@ from gitea_client import GiteaClient, load_gitea_config, git_push, git_merge_to_
|
||||
from agent import AgentPool, AgentTask, build_prompt
|
||||
from webhook_server import WebhookServer, WebhookEvent, load_webhook_config
|
||||
from woodpecker_client import WoodpeckerClient, BuildInfo
|
||||
import re
|
||||
|
||||
# Constants
|
||||
DEFAULT_WEBHOOK_QUEUE_SIZE = 1000 # Maximum queued webhook events
|
||||
@@ -166,6 +167,59 @@ class ConfigurationError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def _detect_verification_result(stdout: str) -> tuple[bool, str]:
|
||||
"""
|
||||
Parse agent stdout for explicit verification pass/fail markers.
|
||||
|
||||
The QA agent may exit with code 0 but still report verification failure
|
||||
in its output. This function detects failure markers in stdout to provide
|
||||
a secondary check.
|
||||
|
||||
Args:
|
||||
stdout: The agent's stdout output
|
||||
|
||||
Returns:
|
||||
Tuple of (passed: bool, reason: str) where:
|
||||
- passed: True if verification passed, False if failure markers found
|
||||
- reason: Human-readable explanation of the detection result
|
||||
"""
|
||||
if not stdout:
|
||||
return True, "no stdout to analyze"
|
||||
|
||||
stdout_lower = stdout.lower()
|
||||
|
||||
# Check for explicit failure markers (case-insensitive)
|
||||
failure_patterns = [
|
||||
(r"verification\s+failed", "found 'verification failed'"),
|
||||
(r"requirements?\s+not\s+met", "found 'requirements not met'"),
|
||||
(r"was\s+not\s+implemented", "found 'was not implemented'"),
|
||||
(r"were\s+not\s+implemented", "found 'were not implemented'"),
|
||||
(r"not\s+actually\s+implemented", "found 'not actually implemented'"),
|
||||
(r"none\s+of\s+the\s+.*\s+changes\s+were\s+(actually\s+)?implemented", "found 'none of the changes were implemented'"),
|
||||
(r"acceptance\s+criteria\s+not\s+met", "found 'acceptance criteria not met'"),
|
||||
(r"##\s*verification\s+failed", "found 'Verification Failed' header"),
|
||||
]
|
||||
|
||||
for pattern, reason in failure_patterns:
|
||||
if re.search(pattern, stdout_lower):
|
||||
return False, reason
|
||||
|
||||
# Check for explicit pass markers
|
||||
pass_patterns = [
|
||||
(r"verification\s+passed", "found 'verification passed'"),
|
||||
(r"verification\s+succeeded", "found 'verification succeeded'"),
|
||||
(r"all\s+acceptance\s+criteria\s+met", "found 'all acceptance criteria met'"),
|
||||
(r"##\s*verification\s+passed", "found 'Verification Passed' header"),
|
||||
]
|
||||
|
||||
for pattern, reason in pass_patterns:
|
||||
if re.search(pattern, stdout_lower):
|
||||
return True, reason
|
||||
|
||||
# No explicit markers found - assume pass (rely on exit code)
|
||||
return True, "no explicit pass/fail markers found"
|
||||
|
||||
|
||||
class Runner:
|
||||
def __init__(self, config_path: str):
|
||||
self.config = self._load_config(config_path)
|
||||
@@ -374,9 +428,28 @@ class Runner:
|
||||
|
||||
# Merge to main after verification succeeds
|
||||
if task.task_type == "verification":
|
||||
if not self._merge_feature_branch(task):
|
||||
# Merge failed - move to Triage for human intervention
|
||||
logger.warning(f"Merge failed for {task.issue_id}, moving to {triage_state}")
|
||||
# Secondary check: parse stdout for explicit failure markers
|
||||
# The QA agent may exit with code 0 but still report verification failure
|
||||
stdout_passed, detection_reason = _detect_verification_result(task.stdout or "")
|
||||
logger.info(f"Verification result for {task.issue_id}: exit_code=0, stdout_check={stdout_passed} ({detection_reason})")
|
||||
|
||||
if stdout_passed:
|
||||
if not self._merge_feature_branch(task):
|
||||
# Merge failed - move to Triage for human intervention
|
||||
logger.warning(f"Merge failed for {task.issue_id}, moving to {triage_state}")
|
||||
self.youtrack.update_issue_state(task.issue_id, triage_state)
|
||||
else:
|
||||
logger.warning(f"Blocking merge for {task.issue_id}: stdout indicates failure despite exit code 0")
|
||||
# Add failure comment and move to triage
|
||||
self.youtrack_qa.add_issue_comment(
|
||||
task.issue_id,
|
||||
f"## Merge Blocked\n\n"
|
||||
f"**Date:** {datetime.now().strftime('%Y-%m-%d %H:%M')}\n"
|
||||
f"**Reason:** Verification output indicates failure despite exit code 0\n"
|
||||
f"**Detection:** {detection_reason}\n\n"
|
||||
f"The QA agent's output contains failure markers. Please review the verification "
|
||||
f"output and address the issues before the branch can be merged."
|
||||
)
|
||||
self.youtrack.update_issue_state(task.issue_id, triage_state)
|
||||
|
||||
elif task.returncode == 2:
|
||||
|
||||
Reference in New Issue
Block a user