{"eval":{"version":"openagentskill-skill-eval-v1","slug":"visual-regression-tracker-visual-regression-tracker","name":"Visual Regression Tracker","generated_at":"2026-07-03T22:55:50.067Z","task_input":"Evaluate Visual Regression Tracker before installing it in an AI agent workflow","status":"review","score":73,"risk_level":"medium","decision":{"recommendation":"manual_review","reason":"Test manually in an isolated workspace and compare against safer alternatives.","auto_install_allowed":false,"policy":"review","human_review_required":true},"task_fit":{"score":84,"suited_tasks":["Browser automation workflows","Claude Code teams","teams that value GitHub adoption signals","Navigate pages","Click and type safely","Check visual and DOM state","Run test suites","Capture failures"],"suited_agents":["Shell","Playwright","Codex","Claude Code","Cursor","OpenAgentSkill CLI","Browser agents","CLI"]},"install":{"command":"npx skills add Visual-Regression-Tracker/Visual-Regression-Tracker","ready":true,"policy":"review","safety_label":"Avoid automatic install","targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add Visual-Regression-Tracker/Visual-Regression-Tracker"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"Visual Regression Tracker\" agent skill from https://github.com/Visual-Regression-Tracker/Visual-Regression-Tracker. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: Backend and Frontend application for tracking differences via image comparison"},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"Visual Regression Tracker\" as a Claude Code skill from https://github.com/Visual-Regression-Tracker/Visual-Regression-Tracker. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: Backend and Frontend application for tracking differences via image comparison"},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"Visual Regression Tracker\" from https://github.com/Visual-Regression-Tracker/Visual-Regression-Tracker into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: Backend and Frontend application for tracking differences via image comparison"}]},"trust":{"score":74,"label":"Strong shortlist","version":"trust-score-v4","evidence":{"stars":"695 GitHub stars","repoActivity":"695 stars, 65 forks","lastPushed":"3mo since push","license":"Apache-2.0","repository":"https://github.com/Visual-Regression-Tracker/Visual-Regression-Tracker","install":"npx skills add Visual-Regression-Tracker/Visual-Regression-Tracker","installSafety":"standard package or runtime install path","permissionSurface":"shell or command execution, network or browser access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"}},"audit":{"score":79,"risk_level":"needs_review","risk_label":"Needs review","warnings":["Dependency or permission surface needs review","Quality score needs review","Dependency/runtime risk: command execution surface, external package install surface"]},"safety_gate":{"score":51,"tier":"experimental","label":"Experimental","auto_install_policy":"review","blocked":false,"permission_hints":[{"id":"shell","label":"Shell or command execution","reason":"Skill metadata references terminal, CLI, shell, subprocess, or command execution workflows.","severity":"high"},{"id":"browser","label":"Browser automation","reason":"Skill may drive a browser or interact with web pages.","severity":"medium"},{"id":"network","label":"Network access","reason":"Skill likely fetches remote pages, APIs, repositories, or external services.","severity":"medium"}],"policy_warnings":["High-risk permission hints: Shell or command execution","Dependency or permission surface needs review"]},"checks":[{"id":"task_fit","label":"Task fit","status":"pass","score":84,"required_for_auto_install":true,"detail":"Task wording matches this skill metadata.","evidence":["Evaluate Visual Regression Tracker before installing it in an AI agent workflow","browser-automation","Browser automation workflows; Claude Code teams; teams that value GitHub adoption signals"]},{"id":"install_path","label":"Install path","status":"pass","score":92,"required_for_auto_install":true,"detail":"Install handoff is available.","evidence":["npx skills add Visual-Regression-Tracker/Visual-Regression-Tracker"]},{"id":"install_safety","label":"Install command safety","status":"pass","score":92,"required_for_auto_install":true,"detail":"standard package or runtime install path","evidence":["npx skills add Visual-Regression-Tracker/Visual-Regression-Tracker"]},{"id":"trust_score","label":"Trust score","status":"warn","score":74,"required_for_auto_install":true,"detail":"Good trust signals with a few areas worth checking before rollout.","evidence":["Strong shortlist","695 GitHub stars","Apache-2.0"]},{"id":"audit_score","label":"Audit score","status":"warn","score":79,"required_for_auto_install":true,"detail":"Needs review","evidence":["Dependency or permission surface needs review"]},{"id":"agent_safety_gate","label":"Agent safety gate","status":"warn","score":51,"required_for_auto_install":true,"detail":"Sparse or mixed signals. Useful for discovery, but not for autonomous installation.","evidence":["Test manually in an isolated workspace and compare against safer alternatives.","High-risk permission hints: Shell or command execution"]},{"id":"readme_skillmd_completeness","label":"README/SKILL.md completeness","status":"warn","score":74,"required_for_auto_install":false,"detail":"Public metadata needs stronger README/SKILL.md context","evidence":["Usable metadata, review docs"]},{"id":"license_clarity","label":"License clarity","status":"pass","score":86,"required_for_auto_install":true,"detail":"Apache-2.0","evidence":["Apache-2.0"]},{"id":"recent_maintenance","label":"Recent maintenance","status":"warn","score":76,"required_for_auto_install":false,"detail":"3mo since push","evidence":["3mo since push"]},{"id":"permission_surface","label":"Permission surface","status":"warn","score":62,"required_for_auto_install":true,"detail":"shell or command execution, network or browser access","evidence":["Shell or command execution: high","Browser automation: medium","Network access: medium"]},{"id":"alternatives","label":"Alternatives available","status":"pass","score":82,"required_for_auto_install":false,"detail":"Alternative skills are available for comparison.","evidence":["microsoft-playwright","apify-crawlee","microsoft-playwright-python","flaresolverr-flaresolverr"]}],"blockers":[],"warnings":["Trust score: Good trust signals with a few areas worth checking before rollout.","Audit score: Needs review","Agent safety gate: Sparse or mixed signals. Useful for discovery, but not for autonomous installation.","README/SKILL.md completeness: Public metadata needs stronger README/SKILL.md context","Recent maintenance: 3mo since push","Permission surface: shell or command execution, network or browser access","High-risk permission hints: Shell or command execution","Dependency or permission surface needs review","Quality score needs review","Dependency/runtime risk: command execution surface, external package install surface"],"validation_plan":["Inspect repository, README/SKILL.md, license, and recent commits before production use.","Install in an isolated workspace or sandbox with no production secrets available.","Run the smallest representative task and record files touched, commands run, network access, and outputs.","Compare the selected skill against at least one alternative when the eval status is review or failed.","Promote only after the agent reports a successful verification result and unresolved warnings are accepted."],"do_not_use_when":["teams that need a vendor-supported SLA","high-compliance environments without internal security review","No major risk signals from current metadata","High-risk permission hints: Shell or command execution","Dependency or permission surface needs review","Quality score needs review","Dependency/runtime risk: command execution surface, external package install surface","Production credentials, payments, or irreversible account changes without explicit human review"],"alternatives":[{"slug":"microsoft-playwright","name":"Playwright","url":"https://www.openagentskill.com/skills/microsoft-playwright","stars":91270,"install_command":"npx skills add microsoft/playwright","trust_score":93,"audit_score":95},{"slug":"apify-crawlee","name":"Crawlee","url":"https://www.openagentskill.com/skills/apify-crawlee","stars":24036,"install_command":"npx skills add apify/crawlee","trust_score":92,"audit_score":94},{"slug":"microsoft-playwright-python","name":"Playwright Python","url":"https://www.openagentskill.com/skills/microsoft-playwright-python","stars":14791,"install_command":"npx skills add microsoft/playwright-python","trust_score":91,"audit_score":94},{"slug":"flaresolverr-flaresolverr","name":"FlareSolverr","url":"https://www.openagentskill.com/skills/flaresolverr-flaresolverr","stars":14496,"install_command":"npx skills add FlareSolverr/FlareSolverr","trust_score":89,"audit_score":93}],"machine_metadata":{"version":"openagentskill-agent-metadata-v2","skill":{"slug":"visual-regression-tracker-visual-regression-tracker","name":"Visual Regression Tracker","description":"Backend and Frontend application for tracking differences via image comparison","category":"browser-automation","url":"https://www.openagentskill.com/skills/visual-regression-tracker-visual-regression-tracker","repository":"https://github.com/Visual-Regression-Tracker/Visual-Regression-Tracker","github_repo":"Visual-Regression-Tracker/Visual-Regression-Tracker"},"suited_tasks":["Browser automation workflows","Claude Code teams","teams that value GitHub adoption signals","Navigate pages","Click and type safely","Check visual and DOM state","Run test suites","Capture failures"],"suited_agents":["Shell","Playwright","Codex","Claude Code","Cursor","OpenAgentSkill CLI","Browser agents","CLI"],"install":{"command":"npx skills add Visual-Regression-Tracker/Visual-Regression-Tracker","ready":true,"targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add Visual-Regression-Tracker/Visual-Regression-Tracker"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"Visual Regression Tracker\" agent skill from https://github.com/Visual-Regression-Tracker/Visual-Regression-Tracker. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: Backend and Frontend application for tracking differences via image comparison"},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"Visual Regression Tracker\" as a Claude Code skill from https://github.com/Visual-Regression-Tracker/Visual-Regression-Tracker. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: Backend and Frontend application for tracking differences via image comparison"},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"Visual Regression Tracker\" from https://github.com/Visual-Regression-Tracker/Visual-Regression-Tracker into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: Backend and Frontend application for tracking differences via image comparison"}],"handoff_url":"https://www.openagentskill.com/api/skills/visual-regression-tracker-visual-regression-tracker/install","manifest_url":"https://www.openagentskill.com/api/registry/manifest/visual-regression-tracker-visual-regression-tracker"},"trust":{"score":74,"label":"Strong shortlist","version":"trust-score-v4","install_policy":"human_review_before_install","evidence":{"stars":"695 GitHub stars","repoActivity":"695 stars, 65 forks","lastPushed":"3mo since push","license":"Apache-2.0","repository":"https://github.com/Visual-Regression-Tracker/Visual-Regression-Tracker","install":"npx skills add Visual-Regression-Tracker/Visual-Regression-Tracker","installSafety":"standard package or runtime install path","permissionSurface":"shell or command execution, network or browser access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"outcome_evidence":{"total":0,"successes":0,"failures":0,"not_relevant":0,"success_rate":null,"recent_success_rate":null,"recent_failure_rate":null,"install_attempts":0,"install_success_rate":null,"risk_blocked":0,"setup_required":0,"avg_output_quality":null,"production_outcomes":0,"last_outcome_at":null,"label":"No agent outcome data yet"},"auto_install":{"allowed":false,"sandbox_required":true,"reason":"Human review or sandbox validation is required before automatic installation."},"best_for":["browser-automation","browser","testing","automation","cypress","docker"],"known_risks":["Quality score needs review","Dependency/runtime risk: command execution surface, external package install surface"]},"agent_proven":{"version":"agent-proven-v1","score":0,"tier":"unproven","label":"Needs first agent run","summary":"No agent outcome reports yet. Use Resolve, run one narrow sandbox task, then report the result.","metrics":{"totalOutcomes":0,"successfulOutcomes":0,"failedOutcomes":0,"installAttempts":0,"installSuccessRate":null,"successRate":null,"recentSuccessRate":null,"recentFailureRate":null,"riskBlocked":0,"setupRequired":0,"notRelevant":0,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"uniqueAgents":0,"lastOutcomeAt":null},"signals":[],"penalties":["No real agent outcome evidence yet"]},"audit":{"score":79,"risk_level":"needs_review","risk_label":"Needs review","warnings":["Dependency or permission surface needs review","Quality score needs review","Dependency/runtime risk: command execution surface, external package install surface"]},"safety_gate":{"tier":"experimental","label":"Experimental","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Test manually in an isolated workspace and compare against safer alternatives."},"quality":{"score":80,"label":"Strong"},"supply":{"track":"Coding and developer agents","scenario":"Testing and QA","maintenance":"3mo since push","risk":"Needs review"},"alternative_skills":[{"slug":"microsoft-playwright","name":"Playwright","url":"https://www.openagentskill.com/skills/microsoft-playwright","stars":91270,"install_command":"npx skills add microsoft/playwright","trust_score":93,"audit_score":95},{"slug":"apify-crawlee","name":"Crawlee","url":"https://www.openagentskill.com/skills/apify-crawlee","stars":24036,"install_command":"npx skills add apify/crawlee","trust_score":92,"audit_score":94},{"slug":"microsoft-playwright-python","name":"Playwright Python","url":"https://www.openagentskill.com/skills/microsoft-playwright-python","stars":14791,"install_command":"npx skills add microsoft/playwright-python","trust_score":91,"audit_score":94},{"slug":"flaresolverr-flaresolverr","name":"FlareSolverr","url":"https://www.openagentskill.com/skills/flaresolverr-flaresolverr","stars":14496,"install_command":"npx skills add FlareSolverr/FlareSolverr","trust_score":89,"audit_score":93}],"do_not_use_when":["teams that need a vendor-supported SLA","high-compliance environments without internal security review","No major risk signals from current metadata","High-risk permission hints: Shell or command execution","Dependency or permission surface needs review","Quality score needs review","Dependency/runtime risk: command execution surface, external package install surface","Production credentials, payments, or irreversible account changes without explicit human review"],"agent_contract":{"task_input":"Evaluate Visual Regression Tracker before installing it in an AI agent workflow","recommended_action":"Test manually in an isolated workspace and compare against safer alternatives.","install_policy":"review","minimum_review_before_use":["Trust: 74/100 Strong shortlist","Audit: 79/100 Needs review","Safety: 51/100 Avoid automatic install","Review repository, license, install command, and permission surface before production use."],"expected_agent_output":{"selected_skill":"visual-regression-tracker-visual-regression-tracker (Visual Regression Tracker)","install_command":"npx skills add Visual-Regression-Tracker/Visual-Regression-Tracker","risk_summary":"Needs review; Experimental; Review before production","verification_result":"Report the smallest successful task, files touched, warnings, and any missing setup."}},"outcome_feedback":{"endpoint":"https://www.openagentskill.com/api/agent/outcome","method":"POST","requires_resolve_event_id":true,"event_id_source":"Use install_receipt.outcome_feedback.event_id or feedback.event_id returned by /api/agent/resolve for the current task.","expected_outcomes":["success","failed","not_relevant","blocked_by_risk","setup_required"],"payload_template":{"event_id":"<install_receipt.outcome_feedback.event_id or feedback.event_id from /api/agent/resolve>","skill_slug":"visual-regression-tracker-visual-regression-tracker","task":"Evaluate Visual Regression Tracker before installing it in an AI agent workflow","agent":"codex","outcome":"success","install_used":true,"risk_blocked":false,"setup_required":false,"task_success":true,"output_quality":4,"error_type":null,"human_review_required":false,"workspace":"sandbox","time_to_useful_ms":120000,"notes":"Report the smallest successful task, setup friction, files touched, and risk notes."}},"endpoints":{"web":"https://www.openagentskill.com/skills/visual-regression-tracker-visual-regression-tracker","api":"https://www.openagentskill.com/api/agent/skills/visual-regression-tracker-visual-regression-tracker","audit":"https://www.openagentskill.com/skills/visual-regression-tracker-visual-regression-tracker/audit","eval":"https://www.openagentskill.com/api/agent/evals?slug=visual-regression-tracker-visual-regression-tracker&task=Evaluate%20Visual%20Regression%20Tracker%20before%20installing%20it%20in%20an%20AI%20agent%20workflow&max_risk=medium","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Evaluate%20Visual%20Regression%20Tracker%20before%20installing%20it%20in%20an%20AI%20agent%20workflow&agent=codex&max_risk=medium","receipt":"https://www.openagentskill.com/api/agent/receipt?task=Evaluate%20Visual%20Regression%20Tracker%20before%20installing%20it%20in%20an%20AI%20agent%20workflow&agent=codex&max_risk=medium&format=text","install":"https://www.openagentskill.com/api/skills/visual-regression-tracker-visual-regression-tracker/install","manifest":"https://www.openagentskill.com/api/registry/manifest/visual-regression-tracker-visual-regression-tracker"}},"endpoints":{"web":"https://www.openagentskill.com/skills/visual-regression-tracker-visual-regression-tracker","api":"https://www.openagentskill.com/api/agent/skills/visual-regression-tracker-visual-regression-tracker","eval":"https://www.openagentskill.com/api/agent/evals?slug=visual-regression-tracker-visual-regression-tracker","audit":"https://www.openagentskill.com/skills/visual-regression-tracker-visual-regression-tracker/audit","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Evaluate%20Visual%20Regression%20Tracker%20before%20installing%20it%20in%20an%20AI%20agent%20workflow&agent=codex&max_risk=medium"}},"meta":{"endpoint":"/api/agent/evals","mode":"skill_eval","purpose":"Pre-install eval contract for a single skill. Agents should read this before installing a reusable skill.","generated_at":"2026-07-03T22:55:50.067Z"}}