{"query":"Bench","filters":{"category":null,"platform":null,"track":null,"safety":null,"include_blocked":false,"min_stars":0},"total":10,"skills":[{"rank":1,"match_score":99,"raw_match_score":958.4,"slug":"arthur-ai-bench","name":"Bench","description":"A tool for evaluating LLMs","tagline":"A tool for evaluating LLMs","category":"ml-automation","tags":["mlops","machine-learning","ml-media","llm","typescript","github"],"author":{"name":"arthur-ai","verified":false,"url":"https://github.com/arthur-ai"},"attribution":{"status":"community_indexed","statusLabel":"Community indexed","shortLabel":"COMMUNITY INDEXED","sourceLabel":"GitHub star discovery","sourceDetail":"arthur-ai/bench","creatorName":"arthur-ai","creatorUrl":"https://github.com/arthur-ai","sourceUrl":"https://github.com/arthur-ai/bench","indexedBy":"OpenAgentSkill community index","claimUrl":"https://www.openagentskill.com/skills/arthur-ai-bench#claim-this-skill","claimCta":"Claim this skill","trustNote":"This listing was indexed from public sources and is not marked official until a maintainer claim is approved.","publicNote":"Attribution links to the public repository or creator profile. Creators can claim the listing to update ownership signals."},"stats":{"stars":428,"forks":42,"downloads":0,"rating":0,"review_count":0,"quality_score":45.13},"quality":{"score":74,"tier":"strong","label":"Strong","summary":"Solid option that is likely worth shortlisting for production workflows.","signals":[{"label":"GitHub stars","value":"428","tone":"neutral"},{"label":"Freshness","value":"4mo ago","tone":"positive"},{"label":"Install ready","value":"Yes","tone":"positive"},{"label":"License","value":"MIT","tone":"neutral"}],"warnings":[]},"trust":{"version":"trust-score-v4","score":77,"tier":"strong","label":"Strong shortlist","summary":"Good trust signals with a few areas worth checking before rollout.","recommendedAction":"Test in a sandbox workflow and compare its install path with close alternatives.","dimensions":[{"id":"github_adoption","label":"GitHub adoption","score":62,"weight":0.13,"status":"info","detail":"428 GitHub stars"},{"id":"repo_activity","label":"Stars/forks activity","score":57,"weight":0.08,"status":"warn","detail":"428 stars, 42 forks; issue activity unavailable in current metadata"},{"id":"maintenance","label":"Recent maintenance","score":76,"weight":0.14,"status":"info","detail":"4mo since push"},{"id":"license","label":"License clarity","score":86,"weight":0.09,"status":"pass","detail":"MIT"},{"id":"documentation","label":"README/SKILL.md completeness","score":74,"weight":0.14,"status":"info","detail":"Public metadata needs stronger README/SKILL.md context"},{"id":"dependency_risk","label":"Dependency/runtime risk","score":90,"weight":0.12,"status":"pass","detail":"no major dependency risk hints in public metadata"},{"id":"installability","label":"Install availability","score":92,"weight":0.1,"status":"pass","detail":"npx skills add arthur-ai/bench"},{"id":"install_safety","label":"Install command safety","score":92,"weight":0.1,"status":"pass","detail":"standard package or runtime install path"},{"id":"permission_surface","label":"Permission surface","score":86,"weight":0.07,"status":"pass","detail":"filesystem or document access"},{"id":"repository","label":"Repository evidence","score":86,"weight":0.04,"status":"pass","detail":"https://github.com/arthur-ai/bench"},{"id":"review_status","label":"Review status","score":88,"weight":0.05,"status":"pass","detail":"AI review data available"},{"id":"agent_outcomes","label":"Agent Proven outcomes","score":54,"weight":0.13,"status":"info","detail":"No agent outcome data yet"}],"checks":[{"status":"info","label":"GitHub adoption","detail":"428 GitHub stars"},{"status":"warn","label":"Stars/forks activity","detail":"428 stars, 42 forks; issue activity unavailable in current metadata"},{"status":"info","label":"Recent maintenance","detail":"4mo since push"},{"status":"pass","label":"License clarity","detail":"MIT"},{"status":"info","label":"README/SKILL.md completeness","detail":"Public metadata needs stronger README/SKILL.md context"},{"status":"pass","label":"Dependency/runtime risk","detail":"no major dependency risk hints in public metadata"},{"status":"pass","label":"Install availability","detail":"npx skills add arthur-ai/bench"},{"status":"pass","label":"Install command safety","detail":"standard package or runtime install path"},{"status":"pass","label":"Permission surface","detail":"filesystem or document access"},{"status":"pass","label":"Repository evidence","detail":"https://github.com/arthur-ai/bench"},{"status":"pass","label":"Review status","detail":"AI review data available"},{"status":"info","label":"Agent Proven outcomes","detail":"No agent outcome data yet"},{"status":"warn","label":"Ownership","detail":"No approved owner claim yet"},{"status":"info","label":"OpenAgentSkill usage","detail":"No local usage activity yet"},{"status":"info","label":"Agent outcomes","detail":"No agent outcome data yet"}],"strengths":["AI review approved","Install path is available","Repository evidence is available","Install command has no obvious high-risk pattern"],"warnings":["Quality score needs review","Documentation summary is thin","Stars/forks activity: 428 stars, 42 forks; issue activity unavailable in current metadata"],"evidence":{"stars":"428 GitHub stars","repoActivity":"428 stars, 42 forks","lastPushed":"4mo since push","license":"MIT","repository":"https://github.com/arthur-ai/bench","install":"npx skills add arthur-ai/bench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"installReadiness":{"ready":true,"command":"npx skills add arthur-ai/bench","policy":"human_review_before_install","label":"Human review before install","notes":["Install path is available","Repository evidence is available","License is declared","No Agent Proven outcome evidence yet","4mo since push"]},"agentCompatibility":["TypeScript","MLOps","Codex","Claude Code","Cursor","OpenAgentSkill CLI"],"riskSummary":{"level":"medium","label":"Review before production","notes":["Quality score needs review","Documentation summary is thin","Stars/forks activity: 428 stars, 42 forks; issue activity unavailable in current metadata"]},"outcomeEvidence":{"total":0,"successes":0,"failures":0,"notRelevant":0,"successRate":null,"installAttempts":0,"riskBlocked":0,"setupRequired":0,"installSuccessRate":null,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"recentSuccessRate":null,"recentFailureRate":null,"uniqueAgents":0,"agentProvenScore":0,"agentProvenLabel":"Needs first agent run","lastOutcomeAt":null,"label":"No agent outcome data yet"},"autoInstall":{"allowed":false,"sandboxRequired":true,"policy":"human_review_before_install","reason":"Human review or sandbox validation is required before automatic installation."},"bestFor":["ml-automation","mlops","machine-learning","ml-media","llm","typescript"],"doNotUseFor":["Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface","Automatic installation in a production workspace"],"knownRisks":["Quality score needs review","Documentation summary is thin","Stars/forks activity: 428 stars, 42 forks; issue activity unavailable in current metadata"]},"safety":{"score":64,"level":"review_before_install","label":"Review before install","safety_tier":{"tier":"reviewed","label":"Reviewed with permission notes","badge":"REVIEWED","summary":"Usable candidate, but the agent should surface permission and audit notes before installation.","recommended_action":"Require human approval before installing into a real workspace.","auto_install_policy":"review","reasons":["Quality score needs review","64/100 agent safety score"]},"auto_install_allowed":false,"human_review_required":true,"blocked":false,"audit_risk":"needs_review","permission_hints":[{"id":"network","label":"Network access","reason":"Skill likely fetches remote pages, APIs, repositories, or external services.","severity":"medium"},{"id":"filesystem","label":"Filesystem access","reason":"Skill may read or write project files, documents, generated artifacts, or local workspace state.","severity":"medium"}],"policy_warnings":["Quality score needs review"],"constraints_applied":{"max_risk":"medium","needs_install_command":true,"min_stars":0}},"safety_gate":{"tier":"reviewed","label":"Reviewed with permission notes","badge":"REVIEWED","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Require human approval before installing into a real workspace.","reasons":["Quality score needs review","64/100 agent safety score"]},"supply_profile":{"track":{"slug":"coding","label":"Coding and developer agents","shortLabel":"Coding","description":"Code review, repo analysis, testing, CI, GitHub, DevOps, and developer workflow skills."},"scenario":{"label":"Coding agents","description":"I need a coding agent that can understand a repository, edit code, and review pull requests.","useCases":[{"slug":"coding-agents","title":"Coding agents"},{"slug":"rag-knowledge","title":"RAG and knowledge"},{"slug":"workflow-automation","title":"Workflow automation"}]},"applicableAgents":["Claude Code","CLI","Codex","Cursor","TypeScript"],"install":{"ready":true,"command":"npx skills add arthur-ai/bench","primaryTarget":"CLI","targetCount":4},"githubQuality":{"stars":428,"starsLabel":"428","forks":42,"license":"MIT","qualityScore":74,"trustScore":77,"auditScore":80},"maintenance":{"status":"active","label":"4mo since push","daysSincePush":109,"lastPushedAt":"2026-03-15T23:29:33+00:00"},"risk":{"level":"needs_review","label":"Needs review","requiresReview":true,"notes":["Quality score needs review","Documentation summary is thin","Stars/forks activity: 428 stars, 42 forks; issue activity unavailable in current metadata","Needs review"]},"coverageTags":["Coding","Coding agents","ml-automation","mlops","machine-learning","ml-media","llm","typescript"]},"audit":{"audit_score":80,"risk_level":"needs_review","risk_label":"Needs review","warnings":["Quality score needs review","Documentation summary is thin","Stars/forks activity: 428 stars, 42 forks; issue activity unavailable in current metadata"]},"decision":{"readiness_score":73,"readiness_label":"Strong shortlist","headline":"Companion skill for Coding agents","role":"Companion skill","primary_fit":"Coding agents","best_for":["Coding agents workflows","Claude Code teams","builders willing to evaluate younger projects"],"risks":["No OpenAgentSkill engagement data yet"],"next_steps":["Install it in a sandbox agent and run one Coding agents task end to end.","Compare output quality, latency, and failure behavior against at least one alternative.","Promote it into production only after reviewing repository permissions, license, and maintenance signals."]},"agent_readable_metadata":{"version":"openagentskill-agent-metadata-v2","skill":{"slug":"arthur-ai-bench","name":"Bench","description":"A tool for evaluating LLMs","category":"ml-automation","url":"https://www.openagentskill.com/skills/arthur-ai-bench","repository":"https://github.com/arthur-ai/bench","github_repo":"arthur-ai/bench"},"suited_tasks":["Coding agents workflows","Claude Code teams","builders willing to evaluate younger projects","Inspect source files","Explain architecture","Patch bugs and verify changes","Chunk documents","Create embeddings"],"suited_agents":["TypeScript","MLOps","Codex","Claude Code","Cursor","OpenAgentSkill CLI","CLI"],"install":{"command":"npx skills add arthur-ai/bench","ready":true,"targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add arthur-ai/bench"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"Bench\" agent skill from https://github.com/arthur-ai/bench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: A tool for evaluating LLMs"},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"Bench\" as a Claude Code skill from https://github.com/arthur-ai/bench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: A tool for evaluating LLMs"},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"Bench\" from https://github.com/arthur-ai/bench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: A tool for evaluating LLMs"}],"handoff_url":"https://www.openagentskill.com/api/skills/arthur-ai-bench/install","manifest_url":"https://www.openagentskill.com/api/registry/manifest/arthur-ai-bench"},"trust":{"score":77,"label":"Strong shortlist","version":"trust-score-v4","install_policy":"human_review_before_install","evidence":{"stars":"428 GitHub stars","repoActivity":"428 stars, 42 forks","lastPushed":"4mo since push","license":"MIT","repository":"https://github.com/arthur-ai/bench","install":"npx skills add arthur-ai/bench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"outcome_evidence":{"total":0,"successes":0,"failures":0,"not_relevant":0,"success_rate":null,"recent_success_rate":null,"recent_failure_rate":null,"install_attempts":0,"install_success_rate":null,"risk_blocked":0,"setup_required":0,"avg_output_quality":null,"production_outcomes":0,"last_outcome_at":null,"label":"No agent outcome data yet"},"auto_install":{"allowed":false,"sandbox_required":true,"reason":"Human review or sandbox validation is required before automatic installation."},"best_for":["ml-automation","mlops","machine-learning","ml-media","llm","typescript"],"known_risks":["Quality score needs review","Documentation summary is thin","Stars/forks activity: 428 stars, 42 forks; issue activity unavailable in current metadata"]},"agent_proven":{"version":"agent-proven-v1","score":0,"tier":"unproven","label":"Needs first agent run","summary":"No agent outcome reports yet. Use Resolve, run one narrow sandbox task, then report the result.","metrics":{"totalOutcomes":0,"successfulOutcomes":0,"failedOutcomes":0,"installAttempts":0,"installSuccessRate":null,"successRate":null,"recentSuccessRate":null,"recentFailureRate":null,"riskBlocked":0,"setupRequired":0,"notRelevant":0,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"uniqueAgents":0,"lastOutcomeAt":null},"signals":[],"penalties":["No real agent outcome evidence yet"]},"audit":{"score":80,"risk_level":"needs_review","risk_label":"Needs review","warnings":["Quality score needs review","Documentation summary is thin","Stars/forks activity: 428 stars, 42 forks; issue activity unavailable in current metadata"]},"safety_gate":{"tier":"reviewed","label":"Reviewed with permission notes","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Require human approval before installing into a real workspace."},"quality":{"score":74,"label":"Strong"},"supply":{"track":"Coding and developer agents","scenario":"Coding agents","maintenance":"4mo since push","risk":"Needs review"},"alternative_skills":[],"do_not_use_when":["teams that need a vendor-supported SLA","high-compliance environments without internal security review","No OpenAgentSkill engagement data yet","Quality score needs review","Documentation summary is thin","Stars/forks activity: 428 stars, 42 forks; issue activity unavailable in current metadata","Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface"],"agent_contract":{"task_input":"Use Bench in an agent workflow","recommended_action":"Require human approval before installing into a real workspace.","install_policy":"review","minimum_review_before_use":["Trust: 77/100 Strong shortlist","Audit: 80/100 Needs review","Safety: 64/100 Review before install","Review repository, license, install command, and permission surface before production use."],"expected_agent_output":{"selected_skill":"arthur-ai-bench (Bench)","install_command":"npx skills add arthur-ai/bench","risk_summary":"Needs review; Reviewed with permission notes; Review before production","verification_result":"Report the smallest successful task, files touched, warnings, and any missing setup."}},"outcome_feedback":{"endpoint":"https://www.openagentskill.com/api/agent/outcome","method":"POST","requires_resolve_event_id":true,"event_id_source":"Use install_receipt.outcome_feedback.event_id or feedback.event_id returned by /api/agent/resolve for the current task.","expected_outcomes":["success","failed","not_relevant","blocked_by_risk","setup_required"],"payload_template":{"event_id":"<install_receipt.outcome_feedback.event_id or feedback.event_id from /api/agent/resolve>","skill_slug":"arthur-ai-bench","task":"Use Bench in an agent workflow","agent":"codex","outcome":"success","install_used":true,"risk_blocked":false,"setup_required":false,"task_success":true,"output_quality":4,"error_type":null,"human_review_required":false,"workspace":"sandbox","time_to_useful_ms":120000,"notes":"Report the smallest successful task, setup friction, files touched, and risk notes."}},"endpoints":{"web":"https://www.openagentskill.com/skills/arthur-ai-bench","api":"https://www.openagentskill.com/api/agent/skills/arthur-ai-bench","audit":"https://www.openagentskill.com/skills/arthur-ai-bench/audit","eval":"https://www.openagentskill.com/api/agent/evals?slug=arthur-ai-bench&task=Use%20Bench%20in%20an%20agent%20workflow&max_risk=medium","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Use%20Bench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium","receipt":"https://www.openagentskill.com/api/agent/receipt?task=Use%20Bench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium&format=text","install":"https://www.openagentskill.com/api/skills/arthur-ai-bench/install","manifest":"https://www.openagentskill.com/api/registry/manifest/arthur-ai-bench"}},"machine_metadata":{"version":"openagentskill-agent-metadata-v2","skill":{"slug":"arthur-ai-bench","name":"Bench","description":"A tool for evaluating LLMs","category":"ml-automation","url":"https://www.openagentskill.com/skills/arthur-ai-bench","repository":"https://github.com/arthur-ai/bench","github_repo":"arthur-ai/bench"},"suited_tasks":["Coding agents workflows","Claude Code teams","builders willing to evaluate younger projects","Inspect source files","Explain architecture","Patch bugs and verify changes","Chunk documents","Create embeddings"],"suited_agents":["TypeScript","MLOps","Codex","Claude Code","Cursor","OpenAgentSkill CLI","CLI"],"install":{"command":"npx skills add arthur-ai/bench","ready":true,"targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add arthur-ai/bench"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"Bench\" agent skill from https://github.com/arthur-ai/bench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: A tool for evaluating LLMs"},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"Bench\" as a Claude Code skill from https://github.com/arthur-ai/bench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: A tool for evaluating LLMs"},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"Bench\" from https://github.com/arthur-ai/bench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: A tool for evaluating LLMs"}],"handoff_url":"https://www.openagentskill.com/api/skills/arthur-ai-bench/install","manifest_url":"https://www.openagentskill.com/api/registry/manifest/arthur-ai-bench"},"trust":{"score":77,"label":"Strong shortlist","version":"trust-score-v4","install_policy":"human_review_before_install","evidence":{"stars":"428 GitHub stars","repoActivity":"428 stars, 42 forks","lastPushed":"4mo since push","license":"MIT","repository":"https://github.com/arthur-ai/bench","install":"npx skills add arthur-ai/bench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"outcome_evidence":{"total":0,"successes":0,"failures":0,"not_relevant":0,"success_rate":null,"recent_success_rate":null,"recent_failure_rate":null,"install_attempts":0,"install_success_rate":null,"risk_blocked":0,"setup_required":0,"avg_output_quality":null,"production_outcomes":0,"last_outcome_at":null,"label":"No agent outcome data yet"},"auto_install":{"allowed":false,"sandbox_required":true,"reason":"Human review or sandbox validation is required before automatic installation."},"best_for":["ml-automation","mlops","machine-learning","ml-media","llm","typescript"],"known_risks":["Quality score needs review","Documentation summary is thin","Stars/forks activity: 428 stars, 42 forks; issue activity unavailable in current metadata"]},"agent_proven":{"version":"agent-proven-v1","score":0,"tier":"unproven","label":"Needs first agent run","summary":"No agent outcome reports yet. Use Resolve, run one narrow sandbox task, then report the result.","metrics":{"totalOutcomes":0,"successfulOutcomes":0,"failedOutcomes":0,"installAttempts":0,"installSuccessRate":null,"successRate":null,"recentSuccessRate":null,"recentFailureRate":null,"riskBlocked":0,"setupRequired":0,"notRelevant":0,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"uniqueAgents":0,"lastOutcomeAt":null},"signals":[],"penalties":["No real agent outcome evidence yet"]},"audit":{"score":80,"risk_level":"needs_review","risk_label":"Needs review","warnings":["Quality score needs review","Documentation summary is thin","Stars/forks activity: 428 stars, 42 forks; issue activity unavailable in current metadata"]},"safety_gate":{"tier":"reviewed","label":"Reviewed with permission notes","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Require human approval before installing into a real workspace."},"quality":{"score":74,"label":"Strong"},"supply":{"track":"Coding and developer agents","scenario":"Coding agents","maintenance":"4mo since push","risk":"Needs review"},"alternative_skills":[],"do_not_use_when":["teams that need a vendor-supported SLA","high-compliance environments without internal security review","No OpenAgentSkill engagement data yet","Quality score needs review","Documentation summary is thin","Stars/forks activity: 428 stars, 42 forks; issue activity unavailable in current metadata","Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface"],"agent_contract":{"task_input":"Use Bench in an agent workflow","recommended_action":"Require human approval before installing into a real workspace.","install_policy":"review","minimum_review_before_use":["Trust: 77/100 Strong shortlist","Audit: 80/100 Needs review","Safety: 64/100 Review before install","Review repository, license, install command, and permission surface before production use."],"expected_agent_output":{"selected_skill":"arthur-ai-bench (Bench)","install_command":"npx skills add arthur-ai/bench","risk_summary":"Needs review; Reviewed with permission notes; Review before production","verification_result":"Report the smallest successful task, files touched, warnings, and any missing setup."}},"outcome_feedback":{"endpoint":"https://www.openagentskill.com/api/agent/outcome","method":"POST","requires_resolve_event_id":true,"event_id_source":"Use install_receipt.outcome_feedback.event_id or feedback.event_id returned by /api/agent/resolve for the current task.","expected_outcomes":["success","failed","not_relevant","blocked_by_risk","setup_required"],"payload_template":{"event_id":"<install_receipt.outcome_feedback.event_id or feedback.event_id from /api/agent/resolve>","skill_slug":"arthur-ai-bench","task":"Use Bench in an agent workflow","agent":"codex","outcome":"success","install_used":true,"risk_blocked":false,"setup_required":false,"task_success":true,"output_quality":4,"error_type":null,"human_review_required":false,"workspace":"sandbox","time_to_useful_ms":120000,"notes":"Report the smallest successful task, setup friction, files touched, and risk notes."}},"endpoints":{"web":"https://www.openagentskill.com/skills/arthur-ai-bench","api":"https://www.openagentskill.com/api/agent/skills/arthur-ai-bench","audit":"https://www.openagentskill.com/skills/arthur-ai-bench/audit","eval":"https://www.openagentskill.com/api/agent/evals?slug=arthur-ai-bench&task=Use%20Bench%20in%20an%20agent%20workflow&max_risk=medium","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Use%20Bench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium","receipt":"https://www.openagentskill.com/api/agent/receipt?task=Use%20Bench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium&format=text","install":"https://www.openagentskill.com/api/skills/arthur-ai-bench/install","manifest":"https://www.openagentskill.com/api/registry/manifest/arthur-ai-bench"}},"platforms":["TypeScript","MLOps","Claude Code"],"use_cases":[{"slug":"coding-agents","title":"Coding agents","url":"https://www.openagentskill.com/use-cases/coding-agents"},{"slug":"rag-knowledge","title":"RAG and knowledge","url":"https://www.openagentskill.com/use-cases/rag-knowledge"},{"slug":"workflow-automation","title":"Workflow automation","url":"https://www.openagentskill.com/use-cases/workflow-automation"},{"slug":"sports-analytics","title":"Sports analytics","url":"https://www.openagentskill.com/use-cases/sports-analytics"}],"install":"npx skills add arthur-ai/bench","install_targets":[{"id":"openagentskill-cli","label":"CLI","title":"OpenAgentSkill CLI","kind":"command","value":"npx skills add arthur-ai/bench","description":"Use the registry command when your workflow supports the OpenAgentSkill installer.","copyLabel":"Copy command"},{"id":"codex","label":"Codex","title":"Codex install prompt","kind":"agent-prompt","value":"Install the \"Bench\" agent skill from https://github.com/arthur-ai/bench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: A tool for evaluating LLMs","description":"Give Codex a repo-aware install prompt when the skill is not available through a local CLI.","copyLabel":"Copy prompt"},{"id":"claude-code","label":"Claude Code","title":"Claude Code skill prompt","kind":"agent-prompt","value":"Add \"Bench\" as a Claude Code skill from https://github.com/arthur-ai/bench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: A tool for evaluating LLMs","description":"Use this prompt to ask Claude Code to add the skill and explain the local activation steps.","copyLabel":"Copy prompt"},{"id":"cursor","label":"Cursor","title":"Cursor rule prompt","kind":"agent-prompt","value":"Turn \"Bench\" from https://github.com/arthur-ai/bench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: A tool for evaluating LLMs","description":"Use this when installing as Cursor project rules or reusable agent instructions.","copyLabel":"Copy prompt"}],"repository":"https://github.com/arthur-ai/bench","github_repo":"arthur-ai/bench","version":"1.0.0","license":"MIT","updated_at":"2026-07-03T03:00:54.237943+00:00","canonical_key":"arthur-ai/bench","recommendation_reasons":["Matches task terms: bench","Install handoff is available","Repository freshness signal is available","Registry match score 99"],"urls":{"web":"https://www.openagentskill.com/skills/arthur-ai-bench","api":"https://www.openagentskill.com/api/agent/skills/arthur-ai-bench","install_api":"https://www.openagentskill.com/api/skills/arthur-ai-bench/install","audit":"https://www.openagentskill.com/skills/arthur-ai-bench/audit","repository":"https://github.com/arthur-ai/bench"}},{"rank":2,"match_score":57,"raw_match_score":552.8,"slug":"aquasecurity-kube-bench","name":"Kube Bench","description":"Checks whether Kubernetes is deployed according to security best practices as defined in the CIS Kubernetes Benchmark","tagline":"Checks whether Kubernetes is deployed according to security best practices as defined in the CIS Kubernetes Benchmark","category":"devops","tags":["kubernetes","devops","cis-benchmark","cis-kubernetes-benchmark","cis-security","hacktoberfest","kube-bench","kubernetes-security","openshift","go"],"author":{"name":"aquasecurity","verified":true,"url":"https://github.com/aquasecurity"},"attribution":{"status":"community_indexed","statusLabel":"Community indexed","shortLabel":"COMMUNITY INDEXED","sourceLabel":"GitHub star discovery","sourceDetail":"aquasecurity/kube-bench","creatorName":"aquasecurity","creatorUrl":"https://github.com/aquasecurity","sourceUrl":"https://github.com/aquasecurity/kube-bench","indexedBy":"OpenAgentSkill community index","claimUrl":"https://www.openagentskill.com/skills/aquasecurity-kube-bench#claim-this-skill","claimCta":"Claim this skill","trustNote":"This listing was indexed from public sources and is not marked official until a maintainer claim is approved.","publicNote":"Attribution links to the public repository or creator profile. Creators can claim the listing to update ownership signals."},"stats":{"stars":8092,"forks":1328,"downloads":0,"rating":0,"review_count":0,"quality_score":69.06},"quality":{"score":100,"tier":"excellent","label":"Excellent","summary":"High-confidence pick with strong adoption and healthy maintenance signals.","signals":[{"label":"GitHub stars","value":"8.1K","tone":"positive"},{"label":"Freshness","value":"11d ago","tone":"positive"},{"label":"Install ready","value":"Yes","tone":"positive"},{"label":"License","value":"Apache-2.0","tone":"neutral"}],"warnings":[]},"trust":{"version":"trust-score-v4","score":92,"tier":"production","label":"Production candidate","summary":"Strong OpenAgentSkill Trust Score across adoption, recent maintenance, license clarity, documentation, dependency/runtime risk, install safety, permission surface, and install availability.","recommendedAction":"Shortlist for production use, then run a normal repository and dependency review.","dimensions":[{"id":"github_adoption","label":"GitHub adoption","score":94,"weight":0.13,"status":"pass","detail":"8.1K GitHub stars"},{"id":"repo_activity","label":"Stars/forks activity","score":93,"weight":0.08,"status":"pass","detail":"8.1K stars, 1.3K forks; issue activity unavailable in current metadata"},{"id":"maintenance","label":"Recent maintenance","score":100,"weight":0.14,"status":"pass","detail":"11d since push"},{"id":"license","label":"License clarity","score":86,"weight":0.09,"status":"pass","detail":"Apache-2.0"},{"id":"documentation","label":"README/SKILL.md completeness","score":90,"weight":0.14,"status":"pass","detail":"Metadata includes enough usage and workflow context"},{"id":"dependency_risk","label":"Dependency/runtime risk","score":90,"weight":0.12,"status":"pass","detail":"no major dependency risk hints in public metadata"},{"id":"installability","label":"Install availability","score":92,"weight":0.1,"status":"pass","detail":"npx skills add aquasecurity/kube-bench"},{"id":"install_safety","label":"Install command safety","score":92,"weight":0.1,"status":"pass","detail":"standard package or runtime install path"},{"id":"permission_surface","label":"Permission surface","score":86,"weight":0.07,"status":"pass","detail":"filesystem or document access"},{"id":"repository","label":"Repository evidence","score":86,"weight":0.04,"status":"pass","detail":"https://github.com/aquasecurity/kube-bench"},{"id":"review_status","label":"Review status","score":88,"weight":0.05,"status":"pass","detail":"AI review data available"},{"id":"agent_outcomes","label":"Agent Proven outcomes","score":54,"weight":0.13,"status":"info","detail":"No agent outcome data yet"}],"checks":[{"status":"pass","label":"GitHub adoption","detail":"8.1K GitHub stars"},{"status":"pass","label":"Stars/forks activity","detail":"8.1K stars, 1.3K forks; issue activity unavailable in current metadata"},{"status":"pass","label":"Recent maintenance","detail":"11d since push"},{"status":"pass","label":"License clarity","detail":"Apache-2.0"},{"status":"pass","label":"README/SKILL.md completeness","detail":"Metadata includes enough usage and workflow context"},{"status":"pass","label":"Dependency/runtime risk","detail":"no major dependency risk hints in public metadata"},{"status":"pass","label":"Install availability","detail":"npx skills add aquasecurity/kube-bench"},{"status":"pass","label":"Install command safety","detail":"standard package or runtime install path"},{"status":"pass","label":"Permission surface","detail":"filesystem or document access"},{"status":"pass","label":"Repository evidence","detail":"https://github.com/aquasecurity/kube-bench"},{"status":"pass","label":"Review status","detail":"AI review data available"},{"status":"info","label":"Agent Proven outcomes","detail":"No agent outcome data yet"},{"status":"pass","label":"Ownership","detail":"Listing manually verified"},{"status":"info","label":"OpenAgentSkill usage","detail":"No local usage activity yet"},{"status":"info","label":"Agent outcomes","detail":"No agent outcome data yet"}],"strengths":["Manually verified listing","AI review approved","Install path is available","Repository evidence is available","Recently maintained repository","Large GitHub adoption signal","Install command has no obvious high-risk pattern"],"warnings":[],"evidence":{"stars":"8.1K GitHub stars","repoActivity":"8.1K stars, 1.3K forks","lastPushed":"11d since push","license":"Apache-2.0","repository":"https://github.com/aquasecurity/kube-bench","install":"npx skills add aquasecurity/kube-bench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access","documentation":"Strong README/SKILL.md context","agentOutcomes":"No agent outcome data yet"},"installReadiness":{"ready":true,"command":"npx skills add aquasecurity/kube-bench","policy":"agent_install_candidate","label":"Agent install candidate","notes":["Install path is available","Repository evidence is available","License is declared","No Agent Proven outcome evidence yet","11d since push"]},"agentCompatibility":["Go","Kubernetes","Codex","Claude Code","Cursor","OpenAgentSkill CLI"],"riskSummary":{"level":"low","label":"Low metadata risk","notes":["No major trust warnings detected from available metadata"]},"outcomeEvidence":{"total":0,"successes":0,"failures":0,"notRelevant":0,"successRate":null,"installAttempts":0,"riskBlocked":0,"setupRequired":0,"installSuccessRate":null,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"recentSuccessRate":null,"recentFailureRate":null,"uniqueAgents":0,"agentProvenScore":0,"agentProvenLabel":"Needs first agent run","lastOutcomeAt":null,"label":"No agent outcome data yet"},"autoInstall":{"allowed":true,"sandboxRequired":true,"policy":"agent_install_candidate","reason":"Trust Score v4 allows sandbox-first agent installation after normal workspace review."},"bestFor":["devops","kubernetes","cis-benchmark","cis-kubernetes-benchmark","cis-security","hacktoberfest"],"doNotUseFor":["Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface"],"knownRisks":[]},"safety":{"score":87,"level":"safe_to_install","label":"Safe to install with normal review","safety_tier":{"tier":"verified","label":"Verified","badge":"VERIFIED","summary":"Strong metadata, audit, install, and review signals. Suitable for agent shortlists after normal workspace review.","recommended_action":"Allow agent install in a sandbox or low-risk workspace, then promote after one successful narrow task.","auto_install_policy":"allow","reasons":["Verified listing","Safe-to-try audit","87/100 agent safety score"]},"auto_install_allowed":true,"human_review_required":false,"blocked":false,"audit_risk":"safe_to_try","permission_hints":[{"id":"network","label":"Network access","reason":"Skill likely fetches remote pages, APIs, repositories, or external services.","severity":"medium"},{"id":"filesystem","label":"Filesystem access","reason":"Skill may read or write project files, documents, generated artifacts, or local workspace state.","severity":"medium"}],"policy_warnings":[],"constraints_applied":{"max_risk":"medium","needs_install_command":true,"min_stars":0}},"safety_gate":{"tier":"verified","label":"Verified","badge":"VERIFIED","auto_install_policy":"allow","auto_install_allowed":true,"human_review_required":false,"blocked":false,"recommended_action":"Allow agent install in a sandbox or low-risk workspace, then promote after one successful narrow task.","reasons":["Verified listing","Safe-to-try audit","87/100 agent safety score"]},"supply_profile":{"track":{"slug":"coding","label":"Coding and developer agents","shortLabel":"Coding","description":"Code review, repo analysis, testing, CI, GitHub, DevOps, and developer workflow skills."},"scenario":{"label":"GitHub automation","description":"I need my agent to triage GitHub issues, review pull requests, and summarize repository changes.","useCases":[{"slug":"github-automation","title":"GitHub automation"},{"slug":"coding-agents","title":"Coding agents"},{"slug":"rag-knowledge","title":"RAG and knowledge"}]},"applicableAgents":["Claude Code","CLI","Codex","Cursor","Go"],"install":{"ready":true,"command":"npx skills add aquasecurity/kube-bench","primaryTarget":"CLI","targetCount":4},"githubQuality":{"stars":8092,"starsLabel":"8.1K","forks":1328,"license":"Apache-2.0","qualityScore":100,"trustScore":92,"auditScore":95},"maintenance":{"status":"fresh","label":"11d since push","daysSincePush":11,"lastPushedAt":"2026-06-22T19:32:26+00:00"},"risk":{"level":"safe_to_try","label":"Safe to try","requiresReview":false,"notes":["No major risk signals from available metadata"]},"coverageTags":["Coding","GitHub automation","devops","kubernetes","cis-benchmark","cis-kubernetes-benchmark","cis-security","hacktoberfest"]},"audit":{"audit_score":95,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":[]},"decision":{"readiness_score":100,"readiness_label":"Production-ready","headline":"Primary pick for GitHub automation","role":"Primary pick","primary_fit":"GitHub automation","best_for":["GitHub automation workflows","Claude Code teams","teams that value GitHub adoption signals"],"risks":["No OpenAgentSkill engagement data yet"],"next_steps":["Install it in a sandbox agent and run one GitHub automation task end to end.","Compare output quality, latency, and failure behavior against at least one alternative.","Promote it into production only after reviewing repository permissions, license, and maintenance signals."]},"agent_readable_metadata":{"version":"openagentskill-agent-metadata-v2","skill":{"slug":"aquasecurity-kube-bench","name":"Kube Bench","description":"Checks whether Kubernetes is deployed according to security best practices as defined in the CIS Kubernetes Benchmark","category":"devops","url":"https://www.openagentskill.com/skills/aquasecurity-kube-bench","repository":"https://github.com/aquasecurity/kube-bench","github_repo":"aquasecurity/kube-bench"},"suited_tasks":["GitHub automation workflows","Claude Code teams","teams that value GitHub adoption signals","Inspect repository metadata","Compare code changes","Write concise engineering summaries","Inspect source files","Explain architecture"],"suited_agents":["Go","Kubernetes","Codex","Claude Code","Cursor","OpenAgentSkill CLI","CLI"],"install":{"command":"npx skills add aquasecurity/kube-bench","ready":true,"targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add aquasecurity/kube-bench"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"Kube Bench\" agent skill from https://github.com/aquasecurity/kube-bench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: Checks whether Kubernetes is deployed according to security best practices as defined in the CIS Kubernetes Benchmark"},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"Kube Bench\" as a Claude Code skill from https://github.com/aquasecurity/kube-bench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: Checks whether Kubernetes is deployed according to security best practices as defined in the CIS Kubernetes Benchmark"},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"Kube Bench\" from https://github.com/aquasecurity/kube-bench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: Checks whether Kubernetes is deployed according to security best practices as defined in the CIS Kubernetes Benchmark"}],"handoff_url":"https://www.openagentskill.com/api/skills/aquasecurity-kube-bench/install","manifest_url":"https://www.openagentskill.com/api/registry/manifest/aquasecurity-kube-bench"},"trust":{"score":92,"label":"Production candidate","version":"trust-score-v4","install_policy":"agent_install_candidate","evidence":{"stars":"8.1K GitHub stars","repoActivity":"8.1K stars, 1.3K forks","lastPushed":"11d since push","license":"Apache-2.0","repository":"https://github.com/aquasecurity/kube-bench","install":"npx skills add aquasecurity/kube-bench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access","documentation":"Strong README/SKILL.md context","agentOutcomes":"No agent outcome data yet"},"outcome_evidence":{"total":0,"successes":0,"failures":0,"not_relevant":0,"success_rate":null,"recent_success_rate":null,"recent_failure_rate":null,"install_attempts":0,"install_success_rate":null,"risk_blocked":0,"setup_required":0,"avg_output_quality":null,"production_outcomes":0,"last_outcome_at":null,"label":"No agent outcome data yet"},"auto_install":{"allowed":true,"sandbox_required":true,"reason":"Trust Score v4 allows sandbox-first agent installation after normal workspace review."},"best_for":["devops","kubernetes","cis-benchmark","cis-kubernetes-benchmark","cis-security","hacktoberfest"],"known_risks":[]},"agent_proven":{"version":"agent-proven-v1","score":0,"tier":"unproven","label":"Needs first agent run","summary":"No agent outcome reports yet. Use Resolve, run one narrow sandbox task, then report the result.","metrics":{"totalOutcomes":0,"successfulOutcomes":0,"failedOutcomes":0,"installAttempts":0,"installSuccessRate":null,"successRate":null,"recentSuccessRate":null,"recentFailureRate":null,"riskBlocked":0,"setupRequired":0,"notRelevant":0,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"uniqueAgents":0,"lastOutcomeAt":null},"signals":[],"penalties":["No real agent outcome evidence yet"]},"audit":{"score":95,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":[]},"safety_gate":{"tier":"verified","label":"Verified","auto_install_policy":"allow","auto_install_allowed":true,"human_review_required":false,"blocked":false,"recommended_action":"Allow agent install in a sandbox or low-risk workspace, then promote after one successful narrow task."},"quality":{"score":100,"label":"Excellent"},"supply":{"track":"Coding and developer agents","scenario":"GitHub automation","maintenance":"11d since push","risk":"Safe to try"},"alternative_skills":[],"do_not_use_when":["teams that need a vendor-supported SLA","high-compliance environments without internal security review","No OpenAgentSkill engagement data yet","No major trust warnings detected from available metadata","Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface"],"agent_contract":{"task_input":"Use Kube Bench in an agent workflow","recommended_action":"Allow agent install in a sandbox or low-risk workspace, then promote after one successful narrow task.","install_policy":"allow","minimum_review_before_use":["Trust: 92/100 Production candidate","Audit: 95/100 Safe to try","Safety: 87/100 Safe to install with normal review","Review repository, license, install command, and permission surface before production use."],"expected_agent_output":{"selected_skill":"aquasecurity-kube-bench (Kube Bench)","install_command":"npx skills add aquasecurity/kube-bench","risk_summary":"Safe to try; Verified; Low metadata risk","verification_result":"Report the smallest successful task, files touched, warnings, and any missing setup."}},"outcome_feedback":{"endpoint":"https://www.openagentskill.com/api/agent/outcome","method":"POST","requires_resolve_event_id":true,"event_id_source":"Use install_receipt.outcome_feedback.event_id or feedback.event_id returned by /api/agent/resolve for the current task.","expected_outcomes":["success","failed","not_relevant","blocked_by_risk","setup_required"],"payload_template":{"event_id":"<install_receipt.outcome_feedback.event_id or feedback.event_id from /api/agent/resolve>","skill_slug":"aquasecurity-kube-bench","task":"Use Kube Bench in an agent workflow","agent":"codex","outcome":"success","install_used":true,"risk_blocked":false,"setup_required":false,"task_success":true,"output_quality":4,"error_type":null,"human_review_required":false,"workspace":"sandbox","time_to_useful_ms":120000,"notes":"Report the smallest successful task, setup friction, files touched, and risk notes."}},"endpoints":{"web":"https://www.openagentskill.com/skills/aquasecurity-kube-bench","api":"https://www.openagentskill.com/api/agent/skills/aquasecurity-kube-bench","audit":"https://www.openagentskill.com/skills/aquasecurity-kube-bench/audit","eval":"https://www.openagentskill.com/api/agent/evals?slug=aquasecurity-kube-bench&task=Use%20Kube%20Bench%20in%20an%20agent%20workflow&max_risk=medium","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Use%20Kube%20Bench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium","receipt":"https://www.openagentskill.com/api/agent/receipt?task=Use%20Kube%20Bench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium&format=text","install":"https://www.openagentskill.com/api/skills/aquasecurity-kube-bench/install","manifest":"https://www.openagentskill.com/api/registry/manifest/aquasecurity-kube-bench"}},"machine_metadata":{"version":"openagentskill-agent-metadata-v2","skill":{"slug":"aquasecurity-kube-bench","name":"Kube Bench","description":"Checks whether Kubernetes is deployed according to security best practices as defined in the CIS Kubernetes Benchmark","category":"devops","url":"https://www.openagentskill.com/skills/aquasecurity-kube-bench","repository":"https://github.com/aquasecurity/kube-bench","github_repo":"aquasecurity/kube-bench"},"suited_tasks":["GitHub automation workflows","Claude Code teams","teams that value GitHub adoption signals","Inspect repository metadata","Compare code changes","Write concise engineering summaries","Inspect source files","Explain architecture"],"suited_agents":["Go","Kubernetes","Codex","Claude Code","Cursor","OpenAgentSkill CLI","CLI"],"install":{"command":"npx skills add aquasecurity/kube-bench","ready":true,"targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add aquasecurity/kube-bench"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"Kube Bench\" agent skill from https://github.com/aquasecurity/kube-bench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: Checks whether Kubernetes is deployed according to security best practices as defined in the CIS Kubernetes Benchmark"},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"Kube Bench\" as a Claude Code skill from https://github.com/aquasecurity/kube-bench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: Checks whether Kubernetes is deployed according to security best practices as defined in the CIS Kubernetes Benchmark"},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"Kube Bench\" from https://github.com/aquasecurity/kube-bench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: Checks whether Kubernetes is deployed according to security best practices as defined in the CIS Kubernetes Benchmark"}],"handoff_url":"https://www.openagentskill.com/api/skills/aquasecurity-kube-bench/install","manifest_url":"https://www.openagentskill.com/api/registry/manifest/aquasecurity-kube-bench"},"trust":{"score":92,"label":"Production candidate","version":"trust-score-v4","install_policy":"agent_install_candidate","evidence":{"stars":"8.1K GitHub stars","repoActivity":"8.1K stars, 1.3K forks","lastPushed":"11d since push","license":"Apache-2.0","repository":"https://github.com/aquasecurity/kube-bench","install":"npx skills add aquasecurity/kube-bench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access","documentation":"Strong README/SKILL.md context","agentOutcomes":"No agent outcome data yet"},"outcome_evidence":{"total":0,"successes":0,"failures":0,"not_relevant":0,"success_rate":null,"recent_success_rate":null,"recent_failure_rate":null,"install_attempts":0,"install_success_rate":null,"risk_blocked":0,"setup_required":0,"avg_output_quality":null,"production_outcomes":0,"last_outcome_at":null,"label":"No agent outcome data yet"},"auto_install":{"allowed":true,"sandbox_required":true,"reason":"Trust Score v4 allows sandbox-first agent installation after normal workspace review."},"best_for":["devops","kubernetes","cis-benchmark","cis-kubernetes-benchmark","cis-security","hacktoberfest"],"known_risks":[]},"agent_proven":{"version":"agent-proven-v1","score":0,"tier":"unproven","label":"Needs first agent run","summary":"No agent outcome reports yet. Use Resolve, run one narrow sandbox task, then report the result.","metrics":{"totalOutcomes":0,"successfulOutcomes":0,"failedOutcomes":0,"installAttempts":0,"installSuccessRate":null,"successRate":null,"recentSuccessRate":null,"recentFailureRate":null,"riskBlocked":0,"setupRequired":0,"notRelevant":0,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"uniqueAgents":0,"lastOutcomeAt":null},"signals":[],"penalties":["No real agent outcome evidence yet"]},"audit":{"score":95,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":[]},"safety_gate":{"tier":"verified","label":"Verified","auto_install_policy":"allow","auto_install_allowed":true,"human_review_required":false,"blocked":false,"recommended_action":"Allow agent install in a sandbox or low-risk workspace, then promote after one successful narrow task."},"quality":{"score":100,"label":"Excellent"},"supply":{"track":"Coding and developer agents","scenario":"GitHub automation","maintenance":"11d since push","risk":"Safe to try"},"alternative_skills":[],"do_not_use_when":["teams that need a vendor-supported SLA","high-compliance environments without internal security review","No OpenAgentSkill engagement data yet","No major trust warnings detected from available metadata","Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface"],"agent_contract":{"task_input":"Use Kube Bench in an agent workflow","recommended_action":"Allow agent install in a sandbox or low-risk workspace, then promote after one successful narrow task.","install_policy":"allow","minimum_review_before_use":["Trust: 92/100 Production candidate","Audit: 95/100 Safe to try","Safety: 87/100 Safe to install with normal review","Review repository, license, install command, and permission surface before production use."],"expected_agent_output":{"selected_skill":"aquasecurity-kube-bench (Kube Bench)","install_command":"npx skills add aquasecurity/kube-bench","risk_summary":"Safe to try; Verified; Low metadata risk","verification_result":"Report the smallest successful task, files touched, warnings, and any missing setup."}},"outcome_feedback":{"endpoint":"https://www.openagentskill.com/api/agent/outcome","method":"POST","requires_resolve_event_id":true,"event_id_source":"Use install_receipt.outcome_feedback.event_id or feedback.event_id returned by /api/agent/resolve for the current task.","expected_outcomes":["success","failed","not_relevant","blocked_by_risk","setup_required"],"payload_template":{"event_id":"<install_receipt.outcome_feedback.event_id or feedback.event_id from /api/agent/resolve>","skill_slug":"aquasecurity-kube-bench","task":"Use Kube Bench in an agent workflow","agent":"codex","outcome":"success","install_used":true,"risk_blocked":false,"setup_required":false,"task_success":true,"output_quality":4,"error_type":null,"human_review_required":false,"workspace":"sandbox","time_to_useful_ms":120000,"notes":"Report the smallest successful task, setup friction, files touched, and risk notes."}},"endpoints":{"web":"https://www.openagentskill.com/skills/aquasecurity-kube-bench","api":"https://www.openagentskill.com/api/agent/skills/aquasecurity-kube-bench","audit":"https://www.openagentskill.com/skills/aquasecurity-kube-bench/audit","eval":"https://www.openagentskill.com/api/agent/evals?slug=aquasecurity-kube-bench&task=Use%20Kube%20Bench%20in%20an%20agent%20workflow&max_risk=medium","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Use%20Kube%20Bench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium","receipt":"https://www.openagentskill.com/api/agent/receipt?task=Use%20Kube%20Bench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium&format=text","install":"https://www.openagentskill.com/api/skills/aquasecurity-kube-bench/install","manifest":"https://www.openagentskill.com/api/registry/manifest/aquasecurity-kube-bench"}},"platforms":["Go","Kubernetes","Claude Code"],"use_cases":[{"slug":"github-automation","title":"GitHub automation","url":"https://www.openagentskill.com/use-cases/github-automation"},{"slug":"coding-agents","title":"Coding agents","url":"https://www.openagentskill.com/use-cases/coding-agents"},{"slug":"rag-knowledge","title":"RAG and knowledge","url":"https://www.openagentskill.com/use-cases/rag-knowledge"},{"slug":"workflow-automation","title":"Workflow automation","url":"https://www.openagentskill.com/use-cases/workflow-automation"}],"install":"npx skills add aquasecurity/kube-bench","install_targets":[{"id":"openagentskill-cli","label":"CLI","title":"OpenAgentSkill CLI","kind":"command","value":"npx skills add aquasecurity/kube-bench","description":"Use the registry command when your workflow supports the OpenAgentSkill installer.","copyLabel":"Copy command"},{"id":"codex","label":"Codex","title":"Codex install prompt","kind":"agent-prompt","value":"Install the \"Kube Bench\" agent skill from https://github.com/aquasecurity/kube-bench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: Checks whether Kubernetes is deployed according to security best practices as defined in the CIS Kubernetes Benchmark","description":"Give Codex a repo-aware install prompt when the skill is not available through a local CLI.","copyLabel":"Copy prompt"},{"id":"claude-code","label":"Claude Code","title":"Claude Code skill prompt","kind":"agent-prompt","value":"Add \"Kube Bench\" as a Claude Code skill from https://github.com/aquasecurity/kube-bench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: Checks whether Kubernetes is deployed according to security best practices as defined in the CIS Kubernetes Benchmark","description":"Use this prompt to ask Claude Code to add the skill and explain the local activation steps.","copyLabel":"Copy prompt"},{"id":"cursor","label":"Cursor","title":"Cursor rule prompt","kind":"agent-prompt","value":"Turn \"Kube Bench\" from https://github.com/aquasecurity/kube-bench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: Checks whether Kubernetes is deployed according to security best practices as defined in the CIS Kubernetes Benchmark","description":"Use this when installing as Cursor project rules or reusable agent instructions.","copyLabel":"Copy prompt"}],"repository":"https://github.com/aquasecurity/kube-bench","github_repo":"aquasecurity/kube-bench","version":"1.0.0","license":"Apache-2.0","updated_at":"2026-07-03T03:01:15.992827+00:00","canonical_key":"aquasecurity/kube-bench","recommendation_reasons":["Matches task terms: bench","Useful GitHub adoption: 8,092 stars","Install handoff is available","Repository freshness signal is available","Registry match score 57"],"urls":{"web":"https://www.openagentskill.com/skills/aquasecurity-kube-bench","api":"https://www.openagentskill.com/api/agent/skills/aquasecurity-kube-bench","install_api":"https://www.openagentskill.com/api/skills/aquasecurity-kube-bench/install","audit":"https://www.openagentskill.com/skills/aquasecurity-kube-bench/audit","repository":"https://github.com/aquasecurity/kube-bench"}},{"rank":3,"match_score":57,"raw_match_score":547.4,"slug":"vchitect-vbench","name":"VBench","description":"[CVPR2024 Highlight] VBench - We Evaluate Video Generation","tagline":"[CVPR2024 Highlight] VBench - We Evaluate Video Generation","category":"media-automation","tags":["video-generation","creative","media","design","aigc","benchmark","dataset","evaluation-kit","gen-ai","stable-diffusion"],"author":{"name":"Vchitect","verified":true,"url":"https://github.com/Vchitect"},"attribution":{"status":"community_indexed","statusLabel":"Community indexed","shortLabel":"COMMUNITY INDEXED","sourceLabel":"GitHub star discovery","sourceDetail":"Vchitect/VBench","creatorName":"Vchitect","creatorUrl":"https://github.com/Vchitect","sourceUrl":"https://github.com/Vchitect/VBench","indexedBy":"OpenAgentSkill community index","claimUrl":"https://www.openagentskill.com/skills/vchitect-vbench#claim-this-skill","claimCta":"Claim this skill","trustNote":"This listing was indexed from public sources and is not marked official until a maintainer claim is approved.","publicNote":"Attribution links to the public repository or creator profile. Creators can claim the listing to update ownership signals."},"stats":{"stars":1654,"forks":126,"downloads":0,"rating":0,"review_count":0,"quality_score":61.23},"quality":{"score":98,"tier":"excellent","label":"Excellent","summary":"High-confidence pick with strong adoption and healthy maintenance signals.","signals":[{"label":"GitHub stars","value":"1.7K","tone":"positive"},{"label":"Freshness","value":"3mo ago","tone":"positive"},{"label":"Install ready","value":"Yes","tone":"positive"},{"label":"License","value":"Apache-2.0","tone":"neutral"}],"warnings":[]},"trust":{"version":"trust-score-v4","score":86,"tier":"production","label":"Production candidate","summary":"Strong OpenAgentSkill Trust Score across adoption, recent maintenance, license clarity, documentation, dependency/runtime risk, install safety, permission surface, and install availability.","recommendedAction":"Shortlist for production use, then run a normal repository and dependency review.","dimensions":[{"id":"github_adoption","label":"GitHub adoption","score":86,"weight":0.13,"status":"pass","detail":"1.7K GitHub stars"},{"id":"repo_activity","label":"Stars/forks activity","score":77,"weight":0.08,"status":"info","detail":"1.7K stars, 126 forks; issue activity unavailable in current metadata"},{"id":"maintenance","label":"Recent maintenance","score":76,"weight":0.14,"status":"info","detail":"3mo since push"},{"id":"license","label":"License clarity","score":86,"weight":0.09,"status":"pass","detail":"Apache-2.0"},{"id":"documentation","label":"README/SKILL.md completeness","score":74,"weight":0.14,"status":"info","detail":"Public metadata needs stronger README/SKILL.md context"},{"id":"dependency_risk","label":"Dependency/runtime risk","score":90,"weight":0.12,"status":"pass","detail":"no major dependency risk hints in public metadata"},{"id":"installability","label":"Install availability","score":92,"weight":0.1,"status":"pass","detail":"npx skills add Vchitect/VBench"},{"id":"install_safety","label":"Install command safety","score":92,"weight":0.1,"status":"pass","detail":"standard package or runtime install path"},{"id":"permission_surface","label":"Permission surface","score":86,"weight":0.07,"status":"pass","detail":"filesystem or document access"},{"id":"repository","label":"Repository evidence","score":86,"weight":0.04,"status":"pass","detail":"https://github.com/Vchitect/VBench"},{"id":"review_status","label":"Review status","score":88,"weight":0.05,"status":"pass","detail":"AI review data available"},{"id":"agent_outcomes","label":"Agent Proven outcomes","score":54,"weight":0.13,"status":"info","detail":"No agent outcome data yet"}],"checks":[{"status":"pass","label":"GitHub adoption","detail":"1.7K GitHub stars"},{"status":"info","label":"Stars/forks activity","detail":"1.7K stars, 126 forks; issue activity unavailable in current metadata"},{"status":"info","label":"Recent maintenance","detail":"3mo since push"},{"status":"pass","label":"License clarity","detail":"Apache-2.0"},{"status":"info","label":"README/SKILL.md completeness","detail":"Public metadata needs stronger README/SKILL.md context"},{"status":"pass","label":"Dependency/runtime risk","detail":"no major dependency risk hints in public metadata"},{"status":"pass","label":"Install availability","detail":"npx skills add Vchitect/VBench"},{"status":"pass","label":"Install command safety","detail":"standard package or runtime install path"},{"status":"pass","label":"Permission surface","detail":"filesystem or document access"},{"status":"pass","label":"Repository evidence","detail":"https://github.com/Vchitect/VBench"},{"status":"pass","label":"Review status","detail":"AI review data available"},{"status":"info","label":"Agent Proven outcomes","detail":"No agent outcome data yet"},{"status":"pass","label":"Ownership","detail":"Listing manually verified"},{"status":"info","label":"OpenAgentSkill usage","detail":"No local usage activity yet"},{"status":"info","label":"Agent outcomes","detail":"No agent outcome data yet"}],"strengths":["Manually verified listing","AI review approved","Install path is available","Repository evidence is available","Meaningful GitHub adoption signal","Install command has no obvious high-risk pattern"],"warnings":[],"evidence":{"stars":"1.7K GitHub stars","repoActivity":"1.7K stars, 126 forks","lastPushed":"3mo since push","license":"Apache-2.0","repository":"https://github.com/Vchitect/VBench","install":"npx skills add Vchitect/VBench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"installReadiness":{"ready":true,"command":"npx skills add Vchitect/VBench","policy":"agent_install_candidate","label":"Agent install candidate","notes":["Install path is available","Repository evidence is available","License is declared","No Agent Proven outcome evidence yet","3mo since push"]},"agentCompatibility":["Python","Video Generation","Codex","Claude Code","Cursor","OpenAgentSkill CLI"],"riskSummary":{"level":"low","label":"Low metadata risk","notes":["No major trust warnings detected from available metadata"]},"outcomeEvidence":{"total":0,"successes":0,"failures":0,"notRelevant":0,"successRate":null,"installAttempts":0,"riskBlocked":0,"setupRequired":0,"installSuccessRate":null,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"recentSuccessRate":null,"recentFailureRate":null,"uniqueAgents":0,"agentProvenScore":0,"agentProvenLabel":"Needs first agent run","lastOutcomeAt":null,"label":"No agent outcome data yet"},"autoInstall":{"allowed":true,"sandboxRequired":true,"policy":"agent_install_candidate","reason":"Trust Score v4 allows sandbox-first agent installation after normal workspace review."},"bestFor":["media-automation","video-generation","creative","media","design","aigc"],"doNotUseFor":["Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface"],"knownRisks":[]},"safety":{"score":81,"level":"safe_to_install","label":"Safe to install with normal review","safety_tier":{"tier":"reviewed","label":"Reviewed","badge":"REVIEWED","summary":"Good audit and safety signals with no high-risk permission hints in public metadata.","recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow.","auto_install_policy":"allow","reasons":["Safe-to-try audit","81/100 agent safety score"]},"auto_install_allowed":true,"human_review_required":false,"blocked":false,"audit_risk":"safe_to_try","permission_hints":[{"id":"network","label":"Network access","reason":"Skill likely fetches remote pages, APIs, repositories, or external services.","severity":"medium"},{"id":"filesystem","label":"Filesystem access","reason":"Skill may read or write project files, documents, generated artifacts, or local workspace state.","severity":"medium"}],"policy_warnings":[],"constraints_applied":{"max_risk":"medium","needs_install_command":true,"min_stars":0}},"safety_gate":{"tier":"reviewed","label":"Reviewed","badge":"REVIEWED","auto_install_policy":"allow","auto_install_allowed":true,"human_review_required":false,"blocked":false,"recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow.","reasons":["Safe-to-try audit","81/100 agent safety score"]},"supply_profile":{"track":{"slug":"design","label":"Design and creative production","shortLabel":"Design","description":"Design assets, images, video, audio, multimodal media, presentation, and creative production skills."},"scenario":{"label":"Design and creative","description":"I need my agent to produce design assets, UI directions, presentations, or creative media workflows.","useCases":[{"slug":"design-creative","title":"Design and creative"},{"slug":"coding-agents","title":"Coding agents"},{"slug":"rag-knowledge","title":"RAG and knowledge"}]},"applicableAgents":["Claude Code","CLI","Codex","Cursor","Python"],"install":{"ready":true,"command":"npx skills add Vchitect/VBench","primaryTarget":"CLI","targetCount":4},"githubQuality":{"stars":1654,"starsLabel":"1.7K","forks":126,"license":"Apache-2.0","qualityScore":98,"trustScore":86,"auditScore":89},"maintenance":{"status":"active","label":"3mo since push","daysSincePush":102,"lastPushedAt":"2026-03-23T08:32:27+00:00"},"risk":{"level":"safe_to_try","label":"Safe to try","requiresReview":false,"notes":["No major risk signals from available metadata"]},"coverageTags":["Design","Design and creative","media-automation","video-generation","creative","media","aigc","benchmark"]},"audit":{"audit_score":89,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":[]},"decision":{"readiness_score":100,"readiness_label":"Production-ready","headline":"Primary pick for Design and creative","role":"Primary pick","primary_fit":"Design and creative","best_for":["Design and creative workflows","Claude Code teams","teams that value GitHub adoption signals"],"risks":["No OpenAgentSkill engagement data yet"],"next_steps":["Install it in a sandbox agent and run one Design and creative task end to end.","Compare output quality, latency, and failure behavior against at least one alternative.","Promote it into production only after reviewing repository permissions, license, and maintenance signals."]},"agent_readable_metadata":{"version":"openagentskill-agent-metadata-v2","skill":{"slug":"vchitect-vbench","name":"VBench","description":"[CVPR2024 Highlight] VBench - We Evaluate Video Generation","category":"media-automation","url":"https://www.openagentskill.com/skills/vchitect-vbench","repository":"https://github.com/Vchitect/VBench","github_repo":"Vchitect/VBench"},"suited_tasks":["Design and creative workflows","Claude Code teams","teams that value GitHub adoption signals","Inspect visual requirements","Generate reusable assets","Package output for review","Inspect source files","Explain architecture"],"suited_agents":["Python","Video Generation","Codex","Claude Code","Cursor","OpenAgentSkill CLI","CLI"],"install":{"command":"npx skills add Vchitect/VBench","ready":true,"targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add Vchitect/VBench"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"VBench\" agent skill from https://github.com/Vchitect/VBench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: [CVPR2024 Highlight] VBench - We Evaluate Video Generation"},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"VBench\" as a Claude Code skill from https://github.com/Vchitect/VBench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: [CVPR2024 Highlight] VBench - We Evaluate Video Generation"},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"VBench\" from https://github.com/Vchitect/VBench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: [CVPR2024 Highlight] VBench - We Evaluate Video Generation"}],"handoff_url":"https://www.openagentskill.com/api/skills/vchitect-vbench/install","manifest_url":"https://www.openagentskill.com/api/registry/manifest/vchitect-vbench"},"trust":{"score":86,"label":"Production candidate","version":"trust-score-v4","install_policy":"agent_install_candidate","evidence":{"stars":"1.7K GitHub stars","repoActivity":"1.7K stars, 126 forks","lastPushed":"3mo since push","license":"Apache-2.0","repository":"https://github.com/Vchitect/VBench","install":"npx skills add Vchitect/VBench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"outcome_evidence":{"total":0,"successes":0,"failures":0,"not_relevant":0,"success_rate":null,"recent_success_rate":null,"recent_failure_rate":null,"install_attempts":0,"install_success_rate":null,"risk_blocked":0,"setup_required":0,"avg_output_quality":null,"production_outcomes":0,"last_outcome_at":null,"label":"No agent outcome data yet"},"auto_install":{"allowed":true,"sandbox_required":true,"reason":"Trust Score v4 allows sandbox-first agent installation after normal workspace review."},"best_for":["media-automation","video-generation","creative","media","design","aigc"],"known_risks":[]},"agent_proven":{"version":"agent-proven-v1","score":0,"tier":"unproven","label":"Needs first agent run","summary":"No agent outcome reports yet. Use Resolve, run one narrow sandbox task, then report the result.","metrics":{"totalOutcomes":0,"successfulOutcomes":0,"failedOutcomes":0,"installAttempts":0,"installSuccessRate":null,"successRate":null,"recentSuccessRate":null,"recentFailureRate":null,"riskBlocked":0,"setupRequired":0,"notRelevant":0,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"uniqueAgents":0,"lastOutcomeAt":null},"signals":[],"penalties":["No real agent outcome evidence yet"]},"audit":{"score":89,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":[]},"safety_gate":{"tier":"reviewed","label":"Reviewed","auto_install_policy":"allow","auto_install_allowed":true,"human_review_required":false,"blocked":false,"recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow."},"quality":{"score":98,"label":"Excellent"},"supply":{"track":"Design and creative production","scenario":"Design and creative","maintenance":"3mo since push","risk":"Safe to try"},"alternative_skills":[],"do_not_use_when":["teams that need a vendor-supported SLA","high-compliance environments without internal security review","No OpenAgentSkill engagement data yet","No major trust warnings detected from available metadata","Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface"],"agent_contract":{"task_input":"Use VBench in an agent workflow","recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow.","install_policy":"allow","minimum_review_before_use":["Trust: 86/100 Production candidate","Audit: 89/100 Safe to try","Safety: 81/100 Safe to install with normal review","Review repository, license, install command, and permission surface before production use."],"expected_agent_output":{"selected_skill":"vchitect-vbench (VBench)","install_command":"npx skills add Vchitect/VBench","risk_summary":"Safe to try; Reviewed; Low metadata risk","verification_result":"Report the smallest successful task, files touched, warnings, and any missing setup."}},"outcome_feedback":{"endpoint":"https://www.openagentskill.com/api/agent/outcome","method":"POST","requires_resolve_event_id":true,"event_id_source":"Use install_receipt.outcome_feedback.event_id or feedback.event_id returned by /api/agent/resolve for the current task.","expected_outcomes":["success","failed","not_relevant","blocked_by_risk","setup_required"],"payload_template":{"event_id":"<install_receipt.outcome_feedback.event_id or feedback.event_id from /api/agent/resolve>","skill_slug":"vchitect-vbench","task":"Use VBench in an agent workflow","agent":"codex","outcome":"success","install_used":true,"risk_blocked":false,"setup_required":false,"task_success":true,"output_quality":4,"error_type":null,"human_review_required":false,"workspace":"sandbox","time_to_useful_ms":120000,"notes":"Report the smallest successful task, setup friction, files touched, and risk notes."}},"endpoints":{"web":"https://www.openagentskill.com/skills/vchitect-vbench","api":"https://www.openagentskill.com/api/agent/skills/vchitect-vbench","audit":"https://www.openagentskill.com/skills/vchitect-vbench/audit","eval":"https://www.openagentskill.com/api/agent/evals?slug=vchitect-vbench&task=Use%20VBench%20in%20an%20agent%20workflow&max_risk=medium","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Use%20VBench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium","receipt":"https://www.openagentskill.com/api/agent/receipt?task=Use%20VBench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium&format=text","install":"https://www.openagentskill.com/api/skills/vchitect-vbench/install","manifest":"https://www.openagentskill.com/api/registry/manifest/vchitect-vbench"}},"machine_metadata":{"version":"openagentskill-agent-metadata-v2","skill":{"slug":"vchitect-vbench","name":"VBench","description":"[CVPR2024 Highlight] VBench - We Evaluate Video Generation","category":"media-automation","url":"https://www.openagentskill.com/skills/vchitect-vbench","repository":"https://github.com/Vchitect/VBench","github_repo":"Vchitect/VBench"},"suited_tasks":["Design and creative workflows","Claude Code teams","teams that value GitHub adoption signals","Inspect visual requirements","Generate reusable assets","Package output for review","Inspect source files","Explain architecture"],"suited_agents":["Python","Video Generation","Codex","Claude Code","Cursor","OpenAgentSkill CLI","CLI"],"install":{"command":"npx skills add Vchitect/VBench","ready":true,"targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add Vchitect/VBench"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"VBench\" agent skill from https://github.com/Vchitect/VBench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: [CVPR2024 Highlight] VBench - We Evaluate Video Generation"},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"VBench\" as a Claude Code skill from https://github.com/Vchitect/VBench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: [CVPR2024 Highlight] VBench - We Evaluate Video Generation"},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"VBench\" from https://github.com/Vchitect/VBench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: [CVPR2024 Highlight] VBench - We Evaluate Video Generation"}],"handoff_url":"https://www.openagentskill.com/api/skills/vchitect-vbench/install","manifest_url":"https://www.openagentskill.com/api/registry/manifest/vchitect-vbench"},"trust":{"score":86,"label":"Production candidate","version":"trust-score-v4","install_policy":"agent_install_candidate","evidence":{"stars":"1.7K GitHub stars","repoActivity":"1.7K stars, 126 forks","lastPushed":"3mo since push","license":"Apache-2.0","repository":"https://github.com/Vchitect/VBench","install":"npx skills add Vchitect/VBench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"outcome_evidence":{"total":0,"successes":0,"failures":0,"not_relevant":0,"success_rate":null,"recent_success_rate":null,"recent_failure_rate":null,"install_attempts":0,"install_success_rate":null,"risk_blocked":0,"setup_required":0,"avg_output_quality":null,"production_outcomes":0,"last_outcome_at":null,"label":"No agent outcome data yet"},"auto_install":{"allowed":true,"sandbox_required":true,"reason":"Trust Score v4 allows sandbox-first agent installation after normal workspace review."},"best_for":["media-automation","video-generation","creative","media","design","aigc"],"known_risks":[]},"agent_proven":{"version":"agent-proven-v1","score":0,"tier":"unproven","label":"Needs first agent run","summary":"No agent outcome reports yet. Use Resolve, run one narrow sandbox task, then report the result.","metrics":{"totalOutcomes":0,"successfulOutcomes":0,"failedOutcomes":0,"installAttempts":0,"installSuccessRate":null,"successRate":null,"recentSuccessRate":null,"recentFailureRate":null,"riskBlocked":0,"setupRequired":0,"notRelevant":0,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"uniqueAgents":0,"lastOutcomeAt":null},"signals":[],"penalties":["No real agent outcome evidence yet"]},"audit":{"score":89,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":[]},"safety_gate":{"tier":"reviewed","label":"Reviewed","auto_install_policy":"allow","auto_install_allowed":true,"human_review_required":false,"blocked":false,"recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow."},"quality":{"score":98,"label":"Excellent"},"supply":{"track":"Design and creative production","scenario":"Design and creative","maintenance":"3mo since push","risk":"Safe to try"},"alternative_skills":[],"do_not_use_when":["teams that need a vendor-supported SLA","high-compliance environments without internal security review","No OpenAgentSkill engagement data yet","No major trust warnings detected from available metadata","Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface"],"agent_contract":{"task_input":"Use VBench in an agent workflow","recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow.","install_policy":"allow","minimum_review_before_use":["Trust: 86/100 Production candidate","Audit: 89/100 Safe to try","Safety: 81/100 Safe to install with normal review","Review repository, license, install command, and permission surface before production use."],"expected_agent_output":{"selected_skill":"vchitect-vbench (VBench)","install_command":"npx skills add Vchitect/VBench","risk_summary":"Safe to try; Reviewed; Low metadata risk","verification_result":"Report the smallest successful task, files touched, warnings, and any missing setup."}},"outcome_feedback":{"endpoint":"https://www.openagentskill.com/api/agent/outcome","method":"POST","requires_resolve_event_id":true,"event_id_source":"Use install_receipt.outcome_feedback.event_id or feedback.event_id returned by /api/agent/resolve for the current task.","expected_outcomes":["success","failed","not_relevant","blocked_by_risk","setup_required"],"payload_template":{"event_id":"<install_receipt.outcome_feedback.event_id or feedback.event_id from /api/agent/resolve>","skill_slug":"vchitect-vbench","task":"Use VBench in an agent workflow","agent":"codex","outcome":"success","install_used":true,"risk_blocked":false,"setup_required":false,"task_success":true,"output_quality":4,"error_type":null,"human_review_required":false,"workspace":"sandbox","time_to_useful_ms":120000,"notes":"Report the smallest successful task, setup friction, files touched, and risk notes."}},"endpoints":{"web":"https://www.openagentskill.com/skills/vchitect-vbench","api":"https://www.openagentskill.com/api/agent/skills/vchitect-vbench","audit":"https://www.openagentskill.com/skills/vchitect-vbench/audit","eval":"https://www.openagentskill.com/api/agent/evals?slug=vchitect-vbench&task=Use%20VBench%20in%20an%20agent%20workflow&max_risk=medium","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Use%20VBench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium","receipt":"https://www.openagentskill.com/api/agent/receipt?task=Use%20VBench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium&format=text","install":"https://www.openagentskill.com/api/skills/vchitect-vbench/install","manifest":"https://www.openagentskill.com/api/registry/manifest/vchitect-vbench"}},"platforms":["Python","Video Generation","Claude Code"],"use_cases":[{"slug":"design-creative","title":"Design and creative","url":"https://www.openagentskill.com/use-cases/design-creative"},{"slug":"coding-agents","title":"Coding agents","url":"https://www.openagentskill.com/use-cases/coding-agents"},{"slug":"rag-knowledge","title":"RAG and knowledge","url":"https://www.openagentskill.com/use-cases/rag-knowledge"},{"slug":"workflow-automation","title":"Workflow automation","url":"https://www.openagentskill.com/use-cases/workflow-automation"}],"install":"npx skills add Vchitect/VBench","install_targets":[{"id":"openagentskill-cli","label":"CLI","title":"OpenAgentSkill CLI","kind":"command","value":"npx skills add Vchitect/VBench","description":"Use the registry command when your workflow supports the OpenAgentSkill installer.","copyLabel":"Copy command"},{"id":"codex","label":"Codex","title":"Codex install prompt","kind":"agent-prompt","value":"Install the \"VBench\" agent skill from https://github.com/Vchitect/VBench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: [CVPR2024 Highlight] VBench - We Evaluate Video Generation","description":"Give Codex a repo-aware install prompt when the skill is not available through a local CLI.","copyLabel":"Copy prompt"},{"id":"claude-code","label":"Claude Code","title":"Claude Code skill prompt","kind":"agent-prompt","value":"Add \"VBench\" as a Claude Code skill from https://github.com/Vchitect/VBench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: [CVPR2024 Highlight] VBench - We Evaluate Video Generation","description":"Use this prompt to ask Claude Code to add the skill and explain the local activation steps.","copyLabel":"Copy prompt"},{"id":"cursor","label":"Cursor","title":"Cursor rule prompt","kind":"agent-prompt","value":"Turn \"VBench\" from https://github.com/Vchitect/VBench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: [CVPR2024 Highlight] VBench - We Evaluate Video Generation","description":"Use this when installing as Cursor project rules or reusable agent instructions.","copyLabel":"Copy prompt"}],"repository":"https://github.com/Vchitect/VBench","github_repo":"Vchitect/VBench","version":"1.0.0","license":"Apache-2.0","updated_at":"2026-06-16T10:05:00.547092+00:00","canonical_key":"vchitect/vbench","recommendation_reasons":["Matches task terms: bench","Useful GitHub adoption: 1,654 stars","Install handoff is available","Repository freshness signal is available","Registry match score 57"],"urls":{"web":"https://www.openagentskill.com/skills/vchitect-vbench","api":"https://www.openagentskill.com/api/agent/skills/vchitect-vbench","install_api":"https://www.openagentskill.com/api/skills/vchitect-vbench/install","audit":"https://www.openagentskill.com/skills/vchitect-vbench/audit","repository":"https://github.com/Vchitect/VBench"}},{"rank":4,"match_score":57,"raw_match_score":547,"slug":"zilliztech-vectordbbench","name":"VectorDBBench","description":"Benchmark for vector databases.","tagline":"Benchmark for vector databases.","category":"rag-knowledge","tags":["vector-database","retrieval","knowledge","benchmark","cost-effectiveness","performance","vector-search","vectordb","python","github"],"author":{"name":"zilliztech","verified":true,"url":"https://github.com/zilliztech"},"attribution":{"status":"community_indexed","statusLabel":"Community indexed","shortLabel":"COMMUNITY INDEXED","sourceLabel":"GitHub star discovery","sourceDetail":"zilliztech/VectorDBBench","creatorName":"zilliztech","creatorUrl":"https://github.com/zilliztech","sourceUrl":"https://github.com/zilliztech/VectorDBBench","indexedBy":"OpenAgentSkill community index","claimUrl":"https://www.openagentskill.com/skills/zilliztech-vectordbbench#claim-this-skill","claimCta":"Claim this skill","trustNote":"This listing was indexed from public sources and is not marked official until a maintainer claim is approved.","publicNote":"Attribution links to the public repository or creator profile. Creators can claim the listing to update ownership signals."},"stats":{"stars":1127,"forks":394,"downloads":0,"rating":0,"review_count":0,"quality_score":63.07},"quality":{"score":100,"tier":"excellent","label":"Excellent","summary":"High-confidence pick with strong adoption and healthy maintenance signals.","signals":[{"label":"GitHub stars","value":"1.1K","tone":"positive"},{"label":"Freshness","value":"16d ago","tone":"positive"},{"label":"Install ready","value":"Yes","tone":"positive"},{"label":"License","value":"MIT","tone":"neutral"}],"warnings":[]},"trust":{"version":"trust-score-v4","score":88,"tier":"production","label":"Production candidate","summary":"Strong OpenAgentSkill Trust Score across adoption, recent maintenance, license clarity, documentation, dependency/runtime risk, install safety, permission surface, and install availability.","recommendedAction":"Shortlist for production use, then run a normal repository and dependency review.","dimensions":[{"id":"github_adoption","label":"GitHub adoption","score":86,"weight":0.13,"status":"pass","detail":"1.1K GitHub stars"},{"id":"repo_activity","label":"Stars/forks activity","score":83,"weight":0.08,"status":"pass","detail":"1.1K stars, 394 forks; issue activity unavailable in current metadata"},{"id":"maintenance","label":"Recent maintenance","score":100,"weight":0.14,"status":"pass","detail":"16d since push"},{"id":"license","label":"License clarity","score":86,"weight":0.09,"status":"pass","detail":"MIT"},{"id":"documentation","label":"README/SKILL.md completeness","score":74,"weight":0.14,"status":"info","detail":"Public metadata needs stronger README/SKILL.md context"},{"id":"dependency_risk","label":"Dependency/runtime risk","score":82,"weight":0.12,"status":"pass","detail":"database surface"},{"id":"installability","label":"Install availability","score":92,"weight":0.1,"status":"pass","detail":"npx skills add zilliztech/VectorDBBench"},{"id":"install_safety","label":"Install command safety","score":92,"weight":0.1,"status":"pass","detail":"standard package or runtime install path"},{"id":"permission_surface","label":"Permission surface","score":74,"weight":0.07,"status":"info","detail":"filesystem or document access, database access"},{"id":"repository","label":"Repository evidence","score":86,"weight":0.04,"status":"pass","detail":"https://github.com/zilliztech/VectorDBBench"},{"id":"review_status","label":"Review status","score":88,"weight":0.05,"status":"pass","detail":"AI review data available"},{"id":"agent_outcomes","label":"Agent Proven outcomes","score":54,"weight":0.13,"status":"info","detail":"No agent outcome data yet"}],"checks":[{"status":"pass","label":"GitHub adoption","detail":"1.1K GitHub stars"},{"status":"pass","label":"Stars/forks activity","detail":"1.1K stars, 394 forks; issue activity unavailable in current metadata"},{"status":"pass","label":"Recent maintenance","detail":"16d since push"},{"status":"pass","label":"License clarity","detail":"MIT"},{"status":"info","label":"README/SKILL.md completeness","detail":"Public metadata needs stronger README/SKILL.md context"},{"status":"pass","label":"Dependency/runtime risk","detail":"database surface"},{"status":"pass","label":"Install availability","detail":"npx skills add zilliztech/VectorDBBench"},{"status":"pass","label":"Install command safety","detail":"standard package or runtime install path"},{"status":"info","label":"Permission surface","detail":"filesystem or document access, database access"},{"status":"pass","label":"Repository evidence","detail":"https://github.com/zilliztech/VectorDBBench"},{"status":"pass","label":"Review status","detail":"AI review data available"},{"status":"info","label":"Agent Proven outcomes","detail":"No agent outcome data yet"},{"status":"pass","label":"Ownership","detail":"Listing manually verified"},{"status":"info","label":"OpenAgentSkill usage","detail":"No local usage activity yet"},{"status":"info","label":"Agent outcomes","detail":"No agent outcome data yet"}],"strengths":["Manually verified listing","AI review approved","Install path is available","Repository evidence is available","Recently maintained repository","Meaningful GitHub adoption signal","Install command has no obvious high-risk pattern"],"warnings":["Documentation summary is thin"],"evidence":{"stars":"1.1K GitHub stars","repoActivity":"1.1K stars, 394 forks","lastPushed":"16d since push","license":"MIT","repository":"https://github.com/zilliztech/VectorDBBench","install":"npx skills add zilliztech/VectorDBBench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access, database access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"installReadiness":{"ready":true,"command":"npx skills add zilliztech/VectorDBBench","policy":"agent_install_candidate","label":"Agent install candidate","notes":["Install path is available","Repository evidence is available","License is declared","No Agent Proven outcome evidence yet","16d since push"]},"agentCompatibility":["Python","Vector Search","Codex","Claude Code","Cursor","OpenAgentSkill CLI"],"riskSummary":{"level":"low","label":"Low metadata risk","notes":["Documentation summary is thin"]},"outcomeEvidence":{"total":0,"successes":0,"failures":0,"notRelevant":0,"successRate":null,"installAttempts":0,"riskBlocked":0,"setupRequired":0,"installSuccessRate":null,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"recentSuccessRate":null,"recentFailureRate":null,"uniqueAgents":0,"agentProvenScore":0,"agentProvenLabel":"Needs first agent run","lastOutcomeAt":null,"label":"No agent outcome data yet"},"autoInstall":{"allowed":true,"sandboxRequired":true,"policy":"agent_install_candidate","reason":"Trust Score v4 allows sandbox-first agent installation after normal workspace review."},"bestFor":["rag-knowledge","vector-database","retrieval","knowledge","benchmark","cost-effectiveness"],"doNotUseFor":["Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface"],"knownRisks":["Documentation summary is thin"]},"safety":{"score":73,"level":"review_before_install","label":"Review before install","safety_tier":{"tier":"reviewed","label":"Reviewed","badge":"REVIEWED","summary":"Good audit and safety signals with no high-risk permission hints in public metadata.","recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow.","auto_install_policy":"review","reasons":["Safe-to-try audit","73/100 agent safety score"]},"auto_install_allowed":false,"human_review_required":true,"blocked":false,"audit_risk":"safe_to_try","permission_hints":[{"id":"network","label":"Network access","reason":"Skill likely fetches remote pages, APIs, repositories, or external services.","severity":"medium"},{"id":"filesystem","label":"Filesystem access","reason":"Skill may read or write project files, documents, generated artifacts, or local workspace state.","severity":"medium"},{"id":"database","label":"Database access","reason":"Skill may inspect schemas, query databases, or work with persistent stores.","severity":"medium"}],"policy_warnings":["Documentation summary is thin"],"constraints_applied":{"max_risk":"medium","needs_install_command":true,"min_stars":0}},"safety_gate":{"tier":"reviewed","label":"Reviewed","badge":"REVIEWED","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow.","reasons":["Safe-to-try audit","73/100 agent safety score"]},"supply_profile":{"track":{"slug":"research","label":"Research and knowledge work","shortLabel":"Research","description":"Deep research, source comparison, literature review, RAG, knowledge search, and reports."},"scenario":{"label":"RAG and knowledge","description":"I need my agent to build a RAG workflow over documents and retrieve reliable context.","useCases":[{"slug":"rag-knowledge","title":"RAG and knowledge"},{"slug":"coding-agents","title":"Coding agents"},{"slug":"browser-automation","title":"Browser automation"}]},"applicableAgents":["Claude Code","CLI","Codex","Cursor","Python"],"install":{"ready":true,"command":"npx skills add zilliztech/VectorDBBench","primaryTarget":"CLI","targetCount":4},"githubQuality":{"stars":1127,"starsLabel":"1.1K","forks":394,"license":"MIT","qualityScore":100,"trustScore":88,"auditScore":93},"maintenance":{"status":"fresh","label":"16d since push","daysSincePush":16,"lastPushedAt":"2026-06-17T12:09:07+00:00"},"risk":{"level":"safe_to_try","label":"Safe to try","requiresReview":true,"notes":["Documentation summary is thin"]},"coverageTags":["Research","RAG and knowledge","rag-knowledge","vector-database","retrieval","knowledge","benchmark","cost-effectiveness"]},"audit":{"audit_score":93,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":["Documentation summary is thin"]},"decision":{"readiness_score":100,"readiness_label":"Production-ready","headline":"Primary pick for RAG and knowledge","role":"Primary pick","primary_fit":"RAG and knowledge","best_for":["RAG and knowledge workflows","Claude Code teams","teams that value GitHub adoption signals"],"risks":["No OpenAgentSkill engagement data yet"],"next_steps":["Install it in a sandbox agent and run one RAG and knowledge task end to end.","Compare output quality, latency, and failure behavior against at least one alternative.","Promote it into production only after reviewing repository permissions, license, and maintenance signals."]},"agent_readable_metadata":{"version":"openagentskill-agent-metadata-v2","skill":{"slug":"zilliztech-vectordbbench","name":"VectorDBBench","description":"Benchmark for vector databases.","category":"rag-knowledge","url":"https://www.openagentskill.com/skills/zilliztech-vectordbbench","repository":"https://github.com/zilliztech/VectorDBBench","github_repo":"zilliztech/VectorDBBench"},"suited_tasks":["RAG and knowledge workflows","Claude Code teams","teams that value GitHub adoption signals","Chunk documents","Create embeddings","Retrieve and cite relevant passages","Inspect source files","Explain architecture"],"suited_agents":["Python","Vector Search","Codex","Claude Code","Cursor","OpenAgentSkill CLI","CLI"],"install":{"command":"npx skills add zilliztech/VectorDBBench","ready":true,"targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add zilliztech/VectorDBBench"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"VectorDBBench\" agent skill from https://github.com/zilliztech/VectorDBBench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: Benchmark for vector databases."},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"VectorDBBench\" as a Claude Code skill from https://github.com/zilliztech/VectorDBBench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: Benchmark for vector databases."},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"VectorDBBench\" from https://github.com/zilliztech/VectorDBBench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: Benchmark for vector databases."}],"handoff_url":"https://www.openagentskill.com/api/skills/zilliztech-vectordbbench/install","manifest_url":"https://www.openagentskill.com/api/registry/manifest/zilliztech-vectordbbench"},"trust":{"score":88,"label":"Production candidate","version":"trust-score-v4","install_policy":"agent_install_candidate","evidence":{"stars":"1.1K GitHub stars","repoActivity":"1.1K stars, 394 forks","lastPushed":"16d since push","license":"MIT","repository":"https://github.com/zilliztech/VectorDBBench","install":"npx skills add zilliztech/VectorDBBench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access, database access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"outcome_evidence":{"total":0,"successes":0,"failures":0,"not_relevant":0,"success_rate":null,"recent_success_rate":null,"recent_failure_rate":null,"install_attempts":0,"install_success_rate":null,"risk_blocked":0,"setup_required":0,"avg_output_quality":null,"production_outcomes":0,"last_outcome_at":null,"label":"No agent outcome data yet"},"auto_install":{"allowed":true,"sandbox_required":true,"reason":"Trust Score v4 allows sandbox-first agent installation after normal workspace review."},"best_for":["rag-knowledge","vector-database","retrieval","knowledge","benchmark","cost-effectiveness"],"known_risks":["Documentation summary is thin"]},"agent_proven":{"version":"agent-proven-v1","score":0,"tier":"unproven","label":"Needs first agent run","summary":"No agent outcome reports yet. Use Resolve, run one narrow sandbox task, then report the result.","metrics":{"totalOutcomes":0,"successfulOutcomes":0,"failedOutcomes":0,"installAttempts":0,"installSuccessRate":null,"successRate":null,"recentSuccessRate":null,"recentFailureRate":null,"riskBlocked":0,"setupRequired":0,"notRelevant":0,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"uniqueAgents":0,"lastOutcomeAt":null},"signals":[],"penalties":["No real agent outcome evidence yet"]},"audit":{"score":93,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":["Documentation summary is thin"]},"safety_gate":{"tier":"reviewed","label":"Reviewed","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow."},"quality":{"score":100,"label":"Excellent"},"supply":{"track":"Research and knowledge work","scenario":"RAG and knowledge","maintenance":"16d since push","risk":"Safe to try"},"alternative_skills":[],"do_not_use_when":["teams that need a vendor-supported SLA","high-compliance environments without internal security review","No OpenAgentSkill engagement data yet","Documentation summary is thin","Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface"],"agent_contract":{"task_input":"Use VectorDBBench in an agent workflow","recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow.","install_policy":"review","minimum_review_before_use":["Trust: 88/100 Production candidate","Audit: 93/100 Safe to try","Safety: 73/100 Review before install","Review repository, license, install command, and permission surface before production use."],"expected_agent_output":{"selected_skill":"zilliztech-vectordbbench (VectorDBBench)","install_command":"npx skills add zilliztech/VectorDBBench","risk_summary":"Safe to try; Reviewed; Low metadata risk","verification_result":"Report the smallest successful task, files touched, warnings, and any missing setup."}},"outcome_feedback":{"endpoint":"https://www.openagentskill.com/api/agent/outcome","method":"POST","requires_resolve_event_id":true,"event_id_source":"Use install_receipt.outcome_feedback.event_id or feedback.event_id returned by /api/agent/resolve for the current task.","expected_outcomes":["success","failed","not_relevant","blocked_by_risk","setup_required"],"payload_template":{"event_id":"<install_receipt.outcome_feedback.event_id or feedback.event_id from /api/agent/resolve>","skill_slug":"zilliztech-vectordbbench","task":"Use VectorDBBench in an agent workflow","agent":"codex","outcome":"success","install_used":true,"risk_blocked":false,"setup_required":false,"task_success":true,"output_quality":4,"error_type":null,"human_review_required":false,"workspace":"sandbox","time_to_useful_ms":120000,"notes":"Report the smallest successful task, setup friction, files touched, and risk notes."}},"endpoints":{"web":"https://www.openagentskill.com/skills/zilliztech-vectordbbench","api":"https://www.openagentskill.com/api/agent/skills/zilliztech-vectordbbench","audit":"https://www.openagentskill.com/skills/zilliztech-vectordbbench/audit","eval":"https://www.openagentskill.com/api/agent/evals?slug=zilliztech-vectordbbench&task=Use%20VectorDBBench%20in%20an%20agent%20workflow&max_risk=medium","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Use%20VectorDBBench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium","receipt":"https://www.openagentskill.com/api/agent/receipt?task=Use%20VectorDBBench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium&format=text","install":"https://www.openagentskill.com/api/skills/zilliztech-vectordbbench/install","manifest":"https://www.openagentskill.com/api/registry/manifest/zilliztech-vectordbbench"}},"machine_metadata":{"version":"openagentskill-agent-metadata-v2","skill":{"slug":"zilliztech-vectordbbench","name":"VectorDBBench","description":"Benchmark for vector databases.","category":"rag-knowledge","url":"https://www.openagentskill.com/skills/zilliztech-vectordbbench","repository":"https://github.com/zilliztech/VectorDBBench","github_repo":"zilliztech/VectorDBBench"},"suited_tasks":["RAG and knowledge workflows","Claude Code teams","teams that value GitHub adoption signals","Chunk documents","Create embeddings","Retrieve and cite relevant passages","Inspect source files","Explain architecture"],"suited_agents":["Python","Vector Search","Codex","Claude Code","Cursor","OpenAgentSkill CLI","CLI"],"install":{"command":"npx skills add zilliztech/VectorDBBench","ready":true,"targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add zilliztech/VectorDBBench"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"VectorDBBench\" agent skill from https://github.com/zilliztech/VectorDBBench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: Benchmark for vector databases."},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"VectorDBBench\" as a Claude Code skill from https://github.com/zilliztech/VectorDBBench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: Benchmark for vector databases."},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"VectorDBBench\" from https://github.com/zilliztech/VectorDBBench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: Benchmark for vector databases."}],"handoff_url":"https://www.openagentskill.com/api/skills/zilliztech-vectordbbench/install","manifest_url":"https://www.openagentskill.com/api/registry/manifest/zilliztech-vectordbbench"},"trust":{"score":88,"label":"Production candidate","version":"trust-score-v4","install_policy":"agent_install_candidate","evidence":{"stars":"1.1K GitHub stars","repoActivity":"1.1K stars, 394 forks","lastPushed":"16d since push","license":"MIT","repository":"https://github.com/zilliztech/VectorDBBench","install":"npx skills add zilliztech/VectorDBBench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access, database access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"outcome_evidence":{"total":0,"successes":0,"failures":0,"not_relevant":0,"success_rate":null,"recent_success_rate":null,"recent_failure_rate":null,"install_attempts":0,"install_success_rate":null,"risk_blocked":0,"setup_required":0,"avg_output_quality":null,"production_outcomes":0,"last_outcome_at":null,"label":"No agent outcome data yet"},"auto_install":{"allowed":true,"sandbox_required":true,"reason":"Trust Score v4 allows sandbox-first agent installation after normal workspace review."},"best_for":["rag-knowledge","vector-database","retrieval","knowledge","benchmark","cost-effectiveness"],"known_risks":["Documentation summary is thin"]},"agent_proven":{"version":"agent-proven-v1","score":0,"tier":"unproven","label":"Needs first agent run","summary":"No agent outcome reports yet. Use Resolve, run one narrow sandbox task, then report the result.","metrics":{"totalOutcomes":0,"successfulOutcomes":0,"failedOutcomes":0,"installAttempts":0,"installSuccessRate":null,"successRate":null,"recentSuccessRate":null,"recentFailureRate":null,"riskBlocked":0,"setupRequired":0,"notRelevant":0,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"uniqueAgents":0,"lastOutcomeAt":null},"signals":[],"penalties":["No real agent outcome evidence yet"]},"audit":{"score":93,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":["Documentation summary is thin"]},"safety_gate":{"tier":"reviewed","label":"Reviewed","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow."},"quality":{"score":100,"label":"Excellent"},"supply":{"track":"Research and knowledge work","scenario":"RAG and knowledge","maintenance":"16d since push","risk":"Safe to try"},"alternative_skills":[],"do_not_use_when":["teams that need a vendor-supported SLA","high-compliance environments without internal security review","No OpenAgentSkill engagement data yet","Documentation summary is thin","Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface"],"agent_contract":{"task_input":"Use VectorDBBench in an agent workflow","recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow.","install_policy":"review","minimum_review_before_use":["Trust: 88/100 Production candidate","Audit: 93/100 Safe to try","Safety: 73/100 Review before install","Review repository, license, install command, and permission surface before production use."],"expected_agent_output":{"selected_skill":"zilliztech-vectordbbench (VectorDBBench)","install_command":"npx skills add zilliztech/VectorDBBench","risk_summary":"Safe to try; Reviewed; Low metadata risk","verification_result":"Report the smallest successful task, files touched, warnings, and any missing setup."}},"outcome_feedback":{"endpoint":"https://www.openagentskill.com/api/agent/outcome","method":"POST","requires_resolve_event_id":true,"event_id_source":"Use install_receipt.outcome_feedback.event_id or feedback.event_id returned by /api/agent/resolve for the current task.","expected_outcomes":["success","failed","not_relevant","blocked_by_risk","setup_required"],"payload_template":{"event_id":"<install_receipt.outcome_feedback.event_id or feedback.event_id from /api/agent/resolve>","skill_slug":"zilliztech-vectordbbench","task":"Use VectorDBBench in an agent workflow","agent":"codex","outcome":"success","install_used":true,"risk_blocked":false,"setup_required":false,"task_success":true,"output_quality":4,"error_type":null,"human_review_required":false,"workspace":"sandbox","time_to_useful_ms":120000,"notes":"Report the smallest successful task, setup friction, files touched, and risk notes."}},"endpoints":{"web":"https://www.openagentskill.com/skills/zilliztech-vectordbbench","api":"https://www.openagentskill.com/api/agent/skills/zilliztech-vectordbbench","audit":"https://www.openagentskill.com/skills/zilliztech-vectordbbench/audit","eval":"https://www.openagentskill.com/api/agent/evals?slug=zilliztech-vectordbbench&task=Use%20VectorDBBench%20in%20an%20agent%20workflow&max_risk=medium","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Use%20VectorDBBench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium","receipt":"https://www.openagentskill.com/api/agent/receipt?task=Use%20VectorDBBench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium&format=text","install":"https://www.openagentskill.com/api/skills/zilliztech-vectordbbench/install","manifest":"https://www.openagentskill.com/api/registry/manifest/zilliztech-vectordbbench"}},"platforms":["Python","Vector Search","Claude Code"],"use_cases":[{"slug":"rag-knowledge","title":"RAG and knowledge","url":"https://www.openagentskill.com/use-cases/rag-knowledge"},{"slug":"coding-agents","title":"Coding agents","url":"https://www.openagentskill.com/use-cases/coding-agents"},{"slug":"browser-automation","title":"Browser automation","url":"https://www.openagentskill.com/use-cases/browser-automation"},{"slug":"workflow-automation","title":"Workflow automation","url":"https://www.openagentskill.com/use-cases/workflow-automation"}],"install":"npx skills add zilliztech/VectorDBBench","install_targets":[{"id":"openagentskill-cli","label":"CLI","title":"OpenAgentSkill CLI","kind":"command","value":"npx skills add zilliztech/VectorDBBench","description":"Use the registry command when your workflow supports the OpenAgentSkill installer.","copyLabel":"Copy command"},{"id":"codex","label":"Codex","title":"Codex install prompt","kind":"agent-prompt","value":"Install the \"VectorDBBench\" agent skill from https://github.com/zilliztech/VectorDBBench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: Benchmark for vector databases.","description":"Give Codex a repo-aware install prompt when the skill is not available through a local CLI.","copyLabel":"Copy prompt"},{"id":"claude-code","label":"Claude Code","title":"Claude Code skill prompt","kind":"agent-prompt","value":"Add \"VectorDBBench\" as a Claude Code skill from https://github.com/zilliztech/VectorDBBench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: Benchmark for vector databases.","description":"Use this prompt to ask Claude Code to add the skill and explain the local activation steps.","copyLabel":"Copy prompt"},{"id":"cursor","label":"Cursor","title":"Cursor rule prompt","kind":"agent-prompt","value":"Turn \"VectorDBBench\" from https://github.com/zilliztech/VectorDBBench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: Benchmark for vector databases.","description":"Use this when installing as Cursor project rules or reusable agent instructions.","copyLabel":"Copy prompt"}],"repository":"https://github.com/zilliztech/VectorDBBench","github_repo":"zilliztech/VectorDBBench","version":"1.0.0","license":"MIT","updated_at":"2026-06-18T03:01:57.748512+00:00","canonical_key":"zilliztech/vectordbbench","recommendation_reasons":["Matches task terms: bench","Useful GitHub adoption: 1,127 stars","Install handoff is available","Repository freshness signal is available","Registry match score 57"],"urls":{"web":"https://www.openagentskill.com/skills/zilliztech-vectordbbench","api":"https://www.openagentskill.com/api/agent/skills/zilliztech-vectordbbench","install_api":"https://www.openagentskill.com/api/skills/zilliztech-vectordbbench/install","audit":"https://www.openagentskill.com/skills/zilliztech-vectordbbench/audit","repository":"https://github.com/zilliztech/VectorDBBench"}},{"rank":5,"match_score":56,"raw_match_score":546.7,"slug":"clickhouse-clickbench","name":"ClickBench","description":"ClickBench: a Benchmark For Analytical Databases","tagline":"ClickBench: a Benchmark For Analytical Databases","category":"data-analysis","tags":["sql","data-analysis","data","analytics","aws","benchmark","big-data","bigquery","chdb","clickhouse"],"author":{"name":"ClickHouse","verified":true,"url":"https://github.com/ClickHouse"},"attribution":{"status":"community_indexed","statusLabel":"Community indexed","shortLabel":"COMMUNITY INDEXED","sourceLabel":"GitHub star discovery","sourceDetail":"ClickHouse/ClickBench","creatorName":"ClickHouse","creatorUrl":"https://github.com/ClickHouse","sourceUrl":"https://github.com/ClickHouse/ClickBench","indexedBy":"OpenAgentSkill community index","claimUrl":"https://www.openagentskill.com/skills/clickhouse-clickbench#claim-this-skill","claimCta":"Claim this skill","trustNote":"This listing was indexed from public sources and is not marked official until a maintainer claim is approved.","publicNote":"Attribution links to the public repository or creator profile. Creators can claim the listing to update ownership signals."},"stats":{"stars":1020,"forks":284,"downloads":0,"rating":0,"review_count":0,"quality_score":62.76},"quality":{"score":96,"tier":"excellent","label":"Excellent","summary":"High-confidence pick with strong adoption and healthy maintenance signals.","signals":[{"label":"GitHub stars","value":"1.0K","tone":"positive"},{"label":"Freshness","value":"17d ago","tone":"positive"},{"label":"Install ready","value":"Yes","tone":"positive"},{"label":"License","value":"Unknown","tone":"neutral"}],"warnings":[]},"trust":{"version":"trust-score-v4","score":84,"tier":"strong","label":"Strong shortlist","summary":"Good trust signals with a few areas worth checking before rollout.","recommendedAction":"Test in a sandbox workflow and compare its install path with close alternatives.","dimensions":[{"id":"github_adoption","label":"GitHub adoption","score":86,"weight":0.13,"status":"pass","detail":"1.0K GitHub stars"},{"id":"repo_activity","label":"Stars/forks activity","score":83,"weight":0.08,"status":"pass","detail":"1.0K stars, 284 forks; issue activity unavailable in current metadata"},{"id":"maintenance","label":"Recent maintenance","score":100,"weight":0.14,"status":"pass","detail":"17d since push"},{"id":"license","label":"License clarity","score":42,"weight":0.09,"status":"warn","detail":"Unknown"},{"id":"documentation","label":"README/SKILL.md completeness","score":74,"weight":0.14,"status":"info","detail":"Public metadata needs stronger README/SKILL.md context"},{"id":"dependency_risk","label":"Dependency/runtime risk","score":82,"weight":0.12,"status":"pass","detail":"database surface"},{"id":"installability","label":"Install availability","score":92,"weight":0.1,"status":"pass","detail":"npx skills add ClickHouse/ClickBench"},{"id":"install_safety","label":"Install command safety","score":92,"weight":0.1,"status":"pass","detail":"standard package or runtime install path"},{"id":"permission_surface","label":"Permission surface","score":74,"weight":0.07,"status":"info","detail":"filesystem or document access, database access"},{"id":"repository","label":"Repository evidence","score":86,"weight":0.04,"status":"pass","detail":"https://github.com/ClickHouse/ClickBench"},{"id":"review_status","label":"Review status","score":88,"weight":0.05,"status":"pass","detail":"AI review data available"},{"id":"agent_outcomes","label":"Agent Proven outcomes","score":54,"weight":0.13,"status":"info","detail":"No agent outcome data yet"}],"checks":[{"status":"pass","label":"GitHub adoption","detail":"1.0K GitHub stars"},{"status":"pass","label":"Stars/forks activity","detail":"1.0K stars, 284 forks; issue activity unavailable in current metadata"},{"status":"pass","label":"Recent maintenance","detail":"17d since push"},{"status":"warn","label":"License clarity","detail":"Unknown"},{"status":"info","label":"README/SKILL.md completeness","detail":"Public metadata needs stronger README/SKILL.md context"},{"status":"pass","label":"Dependency/runtime risk","detail":"database surface"},{"status":"pass","label":"Install availability","detail":"npx skills add ClickHouse/ClickBench"},{"status":"pass","label":"Install command safety","detail":"standard package or runtime install path"},{"status":"info","label":"Permission surface","detail":"filesystem or document access, database access"},{"status":"pass","label":"Repository evidence","detail":"https://github.com/ClickHouse/ClickBench"},{"status":"pass","label":"Review status","detail":"AI review data available"},{"status":"info","label":"Agent Proven outcomes","detail":"No agent outcome data yet"},{"status":"pass","label":"Ownership","detail":"Listing manually verified"},{"status":"info","label":"OpenAgentSkill usage","detail":"No local usage activity yet"},{"status":"info","label":"Agent outcomes","detail":"No agent outcome data yet"}],"strengths":["Manually verified listing","AI review approved","Install path is available","Repository evidence is available","Recently maintained repository","Meaningful GitHub adoption signal","Install command has no obvious high-risk pattern"],"warnings":["License is unclear","License clarity: Unknown"],"evidence":{"stars":"1.0K GitHub stars","repoActivity":"1.0K stars, 284 forks","lastPushed":"17d since push","license":"Unknown","repository":"https://github.com/ClickHouse/ClickBench","install":"npx skills add ClickHouse/ClickBench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access, database access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"installReadiness":{"ready":true,"command":"npx skills add ClickHouse/ClickBench","policy":"human_review_before_install","label":"Human review before install","notes":["Install path is available","Repository evidence is available","License is unclear","No Agent Proven outcome evidence yet","17d since push"]},"agentCompatibility":["HTML","SQL","Codex","Claude Code","Cursor","OpenAgentSkill CLI"],"riskSummary":{"level":"medium","label":"Review before production","notes":["License is unclear","License clarity: Unknown"]},"outcomeEvidence":{"total":0,"successes":0,"failures":0,"notRelevant":0,"successRate":null,"installAttempts":0,"riskBlocked":0,"setupRequired":0,"installSuccessRate":null,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"recentSuccessRate":null,"recentFailureRate":null,"uniqueAgents":0,"agentProvenScore":0,"agentProvenLabel":"Needs first agent run","lastOutcomeAt":null,"label":"No agent outcome data yet"},"autoInstall":{"allowed":false,"sandboxRequired":true,"policy":"human_review_before_install","reason":"Human review or sandbox validation is required before automatic installation."},"bestFor":["data-analysis","sql","data","analytics","aws","benchmark"],"doNotUseFor":["Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface","Automatic installation in a production workspace","Commercial reuse before clarifying license terms"],"knownRisks":["License is unclear","License clarity: Unknown"]},"safety":{"score":69,"level":"review_before_install","label":"Review before install","safety_tier":{"tier":"reviewed","label":"Reviewed","badge":"REVIEWED","summary":"Good audit and safety signals with no high-risk permission hints in public metadata.","recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow.","auto_install_policy":"review","reasons":["Safe-to-try audit","69/100 agent safety score"]},"auto_install_allowed":false,"human_review_required":true,"blocked":false,"audit_risk":"safe_to_try","permission_hints":[{"id":"network","label":"Network access","reason":"Skill likely fetches remote pages, APIs, repositories, or external services.","severity":"medium"},{"id":"filesystem","label":"Filesystem access","reason":"Skill may read or write project files, documents, generated artifacts, or local workspace state.","severity":"medium"},{"id":"database","label":"Database access","reason":"Skill may inspect schemas, query databases, or work with persistent stores.","severity":"medium"}],"policy_warnings":["License is unclear"],"constraints_applied":{"max_risk":"medium","needs_install_command":true,"min_stars":0}},"safety_gate":{"tier":"reviewed","label":"Reviewed","badge":"REVIEWED","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow.","reasons":["Safe-to-try audit","69/100 agent safety score"]},"supply_profile":{"track":{"slug":"data","label":"Data, BI, and analytics","shortLabel":"Data","description":"CSV, SQL, notebooks, dashboards, data pipelines, BI, ETL, and spreadsheet analysis."},"scenario":{"label":"Database and SQL","description":"I need my agent to inspect database schemas, write SQL, and explain query results.","useCases":[{"slug":"database-sql","title":"Database and SQL"},{"slug":"coding-agents","title":"Coding agents"},{"slug":"rag-knowledge","title":"RAG and knowledge"}]},"applicableAgents":["Claude Code","CLI","Codex","Cursor","HTML"],"install":{"ready":true,"command":"npx skills add ClickHouse/ClickBench","primaryTarget":"CLI","targetCount":4},"githubQuality":{"stars":1020,"starsLabel":"1.0K","forks":284,"license":"Unknown","qualityScore":96,"trustScore":84,"auditScore":89},"maintenance":{"status":"fresh","label":"17d since push","daysSincePush":17,"lastPushedAt":"2026-06-16T00:04:58+00:00"},"risk":{"level":"safe_to_try","label":"Safe to try","requiresReview":true,"notes":["License is unclear","License clarity: Unknown"]},"coverageTags":["Data","Database and SQL","data-analysis","sql","analytics","aws","benchmark","big-data"]},"audit":{"audit_score":89,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":["License is unclear","License clarity: Unknown"]},"decision":{"readiness_score":100,"readiness_label":"Production-ready","headline":"Primary pick for Database and SQL","role":"Primary pick","primary_fit":"Database and SQL","best_for":["Database and SQL workflows","Claude Code teams","teams that value GitHub adoption signals"],"risks":["No OpenAgentSkill engagement data yet"],"next_steps":["Install it in a sandbox agent and run one Database and SQL task end to end.","Compare output quality, latency, and failure behavior against at least one alternative.","Promote it into production only after reviewing repository permissions, license, and maintenance signals."]},"agent_readable_metadata":{"version":"openagentskill-agent-metadata-v2","skill":{"slug":"clickhouse-clickbench","name":"ClickBench","description":"ClickBench: a Benchmark For Analytical Databases","category":"data-analysis","url":"https://www.openagentskill.com/skills/clickhouse-clickbench","repository":"https://github.com/ClickHouse/ClickBench","github_repo":"ClickHouse/ClickBench"},"suited_tasks":["Database and SQL workflows","Claude Code teams","teams that value GitHub adoption signals","Understand table relationships","Write safer queries","Explain database changes","Inspect source files","Explain architecture"],"suited_agents":["HTML","SQL","Codex","Claude Code","Cursor","OpenAgentSkill CLI","CLI"],"install":{"command":"npx skills add ClickHouse/ClickBench","ready":true,"targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add ClickHouse/ClickBench"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"ClickBench\" agent skill from https://github.com/ClickHouse/ClickBench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: ClickBench: a Benchmark For Analytical Databases"},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"ClickBench\" as a Claude Code skill from https://github.com/ClickHouse/ClickBench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: ClickBench: a Benchmark For Analytical Databases"},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"ClickBench\" from https://github.com/ClickHouse/ClickBench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: ClickBench: a Benchmark For Analytical Databases"}],"handoff_url":"https://www.openagentskill.com/api/skills/clickhouse-clickbench/install","manifest_url":"https://www.openagentskill.com/api/registry/manifest/clickhouse-clickbench"},"trust":{"score":84,"label":"Strong shortlist","version":"trust-score-v4","install_policy":"human_review_before_install","evidence":{"stars":"1.0K GitHub stars","repoActivity":"1.0K stars, 284 forks","lastPushed":"17d since push","license":"Unknown","repository":"https://github.com/ClickHouse/ClickBench","install":"npx skills add ClickHouse/ClickBench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access, database access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"outcome_evidence":{"total":0,"successes":0,"failures":0,"not_relevant":0,"success_rate":null,"recent_success_rate":null,"recent_failure_rate":null,"install_attempts":0,"install_success_rate":null,"risk_blocked":0,"setup_required":0,"avg_output_quality":null,"production_outcomes":0,"last_outcome_at":null,"label":"No agent outcome data yet"},"auto_install":{"allowed":false,"sandbox_required":true,"reason":"Human review or sandbox validation is required before automatic installation."},"best_for":["data-analysis","sql","data","analytics","aws","benchmark"],"known_risks":["License is unclear","License clarity: Unknown"]},"agent_proven":{"version":"agent-proven-v1","score":0,"tier":"unproven","label":"Needs first agent run","summary":"No agent outcome reports yet. Use Resolve, run one narrow sandbox task, then report the result.","metrics":{"totalOutcomes":0,"successfulOutcomes":0,"failedOutcomes":0,"installAttempts":0,"installSuccessRate":null,"successRate":null,"recentSuccessRate":null,"recentFailureRate":null,"riskBlocked":0,"setupRequired":0,"notRelevant":0,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"uniqueAgents":0,"lastOutcomeAt":null},"signals":[],"penalties":["No real agent outcome evidence yet"]},"audit":{"score":89,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":["License is unclear","License clarity: Unknown"]},"safety_gate":{"tier":"reviewed","label":"Reviewed","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow."},"quality":{"score":96,"label":"Excellent"},"supply":{"track":"Data, BI, and analytics","scenario":"Database and SQL","maintenance":"17d since push","risk":"Safe to try"},"alternative_skills":[],"do_not_use_when":["teams that need a vendor-supported SLA","high-compliance environments without internal security review","No OpenAgentSkill engagement data yet","License is unclear","License clarity: Unknown","Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface","Automatic installation in a production workspace"],"agent_contract":{"task_input":"Use ClickBench in an agent workflow","recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow.","install_policy":"review","minimum_review_before_use":["Trust: 84/100 Strong shortlist","Audit: 89/100 Safe to try","Safety: 69/100 Review before install","Review repository, license, install command, and permission surface before production use."],"expected_agent_output":{"selected_skill":"clickhouse-clickbench (ClickBench)","install_command":"npx skills add ClickHouse/ClickBench","risk_summary":"Safe to try; Reviewed; Review before production","verification_result":"Report the smallest successful task, files touched, warnings, and any missing setup."}},"outcome_feedback":{"endpoint":"https://www.openagentskill.com/api/agent/outcome","method":"POST","requires_resolve_event_id":true,"event_id_source":"Use install_receipt.outcome_feedback.event_id or feedback.event_id returned by /api/agent/resolve for the current task.","expected_outcomes":["success","failed","not_relevant","blocked_by_risk","setup_required"],"payload_template":{"event_id":"<install_receipt.outcome_feedback.event_id or feedback.event_id from /api/agent/resolve>","skill_slug":"clickhouse-clickbench","task":"Use ClickBench in an agent workflow","agent":"codex","outcome":"success","install_used":true,"risk_blocked":false,"setup_required":false,"task_success":true,"output_quality":4,"error_type":null,"human_review_required":false,"workspace":"sandbox","time_to_useful_ms":120000,"notes":"Report the smallest successful task, setup friction, files touched, and risk notes."}},"endpoints":{"web":"https://www.openagentskill.com/skills/clickhouse-clickbench","api":"https://www.openagentskill.com/api/agent/skills/clickhouse-clickbench","audit":"https://www.openagentskill.com/skills/clickhouse-clickbench/audit","eval":"https://www.openagentskill.com/api/agent/evals?slug=clickhouse-clickbench&task=Use%20ClickBench%20in%20an%20agent%20workflow&max_risk=medium","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Use%20ClickBench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium","receipt":"https://www.openagentskill.com/api/agent/receipt?task=Use%20ClickBench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium&format=text","install":"https://www.openagentskill.com/api/skills/clickhouse-clickbench/install","manifest":"https://www.openagentskill.com/api/registry/manifest/clickhouse-clickbench"}},"machine_metadata":{"version":"openagentskill-agent-metadata-v2","skill":{"slug":"clickhouse-clickbench","name":"ClickBench","description":"ClickBench: a Benchmark For Analytical Databases","category":"data-analysis","url":"https://www.openagentskill.com/skills/clickhouse-clickbench","repository":"https://github.com/ClickHouse/ClickBench","github_repo":"ClickHouse/ClickBench"},"suited_tasks":["Database and SQL workflows","Claude Code teams","teams that value GitHub adoption signals","Understand table relationships","Write safer queries","Explain database changes","Inspect source files","Explain architecture"],"suited_agents":["HTML","SQL","Codex","Claude Code","Cursor","OpenAgentSkill CLI","CLI"],"install":{"command":"npx skills add ClickHouse/ClickBench","ready":true,"targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add ClickHouse/ClickBench"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"ClickBench\" agent skill from https://github.com/ClickHouse/ClickBench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: ClickBench: a Benchmark For Analytical Databases"},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"ClickBench\" as a Claude Code skill from https://github.com/ClickHouse/ClickBench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: ClickBench: a Benchmark For Analytical Databases"},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"ClickBench\" from https://github.com/ClickHouse/ClickBench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: ClickBench: a Benchmark For Analytical Databases"}],"handoff_url":"https://www.openagentskill.com/api/skills/clickhouse-clickbench/install","manifest_url":"https://www.openagentskill.com/api/registry/manifest/clickhouse-clickbench"},"trust":{"score":84,"label":"Strong shortlist","version":"trust-score-v4","install_policy":"human_review_before_install","evidence":{"stars":"1.0K GitHub stars","repoActivity":"1.0K stars, 284 forks","lastPushed":"17d since push","license":"Unknown","repository":"https://github.com/ClickHouse/ClickBench","install":"npx skills add ClickHouse/ClickBench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access, database access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"outcome_evidence":{"total":0,"successes":0,"failures":0,"not_relevant":0,"success_rate":null,"recent_success_rate":null,"recent_failure_rate":null,"install_attempts":0,"install_success_rate":null,"risk_blocked":0,"setup_required":0,"avg_output_quality":null,"production_outcomes":0,"last_outcome_at":null,"label":"No agent outcome data yet"},"auto_install":{"allowed":false,"sandbox_required":true,"reason":"Human review or sandbox validation is required before automatic installation."},"best_for":["data-analysis","sql","data","analytics","aws","benchmark"],"known_risks":["License is unclear","License clarity: Unknown"]},"agent_proven":{"version":"agent-proven-v1","score":0,"tier":"unproven","label":"Needs first agent run","summary":"No agent outcome reports yet. Use Resolve, run one narrow sandbox task, then report the result.","metrics":{"totalOutcomes":0,"successfulOutcomes":0,"failedOutcomes":0,"installAttempts":0,"installSuccessRate":null,"successRate":null,"recentSuccessRate":null,"recentFailureRate":null,"riskBlocked":0,"setupRequired":0,"notRelevant":0,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"uniqueAgents":0,"lastOutcomeAt":null},"signals":[],"penalties":["No real agent outcome evidence yet"]},"audit":{"score":89,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":["License is unclear","License clarity: Unknown"]},"safety_gate":{"tier":"reviewed","label":"Reviewed","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow."},"quality":{"score":96,"label":"Excellent"},"supply":{"track":"Data, BI, and analytics","scenario":"Database and SQL","maintenance":"17d since push","risk":"Safe to try"},"alternative_skills":[],"do_not_use_when":["teams that need a vendor-supported SLA","high-compliance environments without internal security review","No OpenAgentSkill engagement data yet","License is unclear","License clarity: Unknown","Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface","Automatic installation in a production workspace"],"agent_contract":{"task_input":"Use ClickBench in an agent workflow","recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow.","install_policy":"review","minimum_review_before_use":["Trust: 84/100 Strong shortlist","Audit: 89/100 Safe to try","Safety: 69/100 Review before install","Review repository, license, install command, and permission surface before production use."],"expected_agent_output":{"selected_skill":"clickhouse-clickbench (ClickBench)","install_command":"npx skills add ClickHouse/ClickBench","risk_summary":"Safe to try; Reviewed; Review before production","verification_result":"Report the smallest successful task, files touched, warnings, and any missing setup."}},"outcome_feedback":{"endpoint":"https://www.openagentskill.com/api/agent/outcome","method":"POST","requires_resolve_event_id":true,"event_id_source":"Use install_receipt.outcome_feedback.event_id or feedback.event_id returned by /api/agent/resolve for the current task.","expected_outcomes":["success","failed","not_relevant","blocked_by_risk","setup_required"],"payload_template":{"event_id":"<install_receipt.outcome_feedback.event_id or feedback.event_id from /api/agent/resolve>","skill_slug":"clickhouse-clickbench","task":"Use ClickBench in an agent workflow","agent":"codex","outcome":"success","install_used":true,"risk_blocked":false,"setup_required":false,"task_success":true,"output_quality":4,"error_type":null,"human_review_required":false,"workspace":"sandbox","time_to_useful_ms":120000,"notes":"Report the smallest successful task, setup friction, files touched, and risk notes."}},"endpoints":{"web":"https://www.openagentskill.com/skills/clickhouse-clickbench","api":"https://www.openagentskill.com/api/agent/skills/clickhouse-clickbench","audit":"https://www.openagentskill.com/skills/clickhouse-clickbench/audit","eval":"https://www.openagentskill.com/api/agent/evals?slug=clickhouse-clickbench&task=Use%20ClickBench%20in%20an%20agent%20workflow&max_risk=medium","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Use%20ClickBench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium","receipt":"https://www.openagentskill.com/api/agent/receipt?task=Use%20ClickBench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium&format=text","install":"https://www.openagentskill.com/api/skills/clickhouse-clickbench/install","manifest":"https://www.openagentskill.com/api/registry/manifest/clickhouse-clickbench"}},"platforms":["HTML","SQL","Claude Code"],"use_cases":[{"slug":"database-sql","title":"Database and SQL","url":"https://www.openagentskill.com/use-cases/database-sql"},{"slug":"coding-agents","title":"Coding agents","url":"https://www.openagentskill.com/use-cases/coding-agents"},{"slug":"rag-knowledge","title":"RAG and knowledge","url":"https://www.openagentskill.com/use-cases/rag-knowledge"},{"slug":"workflow-automation","title":"Workflow automation","url":"https://www.openagentskill.com/use-cases/workflow-automation"}],"install":"npx skills add ClickHouse/ClickBench","install_targets":[{"id":"openagentskill-cli","label":"CLI","title":"OpenAgentSkill CLI","kind":"command","value":"npx skills add ClickHouse/ClickBench","description":"Use the registry command when your workflow supports the OpenAgentSkill installer.","copyLabel":"Copy command"},{"id":"codex","label":"Codex","title":"Codex install prompt","kind":"agent-prompt","value":"Install the \"ClickBench\" agent skill from https://github.com/ClickHouse/ClickBench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: ClickBench: a Benchmark For Analytical Databases","description":"Give Codex a repo-aware install prompt when the skill is not available through a local CLI.","copyLabel":"Copy prompt"},{"id":"claude-code","label":"Claude Code","title":"Claude Code skill prompt","kind":"agent-prompt","value":"Add \"ClickBench\" as a Claude Code skill from https://github.com/ClickHouse/ClickBench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: ClickBench: a Benchmark For Analytical Databases","description":"Use this prompt to ask Claude Code to add the skill and explain the local activation steps.","copyLabel":"Copy prompt"},{"id":"cursor","label":"Cursor","title":"Cursor rule prompt","kind":"agent-prompt","value":"Turn \"ClickBench\" from https://github.com/ClickHouse/ClickBench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: ClickBench: a Benchmark For Analytical Databases","description":"Use this when installing as Cursor project rules or reusable agent instructions.","copyLabel":"Copy prompt"}],"repository":"https://github.com/ClickHouse/ClickBench","github_repo":"ClickHouse/ClickBench","version":"1.0.0","license":"Unknown","updated_at":"2026-06-16T09:14:35.762686+00:00","canonical_key":"clickhouse/clickbench","recommendation_reasons":["Matches task terms: bench","Useful GitHub adoption: 1,020 stars","Install handoff is available","Repository freshness signal is available","Registry match score 56"],"urls":{"web":"https://www.openagentskill.com/skills/clickhouse-clickbench","api":"https://www.openagentskill.com/api/agent/skills/clickhouse-clickbench","install_api":"https://www.openagentskill.com/api/skills/clickhouse-clickbench/install","audit":"https://www.openagentskill.com/skills/clickhouse-clickbench/audit","repository":"https://github.com/ClickHouse/ClickBench"}},{"rank":6,"match_score":55,"raw_match_score":537.1,"slug":"ayanami0730-deep-research-bench","name":"Deep Research Bench","description":"DeepResearch Bench: A Comprehensive Benchmark for Deep Research Agents","tagline":"DeepResearch Bench: A Comprehensive Benchmark for Deep Research Agents","category":"research","tags":["deep-research","agent","research","benchmark","deepresearch","nlp","python","github"],"author":{"name":"Ayanami0730","verified":false,"url":"https://github.com/Ayanami0730"},"attribution":{"status":"community_indexed","statusLabel":"Community indexed","shortLabel":"COMMUNITY INDEXED","sourceLabel":"GitHub star discovery","sourceDetail":"Ayanami0730/deep_research_bench","creatorName":"Ayanami0730","creatorUrl":"https://github.com/Ayanami0730","sourceUrl":"https://github.com/Ayanami0730/deep_research_bench","indexedBy":"OpenAgentSkill community index","claimUrl":"https://www.openagentskill.com/skills/ayanami0730-deep-research-bench#claim-this-skill","claimCta":"Claim this skill","trustNote":"This listing was indexed from public sources and is not marked official until a maintainer claim is approved.","publicNote":"Attribution links to the public repository or creator profile. Creators can claim the listing to update ownership signals."},"stats":{"stars":753,"forks":82,"downloads":0,"rating":0,"review_count":0,"quality_score":50.84},"quality":{"score":80,"tier":"strong","label":"Strong","summary":"Solid option that is likely worth shortlisting for production workflows.","signals":[{"label":"GitHub stars","value":"753","tone":"positive"},{"label":"Freshness","value":"2mo ago","tone":"positive"},{"label":"Install ready","value":"Yes","tone":"positive"},{"label":"License","value":"Apache-2.0","tone":"neutral"}],"warnings":[]},"trust":{"version":"trust-score-v4","score":81,"tier":"strong","label":"Strong shortlist","summary":"Good trust signals with a few areas worth checking before rollout.","recommendedAction":"Test in a sandbox workflow and compare its install path with close alternatives.","dimensions":[{"id":"github_adoption","label":"GitHub adoption","score":76,"weight":0.13,"status":"info","detail":"753 GitHub stars"},{"id":"repo_activity","label":"Stars/forks activity","score":71,"weight":0.08,"status":"info","detail":"753 stars, 82 forks; issue activity unavailable in current metadata"},{"id":"maintenance","label":"Recent maintenance","score":88,"weight":0.14,"status":"pass","detail":"2mo since push"},{"id":"license","label":"License clarity","score":86,"weight":0.09,"status":"pass","detail":"Apache-2.0"},{"id":"documentation","label":"README/SKILL.md completeness","score":74,"weight":0.14,"status":"info","detail":"Public metadata needs stronger README/SKILL.md context"},{"id":"dependency_risk","label":"Dependency/runtime risk","score":90,"weight":0.12,"status":"pass","detail":"no major dependency risk hints in public metadata"},{"id":"installability","label":"Install availability","score":92,"weight":0.1,"status":"pass","detail":"npx skills add Ayanami0730/deep_research_bench"},{"id":"install_safety","label":"Install command safety","score":92,"weight":0.1,"status":"pass","detail":"standard package or runtime install path"},{"id":"permission_surface","label":"Permission surface","score":86,"weight":0.07,"status":"pass","detail":"filesystem or document access"},{"id":"repository","label":"Repository evidence","score":86,"weight":0.04,"status":"pass","detail":"https://github.com/Ayanami0730/deep_research_bench"},{"id":"review_status","label":"Review status","score":88,"weight":0.05,"status":"pass","detail":"AI review data available"},{"id":"agent_outcomes","label":"Agent Proven outcomes","score":54,"weight":0.13,"status":"info","detail":"No agent outcome data yet"}],"checks":[{"status":"info","label":"GitHub adoption","detail":"753 GitHub stars"},{"status":"info","label":"Stars/forks activity","detail":"753 stars, 82 forks; issue activity unavailable in current metadata"},{"status":"pass","label":"Recent maintenance","detail":"2mo since push"},{"status":"pass","label":"License clarity","detail":"Apache-2.0"},{"status":"info","label":"README/SKILL.md completeness","detail":"Public metadata needs stronger README/SKILL.md context"},{"status":"pass","label":"Dependency/runtime risk","detail":"no major dependency risk hints in public metadata"},{"status":"pass","label":"Install availability","detail":"npx skills add Ayanami0730/deep_research_bench"},{"status":"pass","label":"Install command safety","detail":"standard package or runtime install path"},{"status":"pass","label":"Permission surface","detail":"filesystem or document access"},{"status":"pass","label":"Repository evidence","detail":"https://github.com/Ayanami0730/deep_research_bench"},{"status":"pass","label":"Review status","detail":"AI review data available"},{"status":"info","label":"Agent Proven outcomes","detail":"No agent outcome data yet"},{"status":"warn","label":"Ownership","detail":"No approved owner claim yet"},{"status":"info","label":"OpenAgentSkill usage","detail":"No local usage activity yet"},{"status":"info","label":"Agent outcomes","detail":"No agent outcome data yet"}],"strengths":["AI review approved","Install path is available","Repository evidence is available","Recently maintained repository","Meaningful GitHub adoption signal","Install command has no obvious high-risk pattern"],"warnings":["Quality score needs review"],"evidence":{"stars":"753 GitHub stars","repoActivity":"753 stars, 82 forks","lastPushed":"2mo since push","license":"Apache-2.0","repository":"https://github.com/Ayanami0730/deep_research_bench","install":"npx skills add Ayanami0730/deep_research_bench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"installReadiness":{"ready":true,"command":"npx skills add Ayanami0730/deep_research_bench","policy":"human_review_before_install","label":"Human review before install","notes":["Install path is available","Repository evidence is available","License is declared","No Agent Proven outcome evidence yet","2mo since push"]},"agentCompatibility":["Python","Research Agent","Codex","Claude Code","Cursor","OpenAgentSkill CLI"],"riskSummary":{"level":"medium","label":"Review before production","notes":["Quality score needs review"]},"outcomeEvidence":{"total":0,"successes":0,"failures":0,"notRelevant":0,"successRate":null,"installAttempts":0,"riskBlocked":0,"setupRequired":0,"installSuccessRate":null,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"recentSuccessRate":null,"recentFailureRate":null,"uniqueAgents":0,"agentProvenScore":0,"agentProvenLabel":"Needs first agent run","lastOutcomeAt":null,"label":"No agent outcome data yet"},"autoInstall":{"allowed":false,"sandboxRequired":true,"policy":"human_review_before_install","reason":"Human review or sandbox validation is required before automatic installation."},"bestFor":["research","deep-research","agent","benchmark","deepresearch","nlp"],"doNotUseFor":["Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface","Automatic installation in a production workspace"],"knownRisks":["Quality score needs review"]},"safety":{"score":68,"level":"review_before_install","label":"Review before install","safety_tier":{"tier":"reviewed","label":"Reviewed","badge":"REVIEWED","summary":"Good audit and safety signals with no high-risk permission hints in public metadata.","recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow.","auto_install_policy":"review","reasons":["Safe-to-try audit","68/100 agent safety score"]},"auto_install_allowed":false,"human_review_required":true,"blocked":false,"audit_risk":"safe_to_try","permission_hints":[{"id":"network","label":"Network access","reason":"Skill likely fetches remote pages, APIs, repositories, or external services.","severity":"medium"},{"id":"filesystem","label":"Filesystem access","reason":"Skill may read or write project files, documents, generated artifacts, or local workspace state.","severity":"medium"}],"policy_warnings":["Quality score needs review"],"constraints_applied":{"max_risk":"medium","needs_install_command":true,"min_stars":0}},"safety_gate":{"tier":"reviewed","label":"Reviewed","badge":"REVIEWED","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow.","reasons":["Safe-to-try audit","68/100 agent safety score"]},"supply_profile":{"track":{"slug":"research","label":"Research and knowledge work","shortLabel":"Research","description":"Deep research, source comparison, literature review, RAG, knowledge search, and reports."},"scenario":{"label":"RAG and knowledge","description":"I need my agent to build a RAG workflow over documents and retrieve reliable context.","useCases":[{"slug":"rag-knowledge","title":"RAG and knowledge"},{"slug":"coding-agents","title":"Coding agents"},{"slug":"research-agents","title":"Research agents"}]},"applicableAgents":["Claude Code","CLI","Codex","Cursor","Python"],"install":{"ready":true,"command":"npx skills add Ayanami0730/deep_research_bench","primaryTarget":"CLI","targetCount":4},"githubQuality":{"stars":753,"starsLabel":"753","forks":82,"license":"Apache-2.0","qualityScore":80,"trustScore":81,"auditScore":84},"maintenance":{"status":"active","label":"2mo since push","daysSincePush":53,"lastPushedAt":"2026-05-11T06:14:30+00:00"},"risk":{"level":"safe_to_try","label":"Safe to try","requiresReview":true,"notes":["Quality score needs review"]},"coverageTags":["Research","RAG and knowledge","deep-research","agent","benchmark","deepresearch","nlp","python"]},"audit":{"audit_score":84,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":["Quality score needs review"]},"decision":{"readiness_score":91,"readiness_label":"Production-ready","headline":"Primary pick for RAG and knowledge","role":"Primary pick","primary_fit":"RAG and knowledge","best_for":["RAG and knowledge workflows","Claude Code teams","teams that value GitHub adoption signals"],"risks":["No OpenAgentSkill engagement data yet"],"next_steps":["Install it in a sandbox agent and run one RAG and knowledge task end to end.","Compare output quality, latency, and failure behavior against at least one alternative.","Promote it into production only after reviewing repository permissions, license, and maintenance signals."]},"agent_readable_metadata":{"version":"openagentskill-agent-metadata-v2","skill":{"slug":"ayanami0730-deep-research-bench","name":"Deep Research Bench","description":"DeepResearch Bench: A Comprehensive Benchmark for Deep Research Agents","category":"research","url":"https://www.openagentskill.com/skills/ayanami0730-deep-research-bench","repository":"https://github.com/Ayanami0730/deep_research_bench","github_repo":"Ayanami0730/deep_research_bench"},"suited_tasks":["RAG and knowledge workflows","Claude Code teams","teams that value GitHub adoption signals","Chunk documents","Create embeddings","Retrieve and cite relevant passages","Inspect source files","Explain architecture"],"suited_agents":["Python","Research Agent","Codex","Claude Code","Cursor","OpenAgentSkill CLI","CLI"],"install":{"command":"npx skills add Ayanami0730/deep_research_bench","ready":true,"targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add Ayanami0730/deep_research_bench"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"Deep Research Bench\" agent skill from https://github.com/Ayanami0730/deep_research_bench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: DeepResearch Bench: A Comprehensive Benchmark for Deep Research Agents"},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"Deep Research Bench\" as a Claude Code skill from https://github.com/Ayanami0730/deep_research_bench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: DeepResearch Bench: A Comprehensive Benchmark for Deep Research Agents"},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"Deep Research Bench\" from https://github.com/Ayanami0730/deep_research_bench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: DeepResearch Bench: A Comprehensive Benchmark for Deep Research Agents"}],"handoff_url":"https://www.openagentskill.com/api/skills/ayanami0730-deep-research-bench/install","manifest_url":"https://www.openagentskill.com/api/registry/manifest/ayanami0730-deep-research-bench"},"trust":{"score":81,"label":"Strong shortlist","version":"trust-score-v4","install_policy":"human_review_before_install","evidence":{"stars":"753 GitHub stars","repoActivity":"753 stars, 82 forks","lastPushed":"2mo since push","license":"Apache-2.0","repository":"https://github.com/Ayanami0730/deep_research_bench","install":"npx skills add Ayanami0730/deep_research_bench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"outcome_evidence":{"total":0,"successes":0,"failures":0,"not_relevant":0,"success_rate":null,"recent_success_rate":null,"recent_failure_rate":null,"install_attempts":0,"install_success_rate":null,"risk_blocked":0,"setup_required":0,"avg_output_quality":null,"production_outcomes":0,"last_outcome_at":null,"label":"No agent outcome data yet"},"auto_install":{"allowed":false,"sandbox_required":true,"reason":"Human review or sandbox validation is required before automatic installation."},"best_for":["research","deep-research","agent","benchmark","deepresearch","nlp"],"known_risks":["Quality score needs review"]},"agent_proven":{"version":"agent-proven-v1","score":0,"tier":"unproven","label":"Needs first agent run","summary":"No agent outcome reports yet. Use Resolve, run one narrow sandbox task, then report the result.","metrics":{"totalOutcomes":0,"successfulOutcomes":0,"failedOutcomes":0,"installAttempts":0,"installSuccessRate":null,"successRate":null,"recentSuccessRate":null,"recentFailureRate":null,"riskBlocked":0,"setupRequired":0,"notRelevant":0,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"uniqueAgents":0,"lastOutcomeAt":null},"signals":[],"penalties":["No real agent outcome evidence yet"]},"audit":{"score":84,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":["Quality score needs review"]},"safety_gate":{"tier":"reviewed","label":"Reviewed","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow."},"quality":{"score":80,"label":"Strong"},"supply":{"track":"Research and knowledge work","scenario":"RAG and knowledge","maintenance":"2mo since push","risk":"Safe to try"},"alternative_skills":[],"do_not_use_when":["teams that need a vendor-supported SLA","high-compliance environments without internal security review","No OpenAgentSkill engagement data yet","Quality score needs review","Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface","Automatic installation in a production workspace"],"agent_contract":{"task_input":"Use Deep Research Bench in an agent workflow","recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow.","install_policy":"review","minimum_review_before_use":["Trust: 81/100 Strong shortlist","Audit: 84/100 Safe to try","Safety: 68/100 Review before install","Review repository, license, install command, and permission surface before production use."],"expected_agent_output":{"selected_skill":"ayanami0730-deep-research-bench (Deep Research Bench)","install_command":"npx skills add Ayanami0730/deep_research_bench","risk_summary":"Safe to try; Reviewed; Review before production","verification_result":"Report the smallest successful task, files touched, warnings, and any missing setup."}},"outcome_feedback":{"endpoint":"https://www.openagentskill.com/api/agent/outcome","method":"POST","requires_resolve_event_id":true,"event_id_source":"Use install_receipt.outcome_feedback.event_id or feedback.event_id returned by /api/agent/resolve for the current task.","expected_outcomes":["success","failed","not_relevant","blocked_by_risk","setup_required"],"payload_template":{"event_id":"<install_receipt.outcome_feedback.event_id or feedback.event_id from /api/agent/resolve>","skill_slug":"ayanami0730-deep-research-bench","task":"Use Deep Research Bench in an agent workflow","agent":"codex","outcome":"success","install_used":true,"risk_blocked":false,"setup_required":false,"task_success":true,"output_quality":4,"error_type":null,"human_review_required":false,"workspace":"sandbox","time_to_useful_ms":120000,"notes":"Report the smallest successful task, setup friction, files touched, and risk notes."}},"endpoints":{"web":"https://www.openagentskill.com/skills/ayanami0730-deep-research-bench","api":"https://www.openagentskill.com/api/agent/skills/ayanami0730-deep-research-bench","audit":"https://www.openagentskill.com/skills/ayanami0730-deep-research-bench/audit","eval":"https://www.openagentskill.com/api/agent/evals?slug=ayanami0730-deep-research-bench&task=Use%20Deep%20Research%20Bench%20in%20an%20agent%20workflow&max_risk=medium","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Use%20Deep%20Research%20Bench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium","receipt":"https://www.openagentskill.com/api/agent/receipt?task=Use%20Deep%20Research%20Bench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium&format=text","install":"https://www.openagentskill.com/api/skills/ayanami0730-deep-research-bench/install","manifest":"https://www.openagentskill.com/api/registry/manifest/ayanami0730-deep-research-bench"}},"machine_metadata":{"version":"openagentskill-agent-metadata-v2","skill":{"slug":"ayanami0730-deep-research-bench","name":"Deep Research Bench","description":"DeepResearch Bench: A Comprehensive Benchmark for Deep Research Agents","category":"research","url":"https://www.openagentskill.com/skills/ayanami0730-deep-research-bench","repository":"https://github.com/Ayanami0730/deep_research_bench","github_repo":"Ayanami0730/deep_research_bench"},"suited_tasks":["RAG and knowledge workflows","Claude Code teams","teams that value GitHub adoption signals","Chunk documents","Create embeddings","Retrieve and cite relevant passages","Inspect source files","Explain architecture"],"suited_agents":["Python","Research Agent","Codex","Claude Code","Cursor","OpenAgentSkill CLI","CLI"],"install":{"command":"npx skills add Ayanami0730/deep_research_bench","ready":true,"targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add Ayanami0730/deep_research_bench"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"Deep Research Bench\" agent skill from https://github.com/Ayanami0730/deep_research_bench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: DeepResearch Bench: A Comprehensive Benchmark for Deep Research Agents"},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"Deep Research Bench\" as a Claude Code skill from https://github.com/Ayanami0730/deep_research_bench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: DeepResearch Bench: A Comprehensive Benchmark for Deep Research Agents"},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"Deep Research Bench\" from https://github.com/Ayanami0730/deep_research_bench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: DeepResearch Bench: A Comprehensive Benchmark for Deep Research Agents"}],"handoff_url":"https://www.openagentskill.com/api/skills/ayanami0730-deep-research-bench/install","manifest_url":"https://www.openagentskill.com/api/registry/manifest/ayanami0730-deep-research-bench"},"trust":{"score":81,"label":"Strong shortlist","version":"trust-score-v4","install_policy":"human_review_before_install","evidence":{"stars":"753 GitHub stars","repoActivity":"753 stars, 82 forks","lastPushed":"2mo since push","license":"Apache-2.0","repository":"https://github.com/Ayanami0730/deep_research_bench","install":"npx skills add Ayanami0730/deep_research_bench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"outcome_evidence":{"total":0,"successes":0,"failures":0,"not_relevant":0,"success_rate":null,"recent_success_rate":null,"recent_failure_rate":null,"install_attempts":0,"install_success_rate":null,"risk_blocked":0,"setup_required":0,"avg_output_quality":null,"production_outcomes":0,"last_outcome_at":null,"label":"No agent outcome data yet"},"auto_install":{"allowed":false,"sandbox_required":true,"reason":"Human review or sandbox validation is required before automatic installation."},"best_for":["research","deep-research","agent","benchmark","deepresearch","nlp"],"known_risks":["Quality score needs review"]},"agent_proven":{"version":"agent-proven-v1","score":0,"tier":"unproven","label":"Needs first agent run","summary":"No agent outcome reports yet. Use Resolve, run one narrow sandbox task, then report the result.","metrics":{"totalOutcomes":0,"successfulOutcomes":0,"failedOutcomes":0,"installAttempts":0,"installSuccessRate":null,"successRate":null,"recentSuccessRate":null,"recentFailureRate":null,"riskBlocked":0,"setupRequired":0,"notRelevant":0,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"uniqueAgents":0,"lastOutcomeAt":null},"signals":[],"penalties":["No real agent outcome evidence yet"]},"audit":{"score":84,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":["Quality score needs review"]},"safety_gate":{"tier":"reviewed","label":"Reviewed","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow."},"quality":{"score":80,"label":"Strong"},"supply":{"track":"Research and knowledge work","scenario":"RAG and knowledge","maintenance":"2mo since push","risk":"Safe to try"},"alternative_skills":[],"do_not_use_when":["teams that need a vendor-supported SLA","high-compliance environments without internal security review","No OpenAgentSkill engagement data yet","Quality score needs review","Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface","Automatic installation in a production workspace"],"agent_contract":{"task_input":"Use Deep Research Bench in an agent workflow","recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow.","install_policy":"review","minimum_review_before_use":["Trust: 81/100 Strong shortlist","Audit: 84/100 Safe to try","Safety: 68/100 Review before install","Review repository, license, install command, and permission surface before production use."],"expected_agent_output":{"selected_skill":"ayanami0730-deep-research-bench (Deep Research Bench)","install_command":"npx skills add Ayanami0730/deep_research_bench","risk_summary":"Safe to try; Reviewed; Review before production","verification_result":"Report the smallest successful task, files touched, warnings, and any missing setup."}},"outcome_feedback":{"endpoint":"https://www.openagentskill.com/api/agent/outcome","method":"POST","requires_resolve_event_id":true,"event_id_source":"Use install_receipt.outcome_feedback.event_id or feedback.event_id returned by /api/agent/resolve for the current task.","expected_outcomes":["success","failed","not_relevant","blocked_by_risk","setup_required"],"payload_template":{"event_id":"<install_receipt.outcome_feedback.event_id or feedback.event_id from /api/agent/resolve>","skill_slug":"ayanami0730-deep-research-bench","task":"Use Deep Research Bench in an agent workflow","agent":"codex","outcome":"success","install_used":true,"risk_blocked":false,"setup_required":false,"task_success":true,"output_quality":4,"error_type":null,"human_review_required":false,"workspace":"sandbox","time_to_useful_ms":120000,"notes":"Report the smallest successful task, setup friction, files touched, and risk notes."}},"endpoints":{"web":"https://www.openagentskill.com/skills/ayanami0730-deep-research-bench","api":"https://www.openagentskill.com/api/agent/skills/ayanami0730-deep-research-bench","audit":"https://www.openagentskill.com/skills/ayanami0730-deep-research-bench/audit","eval":"https://www.openagentskill.com/api/agent/evals?slug=ayanami0730-deep-research-bench&task=Use%20Deep%20Research%20Bench%20in%20an%20agent%20workflow&max_risk=medium","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Use%20Deep%20Research%20Bench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium","receipt":"https://www.openagentskill.com/api/agent/receipt?task=Use%20Deep%20Research%20Bench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium&format=text","install":"https://www.openagentskill.com/api/skills/ayanami0730-deep-research-bench/install","manifest":"https://www.openagentskill.com/api/registry/manifest/ayanami0730-deep-research-bench"}},"platforms":["Python","Research Agent","Claude Code"],"use_cases":[{"slug":"rag-knowledge","title":"RAG and knowledge","url":"https://www.openagentskill.com/use-cases/rag-knowledge"},{"slug":"coding-agents","title":"Coding agents","url":"https://www.openagentskill.com/use-cases/coding-agents"},{"slug":"research-agents","title":"Research agents","url":"https://www.openagentskill.com/use-cases/research-agents"},{"slug":"workflow-automation","title":"Workflow automation","url":"https://www.openagentskill.com/use-cases/workflow-automation"}],"install":"npx skills add Ayanami0730/deep_research_bench","install_targets":[{"id":"openagentskill-cli","label":"CLI","title":"OpenAgentSkill CLI","kind":"command","value":"npx skills add Ayanami0730/deep_research_bench","description":"Use the registry command when your workflow supports the OpenAgentSkill installer.","copyLabel":"Copy command"},{"id":"codex","label":"Codex","title":"Codex install prompt","kind":"agent-prompt","value":"Install the \"Deep Research Bench\" agent skill from https://github.com/Ayanami0730/deep_research_bench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: DeepResearch Bench: A Comprehensive Benchmark for Deep Research Agents","description":"Give Codex a repo-aware install prompt when the skill is not available through a local CLI.","copyLabel":"Copy prompt"},{"id":"claude-code","label":"Claude Code","title":"Claude Code skill prompt","kind":"agent-prompt","value":"Add \"Deep Research Bench\" as a Claude Code skill from https://github.com/Ayanami0730/deep_research_bench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: DeepResearch Bench: A Comprehensive Benchmark for Deep Research Agents","description":"Use this prompt to ask Claude Code to add the skill and explain the local activation steps.","copyLabel":"Copy prompt"},{"id":"cursor","label":"Cursor","title":"Cursor rule prompt","kind":"agent-prompt","value":"Turn \"Deep Research Bench\" from https://github.com/Ayanami0730/deep_research_bench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: DeepResearch Bench: A Comprehensive Benchmark for Deep Research Agents","description":"Use this when installing as Cursor project rules or reusable agent instructions.","copyLabel":"Copy prompt"}],"repository":"https://github.com/Ayanami0730/deep_research_bench","github_repo":"Ayanami0730/deep_research_bench","version":"1.0.0","license":"Apache-2.0","updated_at":"2026-06-14T13:01:04.200426+00:00","canonical_key":"ayanami0730/deep_research_bench","recommendation_reasons":["Matches task terms: bench","Useful GitHub adoption: 753 stars","Install handoff is available","Repository freshness signal is available","Registry match score 55"],"urls":{"web":"https://www.openagentskill.com/skills/ayanami0730-deep-research-bench","api":"https://www.openagentskill.com/api/agent/skills/ayanami0730-deep-research-bench","install_api":"https://www.openagentskill.com/api/skills/ayanami0730-deep-research-bench/install","audit":"https://www.openagentskill.com/skills/ayanami0730-deep-research-bench/audit","repository":"https://github.com/Ayanami0730/deep_research_bench"}},{"rank":7,"match_score":55,"raw_match_score":536.6,"slug":"run-llama-parsebench","name":"ParseBench","description":"ParseBench - A Document Parsing Benchmark for AI Agents","tagline":"ParseBench - A Document Parsing Benchmark for AI Agents","category":"document-processing","tags":["document-ai","documents","extraction","benchmark","document-parsing","evaluation","llamaindex","llm","machine-learning","ocr"],"author":{"name":"run-llama","verified":false,"url":"https://github.com/run-llama"},"attribution":{"status":"community_indexed","statusLabel":"Community indexed","shortLabel":"COMMUNITY INDEXED","sourceLabel":"GitHub star discovery","sourceDetail":"run-llama/ParseBench","creatorName":"run-llama","creatorUrl":"https://github.com/run-llama","sourceUrl":"https://github.com/run-llama/ParseBench","indexedBy":"OpenAgentSkill community index","claimUrl":"https://www.openagentskill.com/skills/run-llama-parsebench#claim-this-skill","claimCta":"Claim this skill","trustNote":"This listing was indexed from public sources and is not marked official until a maintainer claim is approved.","publicNote":"Attribution links to the public repository or creator profile. Creators can claim the listing to update ownership signals."},"stats":{"stars":497,"forks":62,"downloads":0,"rating":0,"review_count":0,"quality_score":52.58},"quality":{"score":84,"tier":"strong","label":"Strong","summary":"Solid option that is likely worth shortlisting for production workflows.","signals":[{"label":"GitHub stars","value":"497","tone":"neutral"},{"label":"Freshness","value":"18d ago","tone":"positive"},{"label":"Install ready","value":"Yes","tone":"positive"},{"label":"License","value":"Apache-2.0","tone":"neutral"}],"warnings":[]},"trust":{"version":"trust-score-v4","score":80,"tier":"strong","label":"Strong shortlist","summary":"Good trust signals with a few areas worth checking before rollout.","recommendedAction":"Test in a sandbox workflow and compare its install path with close alternatives.","dimensions":[{"id":"github_adoption","label":"GitHub adoption","score":62,"weight":0.13,"status":"info","detail":"497 GitHub stars"},{"id":"repo_activity","label":"Stars/forks activity","score":62,"weight":0.08,"status":"info","detail":"497 stars, 62 forks; issue activity unavailable in current metadata"},{"id":"maintenance","label":"Recent maintenance","score":100,"weight":0.14,"status":"pass","detail":"18d since push"},{"id":"license","label":"License clarity","score":86,"weight":0.09,"status":"pass","detail":"Apache-2.0"},{"id":"documentation","label":"README/SKILL.md completeness","score":74,"weight":0.14,"status":"info","detail":"Public metadata needs stronger README/SKILL.md context"},{"id":"dependency_risk","label":"Dependency/runtime risk","score":90,"weight":0.12,"status":"pass","detail":"no major dependency risk hints in public metadata"},{"id":"installability","label":"Install availability","score":92,"weight":0.1,"status":"pass","detail":"npx skills add run-llama/ParseBench"},{"id":"install_safety","label":"Install command safety","score":92,"weight":0.1,"status":"pass","detail":"standard package or runtime install path"},{"id":"permission_surface","label":"Permission surface","score":86,"weight":0.07,"status":"pass","detail":"filesystem or document access"},{"id":"repository","label":"Repository evidence","score":86,"weight":0.04,"status":"pass","detail":"https://github.com/run-llama/ParseBench"},{"id":"review_status","label":"Review status","score":88,"weight":0.05,"status":"pass","detail":"AI review data available"},{"id":"agent_outcomes","label":"Agent Proven outcomes","score":54,"weight":0.13,"status":"info","detail":"No agent outcome data yet"}],"checks":[{"status":"info","label":"GitHub adoption","detail":"497 GitHub stars"},{"status":"info","label":"Stars/forks activity","detail":"497 stars, 62 forks; issue activity unavailable in current metadata"},{"status":"pass","label":"Recent maintenance","detail":"18d since push"},{"status":"pass","label":"License clarity","detail":"Apache-2.0"},{"status":"info","label":"README/SKILL.md completeness","detail":"Public metadata needs stronger README/SKILL.md context"},{"status":"pass","label":"Dependency/runtime risk","detail":"no major dependency risk hints in public metadata"},{"status":"pass","label":"Install availability","detail":"npx skills add run-llama/ParseBench"},{"status":"pass","label":"Install command safety","detail":"standard package or runtime install path"},{"status":"pass","label":"Permission surface","detail":"filesystem or document access"},{"status":"pass","label":"Repository evidence","detail":"https://github.com/run-llama/ParseBench"},{"status":"pass","label":"Review status","detail":"AI review data available"},{"status":"info","label":"Agent Proven outcomes","detail":"No agent outcome data yet"},{"status":"warn","label":"Ownership","detail":"No approved owner claim yet"},{"status":"info","label":"OpenAgentSkill usage","detail":"No local usage activity yet"},{"status":"info","label":"Agent outcomes","detail":"No agent outcome data yet"}],"strengths":["AI review approved","Install path is available","Repository evidence is available","Recently maintained repository","Install command has no obvious high-risk pattern"],"warnings":["Quality score needs review"],"evidence":{"stars":"497 GitHub stars","repoActivity":"497 stars, 62 forks","lastPushed":"18d since push","license":"Apache-2.0","repository":"https://github.com/run-llama/ParseBench","install":"npx skills add run-llama/ParseBench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"installReadiness":{"ready":true,"command":"npx skills add run-llama/ParseBench","policy":"human_review_before_install","label":"Human review before install","notes":["Install path is available","Repository evidence is available","License is declared","No Agent Proven outcome evidence yet","18d since push"]},"agentCompatibility":["Python","Document AI","Codex","Claude Code","Cursor","OpenAgentSkill CLI"],"riskSummary":{"level":"medium","label":"Review before production","notes":["Quality score needs review"]},"outcomeEvidence":{"total":0,"successes":0,"failures":0,"notRelevant":0,"successRate":null,"installAttempts":0,"riskBlocked":0,"setupRequired":0,"installSuccessRate":null,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"recentSuccessRate":null,"recentFailureRate":null,"uniqueAgents":0,"agentProvenScore":0,"agentProvenLabel":"Needs first agent run","lastOutcomeAt":null,"label":"No agent outcome data yet"},"autoInstall":{"allowed":false,"sandboxRequired":true,"policy":"human_review_before_install","reason":"Human review or sandbox validation is required before automatic installation."},"bestFor":["document-processing","document-ai","documents","extraction","benchmark","document-parsing"],"doNotUseFor":["Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface","Automatic installation in a production workspace"],"knownRisks":["Quality score needs review"]},"safety":{"score":71,"level":"review_before_install","label":"Review before install","safety_tier":{"tier":"reviewed","label":"Reviewed","badge":"REVIEWED","summary":"Good audit and safety signals with no high-risk permission hints in public metadata.","recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow.","auto_install_policy":"review","reasons":["Safe-to-try audit","71/100 agent safety score"]},"auto_install_allowed":false,"human_review_required":true,"blocked":false,"audit_risk":"safe_to_try","permission_hints":[{"id":"network","label":"Network access","reason":"Skill likely fetches remote pages, APIs, repositories, or external services.","severity":"medium"},{"id":"filesystem","label":"Filesystem access","reason":"Skill may read or write project files, documents, generated artifacts, or local workspace state.","severity":"medium"}],"policy_warnings":["Quality score needs review"],"constraints_applied":{"max_risk":"medium","needs_install_command":true,"min_stars":0}},"safety_gate":{"tier":"reviewed","label":"Reviewed","badge":"REVIEWED","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow.","reasons":["Safe-to-try audit","71/100 agent safety score"]},"supply_profile":{"track":{"slug":"research","label":"Research and knowledge work","shortLabel":"Research","description":"Deep research, source comparison, literature review, RAG, knowledge search, and reports."},"scenario":{"label":"Document processing","description":"I need my agent to read PDFs, extract tables, and turn documents into structured data.","useCases":[{"slug":"document-processing","title":"Document processing"},{"slug":"rag-knowledge","title":"RAG and knowledge"},{"slug":"coding-agents","title":"Coding agents"}]},"applicableAgents":["Claude Code","LlamaIndex","CLI","Codex","Cursor"],"install":{"ready":true,"command":"npx skills add run-llama/ParseBench","primaryTarget":"CLI","targetCount":4},"githubQuality":{"stars":497,"starsLabel":"497","forks":62,"license":"Apache-2.0","qualityScore":84,"trustScore":80,"auditScore":87},"maintenance":{"status":"fresh","label":"18d since push","daysSincePush":18,"lastPushedAt":"2026-06-15T15:11:09+00:00"},"risk":{"level":"safe_to_try","label":"Safe to try","requiresReview":true,"notes":["Quality score needs review"]},"coverageTags":["Research","Document processing","document-processing","document-ai","documents","extraction","benchmark","document-parsing"]},"audit":{"audit_score":87,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":["Quality score needs review"]},"decision":{"readiness_score":83,"readiness_label":"Strong shortlist","headline":"Companion skill for Document processing","role":"Companion skill","primary_fit":"Document processing","best_for":["Document processing workflows","Claude Code teams","builders willing to evaluate younger projects"],"risks":["No OpenAgentSkill engagement data yet"],"next_steps":["Install it in a sandbox agent and run one Document processing task end to end.","Compare output quality, latency, and failure behavior against at least one alternative.","Promote it into production only after reviewing repository permissions, license, and maintenance signals."]},"agent_readable_metadata":{"version":"openagentskill-agent-metadata-v2","skill":{"slug":"run-llama-parsebench","name":"ParseBench","description":"ParseBench - A Document Parsing Benchmark for AI Agents","category":"document-processing","url":"https://www.openagentskill.com/skills/run-llama-parsebench","repository":"https://github.com/run-llama/ParseBench","github_repo":"run-llama/ParseBench"},"suited_tasks":["Document processing workflows","Claude Code teams","builders willing to evaluate younger projects","Read uploaded files","Extract structured fields","Prepare clean context for downstream agents","Chunk documents","Create embeddings"],"suited_agents":["Python","Document AI","Codex","Claude Code","Cursor","OpenAgentSkill CLI","LlamaIndex","CLI"],"install":{"command":"npx skills add run-llama/ParseBench","ready":true,"targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add run-llama/ParseBench"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"ParseBench\" agent skill from https://github.com/run-llama/ParseBench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: ParseBench - A Document Parsing Benchmark for AI Agents"},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"ParseBench\" as a Claude Code skill from https://github.com/run-llama/ParseBench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: ParseBench - A Document Parsing Benchmark for AI Agents"},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"ParseBench\" from https://github.com/run-llama/ParseBench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: ParseBench - A Document Parsing Benchmark for AI Agents"}],"handoff_url":"https://www.openagentskill.com/api/skills/run-llama-parsebench/install","manifest_url":"https://www.openagentskill.com/api/registry/manifest/run-llama-parsebench"},"trust":{"score":80,"label":"Strong shortlist","version":"trust-score-v4","install_policy":"human_review_before_install","evidence":{"stars":"497 GitHub stars","repoActivity":"497 stars, 62 forks","lastPushed":"18d since push","license":"Apache-2.0","repository":"https://github.com/run-llama/ParseBench","install":"npx skills add run-llama/ParseBench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"outcome_evidence":{"total":0,"successes":0,"failures":0,"not_relevant":0,"success_rate":null,"recent_success_rate":null,"recent_failure_rate":null,"install_attempts":0,"install_success_rate":null,"risk_blocked":0,"setup_required":0,"avg_output_quality":null,"production_outcomes":0,"last_outcome_at":null,"label":"No agent outcome data yet"},"auto_install":{"allowed":false,"sandbox_required":true,"reason":"Human review or sandbox validation is required before automatic installation."},"best_for":["document-processing","document-ai","documents","extraction","benchmark","document-parsing"],"known_risks":["Quality score needs review"]},"agent_proven":{"version":"agent-proven-v1","score":0,"tier":"unproven","label":"Needs first agent run","summary":"No agent outcome reports yet. Use Resolve, run one narrow sandbox task, then report the result.","metrics":{"totalOutcomes":0,"successfulOutcomes":0,"failedOutcomes":0,"installAttempts":0,"installSuccessRate":null,"successRate":null,"recentSuccessRate":null,"recentFailureRate":null,"riskBlocked":0,"setupRequired":0,"notRelevant":0,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"uniqueAgents":0,"lastOutcomeAt":null},"signals":[],"penalties":["No real agent outcome evidence yet"]},"audit":{"score":87,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":["Quality score needs review"]},"safety_gate":{"tier":"reviewed","label":"Reviewed","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow."},"quality":{"score":84,"label":"Strong"},"supply":{"track":"Research and knowledge work","scenario":"Document processing","maintenance":"18d since push","risk":"Safe to try"},"alternative_skills":[],"do_not_use_when":["teams that need a vendor-supported SLA","high-compliance environments without internal security review","No OpenAgentSkill engagement data yet","Quality score needs review","Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface","Automatic installation in a production workspace"],"agent_contract":{"task_input":"Use ParseBench in an agent workflow","recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow.","install_policy":"review","minimum_review_before_use":["Trust: 80/100 Strong shortlist","Audit: 87/100 Safe to try","Safety: 71/100 Review before install","Review repository, license, install command, and permission surface before production use."],"expected_agent_output":{"selected_skill":"run-llama-parsebench (ParseBench)","install_command":"npx skills add run-llama/ParseBench","risk_summary":"Safe to try; Reviewed; Review before production","verification_result":"Report the smallest successful task, files touched, warnings, and any missing setup."}},"outcome_feedback":{"endpoint":"https://www.openagentskill.com/api/agent/outcome","method":"POST","requires_resolve_event_id":true,"event_id_source":"Use install_receipt.outcome_feedback.event_id or feedback.event_id returned by /api/agent/resolve for the current task.","expected_outcomes":["success","failed","not_relevant","blocked_by_risk","setup_required"],"payload_template":{"event_id":"<install_receipt.outcome_feedback.event_id or feedback.event_id from /api/agent/resolve>","skill_slug":"run-llama-parsebench","task":"Use ParseBench in an agent workflow","agent":"codex","outcome":"success","install_used":true,"risk_blocked":false,"setup_required":false,"task_success":true,"output_quality":4,"error_type":null,"human_review_required":false,"workspace":"sandbox","time_to_useful_ms":120000,"notes":"Report the smallest successful task, setup friction, files touched, and risk notes."}},"endpoints":{"web":"https://www.openagentskill.com/skills/run-llama-parsebench","api":"https://www.openagentskill.com/api/agent/skills/run-llama-parsebench","audit":"https://www.openagentskill.com/skills/run-llama-parsebench/audit","eval":"https://www.openagentskill.com/api/agent/evals?slug=run-llama-parsebench&task=Use%20ParseBench%20in%20an%20agent%20workflow&max_risk=medium","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Use%20ParseBench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium","receipt":"https://www.openagentskill.com/api/agent/receipt?task=Use%20ParseBench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium&format=text","install":"https://www.openagentskill.com/api/skills/run-llama-parsebench/install","manifest":"https://www.openagentskill.com/api/registry/manifest/run-llama-parsebench"}},"machine_metadata":{"version":"openagentskill-agent-metadata-v2","skill":{"slug":"run-llama-parsebench","name":"ParseBench","description":"ParseBench - A Document Parsing Benchmark for AI Agents","category":"document-processing","url":"https://www.openagentskill.com/skills/run-llama-parsebench","repository":"https://github.com/run-llama/ParseBench","github_repo":"run-llama/ParseBench"},"suited_tasks":["Document processing workflows","Claude Code teams","builders willing to evaluate younger projects","Read uploaded files","Extract structured fields","Prepare clean context for downstream agents","Chunk documents","Create embeddings"],"suited_agents":["Python","Document AI","Codex","Claude Code","Cursor","OpenAgentSkill CLI","LlamaIndex","CLI"],"install":{"command":"npx skills add run-llama/ParseBench","ready":true,"targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add run-llama/ParseBench"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"ParseBench\" agent skill from https://github.com/run-llama/ParseBench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: ParseBench - A Document Parsing Benchmark for AI Agents"},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"ParseBench\" as a Claude Code skill from https://github.com/run-llama/ParseBench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: ParseBench - A Document Parsing Benchmark for AI Agents"},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"ParseBench\" from https://github.com/run-llama/ParseBench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: ParseBench - A Document Parsing Benchmark for AI Agents"}],"handoff_url":"https://www.openagentskill.com/api/skills/run-llama-parsebench/install","manifest_url":"https://www.openagentskill.com/api/registry/manifest/run-llama-parsebench"},"trust":{"score":80,"label":"Strong shortlist","version":"trust-score-v4","install_policy":"human_review_before_install","evidence":{"stars":"497 GitHub stars","repoActivity":"497 stars, 62 forks","lastPushed":"18d since push","license":"Apache-2.0","repository":"https://github.com/run-llama/ParseBench","install":"npx skills add run-llama/ParseBench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"outcome_evidence":{"total":0,"successes":0,"failures":0,"not_relevant":0,"success_rate":null,"recent_success_rate":null,"recent_failure_rate":null,"install_attempts":0,"install_success_rate":null,"risk_blocked":0,"setup_required":0,"avg_output_quality":null,"production_outcomes":0,"last_outcome_at":null,"label":"No agent outcome data yet"},"auto_install":{"allowed":false,"sandbox_required":true,"reason":"Human review or sandbox validation is required before automatic installation."},"best_for":["document-processing","document-ai","documents","extraction","benchmark","document-parsing"],"known_risks":["Quality score needs review"]},"agent_proven":{"version":"agent-proven-v1","score":0,"tier":"unproven","label":"Needs first agent run","summary":"No agent outcome reports yet. Use Resolve, run one narrow sandbox task, then report the result.","metrics":{"totalOutcomes":0,"successfulOutcomes":0,"failedOutcomes":0,"installAttempts":0,"installSuccessRate":null,"successRate":null,"recentSuccessRate":null,"recentFailureRate":null,"riskBlocked":0,"setupRequired":0,"notRelevant":0,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"uniqueAgents":0,"lastOutcomeAt":null},"signals":[],"penalties":["No real agent outcome evidence yet"]},"audit":{"score":87,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":["Quality score needs review"]},"safety_gate":{"tier":"reviewed","label":"Reviewed","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow."},"quality":{"score":84,"label":"Strong"},"supply":{"track":"Research and knowledge work","scenario":"Document processing","maintenance":"18d since push","risk":"Safe to try"},"alternative_skills":[],"do_not_use_when":["teams that need a vendor-supported SLA","high-compliance environments without internal security review","No OpenAgentSkill engagement data yet","Quality score needs review","Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface","Automatic installation in a production workspace"],"agent_contract":{"task_input":"Use ParseBench in an agent workflow","recommended_action":"Review the audit page, then allow agent install in a sandboxed workflow.","install_policy":"review","minimum_review_before_use":["Trust: 80/100 Strong shortlist","Audit: 87/100 Safe to try","Safety: 71/100 Review before install","Review repository, license, install command, and permission surface before production use."],"expected_agent_output":{"selected_skill":"run-llama-parsebench (ParseBench)","install_command":"npx skills add run-llama/ParseBench","risk_summary":"Safe to try; Reviewed; Review before production","verification_result":"Report the smallest successful task, files touched, warnings, and any missing setup."}},"outcome_feedback":{"endpoint":"https://www.openagentskill.com/api/agent/outcome","method":"POST","requires_resolve_event_id":true,"event_id_source":"Use install_receipt.outcome_feedback.event_id or feedback.event_id returned by /api/agent/resolve for the current task.","expected_outcomes":["success","failed","not_relevant","blocked_by_risk","setup_required"],"payload_template":{"event_id":"<install_receipt.outcome_feedback.event_id or feedback.event_id from /api/agent/resolve>","skill_slug":"run-llama-parsebench","task":"Use ParseBench in an agent workflow","agent":"codex","outcome":"success","install_used":true,"risk_blocked":false,"setup_required":false,"task_success":true,"output_quality":4,"error_type":null,"human_review_required":false,"workspace":"sandbox","time_to_useful_ms":120000,"notes":"Report the smallest successful task, setup friction, files touched, and risk notes."}},"endpoints":{"web":"https://www.openagentskill.com/skills/run-llama-parsebench","api":"https://www.openagentskill.com/api/agent/skills/run-llama-parsebench","audit":"https://www.openagentskill.com/skills/run-llama-parsebench/audit","eval":"https://www.openagentskill.com/api/agent/evals?slug=run-llama-parsebench&task=Use%20ParseBench%20in%20an%20agent%20workflow&max_risk=medium","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Use%20ParseBench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium","receipt":"https://www.openagentskill.com/api/agent/receipt?task=Use%20ParseBench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium&format=text","install":"https://www.openagentskill.com/api/skills/run-llama-parsebench/install","manifest":"https://www.openagentskill.com/api/registry/manifest/run-llama-parsebench"}},"platforms":["Python","Document AI","Claude Code","LlamaIndex"],"use_cases":[{"slug":"document-processing","title":"Document processing","url":"https://www.openagentskill.com/use-cases/document-processing"},{"slug":"rag-knowledge","title":"RAG and knowledge","url":"https://www.openagentskill.com/use-cases/rag-knowledge"},{"slug":"coding-agents","title":"Coding agents","url":"https://www.openagentskill.com/use-cases/coding-agents"},{"slug":"workflow-automation","title":"Workflow automation","url":"https://www.openagentskill.com/use-cases/workflow-automation"}],"install":"npx skills add run-llama/ParseBench","install_targets":[{"id":"openagentskill-cli","label":"CLI","title":"OpenAgentSkill CLI","kind":"command","value":"npx skills add run-llama/ParseBench","description":"Use the registry command when your workflow supports the OpenAgentSkill installer.","copyLabel":"Copy command"},{"id":"codex","label":"Codex","title":"Codex install prompt","kind":"agent-prompt","value":"Install the \"ParseBench\" agent skill from https://github.com/run-llama/ParseBench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: ParseBench - A Document Parsing Benchmark for AI Agents","description":"Give Codex a repo-aware install prompt when the skill is not available through a local CLI.","copyLabel":"Copy prompt"},{"id":"claude-code","label":"Claude Code","title":"Claude Code skill prompt","kind":"agent-prompt","value":"Add \"ParseBench\" as a Claude Code skill from https://github.com/run-llama/ParseBench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: ParseBench - A Document Parsing Benchmark for AI Agents","description":"Use this prompt to ask Claude Code to add the skill and explain the local activation steps.","copyLabel":"Copy prompt"},{"id":"cursor","label":"Cursor","title":"Cursor rule prompt","kind":"agent-prompt","value":"Turn \"ParseBench\" from https://github.com/run-llama/ParseBench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: ParseBench - A Document Parsing Benchmark for AI Agents","description":"Use this when installing as Cursor project rules or reusable agent instructions.","copyLabel":"Copy prompt"}],"repository":"https://github.com/run-llama/ParseBench","github_repo":"run-llama/ParseBench","version":"1.0.0","license":"Apache-2.0","updated_at":"2026-06-16T09:51:15.112123+00:00","canonical_key":"run-llama/parsebench","recommendation_reasons":["Matches task terms: bench","Install handoff is available","Repository freshness signal is available","Registry match score 55"],"urls":{"web":"https://www.openagentskill.com/skills/run-llama-parsebench","api":"https://www.openagentskill.com/api/agent/skills/run-llama-parsebench","install_api":"https://www.openagentskill.com/api/skills/run-llama-parsebench/install","audit":"https://www.openagentskill.com/skills/run-llama-parsebench/audit","repository":"https://github.com/run-llama/ParseBench"}},{"rank":8,"match_score":55,"raw_match_score":536,"slug":"tiger-ai-lab-clawbench","name":"ClawBench","description":"Open-source benchmark for browser AI agents on daily tasks.","tagline":"Open-source benchmark for browser AI agents on daily tasks.","category":"web-automation","tags":["browser","automation","agent-evaluation","agentic-ai","ai-agent-benchmark","ai-agents","benchmark","browser-agent","browser-automation","browser-use"],"author":{"name":"TIGER-AI-Lab","verified":false,"url":"https://github.com/TIGER-AI-Lab"},"attribution":{"status":"community_indexed","statusLabel":"Community indexed","shortLabel":"COMMUNITY INDEXED","sourceLabel":"GitHub star discovery","sourceDetail":"TIGER-AI-Lab/ClawBench","creatorName":"TIGER-AI-Lab","creatorUrl":"https://github.com/TIGER-AI-Lab","sourceUrl":"https://github.com/TIGER-AI-Lab/ClawBench","indexedBy":"OpenAgentSkill community index","claimUrl":"https://www.openagentskill.com/skills/tiger-ai-lab-clawbench#claim-this-skill","claimCta":"Claim this skill","trustNote":"This listing was indexed from public sources and is not marked official until a maintainer claim is approved.","publicNote":"Attribution links to the public repository or creator profile. Creators can claim the listing to update ownership signals."},"stats":{"stars":397,"forks":22,"downloads":0,"rating":0,"review_count":0,"quality_score":51.9},"quality":{"score":83,"tier":"strong","label":"Strong","summary":"Solid option that is likely worth shortlisting for production workflows.","signals":[{"label":"GitHub stars","value":"397","tone":"neutral"},{"label":"Freshness","value":"19d ago","tone":"positive"},{"label":"Install ready","value":"Yes","tone":"positive"},{"label":"License","value":"Apache-2.0","tone":"neutral"}],"warnings":[]},"trust":{"version":"trust-score-v4","score":78,"tier":"strong","label":"Strong shortlist","summary":"Good trust signals with a few areas worth checking before rollout.","recommendedAction":"Test in a sandbox workflow and compare its install path with close alternatives.","dimensions":[{"id":"github_adoption","label":"GitHub adoption","score":62,"weight":0.13,"status":"info","detail":"397 GitHub stars"},{"id":"repo_activity","label":"Stars/forks activity","score":57,"weight":0.08,"status":"warn","detail":"397 stars, 22 forks; issue activity unavailable in current metadata"},{"id":"maintenance","label":"Recent maintenance","score":100,"weight":0.14,"status":"pass","detail":"19d since push"},{"id":"license","label":"License clarity","score":86,"weight":0.09,"status":"pass","detail":"Apache-2.0"},{"id":"documentation","label":"README/SKILL.md completeness","score":74,"weight":0.14,"status":"info","detail":"Public metadata needs stronger README/SKILL.md context"},{"id":"dependency_risk","label":"Dependency/runtime risk","score":82,"weight":0.12,"status":"pass","detail":"network or browser surface"},{"id":"installability","label":"Install availability","score":92,"weight":0.1,"status":"pass","detail":"npx skills add TIGER-AI-Lab/ClawBench"},{"id":"install_safety","label":"Install command safety","score":92,"weight":0.1,"status":"pass","detail":"standard package or runtime install path"},{"id":"permission_surface","label":"Permission surface","score":72,"weight":0.07,"status":"info","detail":"filesystem or document access, network or browser access"},{"id":"repository","label":"Repository evidence","score":86,"weight":0.04,"status":"pass","detail":"https://github.com/TIGER-AI-Lab/ClawBench"},{"id":"review_status","label":"Review status","score":88,"weight":0.05,"status":"pass","detail":"AI review data available"},{"id":"agent_outcomes","label":"Agent Proven outcomes","score":54,"weight":0.13,"status":"info","detail":"No agent outcome data yet"}],"checks":[{"status":"info","label":"GitHub adoption","detail":"397 GitHub stars"},{"status":"warn","label":"Stars/forks activity","detail":"397 stars, 22 forks; issue activity unavailable in current metadata"},{"status":"pass","label":"Recent maintenance","detail":"19d since push"},{"status":"pass","label":"License clarity","detail":"Apache-2.0"},{"status":"info","label":"README/SKILL.md completeness","detail":"Public metadata needs stronger README/SKILL.md context"},{"status":"pass","label":"Dependency/runtime risk","detail":"network or browser surface"},{"status":"pass","label":"Install availability","detail":"npx skills add TIGER-AI-Lab/ClawBench"},{"status":"pass","label":"Install command safety","detail":"standard package or runtime install path"},{"status":"info","label":"Permission surface","detail":"filesystem or document access, network or browser access"},{"status":"pass","label":"Repository evidence","detail":"https://github.com/TIGER-AI-Lab/ClawBench"},{"status":"pass","label":"Review status","detail":"AI review data available"},{"status":"info","label":"Agent Proven outcomes","detail":"No agent outcome data yet"},{"status":"warn","label":"Ownership","detail":"No approved owner claim yet"},{"status":"info","label":"OpenAgentSkill usage","detail":"No local usage activity yet"},{"status":"info","label":"Agent outcomes","detail":"No agent outcome data yet"}],"strengths":["AI review approved","Install path is available","Repository evidence is available","Recently maintained repository","Install command has no obvious high-risk pattern"],"warnings":["Quality score needs review","Stars/forks activity: 397 stars, 22 forks; issue activity unavailable in current metadata"],"evidence":{"stars":"397 GitHub stars","repoActivity":"397 stars, 22 forks","lastPushed":"19d since push","license":"Apache-2.0","repository":"https://github.com/TIGER-AI-Lab/ClawBench","install":"npx skills add TIGER-AI-Lab/ClawBench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access, network or browser access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"installReadiness":{"ready":true,"command":"npx skills add TIGER-AI-Lab/ClawBench","policy":"human_review_before_install","label":"Human review before install","notes":["Install path is available","Repository evidence is available","License is declared","No Agent Proven outcome evidence yet","19d since push"]},"agentCompatibility":["Python","Browser Automation","Codex","Claude Code","Cursor","OpenAgentSkill CLI"],"riskSummary":{"level":"medium","label":"Review before production","notes":["Quality score needs review","Stars/forks activity: 397 stars, 22 forks; issue activity unavailable in current metadata"]},"outcomeEvidence":{"total":0,"successes":0,"failures":0,"notRelevant":0,"successRate":null,"installAttempts":0,"riskBlocked":0,"setupRequired":0,"installSuccessRate":null,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"recentSuccessRate":null,"recentFailureRate":null,"uniqueAgents":0,"agentProvenScore":0,"agentProvenLabel":"Needs first agent run","lastOutcomeAt":null,"label":"No agent outcome data yet"},"autoInstall":{"allowed":false,"sandboxRequired":true,"policy":"human_review_before_install","reason":"Human review or sandbox validation is required before automatic installation."},"bestFor":["web-automation","browser","automation","agent-evaluation","agentic-ai","ai-agent-benchmark"],"doNotUseFor":["Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface","Automatic installation in a production workspace"],"knownRisks":["Quality score needs review","Stars/forks activity: 397 stars, 22 forks; issue activity unavailable in current metadata"]},"safety":{"score":65,"level":"review_before_install","label":"Review before install","safety_tier":{"tier":"reviewed","label":"Reviewed with permission notes","badge":"REVIEWED","summary":"Usable candidate, but the agent should surface permission and audit notes before installation.","recommended_action":"Require human approval before installing into a real workspace.","auto_install_policy":"review","reasons":["Quality score needs review","65/100 agent safety score"]},"auto_install_allowed":false,"human_review_required":true,"blocked":false,"audit_risk":"safe_to_try","permission_hints":[{"id":"browser","label":"Browser automation","reason":"Skill may drive a browser or interact with web pages.","severity":"medium"},{"id":"network","label":"Network access","reason":"Skill likely fetches remote pages, APIs, repositories, or external services.","severity":"medium"},{"id":"filesystem","label":"Filesystem access","reason":"Skill may read or write project files, documents, generated artifacts, or local workspace state.","severity":"medium"}],"policy_warnings":["Quality score needs review"],"constraints_applied":{"max_risk":"medium","needs_install_command":true,"min_stars":0}},"safety_gate":{"tier":"reviewed","label":"Reviewed with permission notes","badge":"REVIEWED","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Require human approval before installing into a real workspace.","reasons":["Quality score needs review","65/100 agent safety score"]},"supply_profile":{"track":{"slug":"coding","label":"Coding and developer agents","shortLabel":"Coding","description":"Code review, repo analysis, testing, CI, GitHub, DevOps, and developer workflow skills."},"scenario":{"label":"Coding agents","description":"I need a coding agent that can understand a repository, edit code, and review pull requests.","useCases":[{"slug":"workflow-automation","title":"Workflow automation"},{"slug":"coding-agents","title":"Coding agents"},{"slug":"rag-knowledge","title":"RAG and knowledge"}]},"applicableAgents":["Claude Code","Browser agents","CLI","Codex","Cursor"],"install":{"ready":true,"command":"npx skills add TIGER-AI-Lab/ClawBench","primaryTarget":"CLI","targetCount":4},"githubQuality":{"stars":397,"starsLabel":"397","forks":22,"license":"Apache-2.0","qualityScore":83,"trustScore":78,"auditScore":85},"maintenance":{"status":"fresh","label":"19d since push","daysSincePush":19,"lastPushedAt":"2026-06-14T05:50:59+00:00"},"risk":{"level":"safe_to_try","label":"Safe to try","requiresReview":true,"notes":["Quality score needs review","Stars/forks activity: 397 stars, 22 forks; issue activity unavailable in current metadata"]},"coverageTags":["Coding","Coding agents","web-automation","browser","automation","agent-evaluation","agentic-ai","ai-agent-benchmark"]},"audit":{"audit_score":85,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":["Quality score needs review","Stars/forks activity: 397 stars, 22 forks; issue activity unavailable in current metadata"]},"decision":{"readiness_score":82,"readiness_label":"Strong shortlist","headline":"Companion skill for Workflow automation","role":"Companion skill","primary_fit":"Workflow automation","best_for":["Workflow automation workflows","Claude Code teams","builders willing to evaluate younger projects"],"risks":["No OpenAgentSkill engagement data yet"],"next_steps":["Install it in a sandbox agent and run one Workflow automation task end to end.","Compare output quality, latency, and failure behavior against at least one alternative.","Promote it into production only after reviewing repository permissions, license, and maintenance signals."]},"agent_readable_metadata":{"version":"openagentskill-agent-metadata-v2","skill":{"slug":"tiger-ai-lab-clawbench","name":"ClawBench","description":"Open-source benchmark for browser AI agents on daily tasks.","category":"web-automation","url":"https://www.openagentskill.com/skills/tiger-ai-lab-clawbench","repository":"https://github.com/TIGER-AI-Lab/ClawBench","github_repo":"TIGER-AI-Lab/ClawBench"},"suited_tasks":["Workflow automation workflows","Claude Code teams","builders willing to evaluate younger projects","Move data between tools","Transform files","Trigger repeatable actions","Inspect source files","Explain architecture"],"suited_agents":["Python","Browser Automation","Codex","Claude Code","Cursor","OpenAgentSkill CLI","Browser agents","CLI"],"install":{"command":"npx skills add TIGER-AI-Lab/ClawBench","ready":true,"targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add TIGER-AI-Lab/ClawBench"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"ClawBench\" agent skill from https://github.com/TIGER-AI-Lab/ClawBench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: Open-source benchmark for browser AI agents on daily tasks."},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"ClawBench\" as a Claude Code skill from https://github.com/TIGER-AI-Lab/ClawBench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: Open-source benchmark for browser AI agents on daily tasks."},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"ClawBench\" from https://github.com/TIGER-AI-Lab/ClawBench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: Open-source benchmark for browser AI agents on daily tasks."}],"handoff_url":"https://www.openagentskill.com/api/skills/tiger-ai-lab-clawbench/install","manifest_url":"https://www.openagentskill.com/api/registry/manifest/tiger-ai-lab-clawbench"},"trust":{"score":78,"label":"Strong shortlist","version":"trust-score-v4","install_policy":"human_review_before_install","evidence":{"stars":"397 GitHub stars","repoActivity":"397 stars, 22 forks","lastPushed":"19d since push","license":"Apache-2.0","repository":"https://github.com/TIGER-AI-Lab/ClawBench","install":"npx skills add TIGER-AI-Lab/ClawBench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access, network or browser access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"outcome_evidence":{"total":0,"successes":0,"failures":0,"not_relevant":0,"success_rate":null,"recent_success_rate":null,"recent_failure_rate":null,"install_attempts":0,"install_success_rate":null,"risk_blocked":0,"setup_required":0,"avg_output_quality":null,"production_outcomes":0,"last_outcome_at":null,"label":"No agent outcome data yet"},"auto_install":{"allowed":false,"sandbox_required":true,"reason":"Human review or sandbox validation is required before automatic installation."},"best_for":["web-automation","browser","automation","agent-evaluation","agentic-ai","ai-agent-benchmark"],"known_risks":["Quality score needs review","Stars/forks activity: 397 stars, 22 forks; issue activity unavailable in current metadata"]},"agent_proven":{"version":"agent-proven-v1","score":0,"tier":"unproven","label":"Needs first agent run","summary":"No agent outcome reports yet. Use Resolve, run one narrow sandbox task, then report the result.","metrics":{"totalOutcomes":0,"successfulOutcomes":0,"failedOutcomes":0,"installAttempts":0,"installSuccessRate":null,"successRate":null,"recentSuccessRate":null,"recentFailureRate":null,"riskBlocked":0,"setupRequired":0,"notRelevant":0,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"uniqueAgents":0,"lastOutcomeAt":null},"signals":[],"penalties":["No real agent outcome evidence yet"]},"audit":{"score":85,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":["Quality score needs review","Stars/forks activity: 397 stars, 22 forks; issue activity unavailable in current metadata"]},"safety_gate":{"tier":"reviewed","label":"Reviewed with permission notes","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Require human approval before installing into a real workspace."},"quality":{"score":83,"label":"Strong"},"supply":{"track":"Coding and developer agents","scenario":"Coding agents","maintenance":"19d since push","risk":"Safe to try"},"alternative_skills":[],"do_not_use_when":["teams that need a vendor-supported SLA","high-compliance environments without internal security review","No OpenAgentSkill engagement data yet","Quality score needs review","Stars/forks activity: 397 stars, 22 forks; issue activity unavailable in current metadata","Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface","Automatic installation in a production workspace"],"agent_contract":{"task_input":"Use ClawBench in an agent workflow","recommended_action":"Require human approval before installing into a real workspace.","install_policy":"review","minimum_review_before_use":["Trust: 78/100 Strong shortlist","Audit: 85/100 Safe to try","Safety: 65/100 Review before install","Review repository, license, install command, and permission surface before production use."],"expected_agent_output":{"selected_skill":"tiger-ai-lab-clawbench (ClawBench)","install_command":"npx skills add TIGER-AI-Lab/ClawBench","risk_summary":"Safe to try; Reviewed with permission notes; Review before production","verification_result":"Report the smallest successful task, files touched, warnings, and any missing setup."}},"outcome_feedback":{"endpoint":"https://www.openagentskill.com/api/agent/outcome","method":"POST","requires_resolve_event_id":true,"event_id_source":"Use install_receipt.outcome_feedback.event_id or feedback.event_id returned by /api/agent/resolve for the current task.","expected_outcomes":["success","failed","not_relevant","blocked_by_risk","setup_required"],"payload_template":{"event_id":"<install_receipt.outcome_feedback.event_id or feedback.event_id from /api/agent/resolve>","skill_slug":"tiger-ai-lab-clawbench","task":"Use ClawBench in an agent workflow","agent":"codex","outcome":"success","install_used":true,"risk_blocked":false,"setup_required":false,"task_success":true,"output_quality":4,"error_type":null,"human_review_required":false,"workspace":"sandbox","time_to_useful_ms":120000,"notes":"Report the smallest successful task, setup friction, files touched, and risk notes."}},"endpoints":{"web":"https://www.openagentskill.com/skills/tiger-ai-lab-clawbench","api":"https://www.openagentskill.com/api/agent/skills/tiger-ai-lab-clawbench","audit":"https://www.openagentskill.com/skills/tiger-ai-lab-clawbench/audit","eval":"https://www.openagentskill.com/api/agent/evals?slug=tiger-ai-lab-clawbench&task=Use%20ClawBench%20in%20an%20agent%20workflow&max_risk=medium","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Use%20ClawBench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium","receipt":"https://www.openagentskill.com/api/agent/receipt?task=Use%20ClawBench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium&format=text","install":"https://www.openagentskill.com/api/skills/tiger-ai-lab-clawbench/install","manifest":"https://www.openagentskill.com/api/registry/manifest/tiger-ai-lab-clawbench"}},"machine_metadata":{"version":"openagentskill-agent-metadata-v2","skill":{"slug":"tiger-ai-lab-clawbench","name":"ClawBench","description":"Open-source benchmark for browser AI agents on daily tasks.","category":"web-automation","url":"https://www.openagentskill.com/skills/tiger-ai-lab-clawbench","repository":"https://github.com/TIGER-AI-Lab/ClawBench","github_repo":"TIGER-AI-Lab/ClawBench"},"suited_tasks":["Workflow automation workflows","Claude Code teams","builders willing to evaluate younger projects","Move data between tools","Transform files","Trigger repeatable actions","Inspect source files","Explain architecture"],"suited_agents":["Python","Browser Automation","Codex","Claude Code","Cursor","OpenAgentSkill CLI","Browser agents","CLI"],"install":{"command":"npx skills add TIGER-AI-Lab/ClawBench","ready":true,"targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add TIGER-AI-Lab/ClawBench"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"ClawBench\" agent skill from https://github.com/TIGER-AI-Lab/ClawBench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: Open-source benchmark for browser AI agents on daily tasks."},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"ClawBench\" as a Claude Code skill from https://github.com/TIGER-AI-Lab/ClawBench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: Open-source benchmark for browser AI agents on daily tasks."},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"ClawBench\" from https://github.com/TIGER-AI-Lab/ClawBench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: Open-source benchmark for browser AI agents on daily tasks."}],"handoff_url":"https://www.openagentskill.com/api/skills/tiger-ai-lab-clawbench/install","manifest_url":"https://www.openagentskill.com/api/registry/manifest/tiger-ai-lab-clawbench"},"trust":{"score":78,"label":"Strong shortlist","version":"trust-score-v4","install_policy":"human_review_before_install","evidence":{"stars":"397 GitHub stars","repoActivity":"397 stars, 22 forks","lastPushed":"19d since push","license":"Apache-2.0","repository":"https://github.com/TIGER-AI-Lab/ClawBench","install":"npx skills add TIGER-AI-Lab/ClawBench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access, network or browser access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"outcome_evidence":{"total":0,"successes":0,"failures":0,"not_relevant":0,"success_rate":null,"recent_success_rate":null,"recent_failure_rate":null,"install_attempts":0,"install_success_rate":null,"risk_blocked":0,"setup_required":0,"avg_output_quality":null,"production_outcomes":0,"last_outcome_at":null,"label":"No agent outcome data yet"},"auto_install":{"allowed":false,"sandbox_required":true,"reason":"Human review or sandbox validation is required before automatic installation."},"best_for":["web-automation","browser","automation","agent-evaluation","agentic-ai","ai-agent-benchmark"],"known_risks":["Quality score needs review","Stars/forks activity: 397 stars, 22 forks; issue activity unavailable in current metadata"]},"agent_proven":{"version":"agent-proven-v1","score":0,"tier":"unproven","label":"Needs first agent run","summary":"No agent outcome reports yet. Use Resolve, run one narrow sandbox task, then report the result.","metrics":{"totalOutcomes":0,"successfulOutcomes":0,"failedOutcomes":0,"installAttempts":0,"installSuccessRate":null,"successRate":null,"recentSuccessRate":null,"recentFailureRate":null,"riskBlocked":0,"setupRequired":0,"notRelevant":0,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"uniqueAgents":0,"lastOutcomeAt":null},"signals":[],"penalties":["No real agent outcome evidence yet"]},"audit":{"score":85,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":["Quality score needs review","Stars/forks activity: 397 stars, 22 forks; issue activity unavailable in current metadata"]},"safety_gate":{"tier":"reviewed","label":"Reviewed with permission notes","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Require human approval before installing into a real workspace."},"quality":{"score":83,"label":"Strong"},"supply":{"track":"Coding and developer agents","scenario":"Coding agents","maintenance":"19d since push","risk":"Safe to try"},"alternative_skills":[],"do_not_use_when":["teams that need a vendor-supported SLA","high-compliance environments without internal security review","No OpenAgentSkill engagement data yet","Quality score needs review","Stars/forks activity: 397 stars, 22 forks; issue activity unavailable in current metadata","Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface","Automatic installation in a production workspace"],"agent_contract":{"task_input":"Use ClawBench in an agent workflow","recommended_action":"Require human approval before installing into a real workspace.","install_policy":"review","minimum_review_before_use":["Trust: 78/100 Strong shortlist","Audit: 85/100 Safe to try","Safety: 65/100 Review before install","Review repository, license, install command, and permission surface before production use."],"expected_agent_output":{"selected_skill":"tiger-ai-lab-clawbench (ClawBench)","install_command":"npx skills add TIGER-AI-Lab/ClawBench","risk_summary":"Safe to try; Reviewed with permission notes; Review before production","verification_result":"Report the smallest successful task, files touched, warnings, and any missing setup."}},"outcome_feedback":{"endpoint":"https://www.openagentskill.com/api/agent/outcome","method":"POST","requires_resolve_event_id":true,"event_id_source":"Use install_receipt.outcome_feedback.event_id or feedback.event_id returned by /api/agent/resolve for the current task.","expected_outcomes":["success","failed","not_relevant","blocked_by_risk","setup_required"],"payload_template":{"event_id":"<install_receipt.outcome_feedback.event_id or feedback.event_id from /api/agent/resolve>","skill_slug":"tiger-ai-lab-clawbench","task":"Use ClawBench in an agent workflow","agent":"codex","outcome":"success","install_used":true,"risk_blocked":false,"setup_required":false,"task_success":true,"output_quality":4,"error_type":null,"human_review_required":false,"workspace":"sandbox","time_to_useful_ms":120000,"notes":"Report the smallest successful task, setup friction, files touched, and risk notes."}},"endpoints":{"web":"https://www.openagentskill.com/skills/tiger-ai-lab-clawbench","api":"https://www.openagentskill.com/api/agent/skills/tiger-ai-lab-clawbench","audit":"https://www.openagentskill.com/skills/tiger-ai-lab-clawbench/audit","eval":"https://www.openagentskill.com/api/agent/evals?slug=tiger-ai-lab-clawbench&task=Use%20ClawBench%20in%20an%20agent%20workflow&max_risk=medium","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Use%20ClawBench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium","receipt":"https://www.openagentskill.com/api/agent/receipt?task=Use%20ClawBench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium&format=text","install":"https://www.openagentskill.com/api/skills/tiger-ai-lab-clawbench/install","manifest":"https://www.openagentskill.com/api/registry/manifest/tiger-ai-lab-clawbench"}},"platforms":["Python","Browser Automation","Claude Code","Browser agents"],"use_cases":[{"slug":"workflow-automation","title":"Workflow automation","url":"https://www.openagentskill.com/use-cases/workflow-automation"},{"slug":"coding-agents","title":"Coding agents","url":"https://www.openagentskill.com/use-cases/coding-agents"},{"slug":"rag-knowledge","title":"RAG and knowledge","url":"https://www.openagentskill.com/use-cases/rag-knowledge"},{"slug":"browser-automation","title":"Browser automation","url":"https://www.openagentskill.com/use-cases/browser-automation"}],"install":"npx skills add TIGER-AI-Lab/ClawBench","install_targets":[{"id":"openagentskill-cli","label":"CLI","title":"OpenAgentSkill CLI","kind":"command","value":"npx skills add TIGER-AI-Lab/ClawBench","description":"Use the registry command when your workflow supports the OpenAgentSkill installer.","copyLabel":"Copy command"},{"id":"codex","label":"Codex","title":"Codex install prompt","kind":"agent-prompt","value":"Install the \"ClawBench\" agent skill from https://github.com/TIGER-AI-Lab/ClawBench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: Open-source benchmark for browser AI agents on daily tasks.","description":"Give Codex a repo-aware install prompt when the skill is not available through a local CLI.","copyLabel":"Copy prompt"},{"id":"claude-code","label":"Claude Code","title":"Claude Code skill prompt","kind":"agent-prompt","value":"Add \"ClawBench\" as a Claude Code skill from https://github.com/TIGER-AI-Lab/ClawBench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: Open-source benchmark for browser AI agents on daily tasks.","description":"Use this prompt to ask Claude Code to add the skill and explain the local activation steps.","copyLabel":"Copy prompt"},{"id":"cursor","label":"Cursor","title":"Cursor rule prompt","kind":"agent-prompt","value":"Turn \"ClawBench\" from https://github.com/TIGER-AI-Lab/ClawBench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: Open-source benchmark for browser AI agents on daily tasks.","description":"Use this when installing as Cursor project rules or reusable agent instructions.","copyLabel":"Copy prompt"}],"repository":"https://github.com/TIGER-AI-Lab/ClawBench","github_repo":"TIGER-AI-Lab/ClawBench","version":"1.0.0","license":"Apache-2.0","updated_at":"2026-06-16T09:50:46.059346+00:00","canonical_key":"tiger-ai-lab/clawbench","recommendation_reasons":["Matches task terms: bench","Install handoff is available","Repository freshness signal is available","Registry match score 55"],"urls":{"web":"https://www.openagentskill.com/skills/tiger-ai-lab-clawbench","api":"https://www.openagentskill.com/api/agent/skills/tiger-ai-lab-clawbench","install_api":"https://www.openagentskill.com/api/skills/tiger-ai-lab-clawbench/install","audit":"https://www.openagentskill.com/skills/tiger-ai-lab-clawbench/audit","repository":"https://github.com/TIGER-AI-Lab/ClawBench"}},{"rank":9,"match_score":55,"raw_match_score":535.3,"slug":"onyx-dot-app-enterpriserag-bench","name":"EnterpriseRAG Bench","description":"Dataset and benchmark for RAG on company internal documents.","tagline":"Dataset and benchmark for RAG on company internal documents.","category":"rag-knowledge","tags":["semantic-search","retrieval","knowledge","benchmark","dataset","enterprise","enterprise-search","evaluation","generative-ai","information-retrieval"],"author":{"name":"onyx-dot-app","verified":false,"url":"https://github.com/onyx-dot-app"},"attribution":{"status":"community_indexed","statusLabel":"Community indexed","shortLabel":"COMMUNITY INDEXED","sourceLabel":"GitHub star discovery","sourceDetail":"onyx-dot-app/EnterpriseRAG-Bench","creatorName":"onyx-dot-app","creatorUrl":"https://github.com/onyx-dot-app","sourceUrl":"https://github.com/onyx-dot-app/EnterpriseRAG-Bench","indexedBy":"OpenAgentSkill community index","claimUrl":"https://www.openagentskill.com/skills/onyx-dot-app-enterpriserag-bench#claim-this-skill","claimCta":"Claim this skill","trustNote":"This listing was indexed from public sources and is not marked official until a maintainer claim is approved.","publicNote":"Attribution links to the public repository or creator profile. Creators can claim the listing to update ownership signals."},"stats":{"stars":406,"forks":37,"downloads":0,"rating":0,"review_count":0,"quality_score":48.97},"quality":{"score":77,"tier":"strong","label":"Strong","summary":"Solid option that is likely worth shortlisting for production workflows.","signals":[{"label":"GitHub stars","value":"406","tone":"neutral"},{"label":"Freshness","value":"2mo ago","tone":"positive"},{"label":"Install ready","value":"Yes","tone":"positive"},{"label":"License","value":"MIT","tone":"neutral"}],"warnings":[]},"trust":{"version":"trust-score-v4","score":78,"tier":"strong","label":"Strong shortlist","summary":"Good trust signals with a few areas worth checking before rollout.","recommendedAction":"Test in a sandbox workflow and compare its install path with close alternatives.","dimensions":[{"id":"github_adoption","label":"GitHub adoption","score":62,"weight":0.13,"status":"info","detail":"406 GitHub stars"},{"id":"repo_activity","label":"Stars/forks activity","score":57,"weight":0.08,"status":"warn","detail":"406 stars, 37 forks; issue activity unavailable in current metadata"},{"id":"maintenance","label":"Recent maintenance","score":88,"weight":0.14,"status":"pass","detail":"2mo since push"},{"id":"license","label":"License clarity","score":86,"weight":0.09,"status":"pass","detail":"MIT"},{"id":"documentation","label":"README/SKILL.md completeness","score":74,"weight":0.14,"status":"info","detail":"Public metadata needs stronger README/SKILL.md context"},{"id":"dependency_risk","label":"Dependency/runtime risk","score":90,"weight":0.12,"status":"pass","detail":"no major dependency risk hints in public metadata"},{"id":"installability","label":"Install availability","score":92,"weight":0.1,"status":"pass","detail":"npx skills add onyx-dot-app/EnterpriseRAG-Bench"},{"id":"install_safety","label":"Install command safety","score":92,"weight":0.1,"status":"pass","detail":"standard package or runtime install path"},{"id":"permission_surface","label":"Permission surface","score":86,"weight":0.07,"status":"pass","detail":"filesystem or document access"},{"id":"repository","label":"Repository evidence","score":86,"weight":0.04,"status":"pass","detail":"https://github.com/onyx-dot-app/EnterpriseRAG-Bench"},{"id":"review_status","label":"Review status","score":88,"weight":0.05,"status":"pass","detail":"AI review data available"},{"id":"agent_outcomes","label":"Agent Proven outcomes","score":54,"weight":0.13,"status":"info","detail":"No agent outcome data yet"}],"checks":[{"status":"info","label":"GitHub adoption","detail":"406 GitHub stars"},{"status":"warn","label":"Stars/forks activity","detail":"406 stars, 37 forks; issue activity unavailable in current metadata"},{"status":"pass","label":"Recent maintenance","detail":"2mo since push"},{"status":"pass","label":"License clarity","detail":"MIT"},{"status":"info","label":"README/SKILL.md completeness","detail":"Public metadata needs stronger README/SKILL.md context"},{"status":"pass","label":"Dependency/runtime risk","detail":"no major dependency risk hints in public metadata"},{"status":"pass","label":"Install availability","detail":"npx skills add onyx-dot-app/EnterpriseRAG-Bench"},{"status":"pass","label":"Install command safety","detail":"standard package or runtime install path"},{"status":"pass","label":"Permission surface","detail":"filesystem or document access"},{"status":"pass","label":"Repository evidence","detail":"https://github.com/onyx-dot-app/EnterpriseRAG-Bench"},{"status":"pass","label":"Review status","detail":"AI review data available"},{"status":"info","label":"Agent Proven outcomes","detail":"No agent outcome data yet"},{"status":"warn","label":"Ownership","detail":"No approved owner claim yet"},{"status":"info","label":"OpenAgentSkill usage","detail":"No local usage activity yet"},{"status":"info","label":"Agent outcomes","detail":"No agent outcome data yet"}],"strengths":["AI review approved","Install path is available","Repository evidence is available","Recently maintained repository","Install command has no obvious high-risk pattern"],"warnings":["Quality score needs review","Stars/forks activity: 406 stars, 37 forks; issue activity unavailable in current metadata"],"evidence":{"stars":"406 GitHub stars","repoActivity":"406 stars, 37 forks","lastPushed":"2mo since push","license":"MIT","repository":"https://github.com/onyx-dot-app/EnterpriseRAG-Bench","install":"npx skills add onyx-dot-app/EnterpriseRAG-Bench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"installReadiness":{"ready":true,"command":"npx skills add onyx-dot-app/EnterpriseRAG-Bench","policy":"human_review_before_install","label":"Human review before install","notes":["Install path is available","Repository evidence is available","License is declared","No Agent Proven outcome evidence yet","2mo since push"]},"agentCompatibility":["Semantic Search","Codex","Claude Code","Cursor","OpenAgentSkill CLI"],"riskSummary":{"level":"medium","label":"Review before production","notes":["Quality score needs review","Stars/forks activity: 406 stars, 37 forks; issue activity unavailable in current metadata"]},"outcomeEvidence":{"total":0,"successes":0,"failures":0,"notRelevant":0,"successRate":null,"installAttempts":0,"riskBlocked":0,"setupRequired":0,"installSuccessRate":null,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"recentSuccessRate":null,"recentFailureRate":null,"uniqueAgents":0,"agentProvenScore":0,"agentProvenLabel":"Needs first agent run","lastOutcomeAt":null,"label":"No agent outcome data yet"},"autoInstall":{"allowed":false,"sandboxRequired":true,"policy":"human_review_before_install","reason":"Human review or sandbox validation is required before automatic installation."},"bestFor":["rag-knowledge","semantic-search","retrieval","knowledge","benchmark","dataset"],"doNotUseFor":["Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface","Automatic installation in a production workspace"],"knownRisks":["Quality score needs review","Stars/forks activity: 406 stars, 37 forks; issue activity unavailable in current metadata"]},"safety":{"score":66,"level":"review_before_install","label":"Review before install","safety_tier":{"tier":"reviewed","label":"Reviewed with permission notes","badge":"REVIEWED","summary":"Usable candidate, but the agent should surface permission and audit notes before installation.","recommended_action":"Require human approval before installing into a real workspace.","auto_install_policy":"review","reasons":["Quality score needs review","66/100 agent safety score"]},"auto_install_allowed":false,"human_review_required":true,"blocked":false,"audit_risk":"safe_to_try","permission_hints":[{"id":"network","label":"Network access","reason":"Skill likely fetches remote pages, APIs, repositories, or external services.","severity":"medium"},{"id":"filesystem","label":"Filesystem access","reason":"Skill may read or write project files, documents, generated artifacts, or local workspace state.","severity":"medium"}],"policy_warnings":["Quality score needs review"],"constraints_applied":{"max_risk":"medium","needs_install_command":true,"min_stars":0}},"safety_gate":{"tier":"reviewed","label":"Reviewed with permission notes","badge":"REVIEWED","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Require human approval before installing into a real workspace.","reasons":["Quality score needs review","66/100 agent safety score"]},"supply_profile":{"track":{"slug":"research","label":"Research and knowledge work","shortLabel":"Research","description":"Deep research, source comparison, literature review, RAG, knowledge search, and reports."},"scenario":{"label":"RAG and knowledge","description":"I need my agent to build a RAG workflow over documents and retrieve reliable context.","useCases":[{"slug":"rag-knowledge","title":"RAG and knowledge"},{"slug":"coding-agents","title":"Coding agents"},{"slug":"browser-automation","title":"Browser automation"}]},"applicableAgents":["Claude Code","CLI","Codex","Cursor","Semantic Search"],"install":{"ready":true,"command":"npx skills add onyx-dot-app/EnterpriseRAG-Bench","primaryTarget":"CLI","targetCount":4},"githubQuality":{"stars":406,"starsLabel":"406","forks":37,"license":"MIT","qualityScore":77,"trustScore":78,"auditScore":82},"maintenance":{"status":"active","label":"2mo since push","daysSincePush":56,"lastPushedAt":"2026-05-08T02:57:41+00:00"},"risk":{"level":"safe_to_try","label":"Safe to try","requiresReview":true,"notes":["Quality score needs review","Stars/forks activity: 406 stars, 37 forks; issue activity unavailable in current metadata"]},"coverageTags":["Research","RAG and knowledge","rag-knowledge","semantic-search","retrieval","knowledge","benchmark","dataset"]},"audit":{"audit_score":82,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":["Quality score needs review","Stars/forks activity: 406 stars, 37 forks; issue activity unavailable in current metadata"]},"decision":{"readiness_score":76,"readiness_label":"Strong shortlist","headline":"Companion skill for RAG and knowledge","role":"Companion skill","primary_fit":"RAG and knowledge","best_for":["RAG and knowledge workflows","Claude Code teams","builders willing to evaluate younger projects"],"risks":["No OpenAgentSkill engagement data yet"],"next_steps":["Install it in a sandbox agent and run one RAG and knowledge task end to end.","Compare output quality, latency, and failure behavior against at least one alternative.","Promote it into production only after reviewing repository permissions, license, and maintenance signals."]},"agent_readable_metadata":{"version":"openagentskill-agent-metadata-v2","skill":{"slug":"onyx-dot-app-enterpriserag-bench","name":"EnterpriseRAG Bench","description":"Dataset and benchmark for RAG on company internal documents.","category":"rag-knowledge","url":"https://www.openagentskill.com/skills/onyx-dot-app-enterpriserag-bench","repository":"https://github.com/onyx-dot-app/EnterpriseRAG-Bench","github_repo":"onyx-dot-app/EnterpriseRAG-Bench"},"suited_tasks":["RAG and knowledge workflows","Claude Code teams","builders willing to evaluate younger projects","Chunk documents","Create embeddings","Retrieve and cite relevant passages","Inspect source files","Explain architecture"],"suited_agents":["Semantic Search","Codex","Claude Code","Cursor","OpenAgentSkill CLI","CLI"],"install":{"command":"npx skills add onyx-dot-app/EnterpriseRAG-Bench","ready":true,"targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add onyx-dot-app/EnterpriseRAG-Bench"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"EnterpriseRAG Bench\" agent skill from https://github.com/onyx-dot-app/EnterpriseRAG-Bench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: Dataset and benchmark for RAG on company internal documents."},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"EnterpriseRAG Bench\" as a Claude Code skill from https://github.com/onyx-dot-app/EnterpriseRAG-Bench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: Dataset and benchmark for RAG on company internal documents."},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"EnterpriseRAG Bench\" from https://github.com/onyx-dot-app/EnterpriseRAG-Bench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: Dataset and benchmark for RAG on company internal documents."}],"handoff_url":"https://www.openagentskill.com/api/skills/onyx-dot-app-enterpriserag-bench/install","manifest_url":"https://www.openagentskill.com/api/registry/manifest/onyx-dot-app-enterpriserag-bench"},"trust":{"score":78,"label":"Strong shortlist","version":"trust-score-v4","install_policy":"human_review_before_install","evidence":{"stars":"406 GitHub stars","repoActivity":"406 stars, 37 forks","lastPushed":"2mo since push","license":"MIT","repository":"https://github.com/onyx-dot-app/EnterpriseRAG-Bench","install":"npx skills add onyx-dot-app/EnterpriseRAG-Bench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"outcome_evidence":{"total":0,"successes":0,"failures":0,"not_relevant":0,"success_rate":null,"recent_success_rate":null,"recent_failure_rate":null,"install_attempts":0,"install_success_rate":null,"risk_blocked":0,"setup_required":0,"avg_output_quality":null,"production_outcomes":0,"last_outcome_at":null,"label":"No agent outcome data yet"},"auto_install":{"allowed":false,"sandbox_required":true,"reason":"Human review or sandbox validation is required before automatic installation."},"best_for":["rag-knowledge","semantic-search","retrieval","knowledge","benchmark","dataset"],"known_risks":["Quality score needs review","Stars/forks activity: 406 stars, 37 forks; issue activity unavailable in current metadata"]},"agent_proven":{"version":"agent-proven-v1","score":0,"tier":"unproven","label":"Needs first agent run","summary":"No agent outcome reports yet. Use Resolve, run one narrow sandbox task, then report the result.","metrics":{"totalOutcomes":0,"successfulOutcomes":0,"failedOutcomes":0,"installAttempts":0,"installSuccessRate":null,"successRate":null,"recentSuccessRate":null,"recentFailureRate":null,"riskBlocked":0,"setupRequired":0,"notRelevant":0,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"uniqueAgents":0,"lastOutcomeAt":null},"signals":[],"penalties":["No real agent outcome evidence yet"]},"audit":{"score":82,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":["Quality score needs review","Stars/forks activity: 406 stars, 37 forks; issue activity unavailable in current metadata"]},"safety_gate":{"tier":"reviewed","label":"Reviewed with permission notes","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Require human approval before installing into a real workspace."},"quality":{"score":77,"label":"Strong"},"supply":{"track":"Research and knowledge work","scenario":"RAG and knowledge","maintenance":"2mo since push","risk":"Safe to try"},"alternative_skills":[],"do_not_use_when":["teams that need a vendor-supported SLA","high-compliance environments without internal security review","No OpenAgentSkill engagement data yet","Quality score needs review","Stars/forks activity: 406 stars, 37 forks; issue activity unavailable in current metadata","Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface","Automatic installation in a production workspace"],"agent_contract":{"task_input":"Use EnterpriseRAG Bench in an agent workflow","recommended_action":"Require human approval before installing into a real workspace.","install_policy":"review","minimum_review_before_use":["Trust: 78/100 Strong shortlist","Audit: 82/100 Safe to try","Safety: 66/100 Review before install","Review repository, license, install command, and permission surface before production use."],"expected_agent_output":{"selected_skill":"onyx-dot-app-enterpriserag-bench (EnterpriseRAG Bench)","install_command":"npx skills add onyx-dot-app/EnterpriseRAG-Bench","risk_summary":"Safe to try; Reviewed with permission notes; Review before production","verification_result":"Report the smallest successful task, files touched, warnings, and any missing setup."}},"outcome_feedback":{"endpoint":"https://www.openagentskill.com/api/agent/outcome","method":"POST","requires_resolve_event_id":true,"event_id_source":"Use install_receipt.outcome_feedback.event_id or feedback.event_id returned by /api/agent/resolve for the current task.","expected_outcomes":["success","failed","not_relevant","blocked_by_risk","setup_required"],"payload_template":{"event_id":"<install_receipt.outcome_feedback.event_id or feedback.event_id from /api/agent/resolve>","skill_slug":"onyx-dot-app-enterpriserag-bench","task":"Use EnterpriseRAG Bench in an agent workflow","agent":"codex","outcome":"success","install_used":true,"risk_blocked":false,"setup_required":false,"task_success":true,"output_quality":4,"error_type":null,"human_review_required":false,"workspace":"sandbox","time_to_useful_ms":120000,"notes":"Report the smallest successful task, setup friction, files touched, and risk notes."}},"endpoints":{"web":"https://www.openagentskill.com/skills/onyx-dot-app-enterpriserag-bench","api":"https://www.openagentskill.com/api/agent/skills/onyx-dot-app-enterpriserag-bench","audit":"https://www.openagentskill.com/skills/onyx-dot-app-enterpriserag-bench/audit","eval":"https://www.openagentskill.com/api/agent/evals?slug=onyx-dot-app-enterpriserag-bench&task=Use%20EnterpriseRAG%20Bench%20in%20an%20agent%20workflow&max_risk=medium","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Use%20EnterpriseRAG%20Bench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium","receipt":"https://www.openagentskill.com/api/agent/receipt?task=Use%20EnterpriseRAG%20Bench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium&format=text","install":"https://www.openagentskill.com/api/skills/onyx-dot-app-enterpriserag-bench/install","manifest":"https://www.openagentskill.com/api/registry/manifest/onyx-dot-app-enterpriserag-bench"}},"machine_metadata":{"version":"openagentskill-agent-metadata-v2","skill":{"slug":"onyx-dot-app-enterpriserag-bench","name":"EnterpriseRAG Bench","description":"Dataset and benchmark for RAG on company internal documents.","category":"rag-knowledge","url":"https://www.openagentskill.com/skills/onyx-dot-app-enterpriserag-bench","repository":"https://github.com/onyx-dot-app/EnterpriseRAG-Bench","github_repo":"onyx-dot-app/EnterpriseRAG-Bench"},"suited_tasks":["RAG and knowledge workflows","Claude Code teams","builders willing to evaluate younger projects","Chunk documents","Create embeddings","Retrieve and cite relevant passages","Inspect source files","Explain architecture"],"suited_agents":["Semantic Search","Codex","Claude Code","Cursor","OpenAgentSkill CLI","CLI"],"install":{"command":"npx skills add onyx-dot-app/EnterpriseRAG-Bench","ready":true,"targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add onyx-dot-app/EnterpriseRAG-Bench"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"EnterpriseRAG Bench\" agent skill from https://github.com/onyx-dot-app/EnterpriseRAG-Bench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: Dataset and benchmark for RAG on company internal documents."},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"EnterpriseRAG Bench\" as a Claude Code skill from https://github.com/onyx-dot-app/EnterpriseRAG-Bench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: Dataset and benchmark for RAG on company internal documents."},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"EnterpriseRAG Bench\" from https://github.com/onyx-dot-app/EnterpriseRAG-Bench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: Dataset and benchmark for RAG on company internal documents."}],"handoff_url":"https://www.openagentskill.com/api/skills/onyx-dot-app-enterpriserag-bench/install","manifest_url":"https://www.openagentskill.com/api/registry/manifest/onyx-dot-app-enterpriserag-bench"},"trust":{"score":78,"label":"Strong shortlist","version":"trust-score-v4","install_policy":"human_review_before_install","evidence":{"stars":"406 GitHub stars","repoActivity":"406 stars, 37 forks","lastPushed":"2mo since push","license":"MIT","repository":"https://github.com/onyx-dot-app/EnterpriseRAG-Bench","install":"npx skills add onyx-dot-app/EnterpriseRAG-Bench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"outcome_evidence":{"total":0,"successes":0,"failures":0,"not_relevant":0,"success_rate":null,"recent_success_rate":null,"recent_failure_rate":null,"install_attempts":0,"install_success_rate":null,"risk_blocked":0,"setup_required":0,"avg_output_quality":null,"production_outcomes":0,"last_outcome_at":null,"label":"No agent outcome data yet"},"auto_install":{"allowed":false,"sandbox_required":true,"reason":"Human review or sandbox validation is required before automatic installation."},"best_for":["rag-knowledge","semantic-search","retrieval","knowledge","benchmark","dataset"],"known_risks":["Quality score needs review","Stars/forks activity: 406 stars, 37 forks; issue activity unavailable in current metadata"]},"agent_proven":{"version":"agent-proven-v1","score":0,"tier":"unproven","label":"Needs first agent run","summary":"No agent outcome reports yet. Use Resolve, run one narrow sandbox task, then report the result.","metrics":{"totalOutcomes":0,"successfulOutcomes":0,"failedOutcomes":0,"installAttempts":0,"installSuccessRate":null,"successRate":null,"recentSuccessRate":null,"recentFailureRate":null,"riskBlocked":0,"setupRequired":0,"notRelevant":0,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"uniqueAgents":0,"lastOutcomeAt":null},"signals":[],"penalties":["No real agent outcome evidence yet"]},"audit":{"score":82,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":["Quality score needs review","Stars/forks activity: 406 stars, 37 forks; issue activity unavailable in current metadata"]},"safety_gate":{"tier":"reviewed","label":"Reviewed with permission notes","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Require human approval before installing into a real workspace."},"quality":{"score":77,"label":"Strong"},"supply":{"track":"Research and knowledge work","scenario":"RAG and knowledge","maintenance":"2mo since push","risk":"Safe to try"},"alternative_skills":[],"do_not_use_when":["teams that need a vendor-supported SLA","high-compliance environments without internal security review","No OpenAgentSkill engagement data yet","Quality score needs review","Stars/forks activity: 406 stars, 37 forks; issue activity unavailable in current metadata","Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface","Automatic installation in a production workspace"],"agent_contract":{"task_input":"Use EnterpriseRAG Bench in an agent workflow","recommended_action":"Require human approval before installing into a real workspace.","install_policy":"review","minimum_review_before_use":["Trust: 78/100 Strong shortlist","Audit: 82/100 Safe to try","Safety: 66/100 Review before install","Review repository, license, install command, and permission surface before production use."],"expected_agent_output":{"selected_skill":"onyx-dot-app-enterpriserag-bench (EnterpriseRAG Bench)","install_command":"npx skills add onyx-dot-app/EnterpriseRAG-Bench","risk_summary":"Safe to try; Reviewed with permission notes; Review before production","verification_result":"Report the smallest successful task, files touched, warnings, and any missing setup."}},"outcome_feedback":{"endpoint":"https://www.openagentskill.com/api/agent/outcome","method":"POST","requires_resolve_event_id":true,"event_id_source":"Use install_receipt.outcome_feedback.event_id or feedback.event_id returned by /api/agent/resolve for the current task.","expected_outcomes":["success","failed","not_relevant","blocked_by_risk","setup_required"],"payload_template":{"event_id":"<install_receipt.outcome_feedback.event_id or feedback.event_id from /api/agent/resolve>","skill_slug":"onyx-dot-app-enterpriserag-bench","task":"Use EnterpriseRAG Bench in an agent workflow","agent":"codex","outcome":"success","install_used":true,"risk_blocked":false,"setup_required":false,"task_success":true,"output_quality":4,"error_type":null,"human_review_required":false,"workspace":"sandbox","time_to_useful_ms":120000,"notes":"Report the smallest successful task, setup friction, files touched, and risk notes."}},"endpoints":{"web":"https://www.openagentskill.com/skills/onyx-dot-app-enterpriserag-bench","api":"https://www.openagentskill.com/api/agent/skills/onyx-dot-app-enterpriserag-bench","audit":"https://www.openagentskill.com/skills/onyx-dot-app-enterpriserag-bench/audit","eval":"https://www.openagentskill.com/api/agent/evals?slug=onyx-dot-app-enterpriserag-bench&task=Use%20EnterpriseRAG%20Bench%20in%20an%20agent%20workflow&max_risk=medium","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Use%20EnterpriseRAG%20Bench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium","receipt":"https://www.openagentskill.com/api/agent/receipt?task=Use%20EnterpriseRAG%20Bench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium&format=text","install":"https://www.openagentskill.com/api/skills/onyx-dot-app-enterpriserag-bench/install","manifest":"https://www.openagentskill.com/api/registry/manifest/onyx-dot-app-enterpriserag-bench"}},"platforms":["Semantic Search","Claude Code"],"use_cases":[{"slug":"rag-knowledge","title":"RAG and knowledge","url":"https://www.openagentskill.com/use-cases/rag-knowledge"},{"slug":"coding-agents","title":"Coding agents","url":"https://www.openagentskill.com/use-cases/coding-agents"},{"slug":"browser-automation","title":"Browser automation","url":"https://www.openagentskill.com/use-cases/browser-automation"},{"slug":"workflow-automation","title":"Workflow automation","url":"https://www.openagentskill.com/use-cases/workflow-automation"}],"install":"npx skills add onyx-dot-app/EnterpriseRAG-Bench","install_targets":[{"id":"openagentskill-cli","label":"CLI","title":"OpenAgentSkill CLI","kind":"command","value":"npx skills add onyx-dot-app/EnterpriseRAG-Bench","description":"Use the registry command when your workflow supports the OpenAgentSkill installer.","copyLabel":"Copy command"},{"id":"codex","label":"Codex","title":"Codex install prompt","kind":"agent-prompt","value":"Install the \"EnterpriseRAG Bench\" agent skill from https://github.com/onyx-dot-app/EnterpriseRAG-Bench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: Dataset and benchmark for RAG on company internal documents.","description":"Give Codex a repo-aware install prompt when the skill is not available through a local CLI.","copyLabel":"Copy prompt"},{"id":"claude-code","label":"Claude Code","title":"Claude Code skill prompt","kind":"agent-prompt","value":"Add \"EnterpriseRAG Bench\" as a Claude Code skill from https://github.com/onyx-dot-app/EnterpriseRAG-Bench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: Dataset and benchmark for RAG on company internal documents.","description":"Use this prompt to ask Claude Code to add the skill and explain the local activation steps.","copyLabel":"Copy prompt"},{"id":"cursor","label":"Cursor","title":"Cursor rule prompt","kind":"agent-prompt","value":"Turn \"EnterpriseRAG Bench\" from https://github.com/onyx-dot-app/EnterpriseRAG-Bench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: Dataset and benchmark for RAG on company internal documents.","description":"Use this when installing as Cursor project rules or reusable agent instructions.","copyLabel":"Copy prompt"}],"repository":"https://github.com/onyx-dot-app/EnterpriseRAG-Bench","github_repo":"onyx-dot-app/EnterpriseRAG-Bench","version":"1.0.0","license":"MIT","updated_at":"2026-06-16T09:51:09.938027+00:00","canonical_key":"onyx-dot-app/enterpriserag-bench","recommendation_reasons":["Matches task terms: bench","Install handoff is available","Repository freshness signal is available","Registry match score 55"],"urls":{"web":"https://www.openagentskill.com/skills/onyx-dot-app-enterpriserag-bench","api":"https://www.openagentskill.com/api/agent/skills/onyx-dot-app-enterpriserag-bench","install_api":"https://www.openagentskill.com/api/skills/onyx-dot-app-enterpriserag-bench/install","audit":"https://www.openagentskill.com/skills/onyx-dot-app-enterpriserag-bench/audit","repository":"https://github.com/onyx-dot-app/EnterpriseRAG-Bench"}},{"rank":10,"match_score":55,"raw_match_score":533.6,"slug":"longhorn-kbench","name":"Kbench","description":"Benchmark your Kubernetes storage.","tagline":"Benchmark your Kubernetes storage.","category":"devops","tags":["kubernetes","devops","benchmark","storage","go","github"],"author":{"name":"longhorn","verified":false,"url":"https://github.com/longhorn"},"attribution":{"status":"community_indexed","statusLabel":"Community indexed","shortLabel":"COMMUNITY INDEXED","sourceLabel":"GitHub star discovery","sourceDetail":"longhorn/kbench","creatorName":"longhorn","creatorUrl":"https://github.com/longhorn","sourceUrl":"https://github.com/longhorn/kbench","indexedBy":"OpenAgentSkill community index","claimUrl":"https://www.openagentskill.com/skills/longhorn-kbench#claim-this-skill","claimCta":"Claim this skill","trustNote":"This listing was indexed from public sources and is not marked official until a maintainer claim is approved.","publicNote":"Attribution links to the public repository or creator profile. Creators can claim the listing to update ownership signals."},"stats":{"stars":229,"forks":41,"downloads":0,"rating":0,"review_count":0,"quality_score":47.23},"quality":{"score":75,"tier":"strong","label":"Strong","summary":"Solid option that is likely worth shortlisting for production workflows.","signals":[{"label":"GitHub stars","value":"229","tone":"neutral"},{"label":"Freshness","value":"3mo ago","tone":"positive"},{"label":"Install ready","value":"Yes","tone":"positive"},{"label":"License","value":"Apache-2.0","tone":"neutral"}],"warnings":[]},"trust":{"version":"trust-score-v4","score":78,"tier":"strong","label":"Strong shortlist","summary":"Good trust signals with a few areas worth checking before rollout.","recommendedAction":"Test in a sandbox workflow and compare its install path with close alternatives.","dimensions":[{"id":"github_adoption","label":"GitHub adoption","score":62,"weight":0.13,"status":"info","detail":"229 GitHub stars"},{"id":"repo_activity","label":"Stars/forks activity","score":57,"weight":0.08,"status":"warn","detail":"229 stars, 41 forks; issue activity unavailable in current metadata"},{"id":"maintenance","label":"Recent maintenance","score":88,"weight":0.14,"status":"pass","detail":"3mo since push"},{"id":"license","label":"License clarity","score":86,"weight":0.09,"status":"pass","detail":"Apache-2.0"},{"id":"documentation","label":"README/SKILL.md completeness","score":74,"weight":0.14,"status":"info","detail":"Public metadata needs stronger README/SKILL.md context"},{"id":"dependency_risk","label":"Dependency/runtime risk","score":90,"weight":0.12,"status":"pass","detail":"no major dependency risk hints in public metadata"},{"id":"installability","label":"Install availability","score":92,"weight":0.1,"status":"pass","detail":"npx skills add longhorn/kbench"},{"id":"install_safety","label":"Install command safety","score":92,"weight":0.1,"status":"pass","detail":"standard package or runtime install path"},{"id":"permission_surface","label":"Permission surface","score":86,"weight":0.07,"status":"pass","detail":"filesystem or document access"},{"id":"repository","label":"Repository evidence","score":86,"weight":0.04,"status":"pass","detail":"https://github.com/longhorn/kbench"},{"id":"review_status","label":"Review status","score":88,"weight":0.05,"status":"pass","detail":"AI review data available"},{"id":"agent_outcomes","label":"Agent Proven outcomes","score":54,"weight":0.13,"status":"info","detail":"No agent outcome data yet"}],"checks":[{"status":"info","label":"GitHub adoption","detail":"229 GitHub stars"},{"status":"warn","label":"Stars/forks activity","detail":"229 stars, 41 forks; issue activity unavailable in current metadata"},{"status":"pass","label":"Recent maintenance","detail":"3mo since push"},{"status":"pass","label":"License clarity","detail":"Apache-2.0"},{"status":"info","label":"README/SKILL.md completeness","detail":"Public metadata needs stronger README/SKILL.md context"},{"status":"pass","label":"Dependency/runtime risk","detail":"no major dependency risk hints in public metadata"},{"status":"pass","label":"Install availability","detail":"npx skills add longhorn/kbench"},{"status":"pass","label":"Install command safety","detail":"standard package or runtime install path"},{"status":"pass","label":"Permission surface","detail":"filesystem or document access"},{"status":"pass","label":"Repository evidence","detail":"https://github.com/longhorn/kbench"},{"status":"pass","label":"Review status","detail":"AI review data available"},{"status":"info","label":"Agent Proven outcomes","detail":"No agent outcome data yet"},{"status":"warn","label":"Ownership","detail":"No approved owner claim yet"},{"status":"info","label":"OpenAgentSkill usage","detail":"No local usage activity yet"},{"status":"info","label":"Agent outcomes","detail":"No agent outcome data yet"}],"strengths":["AI review approved","Install path is available","Repository evidence is available","Recently maintained repository","Install command has no obvious high-risk pattern"],"warnings":["Quality score needs review","Stars/forks activity: 229 stars, 41 forks; issue activity unavailable in current metadata"],"evidence":{"stars":"229 GitHub stars","repoActivity":"229 stars, 41 forks","lastPushed":"3mo since push","license":"Apache-2.0","repository":"https://github.com/longhorn/kbench","install":"npx skills add longhorn/kbench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"installReadiness":{"ready":true,"command":"npx skills add longhorn/kbench","policy":"human_review_before_install","label":"Human review before install","notes":["Install path is available","Repository evidence is available","License is declared","No Agent Proven outcome evidence yet","3mo since push"]},"agentCompatibility":["Go","Kubernetes","Codex","Claude Code","Cursor","OpenAgentSkill CLI"],"riskSummary":{"level":"medium","label":"Review before production","notes":["Quality score needs review","Stars/forks activity: 229 stars, 41 forks; issue activity unavailable in current metadata"]},"outcomeEvidence":{"total":0,"successes":0,"failures":0,"notRelevant":0,"successRate":null,"installAttempts":0,"riskBlocked":0,"setupRequired":0,"installSuccessRate":null,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"recentSuccessRate":null,"recentFailureRate":null,"uniqueAgents":0,"agentProvenScore":0,"agentProvenLabel":"Needs first agent run","lastOutcomeAt":null,"label":"No agent outcome data yet"},"autoInstall":{"allowed":false,"sandboxRequired":true,"policy":"human_review_before_install","reason":"Human review or sandbox validation is required before automatic installation."},"bestFor":["devops","kubernetes","benchmark","storage","go","github"],"doNotUseFor":["Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface","Automatic installation in a production workspace"],"knownRisks":["Quality score needs review","Stars/forks activity: 229 stars, 41 forks; issue activity unavailable in current metadata"]},"safety":{"score":66,"level":"review_before_install","label":"Review before install","safety_tier":{"tier":"reviewed","label":"Reviewed with permission notes","badge":"REVIEWED","summary":"Usable candidate, but the agent should surface permission and audit notes before installation.","recommended_action":"Require human approval before installing into a real workspace.","auto_install_policy":"review","reasons":["Quality score needs review","66/100 agent safety score"]},"auto_install_allowed":false,"human_review_required":true,"blocked":false,"audit_risk":"safe_to_try","permission_hints":[{"id":"network","label":"Network access","reason":"Skill likely fetches remote pages, APIs, repositories, or external services.","severity":"medium"},{"id":"filesystem","label":"Filesystem access","reason":"Skill may read or write project files, documents, generated artifacts, or local workspace state.","severity":"medium"}],"policy_warnings":["Quality score needs review"],"constraints_applied":{"max_risk":"medium","needs_install_command":true,"min_stars":0}},"safety_gate":{"tier":"reviewed","label":"Reviewed with permission notes","badge":"REVIEWED","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Require human approval before installing into a real workspace.","reasons":["Quality score needs review","66/100 agent safety score"]},"supply_profile":{"track":{"slug":"coding","label":"Coding and developer agents","shortLabel":"Coding","description":"Code review, repo analysis, testing, CI, GitHub, DevOps, and developer workflow skills."},"scenario":{"label":"Coding agents","description":"I need a coding agent that can understand a repository, edit code, and review pull requests.","useCases":[{"slug":"coding-agents","title":"Coding agents"},{"slug":"rag-knowledge","title":"RAG and knowledge"},{"slug":"workflow-automation","title":"Workflow automation"}]},"applicableAgents":["Claude Code","CLI","Codex","Cursor","Go"],"install":{"ready":true,"command":"npx skills add longhorn/kbench","primaryTarget":"CLI","targetCount":4},"githubQuality":{"stars":229,"starsLabel":"229","forks":41,"license":"Apache-2.0","qualityScore":75,"trustScore":78,"auditScore":82},"maintenance":{"status":"active","label":"3mo since push","daysSincePush":81,"lastPushedAt":"2026-04-13T07:49:11+00:00"},"risk":{"level":"safe_to_try","label":"Safe to try","requiresReview":true,"notes":["Quality score needs review","Stars/forks activity: 229 stars, 41 forks; issue activity unavailable in current metadata"]},"coverageTags":["Coding","Coding agents","devops","kubernetes","benchmark","storage","go","github"]},"audit":{"audit_score":82,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":["Quality score needs review","Stars/forks activity: 229 stars, 41 forks; issue activity unavailable in current metadata"]},"decision":{"readiness_score":74,"readiness_label":"Strong shortlist","headline":"Companion skill for Coding agents","role":"Companion skill","primary_fit":"Coding agents","best_for":["Coding agents workflows","Claude Code teams","builders willing to evaluate younger projects"],"risks":["No OpenAgentSkill engagement data yet"],"next_steps":["Install it in a sandbox agent and run one Coding agents task end to end.","Compare output quality, latency, and failure behavior against at least one alternative.","Promote it into production only after reviewing repository permissions, license, and maintenance signals."]},"agent_readable_metadata":{"version":"openagentskill-agent-metadata-v2","skill":{"slug":"longhorn-kbench","name":"Kbench","description":"Benchmark your Kubernetes storage.","category":"devops","url":"https://www.openagentskill.com/skills/longhorn-kbench","repository":"https://github.com/longhorn/kbench","github_repo":"longhorn/kbench"},"suited_tasks":["Coding agents workflows","Claude Code teams","builders willing to evaluate younger projects","Inspect source files","Explain architecture","Patch bugs and verify changes","Chunk documents","Create embeddings"],"suited_agents":["Go","Kubernetes","Codex","Claude Code","Cursor","OpenAgentSkill CLI","CLI"],"install":{"command":"npx skills add longhorn/kbench","ready":true,"targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add longhorn/kbench"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"Kbench\" agent skill from https://github.com/longhorn/kbench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: Benchmark your Kubernetes storage."},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"Kbench\" as a Claude Code skill from https://github.com/longhorn/kbench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: Benchmark your Kubernetes storage."},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"Kbench\" from https://github.com/longhorn/kbench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: Benchmark your Kubernetes storage."}],"handoff_url":"https://www.openagentskill.com/api/skills/longhorn-kbench/install","manifest_url":"https://www.openagentskill.com/api/registry/manifest/longhorn-kbench"},"trust":{"score":78,"label":"Strong shortlist","version":"trust-score-v4","install_policy":"human_review_before_install","evidence":{"stars":"229 GitHub stars","repoActivity":"229 stars, 41 forks","lastPushed":"3mo since push","license":"Apache-2.0","repository":"https://github.com/longhorn/kbench","install":"npx skills add longhorn/kbench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"outcome_evidence":{"total":0,"successes":0,"failures":0,"not_relevant":0,"success_rate":null,"recent_success_rate":null,"recent_failure_rate":null,"install_attempts":0,"install_success_rate":null,"risk_blocked":0,"setup_required":0,"avg_output_quality":null,"production_outcomes":0,"last_outcome_at":null,"label":"No agent outcome data yet"},"auto_install":{"allowed":false,"sandbox_required":true,"reason":"Human review or sandbox validation is required before automatic installation."},"best_for":["devops","kubernetes","benchmark","storage","go","github"],"known_risks":["Quality score needs review","Stars/forks activity: 229 stars, 41 forks; issue activity unavailable in current metadata"]},"agent_proven":{"version":"agent-proven-v1","score":0,"tier":"unproven","label":"Needs first agent run","summary":"No agent outcome reports yet. Use Resolve, run one narrow sandbox task, then report the result.","metrics":{"totalOutcomes":0,"successfulOutcomes":0,"failedOutcomes":0,"installAttempts":0,"installSuccessRate":null,"successRate":null,"recentSuccessRate":null,"recentFailureRate":null,"riskBlocked":0,"setupRequired":0,"notRelevant":0,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"uniqueAgents":0,"lastOutcomeAt":null},"signals":[],"penalties":["No real agent outcome evidence yet"]},"audit":{"score":82,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":["Quality score needs review","Stars/forks activity: 229 stars, 41 forks; issue activity unavailable in current metadata"]},"safety_gate":{"tier":"reviewed","label":"Reviewed with permission notes","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Require human approval before installing into a real workspace."},"quality":{"score":75,"label":"Strong"},"supply":{"track":"Coding and developer agents","scenario":"Coding agents","maintenance":"3mo since push","risk":"Safe to try"},"alternative_skills":[],"do_not_use_when":["teams that need a vendor-supported SLA","high-compliance environments without internal security review","No OpenAgentSkill engagement data yet","Quality score needs review","Stars/forks activity: 229 stars, 41 forks; issue activity unavailable in current metadata","Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface","Automatic installation in a production workspace"],"agent_contract":{"task_input":"Use Kbench in an agent workflow","recommended_action":"Require human approval before installing into a real workspace.","install_policy":"review","minimum_review_before_use":["Trust: 78/100 Strong shortlist","Audit: 82/100 Safe to try","Safety: 66/100 Review before install","Review repository, license, install command, and permission surface before production use."],"expected_agent_output":{"selected_skill":"longhorn-kbench (Kbench)","install_command":"npx skills add longhorn/kbench","risk_summary":"Safe to try; Reviewed with permission notes; Review before production","verification_result":"Report the smallest successful task, files touched, warnings, and any missing setup."}},"outcome_feedback":{"endpoint":"https://www.openagentskill.com/api/agent/outcome","method":"POST","requires_resolve_event_id":true,"event_id_source":"Use install_receipt.outcome_feedback.event_id or feedback.event_id returned by /api/agent/resolve for the current task.","expected_outcomes":["success","failed","not_relevant","blocked_by_risk","setup_required"],"payload_template":{"event_id":"<install_receipt.outcome_feedback.event_id or feedback.event_id from /api/agent/resolve>","skill_slug":"longhorn-kbench","task":"Use Kbench in an agent workflow","agent":"codex","outcome":"success","install_used":true,"risk_blocked":false,"setup_required":false,"task_success":true,"output_quality":4,"error_type":null,"human_review_required":false,"workspace":"sandbox","time_to_useful_ms":120000,"notes":"Report the smallest successful task, setup friction, files touched, and risk notes."}},"endpoints":{"web":"https://www.openagentskill.com/skills/longhorn-kbench","api":"https://www.openagentskill.com/api/agent/skills/longhorn-kbench","audit":"https://www.openagentskill.com/skills/longhorn-kbench/audit","eval":"https://www.openagentskill.com/api/agent/evals?slug=longhorn-kbench&task=Use%20Kbench%20in%20an%20agent%20workflow&max_risk=medium","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Use%20Kbench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium","receipt":"https://www.openagentskill.com/api/agent/receipt?task=Use%20Kbench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium&format=text","install":"https://www.openagentskill.com/api/skills/longhorn-kbench/install","manifest":"https://www.openagentskill.com/api/registry/manifest/longhorn-kbench"}},"machine_metadata":{"version":"openagentskill-agent-metadata-v2","skill":{"slug":"longhorn-kbench","name":"Kbench","description":"Benchmark your Kubernetes storage.","category":"devops","url":"https://www.openagentskill.com/skills/longhorn-kbench","repository":"https://github.com/longhorn/kbench","github_repo":"longhorn/kbench"},"suited_tasks":["Coding agents workflows","Claude Code teams","builders willing to evaluate younger projects","Inspect source files","Explain architecture","Patch bugs and verify changes","Chunk documents","Create embeddings"],"suited_agents":["Go","Kubernetes","Codex","Claude Code","Cursor","OpenAgentSkill CLI","CLI"],"install":{"command":"npx skills add longhorn/kbench","ready":true,"targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add longhorn/kbench"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"Kbench\" agent skill from https://github.com/longhorn/kbench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: Benchmark your Kubernetes storage."},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"Kbench\" as a Claude Code skill from https://github.com/longhorn/kbench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: Benchmark your Kubernetes storage."},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"Kbench\" from https://github.com/longhorn/kbench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: Benchmark your Kubernetes storage."}],"handoff_url":"https://www.openagentskill.com/api/skills/longhorn-kbench/install","manifest_url":"https://www.openagentskill.com/api/registry/manifest/longhorn-kbench"},"trust":{"score":78,"label":"Strong shortlist","version":"trust-score-v4","install_policy":"human_review_before_install","evidence":{"stars":"229 GitHub stars","repoActivity":"229 stars, 41 forks","lastPushed":"3mo since push","license":"Apache-2.0","repository":"https://github.com/longhorn/kbench","install":"npx skills add longhorn/kbench","installSafety":"standard package or runtime install path","permissionSurface":"filesystem or document access","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"outcome_evidence":{"total":0,"successes":0,"failures":0,"not_relevant":0,"success_rate":null,"recent_success_rate":null,"recent_failure_rate":null,"install_attempts":0,"install_success_rate":null,"risk_blocked":0,"setup_required":0,"avg_output_quality":null,"production_outcomes":0,"last_outcome_at":null,"label":"No agent outcome data yet"},"auto_install":{"allowed":false,"sandbox_required":true,"reason":"Human review or sandbox validation is required before automatic installation."},"best_for":["devops","kubernetes","benchmark","storage","go","github"],"known_risks":["Quality score needs review","Stars/forks activity: 229 stars, 41 forks; issue activity unavailable in current metadata"]},"agent_proven":{"version":"agent-proven-v1","score":0,"tier":"unproven","label":"Needs first agent run","summary":"No agent outcome reports yet. Use Resolve, run one narrow sandbox task, then report the result.","metrics":{"totalOutcomes":0,"successfulOutcomes":0,"failedOutcomes":0,"installAttempts":0,"installSuccessRate":null,"successRate":null,"recentSuccessRate":null,"recentFailureRate":null,"riskBlocked":0,"setupRequired":0,"notRelevant":0,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"uniqueAgents":0,"lastOutcomeAt":null},"signals":[],"penalties":["No real agent outcome evidence yet"]},"audit":{"score":82,"risk_level":"safe_to_try","risk_label":"Safe to try","warnings":["Quality score needs review","Stars/forks activity: 229 stars, 41 forks; issue activity unavailable in current metadata"]},"safety_gate":{"tier":"reviewed","label":"Reviewed with permission notes","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Require human approval before installing into a real workspace."},"quality":{"score":75,"label":"Strong"},"supply":{"track":"Coding and developer agents","scenario":"Coding agents","maintenance":"3mo since push","risk":"Safe to try"},"alternative_skills":[],"do_not_use_when":["teams that need a vendor-supported SLA","high-compliance environments without internal security review","No OpenAgentSkill engagement data yet","Quality score needs review","Stars/forks activity: 229 stars, 41 forks; issue activity unavailable in current metadata","Production credentials, payments, or irreversible account changes without explicit human review","Sensitive private data before reviewing repository code, license, and permission surface","Automatic installation in a production workspace"],"agent_contract":{"task_input":"Use Kbench in an agent workflow","recommended_action":"Require human approval before installing into a real workspace.","install_policy":"review","minimum_review_before_use":["Trust: 78/100 Strong shortlist","Audit: 82/100 Safe to try","Safety: 66/100 Review before install","Review repository, license, install command, and permission surface before production use."],"expected_agent_output":{"selected_skill":"longhorn-kbench (Kbench)","install_command":"npx skills add longhorn/kbench","risk_summary":"Safe to try; Reviewed with permission notes; Review before production","verification_result":"Report the smallest successful task, files touched, warnings, and any missing setup."}},"outcome_feedback":{"endpoint":"https://www.openagentskill.com/api/agent/outcome","method":"POST","requires_resolve_event_id":true,"event_id_source":"Use install_receipt.outcome_feedback.event_id or feedback.event_id returned by /api/agent/resolve for the current task.","expected_outcomes":["success","failed","not_relevant","blocked_by_risk","setup_required"],"payload_template":{"event_id":"<install_receipt.outcome_feedback.event_id or feedback.event_id from /api/agent/resolve>","skill_slug":"longhorn-kbench","task":"Use Kbench in an agent workflow","agent":"codex","outcome":"success","install_used":true,"risk_blocked":false,"setup_required":false,"task_success":true,"output_quality":4,"error_type":null,"human_review_required":false,"workspace":"sandbox","time_to_useful_ms":120000,"notes":"Report the smallest successful task, setup friction, files touched, and risk notes."}},"endpoints":{"web":"https://www.openagentskill.com/skills/longhorn-kbench","api":"https://www.openagentskill.com/api/agent/skills/longhorn-kbench","audit":"https://www.openagentskill.com/skills/longhorn-kbench/audit","eval":"https://www.openagentskill.com/api/agent/evals?slug=longhorn-kbench&task=Use%20Kbench%20in%20an%20agent%20workflow&max_risk=medium","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Use%20Kbench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium","receipt":"https://www.openagentskill.com/api/agent/receipt?task=Use%20Kbench%20in%20an%20agent%20workflow&agent=codex&max_risk=medium&format=text","install":"https://www.openagentskill.com/api/skills/longhorn-kbench/install","manifest":"https://www.openagentskill.com/api/registry/manifest/longhorn-kbench"}},"platforms":["Go","Kubernetes","Claude Code"],"use_cases":[{"slug":"coding-agents","title":"Coding agents","url":"https://www.openagentskill.com/use-cases/coding-agents"},{"slug":"rag-knowledge","title":"RAG and knowledge","url":"https://www.openagentskill.com/use-cases/rag-knowledge"},{"slug":"workflow-automation","title":"Workflow automation","url":"https://www.openagentskill.com/use-cases/workflow-automation"},{"slug":"sports-analytics","title":"Sports analytics","url":"https://www.openagentskill.com/use-cases/sports-analytics"}],"install":"npx skills add longhorn/kbench","install_targets":[{"id":"openagentskill-cli","label":"CLI","title":"OpenAgentSkill CLI","kind":"command","value":"npx skills add longhorn/kbench","description":"Use the registry command when your workflow supports the OpenAgentSkill installer.","copyLabel":"Copy command"},{"id":"codex","label":"Codex","title":"Codex install prompt","kind":"agent-prompt","value":"Install the \"Kbench\" agent skill from https://github.com/longhorn/kbench. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: Benchmark your Kubernetes storage.","description":"Give Codex a repo-aware install prompt when the skill is not available through a local CLI.","copyLabel":"Copy prompt"},{"id":"claude-code","label":"Claude Code","title":"Claude Code skill prompt","kind":"agent-prompt","value":"Add \"Kbench\" as a Claude Code skill from https://github.com/longhorn/kbench. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: Benchmark your Kubernetes storage.","description":"Use this prompt to ask Claude Code to add the skill and explain the local activation steps.","copyLabel":"Copy prompt"},{"id":"cursor","label":"Cursor","title":"Cursor rule prompt","kind":"agent-prompt","value":"Turn \"Kbench\" from https://github.com/longhorn/kbench into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: Benchmark your Kubernetes storage.","description":"Use this when installing as Cursor project rules or reusable agent instructions.","copyLabel":"Copy prompt"}],"repository":"https://github.com/longhorn/kbench","github_repo":"longhorn/kbench","version":"1.0.0","license":"Apache-2.0","updated_at":"2026-06-23T03:00:51.033442+00:00","canonical_key":"longhorn/kbench","recommendation_reasons":["Matches task terms: bench","Install handoff is available","Repository freshness signal is available","Registry match score 55"],"urls":{"web":"https://www.openagentskill.com/skills/longhorn-kbench","api":"https://www.openagentskill.com/api/agent/skills/longhorn-kbench","install_api":"https://www.openagentskill.com/api/skills/longhorn-kbench/install","audit":"https://www.openagentskill.com/skills/longhorn-kbench/audit","repository":"https://github.com/longhorn/kbench"}}],"meta":{"endpoint":"/api/skills/search","canonical_agent_endpoint":"/api/agent/resolve","safety_policy":"Blocked candidates are excluded by default. Pass include_blocked=true only for manual audit workflows.","agent_friendly":true,"api_version":"1.0","generated_at":"2026-07-03T21:16:33.981Z"}}