{"eval":{"version":"openagentskill-skill-eval-v1","slug":"marlbenchmark-on-policy","name":"On Policy","generated_at":"2026-07-03T22:03:14.118Z","task_input":"Evaluate On Policy before installing it in an AI agent workflow","status":"review","score":75,"risk_level":"medium","decision":{"recommendation":"manual_review","reason":"Require human approval before installing into a real workspace.","auto_install_allowed":false,"policy":"review","human_review_required":true},"task_fit":{"score":84,"suited_tasks":["GitHub automation workflows","Claude Code teams","teams that value GitHub adoption signals","Inspect repository metadata","Compare code changes","Write concise engineering summaries","Inspect source files","Explain architecture"],"suited_agents":["Python","Multi-Agent","Codex","Claude Code","Cursor","OpenAgentSkill CLI","CLI"]},"install":{"command":"npx skills add marlbenchmark/on-policy","ready":true,"policy":"review","safety_label":"Review before install","targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add marlbenchmark/on-policy"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"On Policy\" agent skill from https://github.com/marlbenchmark/on-policy. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: This is the official implementation of Multi-Agent PPO (MAPPO)."},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"On Policy\" as a Claude Code skill from https://github.com/marlbenchmark/on-policy. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: This is the official implementation of Multi-Agent PPO (MAPPO)."},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"On Policy\" from https://github.com/marlbenchmark/on-policy into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: This is the official implementation of Multi-Agent PPO (MAPPO)."}]},"trust":{"score":83,"label":"Strong shortlist","version":"trust-score-v4","evidence":{"stars":"2.0K GitHub stars","repoActivity":"2.0K stars, 378 forks","lastPushed":"2y since push","license":"MIT","repository":"https://github.com/marlbenchmark/on-policy","install":"npx skills add marlbenchmark/on-policy","installSafety":"standard package or runtime install path","permissionSurface":"no high-risk permission surface in public metadata","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"}},"audit":{"score":76,"risk_level":"needs_review","risk_label":"Needs review","warnings":["Repository appears stale","Repository looks stale","Quality score needs review","Documentation summary is thin","Recent maintenance: 2y since push"]},"safety_gate":{"score":64,"tier":"reviewed","label":"Reviewed with permission notes","auto_install_policy":"review","blocked":false,"permission_hints":[{"id":"network","label":"Network access","reason":"Skill likely fetches remote pages, APIs, repositories, or external services.","severity":"medium"}],"policy_warnings":["Repository appears stale"]},"checks":[{"id":"task_fit","label":"Task fit","status":"pass","score":84,"required_for_auto_install":true,"detail":"Task wording matches this skill metadata.","evidence":["Evaluate On Policy before installing it in an AI agent workflow","agent-frameworks","GitHub automation workflows; Claude Code teams; teams that value GitHub adoption signals"]},{"id":"install_path","label":"Install path","status":"pass","score":92,"required_for_auto_install":true,"detail":"Install handoff is available.","evidence":["npx skills add marlbenchmark/on-policy"]},{"id":"install_safety","label":"Install command safety","status":"pass","score":92,"required_for_auto_install":true,"detail":"standard package or runtime install path","evidence":["npx skills add marlbenchmark/on-policy"]},{"id":"trust_score","label":"Trust score","status":"pass","score":83,"required_for_auto_install":true,"detail":"Good trust signals with a few areas worth checking before rollout.","evidence":["Strong shortlist","2.0K GitHub stars","MIT"]},{"id":"audit_score","label":"Audit score","status":"warn","score":76,"required_for_auto_install":true,"detail":"Needs review","evidence":["Repository appears stale"]},{"id":"agent_safety_gate","label":"Agent safety gate","status":"warn","score":64,"required_for_auto_install":true,"detail":"Usable candidate, but the agent should surface permission and audit notes before installation.","evidence":["Require human approval before installing into a real workspace.","Repository appears stale"]},{"id":"readme_skillmd_completeness","label":"README/SKILL.md completeness","status":"warn","score":74,"required_for_auto_install":false,"detail":"Public metadata needs stronger README/SKILL.md context","evidence":["Usable metadata, review docs"]},{"id":"license_clarity","label":"License clarity","status":"pass","score":86,"required_for_auto_install":true,"detail":"MIT","evidence":["MIT"]},{"id":"recent_maintenance","label":"Recent maintenance","status":"fail","score":38,"required_for_auto_install":false,"detail":"2y since push","evidence":["2y since push"]},{"id":"permission_surface","label":"Permission surface","status":"pass","score":100,"required_for_auto_install":true,"detail":"no high-risk permission surface in public metadata","evidence":["Network access: medium"]},{"id":"alternatives","label":"Alternatives available","status":"pass","score":82,"required_for_auto_install":false,"detail":"Alternative skills are available for comparison.","evidence":["significant-gravitas-autogpt","langchain-ai-langchain","nousresearch-hermes-agent","firecrawl-firecrawl"]}],"blockers":[],"warnings":["Audit score: Needs review","Agent safety gate: Usable candidate, but the agent should surface permission and audit notes before installation.","README/SKILL.md completeness: Public metadata needs stronger README/SKILL.md context","Repository appears stale","Repository looks stale","Quality score needs review","Documentation summary is thin","Recent maintenance: 2y since push"],"validation_plan":["Inspect repository, README/SKILL.md, license, and recent commits before production use.","Install in an isolated workspace or sandbox with no production secrets available.","Run the smallest representative task and record files touched, commands run, network access, and outputs.","Compare the selected skill against at least one alternative when the eval status is review or failed.","Promote only after the agent reports a successful verification result and unresolved warnings are accepted."],"do_not_use_when":["teams that require actively maintained dependencies","production agents without a repository review","Repository looks stale","Repository appears stale","Quality score needs review","Documentation summary is thin","Recent maintenance: 2y since push","Production credentials, payments, or irreversible account changes without explicit human review"],"alternatives":[{"slug":"significant-gravitas-autogpt","name":"AutoGPT","url":"https://www.openagentskill.com/skills/significant-gravitas-autogpt","stars":185244,"install_command":"npx skills add Significant-Gravitas/AutoGPT","trust_score":86,"audit_score":92},{"slug":"langchain-ai-langchain","name":"Langchain","url":"https://www.openagentskill.com/skills/langchain-ai-langchain","stars":140782,"install_command":"npx skills add langchain-ai/langchain","trust_score":92,"audit_score":95},{"slug":"nousresearch-hermes-agent","name":"Hermes Agent","url":"https://www.openagentskill.com/skills/nousresearch-hermes-agent","stars":205451,"install_command":"npx skills add NousResearch/hermes-agent","trust_score":92,"audit_score":95},{"slug":"firecrawl-firecrawl","name":"Firecrawl","url":"https://www.openagentskill.com/skills/firecrawl-firecrawl","stars":139273,"install_command":"npx skills add firecrawl/firecrawl","trust_score":91,"audit_score":94}],"machine_metadata":{"version":"openagentskill-agent-metadata-v2","skill":{"slug":"marlbenchmark-on-policy","name":"On Policy","description":"This is the official implementation of Multi-Agent PPO (MAPPO).","category":"agent-frameworks","url":"https://www.openagentskill.com/skills/marlbenchmark-on-policy","repository":"https://github.com/marlbenchmark/on-policy","github_repo":"marlbenchmark/on-policy"},"suited_tasks":["GitHub automation workflows","Claude Code teams","teams that value GitHub adoption signals","Inspect repository metadata","Compare code changes","Write concise engineering summaries","Inspect source files","Explain architecture"],"suited_agents":["Python","Multi-Agent","Codex","Claude Code","Cursor","OpenAgentSkill CLI","CLI"],"install":{"command":"npx skills add marlbenchmark/on-policy","ready":true,"targets":[{"id":"openagentskill-cli","label":"CLI","kind":"command","value":"npx skills add marlbenchmark/on-policy"},{"id":"codex","label":"Codex","kind":"agent-prompt","value":"Install the \"On Policy\" agent skill from https://github.com/marlbenchmark/on-policy. Read its SKILL.md or equivalent instructions first, install only the files needed for this workspace, and summarize any required setup before using it. Skill purpose: This is the official implementation of Multi-Agent PPO (MAPPO)."},{"id":"claude-code","label":"Claude Code","kind":"agent-prompt","value":"Add \"On Policy\" as a Claude Code skill from https://github.com/marlbenchmark/on-policy. Inspect the skill instructions, place the reusable skill files in the appropriate local skills location for this project, and report the activation steps. Skill purpose: This is the official implementation of Multi-Agent PPO (MAPPO)."},{"id":"cursor","label":"Cursor","kind":"agent-prompt","value":"Turn \"On Policy\" from https://github.com/marlbenchmark/on-policy into a reusable Cursor project rule or agent instruction. Preserve the core workflow, adapt paths to this repo, and keep the rule scoped to tasks where it is relevant. Skill purpose: This is the official implementation of Multi-Agent PPO (MAPPO)."}],"handoff_url":"https://www.openagentskill.com/api/skills/marlbenchmark-on-policy/install","manifest_url":"https://www.openagentskill.com/api/registry/manifest/marlbenchmark-on-policy"},"trust":{"score":83,"label":"Strong shortlist","version":"trust-score-v4","install_policy":"human_review_before_install","evidence":{"stars":"2.0K GitHub stars","repoActivity":"2.0K stars, 378 forks","lastPushed":"2y since push","license":"MIT","repository":"https://github.com/marlbenchmark/on-policy","install":"npx skills add marlbenchmark/on-policy","installSafety":"standard package or runtime install path","permissionSurface":"no high-risk permission surface in public metadata","documentation":"Usable metadata, review docs","agentOutcomes":"No agent outcome data yet"},"outcome_evidence":{"total":0,"successes":0,"failures":0,"not_relevant":0,"success_rate":null,"recent_success_rate":null,"recent_failure_rate":null,"install_attempts":0,"install_success_rate":null,"risk_blocked":0,"setup_required":0,"avg_output_quality":null,"production_outcomes":0,"last_outcome_at":null,"label":"No agent outcome data yet"},"auto_install":{"allowed":false,"sandbox_required":true,"reason":"Human review or sandbox validation is required before automatic installation."},"best_for":["agent-frameworks","multi-agent","orchestration","algorithms","hanabi","mappo"],"known_risks":["Repository looks stale","Quality score needs review","Documentation summary is thin","Recent maintenance: 2y since push"]},"agent_proven":{"version":"agent-proven-v1","score":0,"tier":"unproven","label":"Needs first agent run","summary":"No agent outcome reports yet. Use Resolve, run one narrow sandbox task, then report the result.","metrics":{"totalOutcomes":0,"successfulOutcomes":0,"failedOutcomes":0,"installAttempts":0,"installSuccessRate":null,"successRate":null,"recentSuccessRate":null,"recentFailureRate":null,"riskBlocked":0,"setupRequired":0,"notRelevant":0,"avgOutputQuality":null,"avgTimeToUsefulMs":null,"productionOutcomes":0,"humanReviewRequired":0,"uniqueAgents":0,"lastOutcomeAt":null},"signals":[],"penalties":["No real agent outcome evidence yet"]},"audit":{"score":76,"risk_level":"needs_review","risk_label":"Needs review","warnings":["Repository appears stale","Repository looks stale","Quality score needs review","Documentation summary is thin","Recent maintenance: 2y since push"]},"safety_gate":{"tier":"reviewed","label":"Reviewed with permission notes","auto_install_policy":"review","auto_install_allowed":false,"human_review_required":true,"blocked":false,"recommended_action":"Require human approval before installing into a real workspace."},"quality":{"score":73,"label":"Strong"},"supply":{"track":"Coding and developer agents","scenario":"GitHub automation","maintenance":"2y since push","risk":"Needs review"},"alternative_skills":[{"slug":"significant-gravitas-autogpt","name":"AutoGPT","url":"https://www.openagentskill.com/skills/significant-gravitas-autogpt","stars":185244,"install_command":"npx skills add Significant-Gravitas/AutoGPT","trust_score":86,"audit_score":92},{"slug":"langchain-ai-langchain","name":"Langchain","url":"https://www.openagentskill.com/skills/langchain-ai-langchain","stars":140782,"install_command":"npx skills add langchain-ai/langchain","trust_score":92,"audit_score":95},{"slug":"nousresearch-hermes-agent","name":"Hermes Agent","url":"https://www.openagentskill.com/skills/nousresearch-hermes-agent","stars":205451,"install_command":"npx skills add NousResearch/hermes-agent","trust_score":92,"audit_score":95},{"slug":"firecrawl-firecrawl","name":"Firecrawl","url":"https://www.openagentskill.com/skills/firecrawl-firecrawl","stars":139273,"install_command":"npx skills add firecrawl/firecrawl","trust_score":91,"audit_score":94}],"do_not_use_when":["teams that require actively maintained dependencies","production agents without a repository review","Repository looks stale","Repository appears stale","Quality score needs review","Documentation summary is thin","Recent maintenance: 2y since push","Production credentials, payments, or irreversible account changes without explicit human review"],"agent_contract":{"task_input":"Evaluate On Policy before installing it in an AI agent workflow","recommended_action":"Require human approval before installing into a real workspace.","install_policy":"review","minimum_review_before_use":["Trust: 83/100 Strong shortlist","Audit: 76/100 Needs review","Safety: 64/100 Review before install","Review repository, license, install command, and permission surface before production use."],"expected_agent_output":{"selected_skill":"marlbenchmark-on-policy (On Policy)","install_command":"npx skills add marlbenchmark/on-policy","risk_summary":"Needs review; Reviewed with permission notes; Review before production","verification_result":"Report the smallest successful task, files touched, warnings, and any missing setup."}},"outcome_feedback":{"endpoint":"https://www.openagentskill.com/api/agent/outcome","method":"POST","requires_resolve_event_id":true,"event_id_source":"Use install_receipt.outcome_feedback.event_id or feedback.event_id returned by /api/agent/resolve for the current task.","expected_outcomes":["success","failed","not_relevant","blocked_by_risk","setup_required"],"payload_template":{"event_id":"<install_receipt.outcome_feedback.event_id or feedback.event_id from /api/agent/resolve>","skill_slug":"marlbenchmark-on-policy","task":"Evaluate On Policy before installing it in an AI agent workflow","agent":"codex","outcome":"success","install_used":true,"risk_blocked":false,"setup_required":false,"task_success":true,"output_quality":4,"error_type":null,"human_review_required":false,"workspace":"sandbox","time_to_useful_ms":120000,"notes":"Report the smallest successful task, setup friction, files touched, and risk notes."}},"endpoints":{"web":"https://www.openagentskill.com/skills/marlbenchmark-on-policy","api":"https://www.openagentskill.com/api/agent/skills/marlbenchmark-on-policy","audit":"https://www.openagentskill.com/skills/marlbenchmark-on-policy/audit","eval":"https://www.openagentskill.com/api/agent/evals?slug=marlbenchmark-on-policy&task=Evaluate%20On%20Policy%20before%20installing%20it%20in%20an%20AI%20agent%20workflow&max_risk=medium","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Evaluate%20On%20Policy%20before%20installing%20it%20in%20an%20AI%20agent%20workflow&agent=codex&max_risk=medium","receipt":"https://www.openagentskill.com/api/agent/receipt?task=Evaluate%20On%20Policy%20before%20installing%20it%20in%20an%20AI%20agent%20workflow&agent=codex&max_risk=medium&format=text","install":"https://www.openagentskill.com/api/skills/marlbenchmark-on-policy/install","manifest":"https://www.openagentskill.com/api/registry/manifest/marlbenchmark-on-policy"}},"endpoints":{"web":"https://www.openagentskill.com/skills/marlbenchmark-on-policy","api":"https://www.openagentskill.com/api/agent/skills/marlbenchmark-on-policy","eval":"https://www.openagentskill.com/api/agent/evals?slug=marlbenchmark-on-policy","audit":"https://www.openagentskill.com/skills/marlbenchmark-on-policy/audit","resolve":"https://www.openagentskill.com/api/agent/resolve?task=Evaluate%20On%20Policy%20before%20installing%20it%20in%20an%20AI%20agent%20workflow&agent=codex&max_risk=medium"}},"meta":{"endpoint":"/api/agent/evals","mode":"skill_eval","purpose":"Pre-install eval contract for a single skill. Agents should read this before installing a reusable skill.","generated_at":"2026-07-03T22:03:14.118Z"}}