{"generated_at":"2026-05-20","links":{"dataset":"https://huggingface.co/datasets/TIGER-Lab/ClawBench","github":"https://github.com/TIGER-AI-Lab/ClawBench","leaderboard_space":"https://huggingface.co/spaces/TIGER-Lab/ClawBench","paper_arxiv":"https://arxiv.org/abs/2604.08523","results_csv":"https://huggingface.co/datasets/TIGER-Lab/ClawBench/resolve/main/leaderboard/results.csv","scoring_md":"https://github.com/TIGER-AI-Lab/ClawBench/blob/main/eval/scoring.md","website":"https://claw-bench.com/"},"rows_v1":[{"corpus":"v1","harness":"hermes","int_rate":0.6144,"intercepted":94,"matched":94,"model":"claude-opus-4-6","n":153,"partial":false,"reward":0.6144,"wall_h":null},{"corpus":"v1","harness":"hermes","int_rate":0.5686,"intercepted":87,"matched":87,"model":"claude-sonnet-4-6","n":153,"partial":false,"reward":0.5686,"wall_h":null},{"corpus":"v1","harness":"hermes","int_rate":0.3007,"intercepted":46,"matched":46,"model":"claude-haiku-4-5-20251001","n":153,"partial":false,"reward":0.3007,"wall_h":null},{"corpus":"v1","harness":"hermes","int_rate":0.2549,"intercepted":39,"matched":39,"model":"gpt-5.4-2026-03-05","n":153,"partial":false,"reward":0.2549,"wall_h":null},{"corpus":"v1","harness":"hermes","int_rate":0.2484,"intercepted":38,"matched":38,"model":"gpt-5.4-mini-2026-03-17","n":153,"partial":false,"reward":0.2484,"wall_h":null},{"corpus":"v1","harness":"hermes","int_rate":0.1765,"intercepted":27,"matched":27,"model":"kimi-k2.5","n":153,"partial":false,"reward":0.1765,"wall_h":null}],"rows_v1_hermes":[{"corpus":"v1","harness":"hermes","int_rate":0.6144,"intercepted":94,"matched":94,"model":"claude-opus-4-6","n":153,"partial":false,"reward":0.6144,"wall_h":null},{"corpus":"v1","harness":"hermes","int_rate":0.5686,"intercepted":87,"matched":87,"model":"claude-sonnet-4-6","n":153,"partial":false,"reward":0.5686,"wall_h":null},{"corpus":"v1","harness":"hermes","int_rate":0.3007,"intercepted":46,"matched":46,"model":"claude-haiku-4-5-20251001","n":153,"partial":false,"reward":0.3007,"wall_h":null},{"corpus":"v1","harness":"hermes","int_rate":0.2549,"intercepted":39,"matched":39,"model":"gpt-5.4-2026-03-05","n":153,"partial":false,"reward":0.2549,"wall_h":null},{"corpus":"v1","harness":"hermes","int_rate":0.2484,"intercepted":38,"matched":38,"model":"gpt-5.4-mini-2026-03-17","n":153,"partial":false,"reward":0.2484,"wall_h":null},{"corpus":"v1","harness":"hermes","int_rate":0.1765,"intercepted":27,"matched":27,"model":"kimi-k2.5","n":153,"partial":false,"reward":0.1765,"wall_h":null}],"rows_v1_openclaw":[],"rows_v2":[{"corpus":"v2","cost_per_task_usd":4.4425,"harness":"hermes","int_rate":0.5462,"intercepted":71,"matched":58,"matched_strict":32,"model":"claude-opus-4-7","n":130,"partial":false,"reward":0.4462,"reward_strict":0.2462,"wall_h":null},{"corpus":"v2","cost_per_task_usd":0.1935,"harness":"hermes","int_rate":0.4846,"intercepted":63,"matched":45,"matched_strict":23,"model":"glm-5.1","n":130,"partial":false,"reward":0.3462,"reward_strict":0.1769,"wall_h":null},{"corpus":"v2","cost_per_task_usd":0.3325,"harness":"hermes","int_rate":0.4538,"intercepted":59,"matched":46,"matched_strict":24,"model":"gpt-5.5","n":130,"partial":false,"reward":0.3538,"reward_strict":0.1846,"wall_h":null},{"corpus":"v2","cost_per_task_usd":0.07207,"harness":"hermes","int_rate":0.4385,"intercepted":57,"matched":44,"matched_strict":16,"model":"deepseek-v4-pro","n":130,"partial":false,"reward":0.3385,"reward_strict":0.1231,"wall_h":null},{"corpus":"v2","cost_per_task_usd":0.3704,"harness":"hermes","int_rate":0.1462,"intercepted":19,"matched":0,"matched_strict":0,"model":"openrouter-owl-alpha","n":130,"partial":false,"reward":0.0,"reward_strict":0.0,"wall_h":null},{"corpus":"v2","cost_per_task_usd":0.0,"harness":"hermes","int_rate":0.0462,"intercepted":6,"matched":3,"matched_strict":1,"model":"z-ai/glm-4.5-air:free","n":130,"partial":false,"reward":0.0231,"reward_strict":0.0077,"wall_h":null},{"corpus":"v2","cost_per_task_usd":0.0,"harness":"hermes","int_rate":0.031,"intercepted":4,"matched":3,"matched_strict":0,"model":"deepseek-v4-flash:free","n":130,"partial":false,"reward":0.0233,"reward_strict":0.0,"wall_h":null},{"corpus":"v2","cost_per_task_usd":0.0,"harness":"hermes","int_rate":0.0231,"intercepted":3,"matched":2,"matched_strict":0,"model":"minimax-m2.5:free","n":130,"partial":false,"reward":0.0154,"reward_strict":0.0,"wall_h":null},{"corpus":"v2","harness":"openclaw","int_rate":0.0,"intercepted":0,"matched":0,"matched_strict":0,"model":"glm-5.1","n":130,"partial":false,"reward":0.0,"reward_strict":0.0,"wall_h":null}],"rows_v2_claude_code":[],"rows_v2_codex":[],"rows_v2_hermes":[{"corpus":"v2","cost_per_task_usd":4.4425,"harness":"hermes","int_rate":0.5462,"intercepted":71,"matched":58,"matched_strict":32,"model":"claude-opus-4-7","n":130,"partial":false,"reward":0.4462,"reward_strict":0.2462,"wall_h":null},{"corpus":"v2","cost_per_task_usd":0.3325,"harness":"hermes","int_rate":0.4538,"intercepted":59,"matched":46,"matched_strict":24,"model":"gpt-5.5","n":130,"partial":false,"reward":0.3538,"reward_strict":0.1846,"wall_h":null},{"corpus":"v2","cost_per_task_usd":0.1935,"harness":"hermes","int_rate":0.4846,"intercepted":63,"matched":45,"matched_strict":23,"model":"glm-5.1","n":130,"partial":false,"reward":0.3462,"reward_strict":0.1769,"wall_h":null},{"corpus":"v2","cost_per_task_usd":0.07207,"harness":"hermes","int_rate":0.4385,"intercepted":57,"matched":44,"matched_strict":16,"model":"deepseek-v4-pro","n":130,"partial":false,"reward":0.3385,"reward_strict":0.1231,"wall_h":null},{"corpus":"v2","cost_per_task_usd":0.0,"harness":"hermes","int_rate":0.031,"intercepted":4,"matched":3,"matched_strict":0,"model":"deepseek-v4-flash:free","n":130,"partial":false,"reward":0.0233,"reward_strict":0.0,"wall_h":null},{"corpus":"v2","cost_per_task_usd":0.0,"harness":"hermes","int_rate":0.0462,"intercepted":6,"matched":3,"matched_strict":1,"model":"z-ai/glm-4.5-air:free","n":130,"partial":false,"reward":0.0231,"reward_strict":0.0077,"wall_h":null},{"corpus":"v2","cost_per_task_usd":0.0,"harness":"hermes","int_rate":0.0231,"intercepted":3,"matched":2,"matched_strict":0,"model":"minimax-m2.5:free","n":130,"partial":false,"reward":0.0154,"reward_strict":0.0,"wall_h":null},{"corpus":"v2","cost_per_task_usd":0.3704,"harness":"hermes","int_rate":0.1462,"intercepted":19,"matched":0,"matched_strict":0,"model":"openrouter-owl-alpha","n":130,"partial":false,"reward":0.0,"reward_strict":0.0,"wall_h":null}],"rows_v2_openclaw":[{"corpus":"v2","harness":"openclaw","int_rate":0.0,"intercepted":0,"matched":0,"matched_strict":0,"model":"glm-5.1","n":130,"partial":false,"reward":0.0,"reward_strict":0.0,"wall_h":null}],"sort_key":"intercepted / corpus_size (V2: 130, V1: 153) DESC, reward DESC"}
