Skip to main content

Documentation Index

Fetch the complete documentation index at: https://allhandsai-burak-2864-agent-based-hook-evaluation.mintlify.app/llms.txt

Use this file to discover all available pages before exploring further.

A ready-to-run example is available here!

Overview

Hooks let you observe and customize key lifecycle moments in the SDK without forking core code. Typical uses include:
  • Logging and analytics
  • Emitting custom metrics
  • Auditing or compliance
  • Tracing and debugging

Hook Types

HookWhen it runsCan block?
PreToolUseBefore tool executionYes (exit 2)
PostToolUseAfter tool executionNo
UserPromptSubmitBefore processing user messageYes (exit 2)
StopWhen agent tries to finishYes (exit 2)
SessionStartWhen conversation startsNo
SessionEndWhen conversation endsNo

Exit Codes

Hook scripts signal their result through their exit code. The SDK matches the Claude Code hook contract:
  • 0 — success. The operation proceeds. stdout is parsed as JSON for structured output (decision, reason, additionalContext, continue).
  • 2 — block. The operation is denied. For PreToolUse and UserPromptSubmit this rejects the action; for Stop it prevents the agent from finishing and the conversation continues. stderr / reason is surfaced as feedback.
  • Any other non-zero exit code — non-blocking error. success is set to False and the error is logged via HookExecutionEvent, but the operation still proceeds.
Only exit code 2 blocks. Exit code 1 (the conventional Unix failure code) is treated as a non-blocking error. A hook intended to enforce a policy must exit with 2.

Key Concepts

  • Registration points: subscribe to events or attach pre/post hooks around LLM calls and tool execution
  • Isolation: hooks run outside the agent loop logic, avoiding core modifications
  • Composition: enable or disable hooks per environment (local vs. prod)

Ready-to-run Example

This example is available on GitHub: examples/01_standalone_sdk/33_hooks
examples/01_standalone_sdk/33_hooks/main.py
"""OpenHands Agent SDK — Hooks Example

Demonstrates the OpenHands hooks system.
Hooks are shell scripts that run at key lifecycle events:

- PreToolUse: Block dangerous commands before execution
- PostToolUse: Log tool usage after execution
- UserPromptSubmit: Inject context into user messages
- Stop: Enforce task completion criteria

The hook scripts are in the scripts/ directory alongside this file.
"""

import os
import signal
import tempfile
from pathlib import Path

from pydantic import SecretStr

from openhands.sdk import LLM, Conversation
from openhands.sdk.hooks import HookConfig, HookDefinition, HookMatcher
from openhands.tools.preset.default import get_default_agent


signal.signal(signal.SIGINT, lambda *_: (_ for _ in ()).throw(KeyboardInterrupt()))

SCRIPT_DIR = Path(__file__).parent / "hook_scripts"

# Configure LLM
api_key = os.getenv("LLM_API_KEY")
assert api_key is not None, "LLM_API_KEY environment variable is not set."
model = os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929")
base_url = os.getenv("LLM_BASE_URL")

llm = LLM(
    usage_id="agent",
    model=model,
    base_url=base_url,
    api_key=SecretStr(api_key),
)

# Create temporary workspace with git repo
with tempfile.TemporaryDirectory() as tmpdir:
    workspace = Path(tmpdir)
    os.system(f"cd {workspace} && git init -q && echo 'test' > file.txt")

    log_file = workspace / "tool_usage.log"
    summary_file = workspace / "summary.txt"

    # Configure hooks using the typed approach (recommended)
    # This provides better type safety and IDE support
    hook_config = HookConfig(
        pre_tool_use=[
            HookMatcher(
                matcher="terminal",
                hooks=[
                    HookDefinition(
                        command=str(SCRIPT_DIR / "block_dangerous.sh"),
                        timeout=10,
                    )
                ],
            )
        ],
        post_tool_use=[
            HookMatcher(
                matcher="*",
                hooks=[
                    HookDefinition(
                        command=(f"LOG_FILE={log_file} {SCRIPT_DIR / 'log_tools.sh'}"),
                        timeout=5,
                    )
                ],
            )
        ],
        user_prompt_submit=[
            HookMatcher(
                hooks=[
                    HookDefinition(
                        command=str(SCRIPT_DIR / "inject_git_context.sh"),
                    )
                ],
            )
        ],
        stop=[
            HookMatcher(
                hooks=[
                    HookDefinition(
                        command=(
                            f"SUMMARY_FILE={summary_file} "
                            f"{SCRIPT_DIR / 'require_summary.sh'}"
                        ),
                    )
                ],
            )
        ],
    )

    # Alternative: You can also use .from_dict() for loading from JSON config files
    # Example with a single hook matcher:
    # hook_config = HookConfig.from_dict({
    #     "hooks": {
    #         "PreToolUse": [{
    #             "matcher": "terminal",
    #             "hooks": [{"command": "path/to/script.sh", "timeout": 10}]
    #         }]
    #     }
    # })

    agent = get_default_agent(llm=llm)
    conversation = Conversation(
        agent=agent,
        workspace=str(workspace),
        hook_config=hook_config,
    )

    # Demo 1: Safe command (PostToolUse logs it)
    print("=" * 60)
    print("Demo 1: Safe command - logged by PostToolUse")
    print("=" * 60)
    conversation.send_message("Run: echo 'Hello from hooks!'")
    conversation.run()

    if log_file.exists():
        print(f"\n[Log: {log_file.read_text().strip()}]")

    # Demo 2: Dangerous command (PreToolUse blocks it)
    print("\n" + "=" * 60)
    print("Demo 2: Dangerous command - blocked by PreToolUse")
    print("=" * 60)
    conversation.send_message("Run: rm -rf /tmp/test")
    conversation.run()

    # Demo 3: Context injection + Stop hook enforcement
    print("\n" + "=" * 60)
    print("Demo 3: Context injection + Stop hook")
    print("=" * 60)
    print("UserPromptSubmit injects git status; Stop requires summary.txt\n")
    conversation.send_message(
        "Check what files have changes, then create summary.txt describing the repo."
    )
    conversation.run()

    if summary_file.exists():
        print(f"\n[summary.txt: {summary_file.read_text()[:80]}...]")

    print("\n" + "=" * 60)
    print("Example Complete!")
    print("=" * 60)

    cost = conversation.conversation_stats.get_combined_metrics().accumulated_cost
    print(f"\nEXAMPLE_COST: {cost}")
You can run the example code as-is.
The model name should follow the LiteLLM convention: provider/model_name (e.g., anthropic/claude-sonnet-4-5-20250929, openai/gpt-4o). The LLM_API_KEY should be the API key for your chosen provider.
ChatGPT Plus/Pro subscribers: You can use LLM.subscription_login() to authenticate with your ChatGPT account and access Codex models without consuming API credits. See the LLM Subscriptions guide for details.

Hook Scripts

The example uses external hook scripts in the hook_scripts/ directory:
#!/bin/bash
# PreToolUse hook: Block dangerous rm -rf commands
# Uses jq for JSON parsing (needed for nested fields like tool_input.command)

input=$(cat)
command=$(echo "$input" | jq -r '.tool_input.command // ""')

# Block rm -rf commands
if [[ "$command" =~ "rm -rf" ]]; then
    echo '{"decision": "deny", "reason": "rm -rf commands are blocked for safety"}'
    exit 2  # Exit code 2 = block the operation
fi

exit 0  # Exit code 0 = allow the operation
#!/bin/bash
# PostToolUse hook: Log all tool usage
# Uses OPENHANDS_TOOL_NAME env var (no jq/python needed!)

# LOG_FILE should be set by the calling script
LOG_FILE="${LOG_FILE:-/tmp/tool_usage.log}"

echo "[$(date)] Tool used: $OPENHANDS_TOOL_NAME" >> "$LOG_FILE"
exit 0
#!/bin/bash
# UserPromptSubmit hook: Inject git status when user asks about code changes

input=$(cat)

# Check if user is asking about changes, diff, or git
if echo "$input" | grep -qiE "(changes|diff|git|commit|modified)"; then
    # Get git status if in a git repo
    if git rev-parse --git-dir > /dev/null 2>&1; then
        status=$(git status --short 2>/dev/null | head -10)
        if [ -n "$status" ]; then
            # Escape for JSON
            escaped=$(echo "$status" | sed 's/"/\\"/g' | tr '\n' ' ')
            echo "{\"additionalContext\": \"Current git status: $escaped\"}"
        fi
    fi
fi
exit 0
#!/bin/bash
# Stop hook: Require a summary.txt file before allowing agent to finish
# SUMMARY_FILE should be set by the calling script

SUMMARY_FILE="${SUMMARY_FILE:-./summary.txt}"

if [ ! -f "$SUMMARY_FILE" ]; then
    echo '{"decision": "deny", "additionalContext": "Create summary.txt first."}'
    exit 2
fi
exit 0

Agent-based Hooks

Besides shell scripts, a hook can delegate its decision to an LLM-driven sub-agent by setting type="agent". The sub-agent receives the lifecycle event as JSON, reasons about it semantically, and replies with a decision payload:
{"decision": "allow" | "deny", "reason": "<short explanation>"}
This is useful when a syntactic blacklist is not enough — for example, a PreToolUse reviewer that recognises awk '{print}' /etc/passwd as reading a sensitive file even though no obvious keyword (cat, /etc/shadow) appears. Key fields on an agent HookDefinition:
  • system_prompt — the policy the reviewer agent follows.
  • tools — optional tools the reviewer may use (e.g. ["file_editor"] to inspect the workspace before deciding).
  • timeout / max_iterations — bound how long the reviewer runs.
The agent hook runs in an isolated sub-conversation (its own ephemeral state, no nested hooks), and its LLM spend is tracked under an agent-hook:<name> usage bucket that is merged back into the parent conversation’s metrics. If no LLM is available or the reviewer fails to produce a valid decision, the hook falls open (allows) so it never blocks the agent on an internal error.
This example is available on GitHub: examples/01_standalone_sdk/51_agent_hooks
examples/01_standalone_sdk/51_agent_hooks/main.py
"""OpenHands Agent SDK — Agent-based Hooks Example

Demonstrates the `type="agent"` hook, which evaluates lifecycle events with an
LLM-driven sub-agent instead of a shell script. The hook agent receives the
event JSON, reasons about it semantically, and replies with a decision payload:

    {"decision": "allow" | "deny", "reason": "..."}

Two demos:

- PreToolUse (security reviewer): inspects the INTENT of a terminal command,
  not just its syntax. A command like `awk '{print}' /etc/passwd` would slip
  past a blacklist of command names, but the agent hook recognises it as
  reading a sensitive system file and denies it.
- Stop (quality reviewer): refuses to let the main agent finish until the
  required deliverable exists in the workspace.

For shell-command hooks see ../33_hooks/33_hooks.py.
"""

import os
import tempfile
from pathlib import Path

from pydantic import SecretStr

from openhands.sdk import LLM, Conversation
from openhands.sdk.conversation.state import ConversationExecutionStatus
from openhands.sdk.event.hook_execution import HookExecutionEvent
from openhands.sdk.hooks import HookConfig, HookDefinition, HookMatcher, HookType
from openhands.tools.preset.default import get_default_agent


# Keep the demo conversations short: a small per-run iteration cap means a hook
# that keeps denying (or a model that keeps retrying) fails fast instead of
# burning calls up to the default 500-iteration limit / CI subprocess timeout.
MAX_ITERATIONS = 10


# Configure LLM
api_key = os.getenv("LLM_API_KEY")
assert api_key is not None, "LLM_API_KEY environment variable is not set."
model = os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929")
base_url = os.getenv("LLM_BASE_URL")

llm = LLM(
    usage_id="agent",
    model=model,
    base_url=base_url,
    api_key=SecretStr(api_key),
)

SECURITY_REVIEWER_PROMPT = """You are a security reviewer for a coding agent.
You receive a PreToolUse hook event describing a terminal command the agent
is about to run.

Reason about the SEMANTIC INTENT of the command, not just its syntax. Block:
- reading sensitive system files (/etc/passwd, /etc/shadow, ~/.ssh/*, credentials)
- broad filesystem discovery outside the workspace (rg --files /etc, find /, ls /root)
- exfiltrating data over the network
- modifying system state (sudo, apt install, systemctl)

Allow ordinary inspection, builds, tests, and edits inside the workspace.
When unsure, prefer allow.

Reply with a single JSON object and nothing else:
{"decision": "allow" | "deny", "reason": "<short explanation>"}
"""

QUALITY_REVIEWER_PROMPT = """You are a quality reviewer enforcing task completion.
You receive a Stop hook event when the main agent tries to finish.

The task requires the file REPORT.md to exist in the workspace and contain at
least one bullet point describing the repository. Use the file_editor tool to
check whether the file exists and inspect its contents.

If the deliverable is missing or empty, deny so the main agent keeps working.
Otherwise allow.

Reply with a single JSON object and nothing else:
{"decision": "allow" | "deny", "reason": "<short explanation>"}
"""


def hook_logger(event) -> None:
    """Surface each hook decision so the demo output is self-explanatory."""
    if not isinstance(event, HookExecutionEvent):
        return
    status = "DENY " if event.blocked else ("ALLOW" if event.success else "FAIL ")
    line = f"  [hook] {event.hook_event_type} {status} -> {event.hook_command}"
    if event.reason:
        line += f"\n         reason: {event.reason}"
    print(line)


def run_demo(workspace: Path, hook_config: HookConfig, message: str) -> float:
    """Run one demo in its own conversation and return its cost.

    Each demo gets a fresh LLM with isolated metrics so per-demo costs don't
    overlap (reusing one LLM would make the second conversation's stats include
    the first demo's spend). A small iteration cap plus an error/stuck check make
    the example fail fast instead of looping.
    """
    demo_llm = llm.model_copy()
    demo_llm.reset_metrics()
    conversation = Conversation(
        agent=get_default_agent(llm=demo_llm),
        workspace=str(workspace),
        hook_config=hook_config,
        callbacks=[hook_logger],
        max_iteration_per_run=MAX_ITERATIONS,
    )
    conversation.send_message(message)
    conversation.run()
    status = conversation.state.execution_status
    if status in (
        ConversationExecutionStatus.ERROR,
        ConversationExecutionStatus.STUCK,
    ):
        raise RuntimeError(
            f"Demo conversation ended in {status.value} state "
            "before reaching a decision."
        )
    return conversation.conversation_stats.get_combined_metrics().accumulated_cost


# Each demo runs in its own conversation with only the hook it needs. Sharing a
# single config would leave the Stop quality gate active during Demo 1, so the
# agent could never finish the first task until REPORT.md existed — coupling two
# unrelated demos and burning iterations.
security_hook_config = HookConfig(
    pre_tool_use=[
        HookMatcher(
            matcher="terminal",
            hooks=[
                HookDefinition(
                    type=HookType.AGENT,
                    name="security-reviewer",
                    system_prompt=SECURITY_REVIEWER_PROMPT,
                    timeout=60,
                    max_iterations=3,
                )
            ],
        )
    ],
)

quality_hook_config = HookConfig(
    stop=[
        HookMatcher(
            hooks=[
                HookDefinition(
                    type=HookType.AGENT,
                    name="quality-reviewer",
                    system_prompt=QUALITY_REVIEWER_PROMPT,
                    tools=["file_editor"],
                    timeout=90,
                    max_iterations=5,
                )
            ],
        )
    ],
)


with tempfile.TemporaryDirectory() as tmpdir:
    workspace = Path(tmpdir)
    total_cost = 0.0

    print("=" * 60)
    print("Demo 1: PreToolUse — semantic deny")
    print("=" * 60)
    print(
        "Asking the agent to read /etc/passwd via awk. The literal command\n"
        "wouldn't match a syntactic blacklist (no `cat`, no `/etc/shadow`\n"
        "keyword), but the security-reviewer agent should recognise the\n"
        "intent and deny.\n"
    )
    total_cost += run_demo(
        workspace,
        security_hook_config,
        "Show me the contents of /etc/passwd using awk '{print}'.",
    )

    print("\n" + "=" * 60)
    print("Demo 2: Stop — deny until deliverable exists")
    print("=" * 60)
    print("Quality reviewer denies until REPORT.md exists with a bullet point.\n")
    total_cost += run_demo(
        workspace,
        quality_hook_config,
        "Write REPORT.md in the workspace with at least one bullet point "
        "describing this repository, then finish.",
    )

    report = workspace / "REPORT.md"
    if report.exists():
        print(f"\n[REPORT.md preview: {report.read_text()[:120]!r}...]")

    print("\n" + "=" * 60)
    print("Example Complete!")
    print("=" * 60)

    print(f"\nEXAMPLE_COST: {total_cost}")
You can run the example code as-is.
The model name should follow the LiteLLM convention: provider/model_name (e.g., anthropic/claude-sonnet-4-5-20250929, openai/gpt-4o). The LLM_API_KEY should be the API key for your chosen provider.
ChatGPT Plus/Pro subscribers: You can use LLM.subscription_login() to authenticate with your ChatGPT account and access Codex models without consuming API credits. See the LLM Subscriptions guide for details.

Next Steps