Skip to main content
The AgentRunner class is the primary interface for executing individual tasks.

Basic Usage

import asyncio
from helios import AgentRunner

async def main():
    runner = AgentRunner(
        task_path="tasks/create-hello-file",
        model="claude-sonnet-4-20250514"
    )

    result = await runner.run()
    print(f"Result: {result.reward}")

asyncio.run(main())

Constructor

AgentRunner(
    task_path: str | Path,
    model: str = "gemini/gemini-2.5-computer-use-preview-10-2025",
    provider: str = "docker",
    output_dir: str | Path = "output",
    watch: bool = False
)

Parameters

ParameterTypeDefaultDescription
task_pathstr | PathRequiredPath to task directory
modelstrGemini defaultModel identifier
providerstr"docker"Environment provider
output_dirstr | Path"output"Where to save outputs
watchboolFalseEnable web viewer

Methods

run()

Execute the task and return results.
async def run(self) -> RunResult
Returns: RunResult with execution details Example:
result = await runner.run()

if result.success:
    print("Task passed!")
else:
    print(f"Task failed: {result.error}")

RunResult

The result object returned by run().
@dataclass
class RunResult:
    success: bool           # Whether the task passed (reward >= 1.0)
    reward: float           # Reward value from verification
    duration: float         # Execution time in seconds
    trajectory: list        # List of execution steps
    error: str | None       # Error message if failed
    output_dir: Path        # Where outputs were saved

Accessing the Trajectory

result = await runner.run()

for step in result.trajectory:
    print(f"Tool: {step.tool_name}")
    print(f"Input: {step.tool_input}")
    print(f"Output: {step.tool_output}")
    print("---")

Examples

Basic Task Execution

import asyncio
from helios import AgentRunner

async def run_task():
    runner = AgentRunner(
        task_path="tasks/create-hello-file",
        model="claude-sonnet-4-20250514"
    )

    result = await runner.run()

    print(f"Success: {result.success}")
    print(f"Reward: {result.reward}")
    print(f"Duration: {result.duration:.1f}s")

asyncio.run(run_task())

With Web Viewer

import asyncio
from helios import AgentRunner

async def run_with_viewer():
    runner = AgentRunner(
        task_path="tasks/explore-desktop",
        model="claude-sonnet-4-20250514",
        watch=True  # Start web viewer
    )

    # Viewer available at http://localhost:8080
    result = await runner.run()

asyncio.run(run_with_viewer())

Using Daytona

import asyncio
from helios import AgentRunner

async def run_in_cloud():
    runner = AgentRunner(
        task_path="tasks/gui-task",
        model="claude-sonnet-4-20250514",
        provider="daytona"
    )

    result = await runner.run()
    print(f"Cloud execution result: {result.reward}")

asyncio.run(run_in_cloud())

Error Handling

import asyncio
from helios import AgentRunner

async def run_with_error_handling():
    runner = AgentRunner(
        task_path="tasks/my-task",
        model="claude-sonnet-4-20250514"
    )

    try:
        result = await runner.run()

        if result.success:
            print("Task completed successfully")
        else:
            print(f"Task failed with reward: {result.reward}")
            if result.error:
                print(f"Error: {result.error}")

    except Exception as e:
        print(f"Execution error: {e}")

asyncio.run(run_with_error_handling())

Saving Results

import asyncio
import json
from helios import AgentRunner

async def run_and_save():
    runner = AgentRunner(
        task_path="tasks/my-task",
        model="claude-sonnet-4-20250514",
        output_dir="results/experiment-1"
    )

    result = await runner.run()

    # Results are automatically saved to output_dir
    # Additional custom processing:
    summary = {
        "success": result.success,
        "reward": result.reward,
        "duration": result.duration,
        "steps": len(result.trajectory)
    }

    with open(result.output_dir / "summary.json", "w") as f:
        json.dump(summary, f, indent=2)

asyncio.run(run_and_save())

Multiple Models

import asyncio
from helios import AgentRunner

async def compare_models():
    task_path = "tasks/create-hello-file"
    models = [
        "gemini/gemini-2.5-computer-use-preview-10-2025",
        "claude-sonnet-4-20250514",
        "openai/computer-use-preview"
    ]

    results = {}

    for model in models:
        runner = AgentRunner(
            task_path=task_path,
            model=model,
            output_dir=f"results/{model.replace('/', '_')}"
        )

        result = await runner.run()
        results[model] = result.reward

    print("Results by model:")
    for model, reward in results.items():
        print(f"  {model}: {reward}")

asyncio.run(compare_models())

Best Practices

AgentRunner is async-only. Use asyncio.run() for scripts:
import asyncio

async def main():
    # Your code here
    pass

asyncio.run(main())
Always check result.success and handle failures:
result = await runner.run()
if not result.success:
    logger.error(f"Task failed: {result.error}")
Organize outputs by experiment or timestamp:
from datetime import datetime

output_dir = f"results/{datetime.now().strftime('%Y%m%d_%H%M%S')}"

Next Steps