Installation
Copy
pip install helios
# or
uv pip install helios
Quick Start
Copy
import asyncio
from helios import AgentRunner
async def main():
runner = AgentRunner(
task_path="tasks/create-hello-file",
model="claude-sonnet-4-20250514"
)
result = await runner.run()
print(f"Status: {'PASS' if result.success else 'FAIL'}")
print(f"Reward: {result.reward}")
print(f"Duration: {result.duration:.1f}s")
asyncio.run(main())
Core Classes
Common Patterns
Run a Task and Check Result
Copy
import asyncio
from helios import AgentRunner
async def run_task():
runner = AgentRunner(
task_path="tasks/my-task",
model="claude-sonnet-4-20250514"
)
result = await runner.run()
if result.success:
print("Task completed successfully!")
else:
print(f"Task failed with reward: {result.reward}")
asyncio.run(run_task())
Run Multiple Tasks in Parallel
Copy
import asyncio
from helios import ParallelRunner, discover_tasks
async def run_benchmark():
task_paths = discover_tasks("tasks/benchmark/")
runner = ParallelRunner(
task_paths=task_paths,
n_concurrent=4,
model="claude-sonnet-4-20250514",
output_dir="results/"
)
result = await runner.run()
print(f"Passed: {result.passed}/{result.total_tasks}")
print(f"Mean reward: {result.mean_reward:.3f}")
asyncio.run(run_benchmark())
Custom Task Discovery
Copy
from pathlib import Path
from helios import ParallelRunner
# Select specific tasks
task_paths = [
Path("tasks/easy-task"),
Path("tasks/medium-task"),
Path("tasks/hard-task"),
]
runner = ParallelRunner(
task_paths=task_paths,
n_concurrent=3,
model="claude-sonnet-4-20250514"
)
Using Different Providers
Copy
from helios import AgentRunner
# Local Docker
runner = AgentRunner(
task_path="tasks/my-task",
model="claude-sonnet-4-20250514",
provider="docker"
)
# Daytona Cloud
runner = AgentRunner(
task_path="tasks/my-task",
model="claude-sonnet-4-20250514",
provider="daytona"
)
Accessing Trajectory
Copy
import asyncio
from helios import AgentRunner
async def analyze_execution():
runner = AgentRunner(
task_path="tasks/my-task",
model="claude-sonnet-4-20250514",
output_dir="output/"
)
result = await runner.run()
# Access the trajectory
for step in result.trajectory:
print(f"Step {step.index}: {step.tool_name}")
print(f" Input: {step.tool_input}")
print(f" Output: {step.tool_output[:100]}...")
asyncio.run(analyze_execution())
Type Definitions
RunResult
Copy
@dataclass
class RunResult:
success: bool # Whether the task passed
reward: float # Reward value (0.0 - 1.0)
duration: float # Execution time in seconds
trajectory: list # List of execution steps
error: str | None # Error message if failed
BatchResult
Copy
@dataclass
class BatchResult:
total_tasks: int # Number of tasks run
passed: int # Number of tasks passed
failed: int # Number of tasks failed
mean_reward: float # Average reward
duration: float # Total execution time
tasks: list[TaskResult] # Individual task results
TaskResult
Copy
@dataclass
class TaskResult:
name: str # Task name
status: str # "passed" or "failed"
reward: float # Reward value
duration: float # Execution time
error: str | None # Error message if failed