Skip to main content
Caribou includes a client for the Kestrel Postgres API. Use it to register projects, runs, run steps, and datapoints so traces link up in the Kestrel UI.

Setup

kestrel = caribou.get_kestrel_client()

if kestrel.enabled:
    project = kestrel.get_or_create_project(name="my-project")
The client is enabled when KESTREL_API_URL and KESTREL_API_KEY are set. Check kestrel.enabled before making calls.

Projects

project = kestrel.get_or_create_project(name="my-project")
project_id = project["id"]

Runs

Register runs for eval and train workloads. Production deployments skip run registration and use the project name as run_id.
run = kestrel.create_run(
    project_id=project_id,
    version_hash="abc123",
    env_name="my-env",
    dataloader_name="my-dataloader",
    grader_name="my-grader",
    model_name="gpt-5",
)
run_id = run["id"]
Update run status:
from datetime import datetime, UTC

kestrel.update_run(run_id, status="running")
kestrel.update_run(run_id, status="completed", completed_at=datetime.now(UTC))

Run steps

Create steps within a run to group traces by training/eval step:
kestrel.create_run_step(run_id, step_number=0, phase="eval")
kestrel.create_run_step(run_id, step_number=1, phase="train")

Datapoints

Register datapoints so traces can reference them. Registration is non-blocking and runs in the background.
dp = kestrel.get_or_create_datapoint(
    name="test-case-1",
    problem_input={"question": "What is 2+2?"},
    grader_input={"expected_answer": "4"},
)
datapoint_id = dp["id"]
For bulk registration:
kestrel.register_datapoints_batch(datapoints)

Cleanup

caribou.shutdown_kestrel()

Comprehensive eval example

import caribou
from caribou import Message, ToolExecution, Grade
from datetime import datetime, UTC

kestrel = caribou.get_kestrel_client()

if kestrel.enabled:
    project = kestrel.get_or_create_project(name="my-project")
    run = kestrel.create_run(
        project_id=project["id"],
        version_hash="abc123",
        env_name="my-env",
        dataloader_name="my-dataloader",
        grader_name="my-grader",
        model_name="gpt-5",
    )
    run_id = run["id"]
    kestrel.update_run(run_id, status="running")
    kestrel.create_run_step(run_id, step_number=0, phase="eval")
else:
    run_id = "local-run"

caribou.configure(run_id=run_id, phase="eval")

for datapoint in datapoints:
    if kestrel.enabled:
        dp = kestrel.get_or_create_datapoint(
            name=datapoint["name"],
            problem_input=datapoint["input"],
            grader_input=datapoint["expected"],
        )
        datapoint_id = dp["id"]
    else:
        datapoint_id = datapoint["name"]

    with caribou.trace_run({
        "datapoint_id": datapoint_id,
        "phase": "eval",
        "env_name": "my-env",
        "model_name": "gpt-5",
        "grader_name": "my-grader",
    }) as trace_id:
        caribou.log(Message(role="user", content=datapoint["prompt"]))

        completion = await get_completion(datapoint["prompt"])
        caribou.log(Message(
            role="assistant",
            content=completion["content"],
            model="gpt-5",
            input_tokens=completion["usage"]["input"],
            output_tokens=completion["usage"]["output"],
        ))

        score = await grade(completion, datapoint["expected"])
        caribou.log(Grade(score=score, grader_type="MyGrader"))
        caribou.set_status(caribou.RolloutStatus.COMPLETED)

    caribou.flush()

if kestrel.enabled:
    kestrel.update_run(run_id, status="completed", completed_at=datetime.now(UTC))

caribou.shutdown()