Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@
"torchcodec>=0.7.0; python_version < '3.14'", # minium version to get windows support, torchcodec doesn't have wheels for 3.14 yet
"nibabel>=5.3.1",
"trimesh>=4.10.0",
"teich==0.1.2",
"teich==0.1.5",
]

NUMPY2_INCOMPATIBLE_LIBRARIES = [
Expand Down
17 changes: 14 additions & 3 deletions src/datasets/packaged_modules/json/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,8 @@ def _generate_tables(self, base_files, files_iterables, original_files, allow_fu
"claude_code": ["user", "assistant", "system"],
"pi": ["session", "message"],
"codex": ["session_meta", "turn_context", "response_item", "event_msg"],
# droid message events share pi's "message" type, but droid traces always start with a session_start event
"droid": ["session_start"],
}
AGENT_TRACES_TYPE_TO_HARNESS = {}
for _harness, _trace_types in AGENT_TRACES_TYPES_VALUES.items():
Expand Down Expand Up @@ -378,6 +380,14 @@ def _generate_tables(self, base_files, files_iterables, original_files, allow_fu
"messages": lambda f: isinstance(f, (datasets.List, datasets.Json)),
}
),
"droid": datasets.Features(
{
"type": lambda f: f == Value("string"),
"id": lambda f: f == Value("string"),
"version": lambda f: f == Value("int64"),
"cwd": lambda f: f == Value("string"),
}
),
}

AGENT_TRACES_FEATURES = datasets.Features(
Expand Down Expand Up @@ -450,8 +460,8 @@ def get_session_id(trace: dict) -> Optional[str]:
# codex
if isinstance(trace.get("payload"), dict) and isinstance(trace["payload"].get("id"), str):
return trace["payload"]["id"]
# pi / openclaw (openclaw embeds pi-agent; distinguish via cwd)
if trace.get("type") == "session" and isinstance(trace.get("id"), str):
# pi / openclaw on "session" (openclaw embeds pi-agent; distinguish via cwd), droid on "session_start"
if trace.get("type") in ("session", "session_start") and isinstance(trace.get("id"), str):
return trace["id"]
return None

Expand All @@ -465,7 +475,8 @@ def get_user_prompt(trace_event: dict) -> Optional[str]:
if trace_event.get("type") == "message":
if isinstance(trace_event.get("message"), dict):
message = trace_event["message"]
if message.get("role") == "user":
# droid marks injected context as llm_only and local-only notes as user_only, neither is a real user prompt
if message.get("role") == "user" and message.get("visibility") not in ("llm_only", "user_only"):
return get_content_text(message.get("content"))
if trace_event.get("role") == "user":
return get_content_text(trace_event.get("content"))
Expand Down
117 changes: 114 additions & 3 deletions tests/packaged_modules/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,74 @@ def assert_agent_traces_output(tmp_path, filename, rows, expected, num_sessions=
}


DROID_SESSION = [
{
"type": "session_start",
"id": "droid-session",
"title": "inspect the project",
"sessionTitle": "Inspect project files",
"owner": "caleb",
"version": 2,
"cwd": "/workspace/project",
},
{
"type": "message",
"id": "context-1",
"timestamp": "2026-06-02T18:55:29.000Z",
"message": {
"role": "user",
"visibility": "llm_only",
"content": [{"type": "text", "text": "<system-reminder>injected context</system-reminder>"}],
},
"parentId": None,
},
{
"type": "message",
"id": "message-1",
"timestamp": "2026-06-02T18:55:30.274Z",
"message": {
"role": "user",
"content": [{"type": "text", "text": "Inspect the project"}],
},
"parentId": "context-1",
},
{
"type": "message",
"id": "message-2",
"timestamp": "2026-06-02T18:55:35.000Z",
"message": {
"role": "assistant",
"content": [
{
"type": "thinking",
"thinking": "I should list the files first.",
"signature": "reasoning_content",
"signatureProvider": "generic-chat-completion-api",
"durationMs": 1200,
},
{"type": "text", "text": "I'll list the files."},
{"type": "tool_use", "id": "LS_0", "name": "LS", "input": {"directory_path": "/workspace/project"}},
],
"chatCompletionReasoningField": "reasoning_content",
"chatCompletionReasoningContent": "I should list the files first.",
},
"parentId": "message-1",
},
{
"type": "message",
"id": "message-3",
"timestamp": "2026-06-02T18:55:36.000Z",
"message": {
"role": "user",
"content": [
{"type": "tool_result", "tool_use_id": "LS_0", "is_error": False, "content": "README.md\nsrc"},
],
},
"parentId": "message-2",
},
]


def test_config_raises_when_invalid_name() -> None:
with pytest.raises(InvalidConfigName, match="Bad characters"):
_ = JsonConfig(name="name-with-*-invalid-character")
Expand Down Expand Up @@ -653,6 +721,12 @@ def test_json_generate_tables_with_sorted_columns(file_fixture, config_kwargs, r
[HERMES_SESSION] * 2,
("hermes", "20260605_092247_d018ec", "Run pwd and date.", "2026-06-05T13:22:48.307Z", 1, 1),
),
pytest.param(
"droid.jsonl",
DROID_SESSION,
("droid", "droid-session", "Inspect the project", "2026-06-02T18:55:30.274Z", 1, 1),
id="droid",
),
pytest.param(
"missing_prompt.jsonl",
[
Expand All @@ -671,12 +745,26 @@ def test_json_generate_tables_with_sorted_columns(file_fixture, config_kwargs, r
def test_json_generate_tables_with_agent_trace_metadata(tmp_path, filename, rows, expected):
num_sessions = 2 if filename == "hermes_two_sessions.jsonl" else 1
_, out = assert_agent_traces_output(tmp_path, filename, rows, expected, num_sessions=num_sessions)
if filename == "droid.jsonl":
assert out["metadata"][0]["trace_type"] == "droid"
assert "models" not in out


@require_teich
def test_json_load_dataset_with_agent_trace_metadata(tmp_path):
trace_file = write_jsonl(tmp_path / "codex.jsonl", CODEX_AGENT_TRACE_ROWS)
@pytest.mark.parametrize(
"filename, rows, expected",
[
pytest.param("codex.jsonl", CODEX_AGENT_TRACE_ROWS, CODEX_EXPECTED_AGENT_TRACE_FIELDS, id="codex"),
pytest.param(
"droid.jsonl",
DROID_SESSION,
("droid", "droid-session", "Inspect the project", "2026-06-02T18:55:30.274Z", 1, 1),
id="droid",
),
],
)
def test_json_load_dataset_with_agent_trace_metadata(tmp_path, filename, rows, expected):
trace_file = write_jsonl(tmp_path / filename, rows)

dataset = load_dataset("json", data_files=trace_file, split="train", cache_dir=str(tmp_path / "cache"))
row = dataset[0]
Expand All @@ -694,5 +782,28 @@ def test_json_load_dataset_with_agent_trace_metadata(tmp_path):
"trace",
"file_path",
]
for key, value in zip(AGENT_TRACE_FIELD_NAMES_TO_CHECK, CODEX_EXPECTED_AGENT_TRACE_FIELDS):
for key, value in zip(AGENT_TRACE_FIELD_NAMES_TO_CHECK, expected):
assert row[key] == value, key
if filename == "droid.jsonl":
assert row["metadata"]["trace_type"] == "droid"
assert json.loads(row["trace"].splitlines()[0])["type"] == "session_start"


def test_json_load_dataset_without_droid_marker_stays_ordinary_json(tmp_path):
trace_file = write_jsonl(
tmp_path / "droid_missing_marker.jsonl",
[
{"type": "session_start", "id": "droid-session", "version": 2},
{
"type": "message",
"id": "message-1",
"timestamp": "2026-06-02T18:55:30.274Z",
"message": {"role": "user", "content": [{"type": "text", "text": "Inspect the project"}]},
},
],
)

dataset = load_dataset("json", data_files=trace_file, split="train", cache_dir=str(tmp_path / "cache"))

assert dataset.column_names == ["type", "id", "version", "timestamp", "message"]
assert dataset[0]["type"] == "session_start"
Loading