1- # Coordinator + Worker delegation test
1+ # Coordinator + Worker delegation test (Claude Code style)
22#
3- # Tests whether a coordinator agent can decompose a complex task
4- # and delegate sub-questions to a worker agent — similar to how
5- # Claude Code spawns sub-agents .
3+ # The coordinator has NO data — only an analyst tool (worker sub-agent).
4+ # It must: 1) decide what to ask, 2) call the analyst, 3) inspect results,
5+ # 4) decide if more info is needed or assemble the answer .
66#
7- # The coordinator has NO data — it can only delegate to the worker.
8- # The worker has all datasets and answers focused questions.
7+ # Uses completion_mode: :auto — println means "exploring", no println means "done".
98#
109# Usage:
1110# cd demo && mix run scripts/coordinator_test.exs
1211#
1312# Set OPENROUTER_API_KEY in .env or environment.
1413
15- alias PtcDemo . { CLIBase , SampleData , SearchTool }
14+ alias PtcDemo . { CLIBase , SampleData }
1615alias PtcRunner.SubAgent
1716
1817CLIBase . load_dotenv ( )
@@ -21,7 +20,7 @@ CLIBase.ensure_api_key!()
2120model = System . get_env ( "COORDINATOR_MODEL" ) || "openrouter:google/gemini-3.1-flash-lite-preview"
2221timeout = 60_000
2322
24- IO . puts ( "=== Coordinator + Worker Delegation Test ===" )
23+ IO . puts ( "=== Coordinator + Worker Test (auto-return) ===" )
2524IO . puts ( "Model: #{ model } \n " )
2625
2726# --- LLM callback ---
@@ -41,7 +40,7 @@ llm = fn %{system: system, messages: messages} ->
4140 end
4241end
4342
44- # --- Datasets (only for the worker) ---
43+ # --- Datasets (only the worker sees these ) ---
4544
4645datasets = % {
4746 "products" => SampleData . products ( ) ,
@@ -50,19 +49,15 @@ datasets = %{
5049 "expenses" => SampleData . expenses ( )
5150}
5251
53- # --- Worker: a function tool that internally runs a SubAgent ---
54- # This is the key pattern: the worker is a plain function tool from
55- # the coordinator's perspective, but internally spawns a full SubAgent
56- # with its own LLM call and data access.
52+ # --- Worker: function tool that spawns a SubAgent ---
5753
5854worker_agent =
5955 SubAgent . new (
6056 prompt: "{{question}}" ,
6157 signature: "(question :string) -> :any" ,
6258 context_descriptions: SampleData . context_descriptions ( ) ,
6359 system_prompt: % {
64- prefix:
65- "You are a data analyst. Answer the question precisely using the datasets provided." ,
60+ prefix: "You are a data analyst. Answer the question precisely using the datasets." ,
6661 language_spec: :single_shot
6762 } ,
6863 max_turns: 1
@@ -78,35 +73,32 @@ analyst_tool = fn %{"question" => question} ->
7873 end
7974end
8075
81- # --- Coordinator agent: decomposes and delegates ---
82- # The coordinator has NO datasets — it can only call the analyst tool.
83- # It must break the problem into sub-questions and combine results.
76+ # --- Coordinator: auto-return mode, no data, only the analyst tool ---
8477
8578coordinator =
8679 SubAgent . new (
8780 prompt: "{{mission}}" ,
8881 signature: "(mission :string) -> :map" ,
82+ completion_mode: :auto ,
8983 tools: % {
9084 "analyst" =>
9185 { analyst_tool ,
9286 signature: "(question :string) -> :any" ,
9387 description:
94- "Answers a data analysis question. Delegates to a sub-agent with full dataset access . " <>
95- "Available datasets : employees (200 records with id, department, salary, remote, level), " <>
96- "expenses (800 records with employee_id, amount, category, status), " <>
97- "orders (1000 records with customer_id, total, created_at, status), " <>
98- "products (500 records with category, price, stock). " <>
88+ "Answers a data analysis question using datasets not available to you . " <>
89+ "Datasets : employees (id, department, salary, remote, level), " <>
90+ "expenses (employee_id, amount, category, status), " <>
91+ "orders (customer_id, total, created_at, status), " <>
92+ "products (category, price, stock). " <>
9993 "Ask focused questions that return simple values (numbers, lists, maps)." }
10094 } ,
10195 system_prompt: % {
10296 prefix: """
103- You are a coordinator that breaks down complex data analysis tasks.
104- You have an analyst tool that can query datasets and return results.
105- Break the mission into focused sub-questions, call the analyst for each,
106- then combine the results into the final answer.
107- You do NOT have direct access to data — you must use the analyst tool.
97+ You are a coordinator. You have NO direct data access.
98+ Use the analyst tool to query datasets. Use println to inspect results.
99+ When you have all the data you need, write your final answer as the last expression (no println).
108100 """ ,
109- language_spec: :multi_turn
101+ language_spec: :auto_return
110102 } ,
111103 max_turns: 6 ,
112104 timeout: 120_000 ,
@@ -120,9 +112,7 @@ tests = [
120112 name: "Remote vs Office expenses" ,
121113 mission:
122114 "Compare average expense amounts between remote and office employees. " <>
123- "Ask the analyst for the average expense amount for remote employees, " <>
124- "then ask for the average expense amount for office employees. " <>
125- "Return a map with :remote_avg, :office_avg, and :remote_higher (boolean)." ,
115+ "Return a map with :remote_avg (number), :office_avg (number), and :remote_higher (boolean)." ,
126116 check: fn result ->
127117 is_map ( result ) and
128118 Map . has_key? ( result , :remote_avg ) and
@@ -142,6 +132,17 @@ tests = [
142132 Map . has_key? ( result , :silver ) and
143133 Map . has_key? ( result , :gold )
144134 end
135+ } ,
136+ % {
137+ name: "Department with highest avg salary" ,
138+ mission:
139+ "Find which department has the highest average salary. " <>
140+ "Return a map with :department (string) and :avg_salary (number)." ,
141+ check: fn result ->
142+ is_map ( result ) and
143+ Map . has_key? ( result , :department ) and
144+ Map . has_key? ( result , :avg_salary )
145+ end
145146 }
146147]
147148
@@ -159,10 +160,12 @@ for test <- tests do
159160 { :ok , step } ->
160161 SubAgent.Debug . print_trace ( step , raw: true , usage: true )
161162 result = step . return
163+ turns = length ( step . turns )
162164
163165 passed = test . check . ( result )
164166 status = if passed , do: "PASS" , else: "FAIL"
165167 IO . puts ( "\n Result: #{ inspect ( result , limit: 10 , pretty: true ) } " )
168+ IO . puts ( "Turns: #{ turns } " )
166169 IO . puts ( "#{ status } \n " )
167170
168171 { :error , step } ->
0 commit comments