meta-llama · yanxi0830 · Mar 17, 2025 · Mar 11, 2025 · Mar 11, 2025 · Mar 11, 2025
@@ -435,7 +435,7 @@ def __init__(self, endpoint: type, options: Options) -> None:
         )
         self.schema_builder = SchemaBuilder(schema_generator)
         self.responses = {}
-        
+
         # Create standard error responses
         self._create_standard_error_responses()
 
@@ -446,7 +446,7 @@ def _create_standard_error_responses(self) -> None:
         """
         # Get the Error schema
         error_schema = self.schema_builder.classdef_to_ref(Error)
-        
+
         # Create standard error responses
         self.responses["BadRequest400"] = Response(
             description="The request was invalid or malformed",
@@ -457,11 +457,11 @@ def _create_standard_error_responses(self) -> None:
                         "status": 400,
                         "title": "Bad Request",
                         "detail": "The request was invalid or malformed",
-                    }
+                    },
                 )
-            }
+            },
         )
-        
+
         self.responses["TooManyRequests429"] = Response(
             description="The client has sent too many requests in a given amount of time",
             content={
@@ -471,11 +471,11 @@ def _create_standard_error_responses(self) -> None:
                         "status": 429,
                         "title": "Too Many Requests",
                         "detail": "You have exceeded the rate limit. Please try again later.",
-                    }
+                    },
                 )
-            }
+            },
         )
-        
+
         self.responses["InternalServerError500"] = Response(
             description="The server encountered an unexpected error",
             content={
@@ -485,11 +485,11 @@ def _create_standard_error_responses(self) -> None:
                         "status": 500,
                         "title": "Internal Server Error",
                         "detail": "An unexpected error occurred. Our team has been notified.",
-                    }
+                    },
                 )
-            }
+            },
         )
-        
+
         # Add a default error response for any unhandled error cases
         self.responses["DefaultError"] = Response(
             description="An unexpected error occurred",
@@ -500,9 +500,9 @@ def _create_standard_error_responses(self) -> None:
                         "status": 0,
                         "title": "Error",
                         "detail": "An unexpected error occurred",
-                    }
+                    },
                 )
-            }
+            },
         )
 
     def _build_type_tag(self, ref: str, schema: Schema) -> Tag:
@@ -547,11 +547,14 @@ def _build_operation(self, op: EndpointOperation) -> Operation:
             "SyntheticDataGeneration",
             "PostTraining",
             "BatchInference",
-            "Files",
         ]:
             op.defining_class.__name__ = f"{op.defining_class.__name__} (Coming Soon)"
             print(op.defining_class.__name__)
 
+        # TODO (xiyan): temporary fix for datasetio inner impl + datasets api
+        # if op.defining_class.__name__ in ["DatasetIO"]:
+        #     op.defining_class.__name__ = "Datasets"
+
         doc_string = parse_type(op.func_ref)
         doc_params = dict(
             (param.name, param.description) for param in doc_string.params.values()
@@ -598,7 +601,9 @@ def _build_operation(self, op: EndpointOperation) -> Operation:
 
         # data passed in request body as raw bytes cannot have request parameters
         if raw_bytes_request_body and op.request_params:
-            raise ValueError("Cannot have both raw bytes request body and request parameters")
+            raise ValueError(
+                "Cannot have both raw bytes request body and request parameters"
+            )
 
         # data passed in request body as raw bytes
         if raw_bytes_request_body:
@@ -719,7 +724,7 @@ def _build_operation(self, op: EndpointOperation) -> Operation:
             responses.update(response_builder.build_response(response_options))
 
         assert len(responses.keys()) > 0, f"No responses found for {op.name}"
-        
+
         # Add standard error response references
         if self.options.include_standard_error_responses:
             if "400" not in responses:
@@ -730,7 +735,7 @@ def _build_operation(self, op: EndpointOperation) -> Operation:
                 responses["500"] = ResponseRef("InternalServerError500")
             if "default" not in responses:
                 responses["default"] = ResponseRef("DefaultError")
-        
+
         if op.event_type is not None:
             builder = ContentBuilder(self.schema_builder)
             callbacks = {

@@ -58,7 +58,7 @@ Breaking down the demo app, this section will show the core pieces that are used
 ### Setup Remote Inferencing
 Start a Llama Stack server on localhost. Here is an example of how you can do this using the firework.ai distribution:
 ```
-conda create -n stack-fireworks python=3.10 
+conda create -n stack-fireworks python=3.10
 conda activate stack-fireworks
 pip install --no-cache llama-stack==0.1.4
 llama stack build --template fireworks --image-type conda

@@ -114,23 +114,17 @@ pprint(response)
 simpleqa_dataset_id = "huggingface::simpleqa"
 
 _ = client.datasets.register(
-    dataset_id=simpleqa_dataset_id,
-    provider_id="huggingface",
-    url={"uri": "https://huggingface.co/datasets/llamastack/simpleqa"},
-    metadata={
-        "path": "llamastack/simpleqa",
-        "split": "train",
-    },
-    dataset_schema={
-        "input_query": {"type": "string"},
-        "expected_answer": {"type": "string"},
-        "chat_completion_input": {"type": "chat_completion_input"},
+    purpose="eval/messages-answer",
+    source={
+        "type": "uri",
+        "uri": "huggingface://datasets/llamastack/simpleqa?split=train",
     },
+    dataset_id=simpleqa_dataset_id,
 )
 
-eval_rows = client.datasetio.get_rows_paginated(
+eval_rows = client.datasets.iterrows(
     dataset_id=simpleqa_dataset_id,
-    rows_in_page=5,
+    limit=5,
 )
 ```
 
@@ -143,7 +137,7 @@ client.benchmarks.register(
 
 response = client.eval.evaluate_rows(
     benchmark_id="meta-reference::simpleqa",
-    input_rows=eval_rows.rows,
+    input_rows=eval_rows.data,
     scoring_functions=["llm-as-judge::405b-simpleqa"],
     benchmark_config={
         "eval_candidate": {
@@ -191,7 +185,7 @@ agent_config = {
 
 response = client.eval.evaluate_rows(
     benchmark_id="meta-reference::simpleqa",
-    input_rows=eval_rows.rows,
+    input_rows=eval_rows.data,
     scoring_functions=["llm-as-judge::405b-simpleqa"],
     benchmark_config={
         "eval_candidate": {

@@ -13,19 +13,16 @@
 
 
 @json_schema_type
-class PaginatedRowsResult(BaseModel):
+class IterrowsResponse(BaseModel):
     """
     A paginated list of rows from a dataset.
 
-    :param rows: The rows in the current page.
-    :param total_count: The total number of rows in the dataset.
-    :param next_page_token: The token to get the next page of rows.
+    :param data: The rows in the current page.
+    :param next_start_index: Index into dataset for the first row in the next page. None if there are no more rows.
     """
 
-    # the rows obey the DatasetSchema for the given dataset
-    rows: List[Dict[str, Any]]
-    total_count: int
-    next_page_token: Optional[str] = None
+    data: List[Dict[str, Any]]
+    next_start_index: Optional[int] = None
 
 
 class DatasetStore(Protocol):
@@ -37,22 +34,21 @@ class DatasetIO(Protocol):
     # keeping for aligning with inference/safety, but this is not used
     dataset_store: DatasetStore
 
-    @webmethod(route="/datasetio/rows", method="GET")
-    async def get_rows_paginated(
+    # TODO(xiyan): there's a flakiness here where setting route to "/datasets/" here will not result in proper routing
+    @webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET")
+    async def iterrows(
         self,
         dataset_id: str,
-        rows_in_page: int,
-        page_token: Optional[str] = None,
-        filter_condition: Optional[str] = None,
-    ) -> PaginatedRowsResult:
-        """Get a paginated list of rows from a dataset.
+        start_index: Optional[int] = None,
+        limit: Optional[int] = None,
+    ) -> IterrowsResponse:
+        """Get a paginated list of rows from a dataset. Uses cursor-based pagination.
 
         :param dataset_id: The ID of the dataset to get the rows from.
-        :param rows_in_page: The number of rows to get per page.
-        :param page_token: The token to get the next page of rows.
-        :param filter_condition: (Optional) A condition to filter the rows by.
+        :param start_index: Index into dataset for the first row to get. Get all rows if None.
+        :param limit: The number of rows to get.
         """
         ...
 
-    @webmethod(route="/datasetio/rows", method="POST")
+    @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST")
     async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: ...