fix review

myandpr · myandpr · commit f5a53bf757f2 · 2026-01-17T03:59:41.000+08:00
Signed-off-by: yaommen &lt;myanstu@163.com&gt;
diff --git a/doc/source/data/loading-data.rst b/doc/source/data/loading-data.rst
@@ -445,26 +445,23 @@ Ray Data interoperates with libraries like pandas, NumPy, and Arrow.
             import numpy as np
             import ray
 
-            array = np.ones((3, 2, 2))
+            array = np.arange(3)
             ds = ray.data.from_numpy(array)
 
             print(ds)
 
         .. testoutput::
 
             shape: (3, 1)
-            ╭──────────────────────────────────────────╮
-            │ data                                     │
-            │ ---                                      │
-            │ ArrowTensorTypeV2(shape=(2, 2), dtype=d… │
-            ╞══════════════════════════════════════════╡
-            │ [[1. 1.]
-             [1. 1.]]                       │
-            │ [[1. 1.]
-             [1. 1.]]                       │
-            │ [[1. 1.]
-             [1. 1.]]                       │
-            ╰──────────────────────────────────────────╯
+            ╭───────╮
+            │ data  │
+            │ ---   │
+            │ int64 │
+            ╞═══════╡
+            │ 0     │
+            │ 1     │
+            │ 2     │
+            ╰───────╯
             (Showing 3 of 3 rows)
 
     .. tab-item:: pandas
diff --git a/python/ray/data/_internal/dataset_repr.py b/python/ray/data/_internal/dataset_repr.py
@@ -194,19 +194,12 @@ def _resolve_block(block_ref: ObjectRef) -> Block:
 def _determine_preview_row_targets(num_rows: Optional[int]) -> Tuple[int, int]:
     """Compute how many head and tail rows to preview."""
     max_rows = _DATASET_REPR_MAX_ROWS
-    if max_rows <= 0:
-        return 0, 0
-
     if num_rows is None or num_rows <= max_rows:
         head = num_rows if num_rows is not None else max_rows
         return head, 0
 
     head = min(_DATASET_REPR_HEAD_ROWS, max_rows)
-    if head < 0:
-        head = 0
     tail = max_rows - head
-    if tail < 0:
-        tail = 0
     return head, tail
 
 
diff --git a/python/ray/data/dataset.py b/python/ray/data/dataset.py
@@ -6284,7 +6284,7 @@ def materialize(self) -> "MaterializedDataset":
             >>> import ray
             >>> ds = ray.data.range(10)
             >>> materialized_ds = ds.materialize()
-            >>> materialized_ds  # doctest: +ELLIPSIS
+            >>> materialized_ds
             shape: (10, 1)
             ╭───────╮
             │ id    │
diff --git a/python/ray/data/iterator.py b/python/ray/data/iterator.py
@@ -84,15 +84,15 @@ class DataIterator(abc.ABC):
     Examples:
         >>> import ray
         >>> ds = ray.data.range(5)
-        >>> ds  # doctest: +ELLIPSIS
+        >>> ds
         shape: (5, 1)
         ╭───────╮
         │ id    │
         │ ---   │
         │ int64 │
         ╰───────╯
         (Dataset isn't materialized)
-        >>> ds.iterator()  # doctest: +ELLIPSIS
+        >>> ds.iterator()
         DataIterator(shape: (5, 1)
         ╭───────╮
         │ id    │
diff --git a/python/ray/data/tests/test_tensor.py b/python/ray/data/tests/test_tensor.py
@@ -323,7 +323,6 @@ def test_tensors_inferred_from_map(
     ray_start_regular_shared, restore_data_context, tensor_format
 ):
     DataContext.get_current().use_arrow_tensor_v2 = tensor_format == "v2"
-    class_name = "ArrowTensorTypeV2" if tensor_format == "v2" else "ArrowTensorType"
     # Test map.
     ds = ray.data.range(10, override_num_blocks=10).map(
         lambda _: {"data": np.ones((4, 4))}
@@ -332,7 +331,11 @@ def test_tensors_inferred_from_map(
     assert ds.count() == 10
     schema = ds.schema()
     assert schema.names == ["data"]
-    assert str(schema.types[0]) == f"{class_name}(shape=(4, 4), dtype=double)"
+    dtype = schema.types[0]
+    expected_type = ArrowTensorTypeV2 if tensor_format == "v2" else ArrowTensorType
+    assert isinstance(dtype, expected_type)
+    assert dtype.shape == (4, 4)
+    assert dtype.scalar_type == pa.float64()
 
     # Test map_batches.
     ds = ray.data.range(16, override_num_blocks=4).map_batches(
@@ -342,7 +345,11 @@ def test_tensors_inferred_from_map(
     assert ds.count() == 24
     schema = ds.schema()
     assert schema.names == ["data"]
-    assert str(schema.types[0]) == f"{class_name}(shape=(4, 4), dtype=double)"
+    dtype = schema.types[0]
+    expected_type = ArrowTensorTypeV2 if tensor_format == "v2" else ArrowTensorType
+    assert isinstance(dtype, expected_type)
+    assert dtype.shape == (4, 4)
+    assert dtype.scalar_type == pa.float64()
 
     # Test flat_map.
     ds = ray.data.range(10, override_num_blocks=10).flat_map(
@@ -352,7 +359,11 @@ def test_tensors_inferred_from_map(
     assert ds.count() == 20
     schema = ds.schema()
     assert schema.names == ["data"]
-    assert str(schema.types[0]) == f"{class_name}(shape=(4, 4), dtype=double)"
+    dtype = schema.types[0]
+    expected_type = ArrowTensorTypeV2 if tensor_format == "v2" else ArrowTensorType
+    assert isinstance(dtype, expected_type)
+    assert dtype.shape == (4, 4)
+    assert dtype.scalar_type == pa.float64()
 
     # Test map_batches ndarray column.
     ds = ray.data.range(16, override_num_blocks=4).map_batches(
@@ -362,7 +373,11 @@ def test_tensors_inferred_from_map(
     assert ds.count() == 24
     schema = ds.schema()
     assert schema.names == ["a"]
-    assert str(schema.types[0]) == f"{class_name}(shape=(4, 4), dtype=double)"
+    dtype = schema.types[0]
+    expected_type = ArrowTensorTypeV2 if tensor_format == "v2" else ArrowTensorType
+    assert isinstance(dtype, expected_type)
+    assert dtype.shape == (4, 4)
+    assert dtype.scalar_type == pa.float64()
 
     ds = ray.data.range(16, override_num_blocks=4).map_batches(
         lambda _: pd.DataFrame({"a": [np.ones((2, 2)), np.ones((3, 3))]}),
@@ -372,7 +387,11 @@ def test_tensors_inferred_from_map(
     assert ds.count() == 16
     schema = ds.schema()
     assert schema.names == ["a"]
-    assert str(schema.types[0]) == f"{class_name}(shape=(None, None), dtype=double)"
+    dtype = schema.types[0]
+    expected_type = ArrowTensorTypeV2 if tensor_format == "v2" else ArrowTensorType
+    assert isinstance(dtype, expected_type)
+    assert dtype.shape == (None, None)
+    assert dtype.scalar_type == pa.float64()
 
 
 @pytest.mark.parametrize("tensor_format", ["v1", "v2"])