Skip to content

Commit f5a53bf

Browse files
committed
fix review
Signed-off-by: yaommen <myanstu@163.com>
1 parent 0b8fc4d commit f5a53bf

File tree

5 files changed

+38
-29
lines changed

5 files changed

+38
-29
lines changed

doc/source/data/loading-data.rst

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -445,26 +445,23 @@ Ray Data interoperates with libraries like pandas, NumPy, and Arrow.
445445
import numpy as np
446446
import ray
447447

448-
array = np.ones((3, 2, 2))
448+
array = np.arange(3)
449449
ds = ray.data.from_numpy(array)
450450

451451
print(ds)
452452

453453
.. testoutput::
454454

455455
shape: (3, 1)
456-
╭──────────────────────────────────────────╮
457-
│ data │
458-
│ --- │
459-
│ ArrowTensorTypeV2(shape=(2, 2), dtype=d… │
460-
╞══════════════════════════════════════════╡
461-
│ [[1. 1.]
462-
[1. 1.]] │
463-
│ [[1. 1.]
464-
[1. 1.]] │
465-
│ [[1. 1.]
466-
[1. 1.]] │
467-
╰──────────────────────────────────────────╯
456+
╭───────╮
457+
│ data │
458+
│ --- │
459+
│ int64 │
460+
╞═══════╡
461+
│ 0 │
462+
│ 1 │
463+
│ 2 │
464+
╰───────╯
468465
(Showing 3 of 3 rows)
469466

470467
.. tab-item:: pandas

python/ray/data/_internal/dataset_repr.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -194,19 +194,12 @@ def _resolve_block(block_ref: ObjectRef) -> Block:
194194
def _determine_preview_row_targets(num_rows: Optional[int]) -> Tuple[int, int]:
195195
"""Compute how many head and tail rows to preview."""
196196
max_rows = _DATASET_REPR_MAX_ROWS
197-
if max_rows <= 0:
198-
return 0, 0
199-
200197
if num_rows is None or num_rows <= max_rows:
201198
head = num_rows if num_rows is not None else max_rows
202199
return head, 0
203200

204201
head = min(_DATASET_REPR_HEAD_ROWS, max_rows)
205-
if head < 0:
206-
head = 0
207202
tail = max_rows - head
208-
if tail < 0:
209-
tail = 0
210203
return head, tail
211204

212205

python/ray/data/dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6284,7 +6284,7 @@ def materialize(self) -> "MaterializedDataset":
62846284
>>> import ray
62856285
>>> ds = ray.data.range(10)
62866286
>>> materialized_ds = ds.materialize()
6287-
>>> materialized_ds # doctest: +ELLIPSIS
6287+
>>> materialized_ds
62886288
shape: (10, 1)
62896289
╭───────╮
62906290
│ id │

python/ray/data/iterator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,15 +84,15 @@ class DataIterator(abc.ABC):
8484
Examples:
8585
>>> import ray
8686
>>> ds = ray.data.range(5)
87-
>>> ds # doctest: +ELLIPSIS
87+
>>> ds
8888
shape: (5, 1)
8989
╭───────╮
9090
│ id │
9191
│ --- │
9292
│ int64 │
9393
╰───────╯
9494
(Dataset isn't materialized)
95-
>>> ds.iterator() # doctest: +ELLIPSIS
95+
>>> ds.iterator()
9696
DataIterator(shape: (5, 1)
9797
╭───────╮
9898
│ id │

python/ray/data/tests/test_tensor.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,6 @@ def test_tensors_inferred_from_map(
323323
ray_start_regular_shared, restore_data_context, tensor_format
324324
):
325325
DataContext.get_current().use_arrow_tensor_v2 = tensor_format == "v2"
326-
class_name = "ArrowTensorTypeV2" if tensor_format == "v2" else "ArrowTensorType"
327326
# Test map.
328327
ds = ray.data.range(10, override_num_blocks=10).map(
329328
lambda _: {"data": np.ones((4, 4))}
@@ -332,7 +331,11 @@ def test_tensors_inferred_from_map(
332331
assert ds.count() == 10
333332
schema = ds.schema()
334333
assert schema.names == ["data"]
335-
assert str(schema.types[0]) == f"{class_name}(shape=(4, 4), dtype=double)"
334+
dtype = schema.types[0]
335+
expected_type = ArrowTensorTypeV2 if tensor_format == "v2" else ArrowTensorType
336+
assert isinstance(dtype, expected_type)
337+
assert dtype.shape == (4, 4)
338+
assert dtype.scalar_type == pa.float64()
336339

337340
# Test map_batches.
338341
ds = ray.data.range(16, override_num_blocks=4).map_batches(
@@ -342,7 +345,11 @@ def test_tensors_inferred_from_map(
342345
assert ds.count() == 24
343346
schema = ds.schema()
344347
assert schema.names == ["data"]
345-
assert str(schema.types[0]) == f"{class_name}(shape=(4, 4), dtype=double)"
348+
dtype = schema.types[0]
349+
expected_type = ArrowTensorTypeV2 if tensor_format == "v2" else ArrowTensorType
350+
assert isinstance(dtype, expected_type)
351+
assert dtype.shape == (4, 4)
352+
assert dtype.scalar_type == pa.float64()
346353

347354
# Test flat_map.
348355
ds = ray.data.range(10, override_num_blocks=10).flat_map(
@@ -352,7 +359,11 @@ def test_tensors_inferred_from_map(
352359
assert ds.count() == 20
353360
schema = ds.schema()
354361
assert schema.names == ["data"]
355-
assert str(schema.types[0]) == f"{class_name}(shape=(4, 4), dtype=double)"
362+
dtype = schema.types[0]
363+
expected_type = ArrowTensorTypeV2 if tensor_format == "v2" else ArrowTensorType
364+
assert isinstance(dtype, expected_type)
365+
assert dtype.shape == (4, 4)
366+
assert dtype.scalar_type == pa.float64()
356367

357368
# Test map_batches ndarray column.
358369
ds = ray.data.range(16, override_num_blocks=4).map_batches(
@@ -362,7 +373,11 @@ def test_tensors_inferred_from_map(
362373
assert ds.count() == 24
363374
schema = ds.schema()
364375
assert schema.names == ["a"]
365-
assert str(schema.types[0]) == f"{class_name}(shape=(4, 4), dtype=double)"
376+
dtype = schema.types[0]
377+
expected_type = ArrowTensorTypeV2 if tensor_format == "v2" else ArrowTensorType
378+
assert isinstance(dtype, expected_type)
379+
assert dtype.shape == (4, 4)
380+
assert dtype.scalar_type == pa.float64()
366381

367382
ds = ray.data.range(16, override_num_blocks=4).map_batches(
368383
lambda _: pd.DataFrame({"a": [np.ones((2, 2)), np.ones((3, 3))]}),
@@ -372,7 +387,11 @@ def test_tensors_inferred_from_map(
372387
assert ds.count() == 16
373388
schema = ds.schema()
374389
assert schema.names == ["a"]
375-
assert str(schema.types[0]) == f"{class_name}(shape=(None, None), dtype=double)"
390+
dtype = schema.types[0]
391+
expected_type = ArrowTensorTypeV2 if tensor_format == "v2" else ArrowTensorType
392+
assert isinstance(dtype, expected_type)
393+
assert dtype.shape == (None, None)
394+
assert dtype.scalar_type == pa.float64()
376395

377396

378397
@pytest.mark.parametrize("tensor_format", ["v1", "v2"])

0 commit comments

Comments
 (0)