-
Notifications
You must be signed in to change notification settings - Fork 7.3k
[Data] - Add alias expression #56550
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
cd1b91e
1104920
95c0828
2107892
245338f
e0e33a9
9521203
3153ee3
adb676f
cebef01
3cade97
c4ad0e8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3248,6 +3248,91 @@ def test_with_column_filter_in_pipeline(ray_start_regular_shared): | |
| pd.testing.assert_frame_equal(result_df, expected_df, check_dtype=False) | ||
|
|
||
|
|
||
| @pytest.mark.parametrize( | ||
| "expr_factory, expected_columns, alias_name, expected_values", | ||
| [ | ||
| ( | ||
| lambda: col("id").alias("new_id"), | ||
| ["id", "new_id"], | ||
| "new_id", | ||
| [0, 1, 2, 3, 4], # Copy of id column | ||
| ), | ||
| ( | ||
| lambda: (col("id") + 1).alias("id_plus_one"), | ||
| ["id", "id_plus_one"], | ||
| "id_plus_one", | ||
| [1, 2, 3, 4, 5], # id + 1 | ||
| ), | ||
| ( | ||
| lambda: (col("id") * 2 + 5).alias("transformed"), | ||
| ["id", "transformed"], | ||
| "transformed", | ||
| [5, 7, 9, 11, 13], # id * 2 + 5 | ||
| ), | ||
| ( | ||
| lambda: lit(42).alias("constant"), | ||
| ["id", "constant"], | ||
| "constant", | ||
| [42, 42, 42, 42, 42], # lit(42) | ||
| ), | ||
| ( | ||
| lambda: (col("id") >= 0).alias("is_non_negative"), | ||
| ["id", "is_non_negative"], | ||
| "is_non_negative", | ||
| [True, True, True, True, True], # id >= 0 | ||
| ), | ||
| ( | ||
| lambda: (col("id") + 1).alias("id"), | ||
| ["id"], # Only one column since we're overwriting id | ||
| "id", | ||
| [1, 2, 3, 4, 5], # id + 1 replaces original id | ||
| ), | ||
| ], | ||
| ids=[ | ||
| "col_alias", | ||
| "arithmetic_alias", | ||
| "complex_alias", | ||
| "literal_alias", | ||
| "comparison_alias", | ||
| "overwrite_existing_column", | ||
| ], | ||
| ) | ||
| def test_with_column_alias_expressions( | ||
| ray_start_regular_shared, | ||
| expr_factory, | ||
| expected_columns, | ||
| alias_name, | ||
| expected_values, | ||
| ): | ||
| """Test that alias expressions work correctly with with_column.""" | ||
| expr = expr_factory() | ||
|
|
||
| # Verify the alias name matches what we expect | ||
| assert expr.name == alias_name | ||
|
|
||
| # Apply the aliased expression | ||
| ds = ray.data.range(5).with_column(alias_name, expr) | ||
|
|
||
| # Convert to pandas for comprehensive comparison | ||
| result_df = ds.to_pandas() | ||
|
|
||
| # Create expected DataFrame | ||
| expected_df = pd.DataFrame({"id": [0, 1, 2, 3, 4], alias_name: expected_values}) | ||
|
|
||
| # Ensure column order matches expected_columns | ||
| expected_df = expected_df[expected_columns] | ||
|
|
||
| # Assert the entire DataFrame is equal | ||
| pd.testing.assert_frame_equal(result_df, expected_df) | ||
| # Verify the alias expression evaluates the same as the non-aliased version | ||
| non_aliased_expr = expr | ||
| ds_non_aliased = ray.data.range(5).with_column(alias_name, non_aliased_expr) | ||
|
|
||
| non_aliased_df = ds_non_aliased.to_pandas() | ||
|
|
||
| pd.testing.assert_frame_equal(result_df, non_aliased_df) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Test Fails Due to Self-ComparisonThe |
||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| import sys | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.