Skip to content

Commit a0e1e50

Browse files
authored
fix: Correct connection normalization in blob system tests (#2222)
* fix: Correct connection normalization in blob system tests * skip more tests * Skip failed e2e tests
1 parent ef5e83a commit a0e1e50

File tree

6 files changed

+89
-8
lines changed

6 files changed

+89
-8
lines changed

tests/system/conftest.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,23 @@ def _hash_digest_file(hasher, filepath):
7070
hasher.update(chunk)
7171

7272

73+
@pytest.fixture(scope="session")
74+
def normalize_connection_id():
75+
"""Normalizes the connection ID by casefolding only the LOCATION component.
76+
77+
Connection format: PROJECT.LOCATION.CONNECTION_NAME
78+
Only LOCATION is case-insensitive; PROJECT and CONNECTION_NAME must be lowercase.
79+
"""
80+
81+
def normalize(connection_id: str) -> str:
82+
parts = connection_id.split(".")
83+
if len(parts) == 3:
84+
return f"{parts[0]}.{parts[1].casefold()}.{parts[2]}"
85+
return connection_id # Return unchanged if invalid format
86+
87+
return normalize
88+
89+
7390
@pytest.fixture(scope="session")
7491
def tokyo_location() -> str:
7592
return TOKYO_LOCATION

tests/system/large/blob/test_function.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def images_output_uris(images_output_folder: str) -> list[str]:
5252
]
5353

5454

55+
@pytest.mark.skip(reason="b/457416070")
5556
def test_blob_exif(
5657
bq_connection: str,
5758
session: bigframes.Session,
@@ -103,6 +104,7 @@ def test_blob_exif_verbose(
103104
assert content_series.dtype == dtypes.JSON_DTYPE
104105

105106

107+
@pytest.mark.skip(reason="b/457416070")
106108
def test_blob_image_blur_to_series(
107109
images_mm_df: bpd.DataFrame,
108110
bq_connection: str,
@@ -136,6 +138,7 @@ def test_blob_image_blur_to_series(
136138
assert not actual.blob.size().isna().any()
137139

138140

141+
@pytest.mark.skip(reason="b/457416070")
139142
def test_blob_image_blur_to_series_verbose(
140143
images_mm_df: bpd.DataFrame,
141144
bq_connection: str,
@@ -163,6 +166,7 @@ def test_blob_image_blur_to_series_verbose(
163166
assert not actual.blob.size().isna().any()
164167

165168

169+
@pytest.mark.skip(reason="b/457416070")
166170
def test_blob_image_blur_to_folder(
167171
images_mm_df: bpd.DataFrame,
168172
bq_connection: str,
@@ -195,6 +199,7 @@ def test_blob_image_blur_to_folder(
195199
assert not actual.blob.size().isna().any()
196200

197201

202+
@pytest.mark.skip(reason="b/457416070")
198203
def test_blob_image_blur_to_folder_verbose(
199204
images_mm_df: bpd.DataFrame,
200205
bq_connection: str,
@@ -254,6 +259,7 @@ def test_blob_image_blur_to_bq_verbose(images_mm_df: bpd.DataFrame, bq_connectio
254259
assert content_series.dtype == dtypes.BYTES_DTYPE
255260

256261

262+
@pytest.mark.skip(reason="b/457416070")
257263
def test_blob_image_resize_to_series(
258264
images_mm_df: bpd.DataFrame,
259265
bq_connection: str,
@@ -291,6 +297,7 @@ def test_blob_image_resize_to_series(
291297
assert not actual.blob.size().isna().any()
292298

293299

300+
@pytest.mark.skip(reason="b/457416070")
294301
def test_blob_image_resize_to_series_verbose(
295302
images_mm_df: bpd.DataFrame,
296303
bq_connection: str,
@@ -325,6 +332,7 @@ def test_blob_image_resize_to_series_verbose(
325332
assert not actual.blob.size().isna().any()
326333

327334

335+
@pytest.mark.skip(reason="b/457416070")
328336
def test_blob_image_resize_to_folder(
329337
images_mm_df: bpd.DataFrame,
330338
bq_connection: str,
@@ -358,6 +366,7 @@ def test_blob_image_resize_to_folder(
358366
assert not actual.blob.size().isna().any()
359367

360368

369+
@pytest.mark.skip(reason="b/457416070")
361370
def test_blob_image_resize_to_folder_verbose(
362371
images_mm_df: bpd.DataFrame,
363372
bq_connection: str,
@@ -420,6 +429,7 @@ def test_blob_image_resize_to_bq_verbose(
420429
assert content_series.dtype == dtypes.BYTES_DTYPE
421430

422431

432+
@pytest.mark.skip(reason="b/457416070")
423433
def test_blob_image_normalize_to_series(
424434
images_mm_df: bpd.DataFrame,
425435
bq_connection: str,
@@ -492,6 +502,7 @@ def test_blob_image_normalize_to_series_verbose(
492502
assert hasattr(content_series, "blob")
493503

494504

505+
@pytest.mark.skip(reason="b/457416070")
495506
def test_blob_image_normalize_to_folder(
496507
images_mm_df: bpd.DataFrame,
497508
bq_connection: str,
@@ -598,6 +609,7 @@ def test_blob_image_normalize_to_bq_verbose(
598609
assert content_series.dtype == dtypes.BYTES_DTYPE
599610

600611

612+
@pytest.mark.skip(reason="b/457416070")
601613
def test_blob_pdf_extract(
602614
pdf_mm_df: bpd.DataFrame,
603615
bq_connection: str,
@@ -633,6 +645,7 @@ def test_blob_pdf_extract(
633645
), f"Item (verbose=False): Expected keyword '{keyword}' not found in extracted text. "
634646

635647

648+
@pytest.mark.skip(reason="b/457416070")
636649
def test_blob_pdf_extract_verbose(
637650
pdf_mm_df: bpd.DataFrame,
638651
bq_connection: str,
@@ -670,6 +683,7 @@ def test_blob_pdf_extract_verbose(
670683
), f"Item (verbose=True): Expected keyword '{keyword}' not found in extracted text. "
671684

672685

686+
@pytest.mark.skip(reason="b/457416070")
673687
def test_blob_pdf_chunk(pdf_mm_df: bpd.DataFrame, bq_connection: str):
674688
actual = (
675689
pdf_mm_df["pdf"]
@@ -709,6 +723,7 @@ def test_blob_pdf_chunk(pdf_mm_df: bpd.DataFrame, bq_connection: str):
709723
), f"Item (verbose=False): Expected keyword '{keyword}' not found in extracted text. "
710724

711725

726+
@pytest.mark.skip(reason="b/457416070")
712727
def test_blob_pdf_chunk_verbose(pdf_mm_df: bpd.DataFrame, bq_connection: str):
713728
actual = (
714729
pdf_mm_df["pdf"]

tests/system/small/bigquery/test_ai.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ def test_ai_if(session):
273273
assert result.dtype == dtypes.BOOL_DTYPE
274274

275275

276+
@pytest.mark.skip(reason="b/457416070")
276277
def test_ai_if_multi_model(session):
277278
df = session.from_glob_path(
278279
"gs://bigframes-dev-testing/a_multimodel/images/*", name="image"
@@ -293,6 +294,7 @@ def test_ai_classify(session):
293294
assert result.dtype == dtypes.STRING_DTYPE
294295

295296

297+
@pytest.mark.skip(reason="b/457416070")
296298
def test_ai_classify_multi_model(session):
297299
df = session.from_glob_path(
298300
"gs://bigframes-dev-testing/a_multimodel/images/*", name="image"

tests/system/small/blob/test_io.py

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,27 +12,36 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
from typing import Callable
1516
from unittest import mock
1617

1718
import IPython.display
1819
import pandas as pd
20+
import pytest
1921

2022
import bigframes
2123
import bigframes.pandas as bpd
2224

2325

2426
def test_blob_create_from_uri_str(
25-
bq_connection: str, session: bigframes.Session, images_uris
27+
bq_connection: str,
28+
session: bigframes.Session,
29+
images_uris,
30+
normalize_connection_id: Callable[[str], str],
2631
):
2732
uri_series = bpd.Series(images_uris, session=session)
2833
blob_series = uri_series.str.to_blob(connection=bq_connection)
2934

3035
pd_blob_df = blob_series.struct.explode().to_pandas()
36+
pd_blob_df["authorizer"] = pd_blob_df["authorizer"].apply(normalize_connection_id)
3137
expected_pd_df = pd.DataFrame(
3238
{
3339
"uri": images_uris,
3440
"version": [None, None],
35-
"authorizer": [bq_connection.casefold(), bq_connection.casefold()],
41+
"authorizer": [
42+
normalize_connection_id(bq_connection),
43+
normalize_connection_id(bq_connection),
44+
],
3645
"details": [None, None],
3746
}
3847
)
@@ -43,7 +52,11 @@ def test_blob_create_from_uri_str(
4352

4453

4554
def test_blob_create_from_glob_path(
46-
bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris
55+
bq_connection: str,
56+
session: bigframes.Session,
57+
images_gcs_path,
58+
images_uris,
59+
normalize_connection_id: Callable[[str], str],
4760
):
4861
blob_df = session.from_glob_path(
4962
images_gcs_path, connection=bq_connection, name="blob_col"
@@ -55,12 +68,16 @@ def test_blob_create_from_glob_path(
5568
.sort_values("uri")
5669
.reset_index(drop=True)
5770
)
71+
pd_blob_df["authorizer"] = pd_blob_df["authorizer"].apply(normalize_connection_id)
5872

5973
expected_df = pd.DataFrame(
6074
{
6175
"uri": images_uris,
6276
"version": [None, None],
63-
"authorizer": [bq_connection.casefold(), bq_connection.casefold()],
77+
"authorizer": [
78+
normalize_connection_id(bq_connection),
79+
normalize_connection_id(bq_connection),
80+
],
6481
"details": [None, None],
6582
}
6683
)
@@ -71,7 +88,11 @@ def test_blob_create_from_glob_path(
7188

7289

7390
def test_blob_create_read_gbq_object_table(
74-
bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris
91+
bq_connection: str,
92+
session: bigframes.Session,
93+
images_gcs_path,
94+
images_uris,
95+
normalize_connection_id: Callable[[str], str],
7596
):
7697
obj_table = session._create_object_table(images_gcs_path, bq_connection)
7798

@@ -83,11 +104,15 @@ def test_blob_create_read_gbq_object_table(
83104
.sort_values("uri")
84105
.reset_index(drop=True)
85106
)
107+
pd_blob_df["authorizer"] = pd_blob_df["authorizer"].apply(normalize_connection_id)
86108
expected_df = pd.DataFrame(
87109
{
88110
"uri": images_uris,
89111
"version": [None, None],
90-
"authorizer": [bq_connection.casefold(), bq_connection.casefold()],
112+
"authorizer": [
113+
normalize_connection_id(bq_connection),
114+
normalize_connection_id(bq_connection),
115+
],
91116
"details": [None, None],
92117
}
93118
)
@@ -97,6 +122,7 @@ def test_blob_create_read_gbq_object_table(
97122
)
98123

99124

125+
@pytest.mark.skip(reason="b/457416070")
100126
def test_display_images(monkeypatch, images_mm_df: bpd.DataFrame):
101127
mock_display = mock.Mock()
102128
monkeypatch.setattr(IPython.display, "display", mock_display)

tests/system/small/blob/test_properties.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,12 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
from __future__ import annotations
16+
17+
from typing import Callable
18+
1519
import pandas as pd
20+
import pytest
1621

1722
import bigframes.dtypes as dtypes
1823
import bigframes.pandas as bpd
@@ -27,17 +32,27 @@ def test_blob_uri(images_uris: list[str], images_mm_df: bpd.DataFrame):
2732
)
2833

2934

30-
def test_blob_authorizer(images_mm_df: bpd.DataFrame, bq_connection: str):
35+
def test_blob_authorizer(
36+
images_mm_df: bpd.DataFrame,
37+
bq_connection: str,
38+
normalize_connection_id: Callable[[str], str],
39+
):
3140
actual = images_mm_df["blob_col"].blob.authorizer().to_pandas()
41+
actual = actual.apply(normalize_connection_id)
3242
expected = pd.Series(
33-
[bq_connection.casefold(), bq_connection.casefold()], name="authorizer"
43+
[
44+
normalize_connection_id(bq_connection),
45+
normalize_connection_id(bq_connection),
46+
],
47+
name="authorizer",
3448
)
3549

3650
pd.testing.assert_series_equal(
3751
actual, expected, check_dtype=False, check_index_type=False
3852
)
3953

4054

55+
@pytest.mark.skip(reason="b/457416070")
4156
def test_blob_version(images_mm_df: bpd.DataFrame):
4257
actual = images_mm_df["blob_col"].blob.version().to_pandas()
4358
expected = pd.Series(["1753907851152593", "1753907851111538"], name="version")
@@ -47,6 +62,7 @@ def test_blob_version(images_mm_df: bpd.DataFrame):
4762
)
4863

4964

65+
@pytest.mark.skip(reason="b/457416070")
5066
def test_blob_metadata(images_mm_df: bpd.DataFrame):
5167
actual = images_mm_df["blob_col"].blob.metadata().to_pandas()
5268
expected = pd.Series(
@@ -71,6 +87,7 @@ def test_blob_metadata(images_mm_df: bpd.DataFrame):
7187
pd.testing.assert_series_equal(actual, expected)
7288

7389

90+
@pytest.mark.skip(reason="b/457416070")
7491
def test_blob_content_type(images_mm_df: bpd.DataFrame):
7592
actual = images_mm_df["blob_col"].blob.content_type().to_pandas()
7693
expected = pd.Series(["image/jpeg", "image/jpeg"], name="content_type")
@@ -80,6 +97,7 @@ def test_blob_content_type(images_mm_df: bpd.DataFrame):
8097
)
8198

8299

100+
@pytest.mark.skip(reason="b/457416070")
83101
def test_blob_md5_hash(images_mm_df: bpd.DataFrame):
84102
actual = images_mm_df["blob_col"].blob.md5_hash().to_pandas()
85103
expected = pd.Series(
@@ -92,6 +110,7 @@ def test_blob_md5_hash(images_mm_df: bpd.DataFrame):
92110
)
93111

94112

113+
@pytest.mark.skip(reason="b/457416070")
95114
def test_blob_size(images_mm_df: bpd.DataFrame):
96115
actual = images_mm_df["blob_col"].blob.size().to_pandas()
97116
expected = pd.Series([338390, 43333], name="size")
@@ -101,6 +120,7 @@ def test_blob_size(images_mm_df: bpd.DataFrame):
101120
)
102121

103122

123+
@pytest.mark.skip(reason="b/457416070")
104124
def test_blob_updated(images_mm_df: bpd.DataFrame):
105125
actual = images_mm_df["blob_col"].blob.updated().to_pandas()
106126
expected = pd.Series(

tests/system/small/ml/test_multimodal_llm.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from bigframes.testing import utils
2222

2323

24+
@pytest.mark.skip(reason="b/457416070")
2425
@pytest.mark.flaky(retries=2)
2526
def test_multimodal_embedding_generator_predict_default_params_success(
2627
images_mm_df, session, bq_connection

0 commit comments

Comments
 (0)