googleapis · tswast · May 6, 2025 · Apr 27, 2025 · Apr 27, 2025 · Apr 28, 2025
diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py
@@ -708,10 +708,12 @@ class GbqTable:
     @staticmethod
     def from_table(table: bq.Table, columns: Sequence[str] = ()) -> GbqTable:
         # Subsetting fields with columns can reduce cost of row-hash default ordering
+        table_schema = bigframes.core.tools.bigquery.get_schema_and_pseudocolumns(table)
+
         if columns:
-            schema = tuple(item for item in table.schema if item.name in columns)
+            schema = tuple(item for item in table_schema if item.name in columns)
         else:
-            schema = tuple(table.schema)
+            schema = tuple(table_schema)
         return GbqTable(
             project_id=table.project,
             dataset_id=table.dataset_id,

diff --git a/bigframes/core/schema.py b/bigframes/core/schema.py
@@ -48,15 +48,19 @@ def from_bq_table(
             typing.Dict[str, bigframes.dtypes.Dtype]
         ] = None,
     ):
+        # Avoid circular imports.
+        import bigframes.core.tools.bigquery
+
         if column_type_overrides is None:
             column_type_overrides = {}
-        items = tuple(
+        items = [
             SchemaItem(name, column_type_overrides.get(name, dtype))
             for name, dtype in bigframes.dtypes.bf_type_from_type_kind(
-                table.schema
+                bigframes.core.tools.bigquery.get_schema_and_pseudocolumns(table)
             ).items()
-        )
-        return ArraySchema(items)
+        ]
+
+        return ArraySchema(tuple(items))
 
     @property
     def names(self) -> typing.Tuple[str, ...]:

diff --git a/bigframes/core/tools/bigquery.py b/bigframes/core/tools/bigquery.py
@@ -0,0 +1,39 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Private helpers for loading a BigQuery table as a BigQuery DataFrames DataFrame.
+"""
+
+from __future__ import annotations
+
+import google.cloud.bigquery as bigquery
+
+
+def get_schema_and_pseudocolumns(
+    table: bigquery.table.Table,
+) -> list[bigquery.SchemaField]:
+    fields = list(table.schema)
+
+    # TODO(tswast): Add _PARTITIONTIME and/or _PARTIONDATE for injestion
+    # time partitioned tables.
+    if table.table_id.endswith("*"):
+        fields.append(
+            bigquery.SchemaField(
+                "_TABLE_SUFFIX",
+                "STRING",
+            )
+        )
+
+    return fields
diff --git a/bigframes/session/_io/bigquery/__init__.py b/bigframes/session/_io/bigquery/__init__.py
@@ -393,6 +393,9 @@ def to_query(
     else:
         select_clause = "SELECT *"
 
+        if query_or_table.endswith("*"):
+            select_clause += ", _TABLE_SUFFIX"
+
     time_travel_clause = ""
     if time_travel_timestamp is not None:
         time_travel_literal = bigframes.core.sql.simple_literal(time_travel_timestamp)

@@ -101,7 +101,16 @@ def validate_table(
     # Anonymous dataset, does not support snapshot ever
     if table.dataset_id.startswith("_"):
         pass
+
     # Only true tables support time travel
+    elif table.table_id.endswith("*"):
+        msg = bfe.format_message(
+            "Wildcard tables do not support FOR SYSTEM_TIME AS OF queries. "
+            "Attempting query without time travel. Be aware that "
+            "modifications to the underlying data may result in errors or "
+            "unexpected behavior."
+        )
+        warnings.warn(msg, category=bfe.TimeTravelDisabledWarning)
     elif table.table_type != "TABLE":
         if table.table_type == "MATERIALIZED_VIEW":
             msg = bfe.format_message(

diff --git a/bigframes/session/bq_caching_executor.py b/bigframes/session/bq_caching_executor.py
@@ -174,6 +174,7 @@ def export_gbq(
             # Only update schema if this is not modifying an existing table, and the
             # new table contains timedelta columns.
             table = self.bqclient.get_table(destination)
+            # TODO(tswast): What to do with pseudocolumns?
             table.schema = array_value.schema.to_bigquery()
             self.bqclient.update_table(table, ["schema"])
 

@@ -440,11 +440,7 @@ def read_gbq_table(
         # clustered tables, so fallback to a query. We do this here so that
         # the index is consistent with tables that have primary keys, even
         # when max_results is set.
-        # TODO(b/338419730): We don't need to fallback to a query for wildcard
-        # tables if we allow some non-determinism when time travel isn't supported.
-        if max_results is not None or bf_io_bigquery.is_table_with_wildcard_suffix(
-            query
-        ):
+        if max_results is not None:
             # TODO(b/338111344): If we are running a query anyway, we might as
             # well generate ROW_NUMBER() at the same time.
             all_columns: Iterable[str] = (

diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
@@ -14,6 +14,7 @@
 
 import io
 import operator
+import re
 import sys
 import tempfile
 import typing
@@ -5284,6 +5285,19 @@ def test_to_gbq_and_create_dataset(session, scalars_df_index, dataset_id_not_cre
     assert not loaded_scalars_df_index.empty
 
 
+def test_read_gbq_to_pandas_wildcard(unordered_session: bigframes.Session):
+    with pytest.warns(
+        bigframes.exceptions.TimeTravelDisabledWarning,
+        match=re.escape("Wildcard tables do not support FOR SYSTEM_TIME"),
+    ):
+        df = unordered_session.read_gbq("bigquery-public-data.noaa_gsod.gsod*")
+    df = df[df["_TABLE_SUFFIX"] == "1929"][["da", "mo", "year", "max"]]
+    df.to_pandas()
+    rows, columns = df.shape
+    assert rows > 0
+    assert columns == 4
+
+
 def test_read_gbq_to_pandas_no_exec(unordered_session: bigframes.Session):
     metrics = unordered_session._metrics
     execs_pre = metrics.execution_count