Skip to content

Commit d993831

Browse files
authored
fix: cache DataFrames to temp tables in bigframes.bigquery.ml methods to avoid time travel (#2318)
See internal issue b/310266666 🦕
1 parent 147aad9 commit d993831

File tree

2 files changed

+7
-2
lines changed

2 files changed

+7
-2
lines changed

bigframes/bigquery/_operations/ml.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ def _to_sql(df_or_sql: Union[pd.DataFrame, dataframe.DataFrame, str]) -> str:
3939
else:
4040
bf_df = cast(dataframe.DataFrame, df_or_sql)
4141

42+
# Cache dataframes to make sure base table is not a snapshot.
43+
# Cached dataframe creates a full copy, never uses snapshot.
44+
# This is a workaround for internal issue b/310266666.
45+
bf_df.cache()
4246
sql, _, _ = bf_df._to_sql_query(include_index=False)
4347
return sql
4448

bigframes/ml/core.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -436,8 +436,9 @@ def create_model(
436436
Returns: a BqmlModel, wrapping a trained model in BigQuery
437437
"""
438438
options = dict(options)
439-
# Cache dataframes to make sure base table is not a snapshot
440-
# cached dataframe creates a full copy, never uses snapshot
439+
# Cache dataframes to make sure base table is not a snapshot.
440+
# Cached dataframe creates a full copy, never uses snapshot.
441+
# This is a workaround for internal issue b/310266666.
441442
if y_train is None:
442443
input_data = X_train.reset_index(drop=True).cache()
443444
else:

0 commit comments

Comments
 (0)