diff --git a/sdk/python/feast/infra/offline_stores/bigquery.py b/sdk/python/feast/infra/offline_stores/bigquery.py index 10c8aa783fb..9dc9acc2b74 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery.py +++ b/sdk/python/feast/infra/offline_stores/bigquery.py @@ -400,7 +400,9 @@ def offline_write_batch( ) with tempfile.TemporaryFile() as parquet_temp_file: - pyarrow.parquet.write_table(table=table, where=parquet_temp_file) + pyarrow.parquet.write_table( + table=table, where=parquet_temp_file, coerce_timestamps="us" + ) parquet_temp_file.seek(0) diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py index f48dfbb86b4..89fb60f2441 100644 --- a/sdk/python/feast/infra/utils/aws_utils.py +++ b/sdk/python/feast/infra/utils/aws_utils.py @@ -351,7 +351,7 @@ def upload_arrow_table_to_redshift( else: # Write the PyArrow Table on disk in Parquet format and upload it to S3 with tempfile.TemporaryFile(suffix=".parquet") as parquet_temp_file: - pq.write_table(table, parquet_temp_file) + pq.write_table(table, parquet_temp_file, coerce_timestamps="us") parquet_temp_file.seek(0) s3_resource.Object(bucket, key).put(Body=parquet_temp_file) @@ -566,7 +566,19 @@ def unload_redshift_query_to_pa( with tempfile.TemporaryDirectory() as temp_dir: download_s3_directory(s3_resource, bucket, key, temp_dir) delete_s3_directory(s3_resource, bucket, key) - return pq.read_table(temp_dir) + + # Debug + print("debug temp file using pandas. ") + df = pd.read_parquet(temp_dir) + # parquet_file = pq.ParquetDataset(temp_dir) + # df = parquet_file.read().to_pandas() + print(df) + + print("transfer to table.") + table = pq.read_table(temp_dir) + print(table) + + return table def unload_redshift_query_to_df( @@ -592,7 +604,17 @@ def unload_redshift_query_to_df( iam_role, query, ) - return table.to_pandas() + + # Debug + print("Debug ------") + print(table.schema) + print(table) + print("Debug ++++++") + + df = table.to_pandas() + print(df) + print("Debug ++++++") + return df def get_lambda_function(lambda_client, function_name: str) -> Optional[Dict]: diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index c74a3f12e15..ee29862c30f 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -57,11 +57,11 @@ black==22.12.0 # via feast (setup.py) bleach==6.1.0 # via nbconvert -boto3==1.28.80 +boto3==1.29.0 # via # feast (setup.py) # moto -botocore==1.31.80 +botocore==1.32.0 # via # boto3 # moto @@ -135,7 +135,7 @@ cryptography==41.0.5 # snowflake-connector-python # types-pyopenssl # types-redis -dask==2023.10.1 +dask==2023.11.0 # via feast (setup.py) db-dtypes==1.1.1 # via google-cloud-bigquery @@ -177,7 +177,7 @@ fastavro==1.9.0 # via # feast (setup.py) # pandavro -fastjsonschema==2.18.1 +fastjsonschema==2.19.0 # via nbformat filelock==3.13.1 # via @@ -199,7 +199,7 @@ geojson==2.5.0 # via rockset geomet==0.2.1.post1 # via cassandra-driver -google-api-core[grpc]==2.13.0 +google-api-core[grpc]==2.14.0 # via # feast (setup.py) # firebase-admin @@ -211,7 +211,7 @@ google-api-core[grpc]==2.13.0 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.107.0 +google-api-python-client==2.108.0 # via firebase-admin google-auth==2.23.4 # via @@ -262,7 +262,7 @@ great-expectations==0.15.50 # via feast (setup.py) greenlet==3.0.1 # via sqlalchemy -grpc-google-iam-v1==0.12.6 +grpc-google-iam-v1==0.12.7 # via google-cloud-bigtable grpcio==1.59.2 # via @@ -298,7 +298,7 @@ hazelcast-python-client==5.3.0 # via feast (setup.py) hiredis==2.2.3 # via feast (setup.py) -httpcore==1.0.1 +httpcore==1.0.2 # via httpx httplib2==0.22.0 # via @@ -376,7 +376,7 @@ jsonschema[format-nongpl]==4.19.2 # jupyter-events # jupyterlab-server # nbformat -jsonschema-specifications==2023.7.1 +jsonschema-specifications==2023.11.1 # via jsonschema jupyter-client==8.6.0 # via @@ -409,7 +409,7 @@ jupyterlab==4.0.8 # via notebook jupyterlab-pygments==0.2.2 # via nbconvert -jupyterlab-server==2.25.0 +jupyterlab-server==2.25.1 # via # jupyterlab # notebook @@ -419,7 +419,7 @@ kubernetes==20.13.0 # via feast (setup.py) locket==1.0.0 # via partd -makefun==1.15.1 +makefun==1.15.2 # via great-expectations markupsafe==2.1.3 # via @@ -446,7 +446,7 @@ mock==2.0.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -moto==4.2.7 +moto==4.2.8 # via feast (setup.py) msal==1.25.0 # via @@ -564,7 +564,7 @@ pre-commit==3.3.1 # via feast (setup.py) prometheus-client==0.18.0 # via jupyter-server -prompt-toolkit==3.0.39 +prompt-toolkit==3.0.41 # via ipython proto-plus==1.22.3 # via @@ -610,7 +610,7 @@ py-cpuinfo==9.0.0 # via pytest-benchmark py4j==0.10.9.7 # via pyspark -pyarrow==10.0.1 +pyarrow==14.0.1 # via # db-dtypes # feast (setup.py) @@ -683,7 +683,7 @@ pytest-ordering==0.6 # via feast (setup.py) pytest-timeout==1.4.2 # via feast (setup.py) -pytest-xdist==3.3.1 +pytest-xdist==3.4.0 # via feast (setup.py) python-dateutil==2.8.2 # via @@ -723,7 +723,7 @@ pyzmq==25.1.1 # jupyter-server redis==4.6.0 # via feast (setup.py) -referencing==0.30.2 +referencing==0.31.0 # via # jsonschema # jsonschema-specifications @@ -751,7 +751,7 @@ requests==2.31.0 # trino requests-oauthlib==1.3.1 # via kubernetes -responses==0.24.0 +responses==0.24.1 # via moto rfc3339-validator==0.1.4 # via @@ -798,7 +798,7 @@ sniffio==1.3.0 # httpx snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==3.4.0 +snowflake-connector-python[pandas]==3.5.0 # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python @@ -826,7 +826,7 @@ sphinxcontrib-serializinghtml==1.1.9 # via sphinx sqlalchemy[mypy]==1.4.50 # via feast (setup.py) -sqlalchemy2-stubs==0.0.2a36 +sqlalchemy2-stubs==0.0.2a37 # via sqlalchemy stack-data==0.6.3 # via ipython @@ -836,7 +836,7 @@ tabulate==0.9.0 # via feast (setup.py) tenacity==8.2.3 # via feast (setup.py) -terminado==0.17.1 +terminado==0.18.0 # via # jupyter-server # jupyter-server-terminals @@ -916,7 +916,7 @@ types-redis==4.6.0.10 # via feast (setup.py) types-requests==2.30.0.0 # via feast (setup.py) -types-setuptools==68.2.0.0 +types-setuptools==68.2.0.1 # via feast (setup.py) types-tabulate==0.9.0.3 # via feast (setup.py) @@ -966,7 +966,7 @@ volatile==2.1.0 # via bowler watchfiles==0.21.0 # via uvicorn -wcwidth==0.2.9 +wcwidth==0.2.10 # via prompt-toolkit webcolors==1.13 # via jsonschema @@ -987,7 +987,7 @@ wheel==0.41.3 # via pip-tools widgetsnbextension==4.0.9 # via ipywidgets -wrapt==1.15.0 +wrapt==1.16.0 # via testcontainers xmltodict==0.13.0 # via moto diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index 41fee168bf4..12724dc902d 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -36,7 +36,7 @@ cloudpickle==3.0.0 # via dask colorama==0.4.6 # via feast (setup.py) -dask==2023.10.1 +dask==2023.11.0 # via feast (setup.py) dill==0.3.7 # via feast (setup.py) @@ -72,7 +72,7 @@ h11==0.14.0 # via # httpcore # uvicorn -httpcore==1.0.1 +httpcore==1.0.2 # via httpx httptools==0.6.1 # via uvicorn @@ -93,7 +93,7 @@ jinja2==3.1.2 # via feast (setup.py) jsonschema==4.19.2 # via feast (setup.py) -jsonschema-specifications==2023.7.1 +jsonschema-specifications==2023.11.1 # via jsonschema locket==1.0.0 # via partd @@ -103,7 +103,7 @@ mmh3==4.0.1 # via feast (setup.py) moreorless==0.4.0 # via bowler -mypy==1.6.1 +mypy==1.7.0 # via sqlalchemy mypy-extensions==1.0.0 # via mypy @@ -137,7 +137,7 @@ protobuf==4.23.3 # grpcio-tools # mypy-protobuf # proto-plus -pyarrow==11.0.0 +pyarrow==14.0.1 # via feast (setup.py) pydantic==1.10.13 # via @@ -156,7 +156,7 @@ pyyaml==6.0.1 # dask # feast (setup.py) # uvicorn -referencing==0.30.2 +referencing==0.31.0 # via # jsonschema # jsonschema-specifications @@ -176,7 +176,7 @@ sniffio==1.3.0 # httpx sqlalchemy[mypy]==1.4.50 # via feast (setup.py) -sqlalchemy2-stubs==0.0.2a36 +sqlalchemy2-stubs==0.0.2a37 # via sqlalchemy starlette==0.27.0 # via fastapi @@ -205,7 +205,7 @@ typing-extensions==4.8.0 # pydantic # sqlalchemy2-stubs # uvicorn -urllib3==2.0.7 +urllib3==2.1.0 # via requests uvicorn[standard]==0.24.0.post1 # via feast (setup.py) diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index 855074057a9..ef52424aabc 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -630,7 +630,7 @@ py-cpuinfo==9.0.0 # via pytest-benchmark py4j==0.10.9.7 # via pyspark -pyarrow==10.0.1 +pyarrow==14.0.1 # via # db-dtypes # feast (setup.py) @@ -821,7 +821,7 @@ sniffio==1.3.0 # httpx snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==3.4.0 +snowflake-connector-python[pandas]==3.5.0 # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index 174dc7d6f52..e2394f36a5b 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -142,7 +142,7 @@ protobuf==4.23.3 # grpcio-tools # mypy-protobuf # proto-plus -pyarrow==11.0.0 +pyarrow==14.0.1 # via feast (setup.py) pydantic==1.10.13 # via diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 4bf35b4adfe..7d6f06dfaac 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -57,11 +57,11 @@ black==22.12.0 # via feast (setup.py) bleach==6.1.0 # via nbconvert -boto3==1.28.80 +boto3==1.28.85 # via # feast (setup.py) # moto -botocore==1.31.80 +botocore==1.31.85 # via # boto3 # moto @@ -121,7 +121,9 @@ comm==0.2.0 # ipykernel # ipywidgets coverage[toml]==7.3.2 - # via pytest-cov + # via + # coverage + # pytest-cov cryptography==41.0.5 # via # azure-identity @@ -135,7 +137,7 @@ cryptography==41.0.5 # snowflake-connector-python # types-pyopenssl # types-redis -dask==2023.10.1 +dask==2023.11.0 # via feast (setup.py) db-dtypes==1.1.1 # via google-cloud-bigquery @@ -199,7 +201,7 @@ geojson==2.5.0 # via rockset geomet==0.2.1.post1 # via cassandra-driver -google-api-core[grpc]==2.13.0 +google-api-core[grpc]==2.14.0 # via # feast (setup.py) # firebase-admin @@ -224,7 +226,9 @@ google-auth==2.23.4 google-auth-httplib2==0.1.1 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 - # via feast (setup.py) + # via + # feast (setup.py) + # google-cloud-bigquery google-cloud-bigquery-storage==2.22.0 # via feast (setup.py) google-cloud-bigtable==2.21.0 @@ -262,7 +266,7 @@ great-expectations==0.15.50 # via feast (setup.py) greenlet==3.0.1 # via sqlalchemy -grpc-google-iam-v1==0.12.6 +grpc-google-iam-v1==0.12.7 # via google-cloud-bigtable grpcio==1.59.2 # via @@ -298,7 +302,7 @@ hazelcast-python-client==5.3.0 # via feast (setup.py) hiredis==2.2.3 # via feast (setup.py) -httpcore==1.0.1 +httpcore==1.0.2 # via httpx httplib2==0.22.0 # via @@ -416,7 +420,7 @@ jupyterlab==4.0.8 # via notebook jupyterlab-pygments==0.2.2 # via nbconvert -jupyterlab-server==2.25.0 +jupyterlab-server==2.25.1 # via # jupyterlab # notebook @@ -426,7 +430,7 @@ kubernetes==20.13.0 # via feast (setup.py) locket==1.0.0 # via partd -makefun==1.15.1 +makefun==1.15.2 # via great-expectations markupsafe==2.1.3 # via @@ -453,7 +457,7 @@ mock==2.0.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -moto==4.2.7 +moto==4.2.8 # via feast (setup.py) msal==1.25.0 # via @@ -473,7 +477,7 @@ mypy-extensions==1.0.0 # via # black # mypy -mypy-protobuf==3.1 +mypy-protobuf==3.1.0 # via feast (setup.py) mysqlclient==2.2.0 # via feast (setup.py) @@ -571,7 +575,7 @@ pre-commit==3.3.1 # via feast (setup.py) prometheus-client==0.18.0 # via jupyter-server -prompt-toolkit==3.0.39 +prompt-toolkit==3.0.40 # via ipython proto-plus==1.22.3 # via @@ -617,7 +621,7 @@ py-cpuinfo==9.0.0 # via pytest-benchmark py4j==0.10.9.7 # via pyspark -pyarrow==10.0.1 +pyarrow==14.0.1 # via # db-dtypes # feast (setup.py) @@ -690,7 +694,7 @@ pytest-ordering==0.6 # via feast (setup.py) pytest-timeout==1.4.2 # via feast (setup.py) -pytest-xdist==3.3.1 +pytest-xdist==3.4.0 # via feast (setup.py) python-dateutil==2.8.2 # via @@ -807,8 +811,10 @@ sniffio==1.3.0 # httpx snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==3.4.0 - # via feast (setup.py) +snowflake-connector-python[pandas]==3.5.0 + # via + # feast (setup.py) + # snowflake-connector-python sortedcontainers==2.4.0 # via snowflake-connector-python soupsieve==2.5 @@ -834,7 +840,9 @@ sphinxcontrib-qthelp==1.0.6 sphinxcontrib-serializinghtml==1.1.9 # via sphinx sqlalchemy[mypy]==1.4.50 - # via feast (setup.py) + # via + # feast (setup.py) + # sqlalchemy sqlalchemy2-stubs==0.0.2a36 # via sqlalchemy stack-data==0.6.3 @@ -845,7 +853,7 @@ tabulate==0.9.0 # via feast (setup.py) tenacity==8.2.3 # via feast (setup.py) -terminado==0.17.1 +terminado==0.18.0 # via # jupyter-server # jupyter-server-terminals @@ -925,7 +933,7 @@ types-redis==4.6.0.10 # via feast (setup.py) types-requests==2.30.0.0 # via feast (setup.py) -types-setuptools==68.2.0.0 +types-setuptools==68.2.0.1 # via feast (setup.py) types-tabulate==0.9.0.3 # via feast (setup.py) @@ -967,7 +975,9 @@ urllib3==1.26.18 # rockset # snowflake-connector-python uvicorn[standard]==0.24.0.post1 - # via feast (setup.py) + # via + # feast (setup.py) + # uvicorn uvloop==0.19.0 # via uvicorn virtualenv==20.23.0 @@ -978,7 +988,7 @@ volatile==2.1.0 # via bowler watchfiles==0.21.0 # via uvicorn -wcwidth==0.2.9 +wcwidth==0.2.10 # via prompt-toolkit webcolors==1.13 # via jsonschema @@ -999,7 +1009,7 @@ wheel==0.41.3 # via pip-tools widgetsnbextension==4.0.9 # via ipywidgets -wrapt==1.15.0 +wrapt==1.16.0 # via testcontainers xmltodict==0.13.0 # via moto diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 12612c34657..2cb99c9cb8b 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -36,7 +36,7 @@ cloudpickle==3.0.0 # via dask colorama==0.4.6 # via feast (setup.py) -dask==2023.10.1 +dask==2023.11.0 # via feast (setup.py) dill==0.3.7 # via feast (setup.py) @@ -72,7 +72,7 @@ h11==0.14.0 # via # httpcore # uvicorn -httpcore==1.0.1 +httpcore==1.0.2 # via httpx httptools==0.6.1 # via uvicorn @@ -103,11 +103,11 @@ mmh3==4.0.1 # via feast (setup.py) moreorless==0.4.0 # via bowler -mypy==1.6.1 +mypy==1.7.0 # via sqlalchemy mypy-extensions==1.0.0 # via mypy -mypy-protobuf==3.1 +mypy-protobuf==3.1.0 # via feast (setup.py) numpy==1.24.4 # via @@ -137,7 +137,7 @@ protobuf==4.23.3 # grpcio-tools # mypy-protobuf # proto-plus -pyarrow==11.0.0 +pyarrow==14.0.1 # via feast (setup.py) pydantic==1.10.13 # via @@ -175,7 +175,9 @@ sniffio==1.3.0 # anyio # httpx sqlalchemy[mypy]==1.4.50 - # via feast (setup.py) + # via + # feast (setup.py) + # sqlalchemy sqlalchemy2-stubs==0.0.2a36 # via sqlalchemy starlette==0.27.0 @@ -206,10 +208,12 @@ typing-extensions==4.8.0 # sqlalchemy2-stubs # starlette # uvicorn -urllib3==2.0.7 +urllib3==2.1.0 # via requests uvicorn[standard]==0.24.0.post1 - # via feast (setup.py) + # via + # feast (setup.py) + # uvicorn uvloop==0.19.0 # via uvicorn volatile==2.1.0 diff --git a/sdk/python/tests/unit/infra/offline_stores/test_bigquery_pyarrow.py b/sdk/python/tests/unit/infra/offline_stores/test_bigquery_pyarrow.py new file mode 100644 index 00000000000..44885e63347 --- /dev/null +++ b/sdk/python/tests/unit/infra/offline_stores/test_bigquery_pyarrow.py @@ -0,0 +1,38 @@ +import random +import tempfile +from datetime import datetime + +import pandas as pd +import pyarrow as pa + + +def test_write_to_bigquery(): + now = datetime.utcnow() + ts = pd.Timestamp(now).round("ms") + + df_to_write = pd.DataFrame.from_dict( + { + "avg_daily_trips": [random.randint(0, 10), random.randint(0, 10)], + "created": [ts, ts], + "conv_rate": [random.random(), random.random()], + "event_timestamp": [ts, ts], + "acc_rate": [random.random(), random.random()], + "driver_id": [1001, 1001], + }, + ) + + # From line 1527 of feature_store.py as for v0.34.1 + table = pa.Table.from_pandas(df_to_write) + + with tempfile.TemporaryFile() as parquet_temp_file: + pa.parquet.write_table( + table=table, where=parquet_temp_file, coerce_timestamps="ms" + ) + + parquet_temp_file.seek(0) + + # Need to check the timestamp type of this parquet file. + final_table = pa.parquet.read_table(parquet_temp_file) + + # Google bigquery api only accept "ms" + assert final_table.schema.field("created").type == pa.timestamp(unit="ms") diff --git a/setup.py b/setup.py index 38b7d295a93..33bf76e1819 100644 --- a/setup.py +++ b/setup.py @@ -60,7 +60,7 @@ # Higher than 4.23.4 seems to cause a seg fault "protobuf<4.23.4,>3.20", "proto-plus>=1.20.0,<2", - "pyarrow>=4,<12", + "pyarrow>=4", "pydantic>=1,<2", "pygments>=2.12.0,<3", "PyYAML>=5.4.0,<7",