From 9d671f160a61fda3469af84d70e3f451a2b0438c Mon Sep 17 00:00:00 2001 From: Shuchu Han Date: Mon, 13 Nov 2023 23:01:20 -0500 Subject: [PATCH 01/14] fix: Update the pyarrow to latest v14.0.1 regarding the CVE-2023-47248. Signed-off-by: Shuchu Han --- .../requirements/py3.10-ci-requirements.txt | 72 ++++++++++++------- .../requirements/py3.10-requirements.txt | 25 ++++--- .../requirements/py3.8-ci-requirements.txt | 4 +- .../requirements/py3.8-requirements.txt | 2 +- .../requirements/py3.9-ci-requirements.txt | 56 +++++++++------ .../requirements/py3.9-requirements.txt | 20 +++--- setup.py | 2 +- 7 files changed, 113 insertions(+), 68 deletions(-) diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index c74a3f12e15..616c58f5f79 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --extra=ci --output-file=sdk/python/requirements/py3.10-ci-requirements.txt @@ -57,11 +57,11 @@ black==22.12.0 # via feast (setup.py) bleach==6.1.0 # via nbconvert -boto3==1.28.80 +boto3==1.28.85 # via # feast (setup.py) # moto -botocore==1.31.80 +botocore==1.31.85 # via # boto3 # moto @@ -121,7 +121,9 @@ comm==0.2.0 # ipykernel # ipywidgets coverage[toml]==7.3.2 - # via pytest-cov + # via + # coverage + # pytest-cov cryptography==41.0.5 # via # azure-identity @@ -135,7 +137,7 @@ cryptography==41.0.5 # snowflake-connector-python # types-pyopenssl # types-redis -dask==2023.10.1 +dask==2023.11.0 # via feast (setup.py) db-dtypes==1.1.1 # via google-cloud-bigquery @@ -199,7 +201,7 @@ geojson==2.5.0 # via rockset geomet==0.2.1.post1 # via cassandra-driver -google-api-core[grpc]==2.13.0 +google-api-core[grpc]==2.14.0 # via # feast (setup.py) # firebase-admin @@ -224,7 +226,9 @@ google-auth==2.23.4 google-auth-httplib2==0.1.1 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 - # via feast (setup.py) + # via + # feast (setup.py) + # google-cloud-bigquery google-cloud-bigquery-storage==2.22.0 # via feast (setup.py) google-cloud-bigtable==2.21.0 @@ -262,7 +266,7 @@ great-expectations==0.15.50 # via feast (setup.py) greenlet==3.0.1 # via sqlalchemy -grpc-google-iam-v1==0.12.6 +grpc-google-iam-v1==0.12.7 # via google-cloud-bigtable grpcio==1.59.2 # via @@ -298,7 +302,7 @@ hazelcast-python-client==5.3.0 # via feast (setup.py) hiredis==2.2.3 # via feast (setup.py) -httpcore==1.0.1 +httpcore==1.0.2 # via httpx httplib2==0.22.0 # via @@ -321,9 +325,16 @@ imagesize==1.4.1 # via sphinx importlib-metadata==6.8.0 # via + # build # dask # feast (setup.py) # great-expectations + # jupyter-client + # jupyter-lsp + # jupyterlab + # jupyterlab-server + # nbconvert + # sphinx importlib-resources==6.1.1 # via feast (setup.py) iniconfig==2.0.0 @@ -409,7 +420,7 @@ jupyterlab==4.0.8 # via notebook jupyterlab-pygments==0.2.2 # via nbconvert -jupyterlab-server==2.25.0 +jupyterlab-server==2.25.1 # via # jupyterlab # notebook @@ -419,7 +430,7 @@ kubernetes==20.13.0 # via feast (setup.py) locket==1.0.0 # via partd -makefun==1.15.1 +makefun==1.15.2 # via great-expectations markupsafe==2.1.3 # via @@ -446,7 +457,7 @@ mock==2.0.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -moto==4.2.7 +moto==4.2.8 # via feast (setup.py) msal==1.25.0 # via @@ -564,7 +575,7 @@ pre-commit==3.3.1 # via feast (setup.py) prometheus-client==0.18.0 # via jupyter-server -prompt-toolkit==3.0.39 +prompt-toolkit==3.0.40 # via ipython proto-plus==1.22.3 # via @@ -610,7 +621,7 @@ py-cpuinfo==9.0.0 # via pytest-benchmark py4j==0.10.9.7 # via pyspark -pyarrow==10.0.1 +pyarrow==14.0.1 # via # db-dtypes # feast (setup.py) @@ -683,7 +694,7 @@ pytest-ordering==0.6 # via feast (setup.py) pytest-timeout==1.4.2 # via feast (setup.py) -pytest-xdist==3.3.1 +pytest-xdist==3.4.0 # via feast (setup.py) python-dateutil==2.8.2 # via @@ -771,6 +782,8 @@ rsa==4.9 # via google-auth ruamel-yaml==0.17.17 # via great-expectations +ruamel-yaml-clib==0.2.8 + # via ruamel-yaml s3transfer==0.7.0 # via boto3 scipy==1.11.3 @@ -798,8 +811,10 @@ sniffio==1.3.0 # httpx snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==3.4.0 - # via feast (setup.py) +snowflake-connector-python[pandas]==3.5.0 + # via + # feast (setup.py) + # snowflake-connector-python sortedcontainers==2.4.0 # via snowflake-connector-python soupsieve==2.5 @@ -825,7 +840,9 @@ sphinxcontrib-qthelp==1.0.6 sphinxcontrib-serializinghtml==1.1.9 # via sphinx sqlalchemy[mypy]==1.4.50 - # via feast (setup.py) + # via + # feast (setup.py) + # sqlalchemy sqlalchemy2-stubs==0.0.2a36 # via sqlalchemy stack-data==0.6.3 @@ -836,7 +853,7 @@ tabulate==0.9.0 # via feast (setup.py) tenacity==8.2.3 # via feast (setup.py) -terminado==0.17.1 +terminado==0.18.0 # via # jupyter-server # jupyter-server-terminals @@ -916,7 +933,7 @@ types-redis==4.6.0.10 # via feast (setup.py) types-requests==2.30.0.0 # via feast (setup.py) -types-setuptools==68.2.0.0 +types-setuptools==68.2.0.1 # via feast (setup.py) types-tabulate==0.9.0.3 # via feast (setup.py) @@ -927,12 +944,15 @@ typing-extensions==4.8.0 # async-lru # azure-core # azure-storage-blob + # black # fastapi # great-expectations + # ipython # mypy # pydantic # snowflake-connector-python # sqlalchemy2-stubs + # starlette # uvicorn tzlocal==5.2 # via @@ -955,7 +975,9 @@ urllib3==1.26.18 # rockset # snowflake-connector-python uvicorn[standard]==0.24.0.post1 - # via feast (setup.py) + # via + # feast (setup.py) + # uvicorn uvloop==0.19.0 # via uvicorn virtualenv==20.23.0 @@ -966,7 +988,7 @@ volatile==2.1.0 # via bowler watchfiles==0.21.0 # via uvicorn -wcwidth==0.2.9 +wcwidth==0.2.10 # via prompt-toolkit webcolors==1.13 # via jsonschema @@ -987,12 +1009,14 @@ wheel==0.41.3 # via pip-tools widgetsnbextension==4.0.9 # via ipywidgets -wrapt==1.15.0 +wrapt==1.16.0 # via testcontainers xmltodict==0.13.0 # via moto zipp==3.17.0 - # via importlib-metadata + # via + # importlib-metadata + # importlib-resources # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index 41fee168bf4..47c17906ab8 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=sdk/python/requirements/py3.10-requirements.txt @@ -36,7 +36,7 @@ cloudpickle==3.0.0 # via dask colorama==0.4.6 # via feast (setup.py) -dask==2023.10.1 +dask==2023.11.0 # via feast (setup.py) dill==0.3.7 # via feast (setup.py) @@ -72,7 +72,7 @@ h11==0.14.0 # via # httpcore # uvicorn -httpcore==1.0.1 +httpcore==1.0.2 # via httpx httptools==0.6.1 # via uvicorn @@ -103,7 +103,7 @@ mmh3==4.0.1 # via feast (setup.py) moreorless==0.4.0 # via bowler -mypy==1.6.1 +mypy==1.7.0 # via sqlalchemy mypy-extensions==1.0.0 # via mypy @@ -137,7 +137,7 @@ protobuf==4.23.3 # grpcio-tools # mypy-protobuf # proto-plus -pyarrow==11.0.0 +pyarrow==14.0.1 # via feast (setup.py) pydantic==1.10.13 # via @@ -175,7 +175,9 @@ sniffio==1.3.0 # anyio # httpx sqlalchemy[mypy]==1.4.50 - # via feast (setup.py) + # via + # feast (setup.py) + # sqlalchemy sqlalchemy2-stubs==0.0.2a36 # via sqlalchemy starlette==0.27.0 @@ -204,11 +206,14 @@ typing-extensions==4.8.0 # mypy # pydantic # sqlalchemy2-stubs + # starlette # uvicorn -urllib3==2.0.7 +urllib3==2.1.0 # via requests uvicorn[standard]==0.24.0.post1 - # via feast (setup.py) + # via + # feast (setup.py) + # uvicorn uvloop==0.19.0 # via uvicorn volatile==2.1.0 @@ -218,7 +223,9 @@ watchfiles==0.21.0 websockets==12.0 # via uvicorn zipp==3.17.0 - # via importlib-metadata + # via + # importlib-metadata + # importlib-resources # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index 855074057a9..ef52424aabc 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -630,7 +630,7 @@ py-cpuinfo==9.0.0 # via pytest-benchmark py4j==0.10.9.7 # via pyspark -pyarrow==10.0.1 +pyarrow==14.0.1 # via # db-dtypes # feast (setup.py) @@ -821,7 +821,7 @@ sniffio==1.3.0 # httpx snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==3.4.0 +snowflake-connector-python[pandas]==3.5.0 # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index 174dc7d6f52..e2394f36a5b 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -142,7 +142,7 @@ protobuf==4.23.3 # grpcio-tools # mypy-protobuf # proto-plus -pyarrow==11.0.0 +pyarrow==14.0.1 # via feast (setup.py) pydantic==1.10.13 # via diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 4bf35b4adfe..7d6f06dfaac 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -57,11 +57,11 @@ black==22.12.0 # via feast (setup.py) bleach==6.1.0 # via nbconvert -boto3==1.28.80 +boto3==1.28.85 # via # feast (setup.py) # moto -botocore==1.31.80 +botocore==1.31.85 # via # boto3 # moto @@ -121,7 +121,9 @@ comm==0.2.0 # ipykernel # ipywidgets coverage[toml]==7.3.2 - # via pytest-cov + # via + # coverage + # pytest-cov cryptography==41.0.5 # via # azure-identity @@ -135,7 +137,7 @@ cryptography==41.0.5 # snowflake-connector-python # types-pyopenssl # types-redis -dask==2023.10.1 +dask==2023.11.0 # via feast (setup.py) db-dtypes==1.1.1 # via google-cloud-bigquery @@ -199,7 +201,7 @@ geojson==2.5.0 # via rockset geomet==0.2.1.post1 # via cassandra-driver -google-api-core[grpc]==2.13.0 +google-api-core[grpc]==2.14.0 # via # feast (setup.py) # firebase-admin @@ -224,7 +226,9 @@ google-auth==2.23.4 google-auth-httplib2==0.1.1 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 - # via feast (setup.py) + # via + # feast (setup.py) + # google-cloud-bigquery google-cloud-bigquery-storage==2.22.0 # via feast (setup.py) google-cloud-bigtable==2.21.0 @@ -262,7 +266,7 @@ great-expectations==0.15.50 # via feast (setup.py) greenlet==3.0.1 # via sqlalchemy -grpc-google-iam-v1==0.12.6 +grpc-google-iam-v1==0.12.7 # via google-cloud-bigtable grpcio==1.59.2 # via @@ -298,7 +302,7 @@ hazelcast-python-client==5.3.0 # via feast (setup.py) hiredis==2.2.3 # via feast (setup.py) -httpcore==1.0.1 +httpcore==1.0.2 # via httpx httplib2==0.22.0 # via @@ -416,7 +420,7 @@ jupyterlab==4.0.8 # via notebook jupyterlab-pygments==0.2.2 # via nbconvert -jupyterlab-server==2.25.0 +jupyterlab-server==2.25.1 # via # jupyterlab # notebook @@ -426,7 +430,7 @@ kubernetes==20.13.0 # via feast (setup.py) locket==1.0.0 # via partd -makefun==1.15.1 +makefun==1.15.2 # via great-expectations markupsafe==2.1.3 # via @@ -453,7 +457,7 @@ mock==2.0.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -moto==4.2.7 +moto==4.2.8 # via feast (setup.py) msal==1.25.0 # via @@ -473,7 +477,7 @@ mypy-extensions==1.0.0 # via # black # mypy -mypy-protobuf==3.1 +mypy-protobuf==3.1.0 # via feast (setup.py) mysqlclient==2.2.0 # via feast (setup.py) @@ -571,7 +575,7 @@ pre-commit==3.3.1 # via feast (setup.py) prometheus-client==0.18.0 # via jupyter-server -prompt-toolkit==3.0.39 +prompt-toolkit==3.0.40 # via ipython proto-plus==1.22.3 # via @@ -617,7 +621,7 @@ py-cpuinfo==9.0.0 # via pytest-benchmark py4j==0.10.9.7 # via pyspark -pyarrow==10.0.1 +pyarrow==14.0.1 # via # db-dtypes # feast (setup.py) @@ -690,7 +694,7 @@ pytest-ordering==0.6 # via feast (setup.py) pytest-timeout==1.4.2 # via feast (setup.py) -pytest-xdist==3.3.1 +pytest-xdist==3.4.0 # via feast (setup.py) python-dateutil==2.8.2 # via @@ -807,8 +811,10 @@ sniffio==1.3.0 # httpx snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==3.4.0 - # via feast (setup.py) +snowflake-connector-python[pandas]==3.5.0 + # via + # feast (setup.py) + # snowflake-connector-python sortedcontainers==2.4.0 # via snowflake-connector-python soupsieve==2.5 @@ -834,7 +840,9 @@ sphinxcontrib-qthelp==1.0.6 sphinxcontrib-serializinghtml==1.1.9 # via sphinx sqlalchemy[mypy]==1.4.50 - # via feast (setup.py) + # via + # feast (setup.py) + # sqlalchemy sqlalchemy2-stubs==0.0.2a36 # via sqlalchemy stack-data==0.6.3 @@ -845,7 +853,7 @@ tabulate==0.9.0 # via feast (setup.py) tenacity==8.2.3 # via feast (setup.py) -terminado==0.17.1 +terminado==0.18.0 # via # jupyter-server # jupyter-server-terminals @@ -925,7 +933,7 @@ types-redis==4.6.0.10 # via feast (setup.py) types-requests==2.30.0.0 # via feast (setup.py) -types-setuptools==68.2.0.0 +types-setuptools==68.2.0.1 # via feast (setup.py) types-tabulate==0.9.0.3 # via feast (setup.py) @@ -967,7 +975,9 @@ urllib3==1.26.18 # rockset # snowflake-connector-python uvicorn[standard]==0.24.0.post1 - # via feast (setup.py) + # via + # feast (setup.py) + # uvicorn uvloop==0.19.0 # via uvicorn virtualenv==20.23.0 @@ -978,7 +988,7 @@ volatile==2.1.0 # via bowler watchfiles==0.21.0 # via uvicorn -wcwidth==0.2.9 +wcwidth==0.2.10 # via prompt-toolkit webcolors==1.13 # via jsonschema @@ -999,7 +1009,7 @@ wheel==0.41.3 # via pip-tools widgetsnbextension==4.0.9 # via ipywidgets -wrapt==1.15.0 +wrapt==1.16.0 # via testcontainers xmltodict==0.13.0 # via moto diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 12612c34657..2cb99c9cb8b 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -36,7 +36,7 @@ cloudpickle==3.0.0 # via dask colorama==0.4.6 # via feast (setup.py) -dask==2023.10.1 +dask==2023.11.0 # via feast (setup.py) dill==0.3.7 # via feast (setup.py) @@ -72,7 +72,7 @@ h11==0.14.0 # via # httpcore # uvicorn -httpcore==1.0.1 +httpcore==1.0.2 # via httpx httptools==0.6.1 # via uvicorn @@ -103,11 +103,11 @@ mmh3==4.0.1 # via feast (setup.py) moreorless==0.4.0 # via bowler -mypy==1.6.1 +mypy==1.7.0 # via sqlalchemy mypy-extensions==1.0.0 # via mypy -mypy-protobuf==3.1 +mypy-protobuf==3.1.0 # via feast (setup.py) numpy==1.24.4 # via @@ -137,7 +137,7 @@ protobuf==4.23.3 # grpcio-tools # mypy-protobuf # proto-plus -pyarrow==11.0.0 +pyarrow==14.0.1 # via feast (setup.py) pydantic==1.10.13 # via @@ -175,7 +175,9 @@ sniffio==1.3.0 # anyio # httpx sqlalchemy[mypy]==1.4.50 - # via feast (setup.py) + # via + # feast (setup.py) + # sqlalchemy sqlalchemy2-stubs==0.0.2a36 # via sqlalchemy starlette==0.27.0 @@ -206,10 +208,12 @@ typing-extensions==4.8.0 # sqlalchemy2-stubs # starlette # uvicorn -urllib3==2.0.7 +urllib3==2.1.0 # via requests uvicorn[standard]==0.24.0.post1 - # via feast (setup.py) + # via + # feast (setup.py) + # uvicorn uvloop==0.19.0 # via uvicorn volatile==2.1.0 diff --git a/setup.py b/setup.py index 38b7d295a93..33bf76e1819 100644 --- a/setup.py +++ b/setup.py @@ -60,7 +60,7 @@ # Higher than 4.23.4 seems to cause a seg fault "protobuf<4.23.4,>3.20", "proto-plus>=1.20.0,<2", - "pyarrow>=4,<12", + "pyarrow>=4", "pydantic>=1,<2", "pygments>=2.12.0,<3", "PyYAML>=5.4.0,<7", From 9b4ec66f1277f17c4f1c13fbfbd1a1274056e2d8 Mon Sep 17 00:00:00 2001 From: Shuchu Han Date: Tue, 14 Nov 2023 20:14:17 -0500 Subject: [PATCH 02/14] fix: Update the requirements file of python3.10. Signed-off-by: Shuchu Han --- .../requirements/py3.10-ci-requirements.txt | 56 ++++++------------- .../requirements/py3.10-requirements.txt | 21 +++---- 2 files changed, 23 insertions(+), 54 deletions(-) diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index 616c58f5f79..ee29862c30f 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile --extra=ci --output-file=sdk/python/requirements/py3.10-ci-requirements.txt @@ -57,11 +57,11 @@ black==22.12.0 # via feast (setup.py) bleach==6.1.0 # via nbconvert -boto3==1.28.85 +boto3==1.29.0 # via # feast (setup.py) # moto -botocore==1.31.85 +botocore==1.32.0 # via # boto3 # moto @@ -121,9 +121,7 @@ comm==0.2.0 # ipykernel # ipywidgets coverage[toml]==7.3.2 - # via - # coverage - # pytest-cov + # via pytest-cov cryptography==41.0.5 # via # azure-identity @@ -179,7 +177,7 @@ fastavro==1.9.0 # via # feast (setup.py) # pandavro -fastjsonschema==2.18.1 +fastjsonschema==2.19.0 # via nbformat filelock==3.13.1 # via @@ -213,7 +211,7 @@ google-api-core[grpc]==2.14.0 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.107.0 +google-api-python-client==2.108.0 # via firebase-admin google-auth==2.23.4 # via @@ -226,9 +224,7 @@ google-auth==2.23.4 google-auth-httplib2==0.1.1 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 - # via - # feast (setup.py) - # google-cloud-bigquery + # via feast (setup.py) google-cloud-bigquery-storage==2.22.0 # via feast (setup.py) google-cloud-bigtable==2.21.0 @@ -325,16 +321,9 @@ imagesize==1.4.1 # via sphinx importlib-metadata==6.8.0 # via - # build # dask # feast (setup.py) # great-expectations - # jupyter-client - # jupyter-lsp - # jupyterlab - # jupyterlab-server - # nbconvert - # sphinx importlib-resources==6.1.1 # via feast (setup.py) iniconfig==2.0.0 @@ -387,7 +376,7 @@ jsonschema[format-nongpl]==4.19.2 # jupyter-events # jupyterlab-server # nbformat -jsonschema-specifications==2023.7.1 +jsonschema-specifications==2023.11.1 # via jsonschema jupyter-client==8.6.0 # via @@ -575,7 +564,7 @@ pre-commit==3.3.1 # via feast (setup.py) prometheus-client==0.18.0 # via jupyter-server -prompt-toolkit==3.0.40 +prompt-toolkit==3.0.41 # via ipython proto-plus==1.22.3 # via @@ -734,7 +723,7 @@ pyzmq==25.1.1 # jupyter-server redis==4.6.0 # via feast (setup.py) -referencing==0.30.2 +referencing==0.31.0 # via # jsonschema # jsonschema-specifications @@ -762,7 +751,7 @@ requests==2.31.0 # trino requests-oauthlib==1.3.1 # via kubernetes -responses==0.24.0 +responses==0.24.1 # via moto rfc3339-validator==0.1.4 # via @@ -782,8 +771,6 @@ rsa==4.9 # via google-auth ruamel-yaml==0.17.17 # via great-expectations -ruamel-yaml-clib==0.2.8 - # via ruamel-yaml s3transfer==0.7.0 # via boto3 scipy==1.11.3 @@ -812,9 +799,7 @@ sniffio==1.3.0 snowballstemmer==2.2.0 # via sphinx snowflake-connector-python[pandas]==3.5.0 - # via - # feast (setup.py) - # snowflake-connector-python + # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python soupsieve==2.5 @@ -840,10 +825,8 @@ sphinxcontrib-qthelp==1.0.6 sphinxcontrib-serializinghtml==1.1.9 # via sphinx sqlalchemy[mypy]==1.4.50 - # via - # feast (setup.py) - # sqlalchemy -sqlalchemy2-stubs==0.0.2a36 + # via feast (setup.py) +sqlalchemy2-stubs==0.0.2a37 # via sqlalchemy stack-data==0.6.3 # via ipython @@ -944,15 +927,12 @@ typing-extensions==4.8.0 # async-lru # azure-core # azure-storage-blob - # black # fastapi # great-expectations - # ipython # mypy # pydantic # snowflake-connector-python # sqlalchemy2-stubs - # starlette # uvicorn tzlocal==5.2 # via @@ -975,9 +955,7 @@ urllib3==1.26.18 # rockset # snowflake-connector-python uvicorn[standard]==0.24.0.post1 - # via - # feast (setup.py) - # uvicorn + # via feast (setup.py) uvloop==0.19.0 # via uvicorn virtualenv==20.23.0 @@ -1014,9 +992,7 @@ wrapt==1.16.0 xmltodict==0.13.0 # via moto zipp==3.17.0 - # via - # importlib-metadata - # importlib-resources + # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index 47c17906ab8..12724dc902d 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile --output-file=sdk/python/requirements/py3.10-requirements.txt @@ -93,7 +93,7 @@ jinja2==3.1.2 # via feast (setup.py) jsonschema==4.19.2 # via feast (setup.py) -jsonschema-specifications==2023.7.1 +jsonschema-specifications==2023.11.1 # via jsonschema locket==1.0.0 # via partd @@ -156,7 +156,7 @@ pyyaml==6.0.1 # dask # feast (setup.py) # uvicorn -referencing==0.30.2 +referencing==0.31.0 # via # jsonschema # jsonschema-specifications @@ -175,10 +175,8 @@ sniffio==1.3.0 # anyio # httpx sqlalchemy[mypy]==1.4.50 - # via - # feast (setup.py) - # sqlalchemy -sqlalchemy2-stubs==0.0.2a36 + # via feast (setup.py) +sqlalchemy2-stubs==0.0.2a37 # via sqlalchemy starlette==0.27.0 # via fastapi @@ -206,14 +204,11 @@ typing-extensions==4.8.0 # mypy # pydantic # sqlalchemy2-stubs - # starlette # uvicorn urllib3==2.1.0 # via requests uvicorn[standard]==0.24.0.post1 - # via - # feast (setup.py) - # uvicorn + # via feast (setup.py) uvloop==0.19.0 # via uvicorn volatile==2.1.0 @@ -223,9 +218,7 @@ watchfiles==0.21.0 websockets==12.0 # via uvicorn zipp==3.17.0 - # via - # importlib-metadata - # importlib-resources + # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: # setuptools From 0e0a5ad48e57171552bd5506f4c28c15d5c9909c Mon Sep 17 00:00:00 2001 From: Shuchu Han Date: Thu, 16 Nov 2023 00:20:27 -0500 Subject: [PATCH 03/14] fix: force the timestamp's datatype resolution to ms for bigquery while write to temporary parquet file. Signed-off-by: Shuchu Han --- .../feast/infra/offline_stores/bigquery.py | 4 +- .../offline_stores/test_bigquery_pyarrow.py | 39 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 sdk/python/tests/unit/infra/offline_stores/test_bigquery_pyarrow.py diff --git a/sdk/python/feast/infra/offline_stores/bigquery.py b/sdk/python/feast/infra/offline_stores/bigquery.py index 10c8aa783fb..4c70a69a585 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery.py +++ b/sdk/python/feast/infra/offline_stores/bigquery.py @@ -400,7 +400,9 @@ def offline_write_batch( ) with tempfile.TemporaryFile() as parquet_temp_file: - pyarrow.parquet.write_table(table=table, where=parquet_temp_file) + pyarrow.parquet.write_table( + table=table, where=parquet_temp_file, coerce_timestamps="ms" + ) parquet_temp_file.seek(0) diff --git a/sdk/python/tests/unit/infra/offline_stores/test_bigquery_pyarrow.py b/sdk/python/tests/unit/infra/offline_stores/test_bigquery_pyarrow.py new file mode 100644 index 00000000000..b5c9eb09f52 --- /dev/null +++ b/sdk/python/tests/unit/infra/offline_stores/test_bigquery_pyarrow.py @@ -0,0 +1,39 @@ +import random +import tempfile +from datetime import datetime + +import pandas as pd +import pyarrow as pa + + +def test_write_to_bigquery(): + now = datetime.utcnow() + ts = pd.Timestamp(now).round("ms") + + # This dataframe has columns in the wrong order. + df_to_write = pd.DataFrame.from_dict( + { + "avg_daily_trips": [random.randint(0, 10), random.randint(0, 10)], + "created": [ts, ts], + "conv_rate": [random.random(), random.random()], + "event_timestamp": [ts, ts], + "acc_rate": [random.random(), random.random()], + "driver_id": [1001, 1001], + }, + ) + + # From line 1527 of feature_store.py + table = pa.Table.from_pandas(df_to_write) + + with tempfile.TemporaryFile() as parquet_temp_file: + pa.parquet.write_table( + table=table, where=parquet_temp_file, coerce_timestamps="ms" + ) + + parquet_temp_file.seek(0) + + # Need to check the timestamp type of this parquet file. + final_table = pa.parquet.read_table(parquet_temp_file) + + # Google bigquery api only accept "ms" + assert final_table.schema.field("created").type == pa.timestamp(unit="ms") From fe61d0d6906093d12b7360ea2be8d13cba848610 Mon Sep 17 00:00:00 2001 From: Shuchu Han Date: Thu, 16 Nov 2023 01:41:55 -0500 Subject: [PATCH 04/14] fix: Update the timestamp resolution to us which is exact same as using pyarrow v10.0.1 Signed-off-by: Shuchu Han --- sdk/python/feast/infra/offline_stores/bigquery.py | 2 +- .../tests/unit/infra/offline_stores/test_bigquery_pyarrow.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sdk/python/feast/infra/offline_stores/bigquery.py b/sdk/python/feast/infra/offline_stores/bigquery.py index 4c70a69a585..9dc9acc2b74 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery.py +++ b/sdk/python/feast/infra/offline_stores/bigquery.py @@ -401,7 +401,7 @@ def offline_write_batch( with tempfile.TemporaryFile() as parquet_temp_file: pyarrow.parquet.write_table( - table=table, where=parquet_temp_file, coerce_timestamps="ms" + table=table, where=parquet_temp_file, coerce_timestamps="us" ) parquet_temp_file.seek(0) diff --git a/sdk/python/tests/unit/infra/offline_stores/test_bigquery_pyarrow.py b/sdk/python/tests/unit/infra/offline_stores/test_bigquery_pyarrow.py index b5c9eb09f52..44885e63347 100644 --- a/sdk/python/tests/unit/infra/offline_stores/test_bigquery_pyarrow.py +++ b/sdk/python/tests/unit/infra/offline_stores/test_bigquery_pyarrow.py @@ -10,7 +10,6 @@ def test_write_to_bigquery(): now = datetime.utcnow() ts = pd.Timestamp(now).round("ms") - # This dataframe has columns in the wrong order. df_to_write = pd.DataFrame.from_dict( { "avg_daily_trips": [random.randint(0, 10), random.randint(0, 10)], @@ -22,7 +21,7 @@ def test_write_to_bigquery(): }, ) - # From line 1527 of feature_store.py + # From line 1527 of feature_store.py as for v0.34.1 table = pa.Table.from_pandas(df_to_write) with tempfile.TemporaryFile() as parquet_temp_file: From e357d79787c94f933814af717b71792bb1598507 Mon Sep 17 00:00:00 2001 From: Shuchu Han Date: Thu, 16 Nov 2023 21:09:52 -0500 Subject: [PATCH 05/14] fix: Add debug info to check the entity_df before send to redshift. Signed-off-by: Shuchu Han --- sdk/python/feast/infra/utils/aws_utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py index f48dfbb86b4..b2081bd0396 100644 --- a/sdk/python/feast/infra/utils/aws_utils.py +++ b/sdk/python/feast/infra/utils/aws_utils.py @@ -168,6 +168,9 @@ def execute_redshift_statement( Returns: Statement ID """ + # debug + print("Redshift querystring: {}".format(query)) + statement = execute_redshift_statement_async( redshift_data_client, cluster_id, workgroup, database, user, query ) From 20e55c4ac90daabdeee81efc74fc8cac310dea1d Mon Sep 17 00:00:00 2001 From: Shuchu Han Date: Thu, 16 Nov 2023 22:02:23 -0500 Subject: [PATCH 06/14] fix: add debug to check the loaded pyarrow table file. Signed-off-by: Shuchu Han --- sdk/python/feast/infra/utils/aws_utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py index b2081bd0396..737e339dea5 100644 --- a/sdk/python/feast/infra/utils/aws_utils.py +++ b/sdk/python/feast/infra/utils/aws_utils.py @@ -168,9 +168,6 @@ def execute_redshift_statement( Returns: Statement ID """ - # debug - print("Redshift querystring: {}".format(query)) - statement = execute_redshift_statement_async( redshift_data_client, cluster_id, workgroup, database, user, query ) @@ -595,6 +592,12 @@ def unload_redshift_query_to_df( iam_role, query, ) + + # Debug + print("Debug ------") + print(table.schema) + print(table) + print("Debug ++++++") return table.to_pandas() From fbb5b200531edc1b19849fa7ad02b115df01ba65 Mon Sep 17 00:00:00 2001 From: Shuchu Han Date: Thu, 16 Nov 2023 22:28:20 -0500 Subject: [PATCH 07/14] fix: debug the transformation from pyarrow table to pandas df. Signed-off-by: Shuchu Han --- sdk/python/feast/infra/utils/aws_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py index 737e339dea5..5e362ceb39f 100644 --- a/sdk/python/feast/infra/utils/aws_utils.py +++ b/sdk/python/feast/infra/utils/aws_utils.py @@ -598,7 +598,11 @@ def unload_redshift_query_to_df( print(table.schema) print(table) print("Debug ++++++") - return table.to_pandas() + + df = table.to_pandas() + print(df) + print("Debug ++++++") + return df def get_lambda_function(lambda_client, function_name: str) -> Optional[Dict]: From 059914b50f83aba82a678d47fd7da41e1a15a3ac Mon Sep 17 00:00:00 2001 From: Shuchu Han Date: Thu, 16 Nov 2023 22:59:13 -0500 Subject: [PATCH 08/14] fix: roll pyarrow back. Signed-off-by: Shuchu Han --- sdk/python/requirements/py3.8-ci-requirements.txt | 2 +- sdk/python/requirements/py3.8-requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index ef52424aabc..ff4f7cb5ca7 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -630,7 +630,7 @@ py-cpuinfo==9.0.0 # via pytest-benchmark py4j==0.10.9.7 # via pyspark -pyarrow==14.0.1 +pyarrow==10.0.1 # via # db-dtypes # feast (setup.py) diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index e2394f36a5b..cbbe6d54bf4 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -142,7 +142,7 @@ protobuf==4.23.3 # grpcio-tools # mypy-protobuf # proto-plus -pyarrow==14.0.1 +pyarrow==10.0.1 # via feast (setup.py) pydantic==1.10.13 # via From f032d7a158762f8f9cd5d417078795476e4cb7c1 Mon Sep 17 00:00:00 2001 From: Shuchu Han Date: Thu, 16 Nov 2023 23:59:11 -0500 Subject: [PATCH 09/14] fix: force test fail to check table and pandas df. Signed-off-by: Shuchu Han --- sdk/python/tests/integration/e2e/test_validation.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdk/python/tests/integration/e2e/test_validation.py b/sdk/python/tests/integration/e2e/test_validation.py index f49ed80a265..258183004d1 100644 --- a/sdk/python/tests/integration/e2e/test_validation.py +++ b/sdk/python/tests/integration/e2e/test_validation.py @@ -73,6 +73,10 @@ def test_historical_retrieval_with_validation(environment, universal_data_source reference = saved_dataset.as_reference(name="ref", profiler=configurable_profiler) job.to_df(validation_reference=reference) + # debug + # fail on purpose to see the stdout! + assert False + @pytest.mark.integration def test_historical_retrieval_fails_on_validation(environment, universal_data_sources): From a2e217fb0aa7c7ea16004519324f5d3daac145a6 Mon Sep 17 00:00:00 2001 From: Shuchu Han Date: Fri, 17 Nov 2023 01:33:11 -0500 Subject: [PATCH 10/14] fix: debug the transformation from parquet file to pyarrow table. Signed-off-by: Shuchu Han --- sdk/python/feast/infra/utils/aws_utils.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py index 5e362ceb39f..0762ef91844 100644 --- a/sdk/python/feast/infra/utils/aws_utils.py +++ b/sdk/python/feast/infra/utils/aws_utils.py @@ -566,7 +566,17 @@ def unload_redshift_query_to_pa( with tempfile.TemporaryDirectory() as temp_dir: download_s3_directory(s3_resource, bucket, key, temp_dir) delete_s3_directory(s3_resource, bucket, key) - return pq.read_table(temp_dir) + + # Debug + print("debug temp file. ") + parquet_file = pq.ParquetFile(temp_dir) + parquet_file.read() + + print("transfer to table.") + table = pq.read_table(temp_dir) + print(table) + + return table def unload_redshift_query_to_df( From 0e43126cc79462497ba57154fa6ea23dd02fd2db Mon Sep 17 00:00:00 2001 From: Shuchu Han Date: Fri, 17 Nov 2023 01:57:30 -0500 Subject: [PATCH 11/14] fix: debug the parquet file. Signed-off-by: Shuchu Han --- sdk/python/feast/infra/utils/aws_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py index 0762ef91844..a0b91fcb59c 100644 --- a/sdk/python/feast/infra/utils/aws_utils.py +++ b/sdk/python/feast/infra/utils/aws_utils.py @@ -569,8 +569,9 @@ def unload_redshift_query_to_pa( # Debug print("debug temp file. ") - parquet_file = pq.ParquetFile(temp_dir) - parquet_file.read() + parquet_file = pq.ParquetDataset(temp_dir) + df = parquet_file.read().to_pandas() + print(df) print("transfer to table.") table = pq.read_table(temp_dir) From 19d30ef6240e5869d33226f3403f8fa50384bfd8 Mon Sep 17 00:00:00 2001 From: Shuchu Han Date: Fri, 17 Nov 2023 02:22:11 -0500 Subject: [PATCH 12/14] fix: test 14.0.1 parquet file. Signed-off-by: Shuchu Han --- sdk/python/requirements/py3.8-ci-requirements.txt | 2 +- sdk/python/requirements/py3.8-requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index ff4f7cb5ca7..ef52424aabc 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -630,7 +630,7 @@ py-cpuinfo==9.0.0 # via pytest-benchmark py4j==0.10.9.7 # via pyspark -pyarrow==10.0.1 +pyarrow==14.0.1 # via # db-dtypes # feast (setup.py) diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index cbbe6d54bf4..e2394f36a5b 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -142,7 +142,7 @@ protobuf==4.23.3 # grpcio-tools # mypy-protobuf # proto-plus -pyarrow==10.0.1 +pyarrow==14.0.1 # via feast (setup.py) pydantic==1.10.13 # via From ffe625510ed4d53eebc7d9fd8b9dbc87dd45531c Mon Sep 17 00:00:00 2001 From: Shuchu Han Date: Fri, 17 Nov 2023 10:38:36 -0500 Subject: [PATCH 13/14] fix: test the parquet reader of pandas instead of pyarrow. Signed-off-by: Shuchu Han --- sdk/python/feast/infra/utils/aws_utils.py | 7 ++++--- sdk/python/tests/integration/e2e/test_validation.py | 4 ---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py index a0b91fcb59c..14fe5b0fba0 100644 --- a/sdk/python/feast/infra/utils/aws_utils.py +++ b/sdk/python/feast/infra/utils/aws_utils.py @@ -568,9 +568,10 @@ def unload_redshift_query_to_pa( delete_s3_directory(s3_resource, bucket, key) # Debug - print("debug temp file. ") - parquet_file = pq.ParquetDataset(temp_dir) - df = parquet_file.read().to_pandas() + print("debug temp file using pandas. ") + df = pd.read_parquet(temp_dir) + # parquet_file = pq.ParquetDataset(temp_dir) + # df = parquet_file.read().to_pandas() print(df) print("transfer to table.") diff --git a/sdk/python/tests/integration/e2e/test_validation.py b/sdk/python/tests/integration/e2e/test_validation.py index 258183004d1..f49ed80a265 100644 --- a/sdk/python/tests/integration/e2e/test_validation.py +++ b/sdk/python/tests/integration/e2e/test_validation.py @@ -73,10 +73,6 @@ def test_historical_retrieval_with_validation(environment, universal_data_source reference = saved_dataset.as_reference(name="ref", profiler=configurable_profiler) job.to_df(validation_reference=reference) - # debug - # fail on purpose to see the stdout! - assert False - @pytest.mark.integration def test_historical_retrieval_fails_on_validation(environment, universal_data_sources): From dc2630e697e67db6750b7a7f67503f193c45144c Mon Sep 17 00:00:00 2001 From: Shuchu Han Date: Fri, 17 Nov 2023 11:18:02 -0500 Subject: [PATCH 14/14] fix: use datetime resolution us while writing test data into redshift. Signed-off-by: Shuchu Han --- sdk/python/feast/infra/utils/aws_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py index 14fe5b0fba0..89fb60f2441 100644 --- a/sdk/python/feast/infra/utils/aws_utils.py +++ b/sdk/python/feast/infra/utils/aws_utils.py @@ -351,7 +351,7 @@ def upload_arrow_table_to_redshift( else: # Write the PyArrow Table on disk in Parquet format and upload it to S3 with tempfile.TemporaryFile(suffix=".parquet") as parquet_temp_file: - pq.write_table(table, parquet_temp_file) + pq.write_table(table, parquet_temp_file, coerce_timestamps="us") parquet_temp_file.seek(0) s3_resource.Object(bucket, key).put(Body=parquet_temp_file)