From 74653f5eeba16ac86d74a8fe5603e5af05cbdd36 Mon Sep 17 00:00:00 2001 From: tokoko Date: Fri, 3 May 2024 04:59:48 +0000 Subject: [PATCH 1/5] add materialization support to ibis/duckdb Signed-off-by: tokoko --- sdk/python/feast/infra/offline_stores/ibis.py | 71 +++++++++++++--- .../test_universal_materialization.py | 84 +++++++++++++++++++ 2 files changed, 142 insertions(+), 13 deletions(-) create mode 100644 sdk/python/tests/integration/materialization/test_universal_materialization.py diff --git a/sdk/python/feast/infra/offline_stores/ibis.py b/sdk/python/feast/infra/offline_stores/ibis.py index de025ca0069..30fa8ef1d43 100644 --- a/sdk/python/feast/infra/offline_stores/ibis.py +++ b/sdk/python/feast/infra/offline_stores/ibis.py @@ -54,7 +54,40 @@ def pull_latest_from_table_or_query( start_date: datetime, end_date: datetime, ) -> RetrievalJob: - raise NotImplementedError() + fields = join_key_columns + feature_name_columns + [timestamp_field] + if created_timestamp_column: + fields.append(created_timestamp_column) + start_date = start_date.astimezone(tz=utc) + end_date = end_date.astimezone(tz=utc) + + table = IbisOfflineStore._read_data_source(data_source) + + table = table.select(*fields) + + # TODO get rid of this fix + if "__log_date" in table.columns: + table = table.drop("__log_date") + + table = table.filter( + ibis.and_( + table[timestamp_field] >= ibis.literal(start_date), + table[timestamp_field] <= ibis.literal(end_date), + ) + ) + + table = deduplicate( + table=table, + group_by_cols=join_key_columns, + event_timestamp_col=timestamp_field, + created_timestamp_col=created_timestamp_column, + ) + + return IbisRetrievalJob( + table=table, + on_demand_feature_views=[], + full_feature_names=False, + metadata=None, + ) def _get_entity_df_event_timestamp_range( entity_df: pd.DataFrame, entity_df_event_timestamp_col: str @@ -386,6 +419,25 @@ def metadata(self) -> Optional[RetrievalMetadata]: return self._metadata +def deduplicate( + table: Table, + group_by_cols: List[str], + event_timestamp_col: str, + created_timestamp_col: Optional[str], +): + order_by_fields = [ibis.desc(table[event_timestamp_col])] + if created_timestamp_col: + order_by_fields.append(ibis.desc(table[created_timestamp_col])) + + table = ( + table.group_by(by=group_by_cols) + .order_by(order_by_fields) + .mutate(rn=ibis.row_number()) + ) + + return table.filter(table["rn"] == ibis.literal(0)).drop("rn") + + def point_in_time_join( entity_table: Table, feature_tables: List[Tuple[Table, str, str, Dict[str, str], List[str], timedelta]], @@ -440,20 +492,13 @@ def point_in_time_join( feature_table = feature_table.drop(s.endswith("_y")) - order_by_fields = [ibis.desc(feature_table[timestamp_field])] - if created_timestamp_field: - order_by_fields.append(ibis.desc(feature_table[created_timestamp_field])) - - feature_table = ( - feature_table.group_by(by="entity_row_id") - .order_by(order_by_fields) - .mutate(rn=ibis.row_number()) + feature_table = deduplicate( + table=feature_table, + group_by_cols=["entity_row_id"], + event_timestamp_col=timestamp_field, + created_timestamp_col=created_timestamp_field, ) - feature_table = feature_table.filter( - feature_table["rn"] == ibis.literal(0) - ).drop("rn") - select_cols = ["entity_row_id"] select_cols.extend(feature_refs) feature_table = feature_table.select(select_cols) diff --git a/sdk/python/tests/integration/materialization/test_universal_materialization.py b/sdk/python/tests/integration/materialization/test_universal_materialization.py new file mode 100644 index 00000000000..2f229f1137c --- /dev/null +++ b/sdk/python/tests/integration/materialization/test_universal_materialization.py @@ -0,0 +1,84 @@ +from datetime import timedelta + +import pytest + +from feast.entity import Entity +from feast.feature_view import FeatureView +from feast.field import Field +from feast.types import Float32 +from tests.data.data_creator import create_basic_driver_dataset +from tests.utils.e2e_test_validation import validate_offline_online_store_consistency + + +@pytest.mark.integration +@pytest.mark.universal_offline_stores +def test_universal_materialization_consistency(environment): + fs = environment.feature_store + + df = create_basic_driver_dataset() + + ds = environment.data_source_creator.create_data_source( + df, + fs.project, + field_mapping={"ts_1": "ts"}, + ) + + driver = Entity( + name="driver_id", + join_keys=["driver_id"], + ) + + driver_stats_fv = FeatureView( + name="driver_hourly_stats", + entities=[driver], + ttl=timedelta(weeks=52), + schema=[Field(name="value", dtype=Float32)], + source=ds, + ) + + fs.apply([driver, driver_stats_fv]) + + # materialization is run in two steps and + # we use timestamp from generated dataframe as a split point + split_dt = df["ts_1"][4].to_pydatetime() - timedelta(seconds=1) + + print(f"Split datetime: {split_dt}") + + validate_offline_online_store_consistency(fs, driver_stats_fv, split_dt) + + +# @pytest.mark.integration +# def test_spark_materialization_consistency(): +# spark_config = IntegrationTestRepoConfig( +# provider="local", +# online_store_creator=RedisOnlineStoreCreator, +# offline_store_creator=SparkDataSourceCreator, +# batch_engine={"type": "spark.engine", "partitions": 10}, +# ) +# spark_environment = construct_test_environment( +# spark_config, None, entity_key_serialization_version=1 +# ) + + +# driver_stats_fv = FeatureView( +# name="driver_hourly_stats", +# entities=[driver], +# ttl=timedelta(weeks=52), +# schema=[Field(name="value", dtype=Float32)], +# source=ds, +# ) + +# # try: +# fs.apply([driver, driver_stats_fv]) + +# # print(df) + +# # # materialization is run in two steps and +# # # we use timestamp from generated dataframe as a split point +# # split_dt = df["ts_1"][4].to_pydatetime() - timedelta(seconds=1) + +# # print(f"Split datetime: {split_dt}") + +# # validate_offline_online_store_consistency(fs, driver_stats_fv, split_dt) +# # finally: +# # fs.teardown() From 9483192399ba926218868e6895290c7f607b6bc7 Mon Sep 17 00:00:00 2001 From: tokoko Date: Fri, 3 May 2024 05:04:15 +0000 Subject: [PATCH 2/5] remove unnecessary comments Signed-off-by: tokoko --- .../test_universal_materialization.py | 39 ------------------- 1 file changed, 39 deletions(-) diff --git a/sdk/python/tests/integration/materialization/test_universal_materialization.py b/sdk/python/tests/integration/materialization/test_universal_materialization.py index 2f229f1137c..37030b1bb30 100644 --- a/sdk/python/tests/integration/materialization/test_universal_materialization.py +++ b/sdk/python/tests/integration/materialization/test_universal_materialization.py @@ -42,43 +42,4 @@ def test_universal_materialization_consistency(environment): # we use timestamp from generated dataframe as a split point split_dt = df["ts_1"][4].to_pydatetime() - timedelta(seconds=1) - print(f"Split datetime: {split_dt}") - validate_offline_online_store_consistency(fs, driver_stats_fv, split_dt) - - -# @pytest.mark.integration -# def test_spark_materialization_consistency(): -# spark_config = IntegrationTestRepoConfig( -# provider="local", -# online_store_creator=RedisOnlineStoreCreator, -# offline_store_creator=SparkDataSourceCreator, -# batch_engine={"type": "spark.engine", "partitions": 10}, -# ) -# spark_environment = construct_test_environment( -# spark_config, None, entity_key_serialization_version=1 -# ) - - -# driver_stats_fv = FeatureView( -# name="driver_hourly_stats", -# entities=[driver], -# ttl=timedelta(weeks=52), -# schema=[Field(name="value", dtype=Float32)], -# source=ds, -# ) - -# # try: -# fs.apply([driver, driver_stats_fv]) - -# # print(df) - -# # # materialization is run in two steps and -# # # we use timestamp from generated dataframe as a split point -# # split_dt = df["ts_1"][4].to_pydatetime() - timedelta(seconds=1) - -# # print(f"Split datetime: {split_dt}") - -# # validate_offline_online_store_consistency(fs, driver_stats_fv, split_dt) -# # finally: -# # fs.teardown() From 124903cf65780e855fc988e583187007b9dcb157 Mon Sep 17 00:00:00 2001 From: tokoko Date: Fri, 3 May 2024 06:47:23 +0000 Subject: [PATCH 3/5] pin ibis versions Signed-off-by: tokoko --- .../requirements/py3.10-ci-requirements.txt | 115 +++++++++++------- .../requirements/py3.10-requirements.txt | 81 +++++++++--- .../requirements/py3.9-ci-requirements.txt | 113 ++++++++++------- .../requirements/py3.9-requirements.txt | 81 +++++++++--- setup.py | 6 +- 5 files changed, 272 insertions(+), 124 deletions(-) diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index 452b52c73ad..6126184fa32 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -56,11 +56,11 @@ bidict==0.23.1 # via ibis-framework bleach==6.1.0 # via nbconvert -boto3==1.34.88 +boto3==1.34.97 # via # feast (setup.py) # moto -botocore==1.34.88 +botocore==1.34.97 # via # boto3 # moto @@ -101,6 +101,7 @@ click==8.1.7 # geomet # great-expectations # pip-tools + # typer # uvicorn cloudpickle==3.0.0 # via dask @@ -112,7 +113,7 @@ comm==0.2.2 # via # ipykernel # ipywidgets -coverage[toml]==7.4.4 +coverage[toml]==7.5.0 # via pytest-cov cryptography==42.0.5 # via @@ -131,7 +132,7 @@ dask[array,dataframe]==2024.4.2 # via # dask-expr # feast (setup.py) -dask-expr==1.0.12 +dask-expr==1.0.14 # via dask db-dtypes==1.2.0 # via google-cloud-bigquery @@ -141,12 +142,14 @@ decorator==5.1.1 # via ipython defusedxml==0.7.1 # via nbconvert -deltalake==0.16.4 +deltalake==0.17.3 # via feast (setup.py) dill==0.3.8 # via feast (setup.py) distlib==0.3.8 # via virtualenv +dnspython==2.6.1 + # via email-validator docker==7.0.0 # via # feast (setup.py) @@ -157,8 +160,10 @@ duckdb==0.10.2 # via # duckdb-engine # ibis-framework -duckdb-engine==0.11.5 +duckdb-engine==0.12.0 # via ibis-framework +email-validator==2.1.1 + # via fastapi entrypoints==0.4 # via altair exceptiongroup==1.2.1 @@ -170,11 +175,15 @@ execnet==2.1.1 # via pytest-xdist executing==2.0.1 # via stack-data -fastapi==0.110.2 - # via feast (setup.py) +fastapi==0.111.0 + # via + # fastapi-cli + # feast (setup.py) +fastapi-cli==0.0.2 + # via fastapi fastjsonschema==2.19.1 # via nbformat -filelock==3.13.4 +filelock==3.14.0 # via # snowflake-connector-python # virtualenv @@ -190,7 +199,7 @@ geojson==2.5.0 # via rockset geomet==0.2.1.post1 # via cassandra-driver -google-api-core[grpc]==2.18.0 +google-api-core[grpc]==2.19.0 # via # feast (setup.py) # firebase-admin @@ -202,7 +211,7 @@ google-api-core[grpc]==2.18.0 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.126.0 +google-api-python-client==2.127.0 # via firebase-admin google-auth==2.29.0 # via @@ -250,13 +259,13 @@ googleapis-common-protos[grpc]==1.63.0 # google-api-core # grpc-google-iam-v1 # grpcio-status -great-expectations==0.18.12 +great-expectations==0.18.13 # via feast (setup.py) greenlet==3.0.3 # via sqlalchemy grpc-google-iam-v1==0.13.0 # via google-cloud-bigtable -grpcio==1.62.2 +grpcio==1.63.0 # via # feast (setup.py) # google-api-core @@ -300,6 +309,7 @@ httptools==0.6.1 # via uvicorn httpx==0.27.0 # via + # fastapi # feast (setup.py) # jupyterlab ibis-framework[duckdb]==8.0.0 @@ -308,11 +318,12 @@ ibis-framework[duckdb]==8.0.0 # ibis-substrait ibis-substrait==3.2.0 # via feast (setup.py) -identify==2.5.35 +identify==2.5.36 # via pre-commit idna==3.7 # via # anyio + # email-validator # httpx # jsonschema # requests @@ -325,7 +336,7 @@ iniconfig==2.0.0 # via pytest ipykernel==6.29.4 # via jupyterlab -ipython==8.23.0 +ipython==8.24.0 # via # great-expectations # ipykernel @@ -341,6 +352,7 @@ jedi==0.19.1 jinja2==3.1.3 # via # altair + # fastapi # feast (setup.py) # great-expectations # jupyter-server @@ -361,7 +373,7 @@ jsonpointer==2.4 # via # jsonpatch # jsonschema -jsonschema[format-nongpl]==4.21.1 +jsonschema[format-nongpl]==4.22.0 # via # altair # feast (setup.py) @@ -398,11 +410,11 @@ jupyter-server==2.14.0 # notebook-shim jupyter-server-terminals==0.5.3 # via jupyter-server -jupyterlab==4.1.6 +jupyterlab==4.1.8 # via notebook jupyterlab-pygments==0.3.0 # via nbconvert -jupyterlab-server==2.26.0 +jupyterlab-server==2.27.1 # via # jupyterlab # notebook @@ -421,7 +433,7 @@ markupsafe==2.1.5 # jinja2 # nbconvert # werkzeug -marshmallow==3.21.1 +marshmallow==3.21.2 # via great-expectations matplotlib-inline==0.1.7 # via @@ -451,7 +463,7 @@ msgpack==1.0.8 # via cachecontrol multipledispatch==1.0.0 # via ibis-framework -mypy==1.9.0 +mypy==1.10.0 # via # feast (setup.py) # sqlalchemy @@ -461,7 +473,7 @@ mypy-protobuf==3.3.0 # via feast (setup.py) nbclient==0.10.0 # via nbconvert -nbconvert==7.16.3 +nbconvert==7.16.4 # via jupyter-server nbformat==5.10.4 # via @@ -492,6 +504,8 @@ numpy==1.26.4 # scipy oauthlib==3.2.2 # via requests-oauthlib +orjson==3.10.2 + # via fastapi overrides==7.7.0 # via jupyter-server packaging==24.0 @@ -545,7 +559,7 @@ platformdirs==3.11.0 # jupyter-core # snowflake-connector-python # virtualenv -pluggy==1.4.0 +pluggy==1.5.0 # via pytest ply==3.11 # via thriftpy2 @@ -625,12 +639,12 @@ pybindgen==0.22.1 # via feast (setup.py) pycparser==2.22 # via cffi -pydantic==2.7.0 +pydantic==2.7.1 # via # fastapi # feast (setup.py) # great-expectations -pydantic-core==2.18.1 +pydantic-core==2.18.2 # via pydantic pygments==2.17.2 # via @@ -655,7 +669,7 @@ pyparsing==3.1.2 # via # great-expectations # httplib2 -pyproject-hooks==1.0.0 +pyproject-hooks==1.1.0 # via # build # pip-tools @@ -686,7 +700,7 @@ pytest-ordering==0.6 # via feast (setup.py) pytest-timeout==1.4.2 # via feast (setup.py) -pytest-xdist==3.6.0 +pytest-xdist==3.6.1 # via feast (setup.py) python-dateutil==2.9.0.post0 # via @@ -705,6 +719,8 @@ python-dotenv==1.0.1 # via uvicorn python-json-logger==2.0.7 # via jupyter-events +python-multipart==0.0.9 + # via fastapi pytz==2024.1 # via # great-expectations @@ -722,19 +738,19 @@ pyyaml==6.0.1 # pre-commit # responses # uvicorn -pyzmq==26.0.2 +pyzmq==26.0.3 # via # ipykernel # jupyter-client # jupyter-server redis==4.6.0 # via feast (setup.py) -referencing==0.34.0 +referencing==0.35.1 # via # jsonschema # jsonschema-specifications # jupyter-events -regex==2024.4.16 +regex==2024.4.28 # via feast (setup.py) requests==2.31.0 # via @@ -768,8 +784,10 @@ rfc3986-validator==0.1.1 # jsonschema # jupyter-events rich==13.7.1 - # via ibis-framework -rockset==2.1.1 + # via + # ibis-framework + # typer +rockset==2.1.2 # via feast (setup.py) rpds-py==0.18.0 # via @@ -779,7 +797,7 @@ rsa==4.9 # via google-auth ruamel-yaml==0.17.17 # via great-expectations -ruff==0.4.1 +ruff==0.4.2 # via feast (setup.py) s3transfer==0.10.1 # via boto3 @@ -787,6 +805,8 @@ scipy==1.13.0 # via great-expectations send2trash==1.8.3 # via jupyter-server +shellingham==1.5.4 + # via typer six==1.16.0 # via # asttokens @@ -806,7 +826,7 @@ sniffio==1.3.1 # httpx snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==3.9.0 +snowflake-connector-python[pandas]==3.10.0 # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python @@ -841,7 +861,7 @@ stack-data==0.6.3 # via ipython starlette==0.37.2 # via fastapi -substrait==0.16.0 +substrait==0.17.0 # via ibis-substrait tabulate==0.9.0 # via feast (setup.py) @@ -851,11 +871,11 @@ terminado==0.18.1 # via # jupyter-server # jupyter-server-terminals -testcontainers==4.3.3 +testcontainers==4.4.0 # via feast (setup.py) thriftpy2==0.4.20 # via happybase -tinycss2==1.2.1 +tinycss2==1.3.0 # via nbconvert toml==0.10.2 # via feast (setup.py) @@ -866,7 +886,6 @@ tomli==2.0.1 # jupyterlab # mypy # pip-tools - # pyproject-hooks # pytest # pytest-env tomlkit==0.12.4 @@ -885,7 +904,7 @@ tornado==6.4 # jupyterlab # notebook # terminado -tqdm==4.66.2 +tqdm==4.66.4 # via # feast (setup.py) # great-expectations @@ -908,15 +927,17 @@ trino==0.328.0 # via feast (setup.py) typeguard==4.2.1 # via feast (setup.py) +typer==0.12.3 + # via fastapi-cli types-cffi==1.16.0.20240331 # via types-pyopenssl types-protobuf==3.19.22 # via # feast (setup.py) # mypy-protobuf -types-pymysql==1.1.0.1 +types-pymysql==1.1.0.20240425 # via feast (setup.py) -types-pyopenssl==24.0.0.20240417 +types-pyopenssl==24.1.0.20240425 # via types-redis types-python-dateutil==2.9.0.20240316 # via @@ -926,11 +947,11 @@ types-pytz==2024.1.0.20240417 # via feast (setup.py) types-pyyaml==6.0.12.20240311 # via feast (setup.py) -types-redis==4.6.0.20240417 +types-redis==4.6.0.20240425 # via feast (setup.py) types-requests==2.30.0.0 # via feast (setup.py) -types-setuptools==69.5.0.20240415 +types-setuptools==69.5.0.20240423 # via # feast (setup.py) # types-cffi @@ -955,6 +976,7 @@ typing-extensions==4.11.0 # sqlalchemy # testcontainers # typeguard + # typer # uvicorn tzdata==2024.1 # via pandas @@ -962,6 +984,8 @@ tzlocal==5.2 # via # great-expectations # trino +ujson==5.9.0 + # via fastapi uri-template==1.3.0 # via jsonschema uritemplate==4.1.1 @@ -979,7 +1003,10 @@ urllib3==1.26.18 # rockset # testcontainers uvicorn[standard]==0.29.0 - # via feast (setup.py) + # via + # fastapi + # fastapi-cli + # feast (setup.py) uvloop==0.19.0 # via uvicorn virtualenv==20.23.0 @@ -996,7 +1023,7 @@ webencodings==0.5.1 # via # bleach # tinycss2 -websocket-client==1.7.0 +websocket-client==1.8.0 # via # jupyter-server # kubernetes diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index 9f90db249a0..24946075a67 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -8,6 +8,7 @@ annotated-types==0.6.0 # via pydantic anyio==4.3.0 # via + # httpx # starlette # watchfiles attrs==23.2.0 @@ -15,13 +16,17 @@ attrs==23.2.0 # jsonschema # referencing certifi==2024.2.2 - # via requests + # via + # httpcore + # httpx + # requests charset-normalizer==3.3.2 # via requests click==8.1.7 # via # dask # feast (setup.py) + # typer # uvicorn cloudpickle==3.0.0 # via dask @@ -31,14 +36,22 @@ dask[array,dataframe]==2024.4.2 # via # dask-expr # feast (setup.py) -dask-expr==1.0.12 +dask-expr==1.0.14 # via dask dill==0.3.8 # via feast (setup.py) +dnspython==2.6.1 + # via email-validator +email-validator==2.1.1 + # via fastapi exceptiongroup==1.2.1 # via anyio -fastapi==0.110.2 - # via feast (setup.py) +fastapi==0.111.0 + # via + # fastapi-cli + # feast (setup.py) +fastapi-cli==0.0.2 + # via fastapi fsspec==2024.3.1 # via dask greenlet==3.0.3 @@ -46,28 +59,42 @@ greenlet==3.0.3 gunicorn==22.0.0 ; platform_system != "Windows" # via feast (setup.py) h11==0.14.0 - # via uvicorn + # via + # httpcore + # uvicorn +httpcore==1.0.5 + # via httpx httptools==0.6.1 # via uvicorn +httpx==0.27.0 + # via fastapi idna==3.7 # via # anyio + # email-validator + # httpx # requests importlib-metadata==7.1.0 # via dask jinja2==3.1.3 - # via feast (setup.py) -jsonschema==4.21.1 + # via + # fastapi + # feast (setup.py) +jsonschema==4.22.0 # via feast (setup.py) jsonschema-specifications==2023.12.1 # via jsonschema locket==1.0.0 # via partd +markdown-it-py==3.0.0 + # via rich markupsafe==2.1.5 # via jinja2 +mdurl==0.1.2 + # via markdown-it-py mmh3==4.1.0 # via feast (setup.py) -mypy==1.9.0 +mypy==1.10.0 # via sqlalchemy mypy-extensions==1.0.0 # via mypy @@ -79,6 +106,8 @@ numpy==1.26.4 # feast (setup.py) # pandas # pyarrow +orjson==3.10.2 + # via fastapi packaging==24.0 # via # dask @@ -94,22 +123,26 @@ protobuf==4.25.3 # via # feast (setup.py) # mypy-protobuf -pyarrow==15.0.2 +pyarrow==16.0.0 # via # dask-expr # feast (setup.py) -pydantic==2.7.0 +pydantic==2.7.1 # via # fastapi # feast (setup.py) -pydantic-core==2.18.1 +pydantic-core==2.18.2 # via pydantic pygments==2.17.2 - # via feast (setup.py) + # via + # feast (setup.py) + # rich python-dateutil==2.9.0.post0 # via pandas python-dotenv==1.0.1 # via uvicorn +python-multipart==0.0.9 + # via fastapi pytz==2024.1 # via pandas pyyaml==6.0.1 @@ -117,20 +150,26 @@ pyyaml==6.0.1 # dask # feast (setup.py) # uvicorn -referencing==0.34.0 +referencing==0.35.1 # via # jsonschema # jsonschema-specifications requests==2.31.0 # via feast (setup.py) +rich==13.7.1 + # via typer rpds-py==0.18.0 # via # jsonschema # referencing +shellingham==1.5.4 + # via typer six==1.16.0 # via python-dateutil sniffio==1.3.1 - # via anyio + # via + # anyio + # httpx sqlalchemy[mypy]==2.0.29 # via # feast (setup.py) @@ -149,11 +188,13 @@ toolz==0.12.1 # via # dask # partd -tqdm==4.66.2 +tqdm==4.66.4 # via feast (setup.py) typeguard==4.2.1 # via feast (setup.py) -types-protobuf==5.26.0.20240420 +typer==0.12.3 + # via fastapi-cli +types-protobuf==5.26.0.20240422 # via mypy-protobuf typing-extensions==4.11.0 # via @@ -164,13 +205,19 @@ typing-extensions==4.11.0 # pydantic-core # sqlalchemy # typeguard + # typer # uvicorn tzdata==2024.1 # via pandas +ujson==5.9.0 + # via fastapi urllib3==2.2.1 # via requests uvicorn[standard]==0.29.0 - # via feast (setup.py) + # via + # fastapi + # fastapi-cli + # feast (setup.py) uvloop==0.19.0 # via uvicorn watchfiles==0.21.0 diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 9486743f776..2abaac7ffb4 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -56,11 +56,11 @@ bidict==0.23.1 # via ibis-framework bleach==6.1.0 # via nbconvert -boto3==1.34.88 +boto3==1.34.97 # via # feast (setup.py) # moto -botocore==1.34.88 +botocore==1.34.97 # via # boto3 # moto @@ -101,6 +101,7 @@ click==8.1.7 # geomet # great-expectations # pip-tools + # typer # uvicorn cloudpickle==3.0.0 # via dask @@ -112,7 +113,7 @@ comm==0.2.2 # via # ipykernel # ipywidgets -coverage[toml]==7.4.4 +coverage[toml]==7.5.0 # via pytest-cov cryptography==42.0.5 # via @@ -131,7 +132,7 @@ dask[array,dataframe]==2024.4.2 # via # dask-expr # feast (setup.py) -dask-expr==1.0.12 +dask-expr==1.0.14 # via dask db-dtypes==1.2.0 # via google-cloud-bigquery @@ -141,12 +142,14 @@ decorator==5.1.1 # via ipython defusedxml==0.7.1 # via nbconvert -deltalake==0.16.4 +deltalake==0.17.3 # via feast (setup.py) dill==0.3.8 # via feast (setup.py) distlib==0.3.8 # via virtualenv +dnspython==2.6.1 + # via email-validator docker==7.0.0 # via # feast (setup.py) @@ -157,8 +160,10 @@ duckdb==0.10.2 # via # duckdb-engine # ibis-framework -duckdb-engine==0.11.5 +duckdb-engine==0.12.0 # via ibis-framework +email-validator==2.1.1 + # via fastapi entrypoints==0.4 # via altair exceptiongroup==1.2.1 @@ -170,11 +175,15 @@ execnet==2.1.1 # via pytest-xdist executing==2.0.1 # via stack-data -fastapi==0.110.2 - # via feast (setup.py) +fastapi==0.111.0 + # via + # fastapi-cli + # feast (setup.py) +fastapi-cli==0.0.2 + # via fastapi fastjsonschema==2.19.1 # via nbformat -filelock==3.13.4 +filelock==3.14.0 # via # snowflake-connector-python # virtualenv @@ -190,7 +199,7 @@ geojson==2.5.0 # via rockset geomet==0.2.1.post1 # via cassandra-driver -google-api-core[grpc]==2.18.0 +google-api-core[grpc]==2.19.0 # via # feast (setup.py) # firebase-admin @@ -202,7 +211,7 @@ google-api-core[grpc]==2.18.0 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.126.0 +google-api-python-client==2.127.0 # via firebase-admin google-auth==2.29.0 # via @@ -250,13 +259,13 @@ googleapis-common-protos[grpc]==1.63.0 # google-api-core # grpc-google-iam-v1 # grpcio-status -great-expectations==0.18.12 +great-expectations==0.18.13 # via feast (setup.py) greenlet==3.0.3 # via sqlalchemy grpc-google-iam-v1==0.13.0 # via google-cloud-bigtable -grpcio==1.62.2 +grpcio==1.63.0 # via # feast (setup.py) # google-api-core @@ -300,6 +309,7 @@ httptools==0.6.1 # via uvicorn httpx==0.27.0 # via + # fastapi # feast (setup.py) # jupyterlab ibis-framework[duckdb]==8.0.0 @@ -308,11 +318,12 @@ ibis-framework[duckdb]==8.0.0 # ibis-substrait ibis-substrait==3.2.0 # via feast (setup.py) -identify==2.5.35 +identify==2.5.36 # via pre-commit idna==3.7 # via # anyio + # email-validator # httpx # jsonschema # requests @@ -350,6 +361,7 @@ jedi==0.19.1 jinja2==3.1.3 # via # altair + # fastapi # feast (setup.py) # great-expectations # jupyter-server @@ -370,7 +382,7 @@ jsonpointer==2.4 # via # jsonpatch # jsonschema -jsonschema[format-nongpl]==4.21.1 +jsonschema[format-nongpl]==4.22.0 # via # altair # feast (setup.py) @@ -407,11 +419,11 @@ jupyter-server==2.14.0 # notebook-shim jupyter-server-terminals==0.5.3 # via jupyter-server -jupyterlab==4.1.6 +jupyterlab==4.1.8 # via notebook jupyterlab-pygments==0.3.0 # via nbconvert -jupyterlab-server==2.26.0 +jupyterlab-server==2.27.1 # via # jupyterlab # notebook @@ -430,7 +442,7 @@ markupsafe==2.1.5 # jinja2 # nbconvert # werkzeug -marshmallow==3.21.1 +marshmallow==3.21.2 # via great-expectations matplotlib-inline==0.1.7 # via @@ -460,7 +472,7 @@ msgpack==1.0.8 # via cachecontrol multipledispatch==1.0.0 # via ibis-framework -mypy==1.9.0 +mypy==1.10.0 # via # feast (setup.py) # sqlalchemy @@ -470,7 +482,7 @@ mypy-protobuf==3.3.0 # via feast (setup.py) nbclient==0.10.0 # via nbconvert -nbconvert==7.16.3 +nbconvert==7.16.4 # via jupyter-server nbformat==5.10.4 # via @@ -501,6 +513,8 @@ numpy==1.26.4 # scipy oauthlib==3.2.2 # via requests-oauthlib +orjson==3.10.2 + # via fastapi overrides==7.7.0 # via jupyter-server packaging==24.0 @@ -554,7 +568,7 @@ platformdirs==3.11.0 # jupyter-core # snowflake-connector-python # virtualenv -pluggy==1.4.0 +pluggy==1.5.0 # via pytest ply==3.11 # via thriftpy2 @@ -634,12 +648,12 @@ pybindgen==0.22.1 # via feast (setup.py) pycparser==2.22 # via cffi -pydantic==2.7.0 +pydantic==2.7.1 # via # fastapi # feast (setup.py) # great-expectations -pydantic-core==2.18.1 +pydantic-core==2.18.2 # via pydantic pygments==2.17.2 # via @@ -664,7 +678,7 @@ pyparsing==3.1.2 # via # great-expectations # httplib2 -pyproject-hooks==1.0.0 +pyproject-hooks==1.1.0 # via # build # pip-tools @@ -695,7 +709,7 @@ pytest-ordering==0.6 # via feast (setup.py) pytest-timeout==1.4.2 # via feast (setup.py) -pytest-xdist==3.6.0 +pytest-xdist==3.6.1 # via feast (setup.py) python-dateutil==2.9.0.post0 # via @@ -714,6 +728,8 @@ python-dotenv==1.0.1 # via uvicorn python-json-logger==2.0.7 # via jupyter-events +python-multipart==0.0.9 + # via fastapi pytz==2024.1 # via # great-expectations @@ -731,19 +747,19 @@ pyyaml==6.0.1 # pre-commit # responses # uvicorn -pyzmq==26.0.2 +pyzmq==26.0.3 # via # ipykernel # jupyter-client # jupyter-server redis==4.6.0 # via feast (setup.py) -referencing==0.34.0 +referencing==0.35.1 # via # jsonschema # jsonschema-specifications # jupyter-events -regex==2024.4.16 +regex==2024.4.28 # via feast (setup.py) requests==2.31.0 # via @@ -777,8 +793,10 @@ rfc3986-validator==0.1.1 # jsonschema # jupyter-events rich==13.7.1 - # via ibis-framework -rockset==2.1.1 + # via + # ibis-framework + # typer +rockset==2.1.2 # via feast (setup.py) rpds-py==0.18.0 # via @@ -790,7 +808,7 @@ ruamel-yaml==0.17.17 # via great-expectations ruamel-yaml-clib==0.2.8 # via ruamel-yaml -ruff==0.4.1 +ruff==0.4.2 # via feast (setup.py) s3transfer==0.10.1 # via boto3 @@ -798,6 +816,8 @@ scipy==1.13.0 # via great-expectations send2trash==1.8.3 # via jupyter-server +shellingham==1.5.4 + # via typer six==1.16.0 # via # asttokens @@ -817,7 +837,7 @@ sniffio==1.3.1 # httpx snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==3.9.0 +snowflake-connector-python[pandas]==3.10.0 # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python @@ -852,7 +872,7 @@ stack-data==0.6.3 # via ipython starlette==0.37.2 # via fastapi -substrait==0.16.0 +substrait==0.17.0 # via ibis-substrait tabulate==0.9.0 # via feast (setup.py) @@ -862,11 +882,11 @@ terminado==0.18.1 # via # jupyter-server # jupyter-server-terminals -testcontainers==4.3.3 +testcontainers==4.4.0 # via feast (setup.py) thriftpy2==0.4.20 # via happybase -tinycss2==1.2.1 +tinycss2==1.3.0 # via nbconvert toml==0.10.2 # via feast (setup.py) @@ -877,7 +897,6 @@ tomli==2.0.1 # jupyterlab # mypy # pip-tools - # pyproject-hooks # pytest # pytest-env tomlkit==0.12.4 @@ -896,7 +915,7 @@ tornado==6.4 # jupyterlab # notebook # terminado -tqdm==4.66.2 +tqdm==4.66.4 # via # feast (setup.py) # great-expectations @@ -919,15 +938,17 @@ trino==0.328.0 # via feast (setup.py) typeguard==4.2.1 # via feast (setup.py) +typer==0.12.3 + # via fastapi-cli types-cffi==1.16.0.20240331 # via types-pyopenssl types-protobuf==3.19.22 # via # feast (setup.py) # mypy-protobuf -types-pymysql==1.1.0.1 +types-pymysql==1.1.0.20240425 # via feast (setup.py) -types-pyopenssl==24.0.0.20240417 +types-pyopenssl==24.1.0.20240425 # via types-redis types-python-dateutil==2.9.0.20240316 # via @@ -937,11 +958,11 @@ types-pytz==2024.1.0.20240417 # via feast (setup.py) types-pyyaml==6.0.12.20240311 # via feast (setup.py) -types-redis==4.6.0.20240417 +types-redis==4.6.0.20240425 # via feast (setup.py) types-requests==2.30.0.0 # via feast (setup.py) -types-setuptools==69.5.0.20240415 +types-setuptools==69.5.0.20240423 # via # feast (setup.py) # types-cffi @@ -967,6 +988,7 @@ typing-extensions==4.11.0 # starlette # testcontainers # typeguard + # typer # uvicorn tzdata==2024.1 # via pandas @@ -974,6 +996,8 @@ tzlocal==5.2 # via # great-expectations # trino +ujson==5.9.0 + # via fastapi uri-template==1.3.0 # via jsonschema uritemplate==4.1.1 @@ -992,7 +1016,10 @@ urllib3==1.26.18 # snowflake-connector-python # testcontainers uvicorn[standard]==0.29.0 - # via feast (setup.py) + # via + # fastapi + # fastapi-cli + # feast (setup.py) uvloop==0.19.0 # via uvicorn virtualenv==20.23.0 @@ -1009,7 +1036,7 @@ webencodings==0.5.1 # via # bleach # tinycss2 -websocket-client==1.7.0 +websocket-client==1.8.0 # via # jupyter-server # kubernetes diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 368b2421266..822176c6d0d 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -8,6 +8,7 @@ annotated-types==0.6.0 # via pydantic anyio==4.3.0 # via + # httpx # starlette # watchfiles attrs==23.2.0 @@ -15,13 +16,17 @@ attrs==23.2.0 # jsonschema # referencing certifi==2024.2.2 - # via requests + # via + # httpcore + # httpx + # requests charset-normalizer==3.3.2 # via requests click==8.1.7 # via # dask # feast (setup.py) + # typer # uvicorn cloudpickle==3.0.0 # via dask @@ -31,14 +36,22 @@ dask[array,dataframe]==2024.4.2 # via # dask-expr # feast (setup.py) -dask-expr==1.0.12 +dask-expr==1.0.14 # via dask dill==0.3.8 # via feast (setup.py) +dnspython==2.6.1 + # via email-validator +email-validator==2.1.1 + # via fastapi exceptiongroup==1.2.1 # via anyio -fastapi==0.110.2 - # via feast (setup.py) +fastapi==0.111.0 + # via + # fastapi-cli + # feast (setup.py) +fastapi-cli==0.0.2 + # via fastapi fsspec==2024.3.1 # via dask greenlet==3.0.3 @@ -46,30 +59,44 @@ greenlet==3.0.3 gunicorn==22.0.0 ; platform_system != "Windows" # via feast (setup.py) h11==0.14.0 - # via uvicorn + # via + # httpcore + # uvicorn +httpcore==1.0.5 + # via httpx httptools==0.6.1 # via uvicorn +httpx==0.27.0 + # via fastapi idna==3.7 # via # anyio + # email-validator + # httpx # requests importlib-metadata==7.1.0 # via # dask # typeguard jinja2==3.1.3 - # via feast (setup.py) -jsonschema==4.21.1 + # via + # fastapi + # feast (setup.py) +jsonschema==4.22.0 # via feast (setup.py) jsonschema-specifications==2023.12.1 # via jsonschema locket==1.0.0 # via partd +markdown-it-py==3.0.0 + # via rich markupsafe==2.1.5 # via jinja2 +mdurl==0.1.2 + # via markdown-it-py mmh3==4.1.0 # via feast (setup.py) -mypy==1.9.0 +mypy==1.10.0 # via sqlalchemy mypy-extensions==1.0.0 # via mypy @@ -81,6 +108,8 @@ numpy==1.26.4 # feast (setup.py) # pandas # pyarrow +orjson==3.10.2 + # via fastapi packaging==24.0 # via # dask @@ -96,22 +125,26 @@ protobuf==4.25.3 # via # feast (setup.py) # mypy-protobuf -pyarrow==15.0.2 +pyarrow==16.0.0 # via # dask-expr # feast (setup.py) -pydantic==2.7.0 +pydantic==2.7.1 # via # fastapi # feast (setup.py) -pydantic-core==2.18.1 +pydantic-core==2.18.2 # via pydantic pygments==2.17.2 - # via feast (setup.py) + # via + # feast (setup.py) + # rich python-dateutil==2.9.0.post0 # via pandas python-dotenv==1.0.1 # via uvicorn +python-multipart==0.0.9 + # via fastapi pytz==2024.1 # via pandas pyyaml==6.0.1 @@ -119,20 +152,26 @@ pyyaml==6.0.1 # dask # feast (setup.py) # uvicorn -referencing==0.34.0 +referencing==0.35.1 # via # jsonschema # jsonschema-specifications requests==2.31.0 # via feast (setup.py) +rich==13.7.1 + # via typer rpds-py==0.18.0 # via # jsonschema # referencing +shellingham==1.5.4 + # via typer six==1.16.0 # via python-dateutil sniffio==1.3.1 - # via anyio + # via + # anyio + # httpx sqlalchemy[mypy]==2.0.29 # via # feast (setup.py) @@ -151,11 +190,13 @@ toolz==0.12.1 # via # dask # partd -tqdm==4.66.2 +tqdm==4.66.4 # via feast (setup.py) typeguard==4.2.1 # via feast (setup.py) -types-protobuf==5.26.0.20240420 +typer==0.12.3 + # via fastapi-cli +types-protobuf==5.26.0.20240422 # via mypy-protobuf typing-extensions==4.11.0 # via @@ -167,13 +208,19 @@ typing-extensions==4.11.0 # sqlalchemy # starlette # typeguard + # typer # uvicorn tzdata==2024.1 # via pandas +ujson==5.9.0 + # via fastapi urllib3==2.2.1 # via requests uvicorn[standard]==0.29.0 - # via feast (setup.py) + # via + # fastapi + # fastapi-cli + # feast (setup.py) uvloop==0.19.0 # via uvicorn watchfiles==0.21.0 diff --git a/setup.py b/setup.py index ef5986f1579..6cc728ee98d 100644 --- a/setup.py +++ b/setup.py @@ -135,8 +135,8 @@ ] IBIS_REQUIRED = [ - "ibis-framework", - "ibis-substrait", + "ibis-framework>=8.0.0,<9", + "ibis-substrait<=3.2.0", ] GRPCIO_REQUIRED = [ @@ -146,7 +146,7 @@ "grpcio-health-checking>=1.56.2,<2", ] -DUCKDB_REQUIRED = ["ibis-framework[duckdb]"] +DUCKDB_REQUIRED = ["ibis-framework[duckdb]>=8.0.0,<9"] DELTA_REQUIRED = ["deltalake"] From b209d9ba8ad9ae65dc1928a83d8240c79a432196 Mon Sep 17 00:00:00 2001 From: tokoko Date: Fri, 3 May 2024 19:48:37 +0000 Subject: [PATCH 4/5] refactor ibis into bunch of functions Signed-off-by: tokoko --- .../feast/infra/offline_stores/duckdb.py | 152 ++++- sdk/python/feast/infra/offline_stores/ibis.py | 625 ++++++++---------- 2 files changed, 437 insertions(+), 340 deletions(-) diff --git a/sdk/python/feast/infra/offline_stores/duckdb.py b/sdk/python/feast/infra/offline_stores/duckdb.py index d43286f3719..8a9390f97b1 100644 --- a/sdk/python/feast/infra/offline_stores/duckdb.py +++ b/sdk/python/feast/infra/offline_stores/duckdb.py @@ -1,8 +1,57 @@ +from datetime import datetime +from pathlib import Path +from typing import Any, Callable, List, Optional, Union + import ibis +import pandas as pd +import pyarrow +from ibis.expr.types import Table from pydantic import StrictStr -from feast.infra.offline_stores.ibis import IbisOfflineStore -from feast.repo_config import FeastConfigBaseModel +from feast.data_format import DeltaFormat, ParquetFormat +from feast.data_source import DataSource +from feast.feature_logging import LoggingConfig, LoggingSource +from feast.feature_view import FeatureView +from feast.infra.offline_stores.file_source import FileSource +from feast.infra.offline_stores.ibis import ( + get_historical_features_ibis, + offline_write_batch_ibis, + pull_all_from_table_or_query_ibis, + pull_latest_from_table_or_query_ibis, + write_logged_features_ibis, +) +from feast.infra.offline_stores.offline_store import OfflineStore, RetrievalJob +from feast.infra.registry.base_registry import BaseRegistry +from feast.repo_config import FeastConfigBaseModel, RepoConfig + + +def _read_data_source(data_source: DataSource) -> Table: + assert isinstance(data_source, FileSource) + + if isinstance(data_source.file_format, ParquetFormat): + return ibis.read_parquet(data_source.path) + elif isinstance(data_source.file_format, DeltaFormat): + return ibis.read_delta(data_source.path) + + +def _write_data_source(table: pyarrow.Table, data_source: DataSource): + assert isinstance(data_source, FileSource) + + file_options = data_source.file_options + + if isinstance(data_source.file_format, ParquetFormat): + prev_table = ibis.read_parquet(file_options.uri).to_pyarrow() + if table.schema != prev_table.schema: + table = table.cast(prev_table.schema) + new_table = pyarrow.concat_tables([table, prev_table]) + ibis.memtable(new_table).to_parquet(file_options.uri) + elif isinstance(data_source.file_format, DeltaFormat): + from deltalake import DeltaTable + + prev_schema = DeltaTable(file_options.uri).schema().to_pyarrow() + if table.schema != prev_schema: + table = table.cast(prev_schema) + ibis.memtable(table).to_delta(file_options.uri, mode="append") class DuckDBOfflineStoreConfig(FeastConfigBaseModel): @@ -10,8 +59,99 @@ class DuckDBOfflineStoreConfig(FeastConfigBaseModel): # """ Offline store type selector""" -class DuckDBOfflineStore(IbisOfflineStore): +class DuckDBOfflineStore(OfflineStore): + @staticmethod + def pull_latest_from_table_or_query( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + timestamp_field: str, + created_timestamp_column: Optional[str], + start_date: datetime, + end_date: datetime, + ) -> RetrievalJob: + return pull_latest_from_table_or_query_ibis( + config=config, + data_source=data_source, + join_key_columns=join_key_columns, + feature_name_columns=feature_name_columns, + timestamp_field=timestamp_field, + created_timestamp_column=created_timestamp_column, + start_date=start_date, + end_date=end_date, + data_source_reader=_read_data_source, + ) + + @staticmethod + def get_historical_features( + config: RepoConfig, + feature_views: List[FeatureView], + feature_refs: List[str], + entity_df: Union[pd.DataFrame, str], + registry: BaseRegistry, + project: str, + full_feature_names: bool = False, + ) -> RetrievalJob: + return get_historical_features_ibis( + config=config, + feature_views=feature_views, + feature_refs=feature_refs, + entity_df=entity_df, + registry=registry, + project=project, + full_feature_names=full_feature_names, + data_source_reader=_read_data_source, + ) + + @staticmethod + def pull_all_from_table_or_query( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + timestamp_field: str, + start_date: datetime, + end_date: datetime, + ) -> RetrievalJob: + return pull_all_from_table_or_query_ibis( + config=config, + data_source=data_source, + join_key_columns=join_key_columns, + feature_name_columns=feature_name_columns, + timestamp_field=timestamp_field, + start_date=start_date, + end_date=end_date, + data_source_reader=_read_data_source, + ) + + @staticmethod + def offline_write_batch( + config: RepoConfig, + feature_view: FeatureView, + table: pyarrow.Table, + progress: Optional[Callable[[int], Any]], + ): + offline_write_batch_ibis( + config=config, + feature_view=feature_view, + table=table, + progress=progress, + data_source_writer=_write_data_source, + ) + @staticmethod - def setup_ibis_backend(): - # there's no need to call setup as duckdb is default ibis backend - ibis.set_backend("duckdb") + def write_logged_features( + config: RepoConfig, + data: Union[pyarrow.Table, Path], + source: LoggingSource, + logging_config: LoggingConfig, + registry: BaseRegistry, + ): + write_logged_features_ibis( + config=config, + data=data, + source=source, + logging_config=logging_config, + registry=registry, + ) diff --git a/sdk/python/feast/infra/offline_stores/ibis.py b/sdk/python/feast/infra/offline_stores/ibis.py index 30fa8ef1d43..da3eefc9af9 100644 --- a/sdk/python/feast/infra/offline_stores/ibis.py +++ b/sdk/python/feast/infra/offline_stores/ibis.py @@ -25,7 +25,6 @@ SavedDatasetFileStorage, ) from feast.infra.offline_stores.offline_store import ( - OfflineStore, RetrievalJob, RetrievalMetadata, ) @@ -42,381 +41,275 @@ def _get_entity_schema(entity_df: pd.DataFrame) -> Dict[str, np.dtype]: return dict(zip(entity_df.columns, entity_df.dtypes)) -class IbisOfflineStore(OfflineStore): - @staticmethod - def pull_latest_from_table_or_query( - config: RepoConfig, - data_source: DataSource, - join_key_columns: List[str], - feature_name_columns: List[str], - timestamp_field: str, - created_timestamp_column: Optional[str], - start_date: datetime, - end_date: datetime, - ) -> RetrievalJob: - fields = join_key_columns + feature_name_columns + [timestamp_field] - if created_timestamp_column: - fields.append(created_timestamp_column) - start_date = start_date.astimezone(tz=utc) - end_date = end_date.astimezone(tz=utc) - - table = IbisOfflineStore._read_data_source(data_source) - - table = table.select(*fields) - - # TODO get rid of this fix - if "__log_date" in table.columns: - table = table.drop("__log_date") - - table = table.filter( - ibis.and_( - table[timestamp_field] >= ibis.literal(start_date), - table[timestamp_field] <= ibis.literal(end_date), - ) - ) - - table = deduplicate( - table=table, - group_by_cols=join_key_columns, - event_timestamp_col=timestamp_field, - created_timestamp_col=created_timestamp_column, - ) - - return IbisRetrievalJob( - table=table, - on_demand_feature_views=[], - full_feature_names=False, - metadata=None, +def pull_latest_from_table_or_query_ibis( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + timestamp_field: str, + created_timestamp_column: Optional[str], + start_date: datetime, + end_date: datetime, + data_source_reader: Callable[[DataSource], Table], +) -> RetrievalJob: + fields = join_key_columns + feature_name_columns + [timestamp_field] + if created_timestamp_column: + fields.append(created_timestamp_column) + start_date = start_date.astimezone(tz=utc) + end_date = end_date.astimezone(tz=utc) + + table = data_source_reader(data_source) + + table = table.select(*fields) + + # TODO get rid of this fix + if "__log_date" in table.columns: + table = table.drop("__log_date") + + table = table.filter( + ibis.and_( + table[timestamp_field] >= ibis.literal(start_date), + table[timestamp_field] <= ibis.literal(end_date), ) + ) - def _get_entity_df_event_timestamp_range( - entity_df: pd.DataFrame, entity_df_event_timestamp_col: str - ) -> Tuple[datetime, datetime]: - entity_df_event_timestamp = entity_df.loc[ - :, entity_df_event_timestamp_col - ].infer_objects() - if pd.api.types.is_string_dtype(entity_df_event_timestamp): - entity_df_event_timestamp = pd.to_datetime( - entity_df_event_timestamp, utc=True - ) - entity_df_event_timestamp_range = ( - entity_df_event_timestamp.min().to_pydatetime(), - entity_df_event_timestamp.max().to_pydatetime(), - ) + table = deduplicate( + table=table, + group_by_cols=join_key_columns, + event_timestamp_col=timestamp_field, + created_timestamp_col=created_timestamp_column, + ) - return entity_df_event_timestamp_range + return IbisRetrievalJob( + table=table, + on_demand_feature_views=[], + full_feature_names=False, + metadata=None, + ) - @staticmethod - def _to_utc(entity_df: pd.DataFrame, event_timestamp_col): - entity_df_event_timestamp = entity_df.loc[ - :, event_timestamp_col - ].infer_objects() - if pd.api.types.is_string_dtype(entity_df_event_timestamp): - entity_df_event_timestamp = pd.to_datetime( - entity_df_event_timestamp, utc=True - ) - entity_df[event_timestamp_col] = entity_df_event_timestamp - return entity_df - - @staticmethod - def _generate_row_id( - entity_table: Table, feature_views: List[FeatureView], event_timestamp_col - ) -> Table: - all_entities = [event_timestamp_col] - for fv in feature_views: - if fv.projection.join_key_map: - all_entities.extend(fv.projection.join_key_map.values()) - else: - all_entities.extend([e.name for e in fv.entity_columns]) - - r = ibis.literal("") - - for e in set(all_entities): - r = r.concat(entity_table[e].cast("string")) # type: ignore - - entity_table = entity_table.mutate(entity_row_id=r) - - return entity_table - - @staticmethod - def _read_data_source(data_source: DataSource) -> Table: - assert isinstance(data_source, FileSource) - - if isinstance(data_source.file_format, ParquetFormat): - return ibis.read_parquet(data_source.path) - elif isinstance(data_source.file_format, DeltaFormat): - return ibis.read_delta(data_source.path) - - @staticmethod - def get_historical_features( - config: RepoConfig, - feature_views: List[FeatureView], - feature_refs: List[str], - entity_df: Union[pd.DataFrame, str], - registry: BaseRegistry, - project: str, - full_feature_names: bool = False, - ) -> RetrievalJob: - entity_schema = _get_entity_schema( - entity_df=entity_df, - ) - event_timestamp_col = offline_utils.infer_event_timestamp_from_entity_df( - entity_schema=entity_schema, - ) +def _get_entity_df_event_timestamp_range( + entity_df: pd.DataFrame, entity_df_event_timestamp_col: str +) -> Tuple[datetime, datetime]: + entity_df_event_timestamp = entity_df.loc[ + :, entity_df_event_timestamp_col + ].infer_objects() + if pd.api.types.is_string_dtype(entity_df_event_timestamp): + entity_df_event_timestamp = pd.to_datetime(entity_df_event_timestamp, utc=True) + entity_df_event_timestamp_range = ( + entity_df_event_timestamp.min().to_pydatetime(), + entity_df_event_timestamp.max().to_pydatetime(), + ) - # TODO get range with ibis - timestamp_range = IbisOfflineStore._get_entity_df_event_timestamp_range( - entity_df, event_timestamp_col - ) + return entity_df_event_timestamp_range - entity_df = IbisOfflineStore._to_utc(entity_df, event_timestamp_col) - entity_table = ibis.memtable(entity_df) - entity_table = IbisOfflineStore._generate_row_id( - entity_table, feature_views, event_timestamp_col - ) +def _to_utc(entity_df: pd.DataFrame, event_timestamp_col): + entity_df_event_timestamp = entity_df.loc[:, event_timestamp_col].infer_objects() + if pd.api.types.is_string_dtype(entity_df_event_timestamp): + entity_df_event_timestamp = pd.to_datetime(entity_df_event_timestamp, utc=True) - def read_fv( - feature_view: FeatureView, feature_refs: List[str], full_feature_names: bool - ) -> Tuple: - fv_table: Table = IbisOfflineStore._read_data_source( - feature_view.batch_source - ) + entity_df[event_timestamp_col] = entity_df_event_timestamp + return entity_df - for old_name, new_name in feature_view.batch_source.field_mapping.items(): - if old_name in fv_table.columns: - fv_table = fv_table.rename({new_name: old_name}) - timestamp_field = feature_view.batch_source.timestamp_field +def _generate_row_id( + entity_table: Table, feature_views: List[FeatureView], event_timestamp_col +) -> Table: + all_entities = [event_timestamp_col] + for fv in feature_views: + if fv.projection.join_key_map: + all_entities.extend(fv.projection.join_key_map.values()) + else: + all_entities.extend([e.name for e in fv.entity_columns]) - # TODO mutate only if tz-naive - fv_table = fv_table.mutate( - **{ - timestamp_field: fv_table[timestamp_field].cast( - dt.Timestamp(timezone="UTC") - ) - } - ) + r = ibis.literal("") - full_name_prefix = feature_view.projection.name_alias or feature_view.name + for e in set(all_entities): + r = r.concat(entity_table[e].cast("string")) # type: ignore - feature_refs = [ - fr.split(":")[1] - for fr in feature_refs - if fr.startswith(f"{full_name_prefix}:") - ] + entity_table = entity_table.mutate(entity_row_id=r) - if full_feature_names: - fv_table = fv_table.rename( - { - f"{full_name_prefix}__{feature}": feature - for feature in feature_refs - } - ) + return entity_table + + +def get_historical_features_ibis( + config: RepoConfig, + feature_views: List[FeatureView], + feature_refs: List[str], + entity_df: Union[pd.DataFrame, str], + registry: BaseRegistry, + project: str, + data_source_reader: Callable[[DataSource], Table], + full_feature_names: bool = False, +) -> RetrievalJob: + entity_schema = _get_entity_schema( + entity_df=entity_df, + ) + event_timestamp_col = offline_utils.infer_event_timestamp_from_entity_df( + entity_schema=entity_schema, + ) - feature_refs = [ - f"{full_name_prefix}__{feature}" for feature in feature_refs - ] - - return ( - fv_table, - feature_view.batch_source.timestamp_field, - feature_view.batch_source.created_timestamp_column, - feature_view.projection.join_key_map - or {e.name: e.name for e in feature_view.entity_columns}, - feature_refs, - feature_view.ttl, - ) + # TODO get range with ibis + timestamp_range = _get_entity_df_event_timestamp_range( + entity_df, event_timestamp_col + ) - res = point_in_time_join( - entity_table=entity_table, - feature_tables=[ - read_fv(feature_view, feature_refs, full_feature_names) - for feature_view in feature_views - ], - event_timestamp_col=event_timestamp_col, - ) + entity_df = _to_utc(entity_df, event_timestamp_col) - odfvs = OnDemandFeatureView.get_requested_odfvs(feature_refs, project, registry) - - substrait_odfvs = [fv for fv in odfvs if fv.mode == "substrait"] - for odfv in substrait_odfvs: - res = odfv.transform_ibis(res, full_feature_names) - - return IbisRetrievalJob( - res, - [fv for fv in odfvs if fv.mode != "substrait"], - full_feature_names, - metadata=RetrievalMetadata( - features=feature_refs, - keys=list(set(entity_df.columns) - {event_timestamp_col}), - min_event_timestamp=timestamp_range[0], - max_event_timestamp=timestamp_range[1], - ), - ) + entity_table = ibis.memtable(entity_df) + entity_table = _generate_row_id(entity_table, feature_views, event_timestamp_col) - @staticmethod - def pull_all_from_table_or_query( - config: RepoConfig, - data_source: DataSource, - join_key_columns: List[str], - feature_name_columns: List[str], - timestamp_field: str, - start_date: datetime, - end_date: datetime, - ) -> RetrievalJob: - assert isinstance(data_source, FileSource) - - fields = join_key_columns + feature_name_columns + [timestamp_field] - start_date = start_date.astimezone(tz=utc) - end_date = end_date.astimezone(tz=utc) - - table = IbisOfflineStore._read_data_source(data_source) - - table = table.select(*fields) - - # TODO get rid of this fix - if "__log_date" in table.columns: - table = table.drop("__log_date") - - table = table.filter( - ibis.and_( - table[timestamp_field] >= ibis.literal(start_date), - table[timestamp_field] <= ibis.literal(end_date), - ) - ) + def read_fv( + feature_view: FeatureView, feature_refs: List[str], full_feature_names: bool + ) -> Tuple: + fv_table: Table = data_source_reader(feature_view.batch_source) - return IbisRetrievalJob( - table=table, - on_demand_feature_views=[], - full_feature_names=False, - metadata=None, - ) + for old_name, new_name in feature_view.batch_source.field_mapping.items(): + if old_name in fv_table.columns: + fv_table = fv_table.rename({new_name: old_name}) - @staticmethod - def write_logged_features( - config: RepoConfig, - data: Union[pyarrow.Table, Path], - source: LoggingSource, - logging_config: LoggingConfig, - registry: BaseRegistry, - ): - destination = logging_config.destination - assert isinstance(destination, FileLoggingDestination) + timestamp_field = feature_view.batch_source.timestamp_field - table = ( - ibis.read_parquet(data) if isinstance(data, Path) else ibis.memtable(data) + # TODO mutate only if tz-naive + fv_table = fv_table.mutate( + **{ + timestamp_field: fv_table[timestamp_field].cast( + dt.Timestamp(timezone="UTC") + ) + } ) - if destination.partition_by: - kwargs = {"partition_by": destination.partition_by} - else: - kwargs = {} + full_name_prefix = feature_view.projection.name_alias or feature_view.name - # TODO always write to directory - table.to_parquet( - f"{destination.path}/{uuid.uuid4().hex}-{{i}}.parquet", **kwargs - ) - - @staticmethod - def offline_write_batch( - config: RepoConfig, - feature_view: FeatureView, - table: pyarrow.Table, - progress: Optional[Callable[[int], Any]], - ): - assert isinstance(feature_view.batch_source, FileSource) + feature_refs = [ + fr.split(":")[1] + for fr in feature_refs + if fr.startswith(f"{full_name_prefix}:") + ] - pa_schema, column_names = get_pyarrow_schema_from_batch_source( - config, feature_view.batch_source - ) - if column_names != table.column_names: - raise ValueError( - f"The input pyarrow table has schema {table.schema} with the incorrect columns {table.column_names}. " - f"The schema is expected to be {pa_schema} with the columns (in this exact order) to be {column_names}." + if full_feature_names: + fv_table = fv_table.rename( + {f"{full_name_prefix}__{feature}": feature for feature in feature_refs} ) - file_options = feature_view.batch_source.file_options + feature_refs = [ + f"{full_name_prefix}__{feature}" for feature in feature_refs + ] - if isinstance(feature_view.batch_source.file_format, ParquetFormat): - prev_table = ibis.read_parquet(file_options.uri).to_pyarrow() - if table.schema != prev_table.schema: - table = table.cast(prev_table.schema) - new_table = pyarrow.concat_tables([table, prev_table]) + return ( + fv_table, + feature_view.batch_source.timestamp_field, + feature_view.batch_source.created_timestamp_column, + feature_view.projection.join_key_map + or {e.name: e.name for e in feature_view.entity_columns}, + feature_refs, + feature_view.ttl, + ) - ibis.memtable(new_table).to_parquet(file_options.uri) - elif isinstance(feature_view.batch_source.file_format, DeltaFormat): - from deltalake import DeltaTable + res = point_in_time_join( + entity_table=entity_table, + feature_tables=[ + read_fv(feature_view, feature_refs, full_feature_names) + for feature_view in feature_views + ], + event_timestamp_col=event_timestamp_col, + ) - prev_schema = DeltaTable(file_options.uri).schema().to_pyarrow() - if table.schema != prev_schema: - table = table.cast(prev_schema) - ibis.memtable(table).to_delta(file_options.uri, mode="append") + odfvs = OnDemandFeatureView.get_requested_odfvs(feature_refs, project, registry) + + substrait_odfvs = [fv for fv in odfvs if fv.mode == "substrait"] + for odfv in substrait_odfvs: + res = odfv.transform_ibis(res, full_feature_names) + + return IbisRetrievalJob( + res, + [fv for fv in odfvs if fv.mode != "substrait"], + full_feature_names, + metadata=RetrievalMetadata( + features=feature_refs, + keys=list(set(entity_df.columns) - {event_timestamp_col}), + min_event_timestamp=timestamp_range[0], + max_event_timestamp=timestamp_range[1], + ), + ) -class IbisRetrievalJob(RetrievalJob): - def __init__( - self, table, on_demand_feature_views, full_feature_names, metadata - ) -> None: - super().__init__() - self.table = table - self._on_demand_feature_views: List[OnDemandFeatureView] = ( - on_demand_feature_views +def pull_all_from_table_or_query_ibis( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + timestamp_field: str, + start_date: datetime, + end_date: datetime, + data_source_reader: Callable[[DataSource], Table], +) -> RetrievalJob: + fields = join_key_columns + feature_name_columns + [timestamp_field] + start_date = start_date.astimezone(tz=utc) + end_date = end_date.astimezone(tz=utc) + + table = data_source_reader(data_source) + + table = table.select(*fields) + + # TODO get rid of this fix + if "__log_date" in table.columns: + table = table.drop("__log_date") + + table = table.filter( + ibis.and_( + table[timestamp_field] >= ibis.literal(start_date), + table[timestamp_field] <= ibis.literal(end_date), ) - self._full_feature_names = full_feature_names - self._metadata = metadata + ) - def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: - return self.table.execute() + return IbisRetrievalJob( + table=table, + on_demand_feature_views=[], + full_feature_names=False, + metadata=None, + ) - def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table: - return self.table.to_pyarrow() - @property - def full_feature_names(self) -> bool: - return self._full_feature_names +def write_logged_features_ibis( + config: RepoConfig, + data: Union[pyarrow.Table, Path], + source: LoggingSource, + logging_config: LoggingConfig, + registry: BaseRegistry, +): + destination = logging_config.destination + assert isinstance(destination, FileLoggingDestination) - @property - def on_demand_feature_views(self) -> List[OnDemandFeatureView]: - return self._on_demand_feature_views + table = ibis.read_parquet(data) if isinstance(data, Path) else ibis.memtable(data) - def persist( - self, - storage: SavedDatasetStorage, - allow_overwrite: bool = False, - timeout: Optional[int] = None, - ): - assert isinstance(storage, SavedDatasetFileStorage) - if not allow_overwrite and os.path.exists(storage.file_options.uri): - raise SavedDatasetLocationAlreadyExists(location=storage.file_options.uri) + if destination.partition_by: + kwargs = {"partition_by": destination.partition_by} + else: + kwargs = {} - if isinstance(storage.file_options.file_format, ParquetFormat): - filesystem, path = FileSource.create_filesystem_and_path( - storage.file_options.uri, - storage.file_options.s3_endpoint_override, - ) + # TODO always write to directory + table.to_parquet(f"{destination.path}/{uuid.uuid4().hex}-{{i}}.parquet", **kwargs) - if path.endswith(".parquet"): - pyarrow.parquet.write_table( - self.to_arrow(), where=path, filesystem=filesystem - ) - else: - # otherwise assume destination is directory - pyarrow.parquet.write_to_dataset( - self.to_arrow(), root_path=path, filesystem=filesystem - ) - elif isinstance(storage.file_options.file_format, DeltaFormat): - mode = ( - "overwrite" - if allow_overwrite and os.path.exists(storage.file_options.uri) - else "error" - ) - self.table.to_delta(storage.file_options.uri, mode=mode) - @property - def metadata(self) -> Optional[RetrievalMetadata]: - return self._metadata +def offline_write_batch_ibis( + config: RepoConfig, + feature_view: FeatureView, + table: pyarrow.Table, + progress: Optional[Callable[[int], Any]], + data_source_writer: Callable[[pyarrow.Table, DataSource], None], +): + pa_schema, column_names = get_pyarrow_schema_from_batch_source( + config, feature_view.batch_source + ) + if column_names != table.column_names: + raise ValueError( + f"The input pyarrow table has schema {table.schema} with the incorrect columns {table.column_names}. " + f"The schema is expected to be {pa_schema} with the columns (in this exact order) to be {column_names}." + ) + + data_source_writer(table, feature_view.batch_source) def deduplicate( @@ -515,3 +408,67 @@ def point_in_time_join( acc_table = acc_table.drop("entity_row_id") return acc_table + + +class IbisRetrievalJob(RetrievalJob): + def __init__( + self, table, on_demand_feature_views, full_feature_names, metadata + ) -> None: + super().__init__() + self.table = table + self._on_demand_feature_views: List[OnDemandFeatureView] = ( + on_demand_feature_views + ) + self._full_feature_names = full_feature_names + self._metadata = metadata + + def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: + return self.table.execute() + + def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table: + return self.table.to_pyarrow() + + @property + def full_feature_names(self) -> bool: + return self._full_feature_names + + @property + def on_demand_feature_views(self) -> List[OnDemandFeatureView]: + return self._on_demand_feature_views + + def persist( + self, + storage: SavedDatasetStorage, + allow_overwrite: bool = False, + timeout: Optional[int] = None, + ): + assert isinstance(storage, SavedDatasetFileStorage) + if not allow_overwrite and os.path.exists(storage.file_options.uri): + raise SavedDatasetLocationAlreadyExists(location=storage.file_options.uri) + + if isinstance(storage.file_options.file_format, ParquetFormat): + filesystem, path = FileSource.create_filesystem_and_path( + storage.file_options.uri, + storage.file_options.s3_endpoint_override, + ) + + if path.endswith(".parquet"): + pyarrow.parquet.write_table( + self.to_arrow(), where=path, filesystem=filesystem + ) + else: + # otherwise assume destination is directory + pyarrow.parquet.write_to_dataset( + self.to_arrow(), root_path=path, filesystem=filesystem + ) + elif isinstance(storage.file_options.file_format, DeltaFormat): + mode = ( + "overwrite" + if allow_overwrite and os.path.exists(storage.file_options.uri) + else "error" + ) + self.table.to_delta(storage.file_options.uri, mode=mode) + + @property + def metadata(self) -> Optional[RetrievalMetadata]: + return self._metadata From 0edb8fb2a1b7d5b62ced889f06426b75543750ec Mon Sep 17 00:00:00 2001 From: tokoko Date: Wed, 8 May 2024 16:39:24 +0000 Subject: [PATCH 5/5] fix requirements conflicts Signed-off-by: tokoko --- .../requirements/py3.10-ci-requirements.txt | 210 ++++-------------- .../requirements/py3.10-requirements.txt | 78 ++----- .../requirements/py3.9-ci-requirements.txt | 210 ++++-------------- .../requirements/py3.9-requirements.txt | 78 ++----- 4 files changed, 136 insertions(+), 440 deletions(-) diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index 6126184fa32..fdcebba7deb 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -1,9 +1,5 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile --extra=ci --output-file=sdk/python/requirements/py3.10-ci-requirements.txt -# +# This file was autogenerated by uv via the following command: +# uv pip compile --system --no-strip-extras setup.py --extra ci --output-file sdk/python/requirements/py3.10-ci-requirements.txt alabaster==0.7.16 # via sphinx altair==4.2.2 @@ -25,7 +21,6 @@ arrow==1.3.0 asn1crypto==1.5.1 # via snowflake-connector-python assertpy==1.1 - # via feast (setup.py) asttokens==2.4.1 # via stack-data async-lru==2.0.4 @@ -43,10 +38,8 @@ azure-core==1.30.1 # azure-identity # azure-storage-blob azure-identity==1.16.0 - # via feast (setup.py) azure-storage-blob==12.19.1 - # via feast (setup.py) -babel==2.14.0 +babel==2.15.0 # via # jupyterlab-server # sphinx @@ -56,25 +49,20 @@ bidict==0.23.1 # via ibis-framework bleach==6.1.0 # via nbconvert -boto3==1.34.97 - # via - # feast (setup.py) - # moto -botocore==1.34.97 +boto3==1.34.99 + # via moto +botocore==1.34.99 # via # boto3 # moto # s3transfer build==1.2.1 - # via - # feast (setup.py) - # pip-tools + # via pip-tools cachecontrol==0.14.0 # via firebase-admin cachetools==5.3.3 # via google-auth cassandra-driver==3.29.1 - # via feast (setup.py) certifi==2024.2.2 # via # httpcore @@ -97,7 +85,6 @@ charset-normalizer==3.3.2 click==8.1.7 # via # dask - # feast (setup.py) # geomet # great-expectations # pip-tools @@ -106,20 +93,17 @@ click==8.1.7 cloudpickle==3.0.0 # via dask colorama==0.4.6 - # via - # feast (setup.py) - # great-expectations + # via great-expectations comm==0.2.2 # via # ipykernel # ipywidgets -coverage[toml]==7.5.0 +coverage[toml]==7.5.1 # via pytest-cov -cryptography==42.0.5 +cryptography==42.0.7 # via # azure-identity # azure-storage-blob - # feast (setup.py) # great-expectations # moto # msal @@ -128,11 +112,9 @@ cryptography==42.0.5 # snowflake-connector-python # types-pyopenssl # types-redis -dask[array,dataframe]==2024.4.2 - # via - # dask-expr - # feast (setup.py) -dask-expr==1.0.14 +dask[dataframe]==2024.5.0 + # via dask-expr +dask-expr==1.1.0 # via dask db-dtypes==1.2.0 # via google-cloud-bigquery @@ -143,17 +125,13 @@ decorator==5.1.1 defusedxml==0.7.1 # via nbconvert deltalake==0.17.3 - # via feast (setup.py) dill==0.3.8 - # via feast (setup.py) distlib==0.3.8 # via virtualenv dnspython==2.6.1 # via email-validator docker==7.0.0 - # via - # feast (setup.py) - # testcontainers + # via testcontainers docutils==0.19 # via sphinx duckdb==0.10.2 @@ -176,9 +154,7 @@ execnet==2.1.1 executing==2.0.1 # via stack-data fastapi==0.111.0 - # via - # fastapi-cli - # feast (setup.py) + # via fastapi-cli fastapi-cli==0.0.2 # via fastapi fastjsonschema==2.19.1 @@ -188,20 +164,16 @@ filelock==3.14.0 # snowflake-connector-python # virtualenv firebase-admin==5.4.0 - # via feast (setup.py) fqdn==1.5.1 # via jsonschema fsspec==2023.12.2 - # via - # dask - # feast (setup.py) + # via dask geojson==2.5.0 # via rockset geomet==0.2.1.post1 # via cassandra-driver google-api-core[grpc]==2.19.0 # via - # feast (setup.py) # firebase-admin # google-api-python-client # google-cloud-bigquery @@ -211,13 +183,14 @@ google-api-core[grpc]==2.19.0 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.127.0 +google-api-python-client==2.128.0 # via firebase-admin google-auth==2.29.0 # via # google-api-core # google-api-python-client # google-auth-httplib2 + # google-cloud-bigquery-storage # google-cloud-core # google-cloud-firestore # google-cloud-storage @@ -225,11 +198,8 @@ google-auth==2.29.0 google-auth-httplib2==0.2.0 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 - # via feast (setup.py) -google-cloud-bigquery-storage==2.24.0 - # via feast (setup.py) +google-cloud-bigquery-storage==2.25.0 google-cloud-bigtable==2.23.1 - # via feast (setup.py) google-cloud-core==2.4.1 # via # google-cloud-bigquery @@ -238,13 +208,10 @@ google-cloud-core==2.4.1 # google-cloud-firestore # google-cloud-storage google-cloud-datastore==2.19.0 - # via feast (setup.py) google-cloud-firestore==2.16.0 # via firebase-admin google-cloud-storage==2.16.0 - # via - # feast (setup.py) - # firebase-admin + # via firebase-admin google-crc32c==1.5.0 # via # google-cloud-storage @@ -255,19 +222,16 @@ google-resumable-media==2.7.0 # google-cloud-storage googleapis-common-protos[grpc]==1.63.0 # via - # feast (setup.py) # google-api-core # grpc-google-iam-v1 # grpcio-status great-expectations==0.18.13 - # via feast (setup.py) greenlet==3.0.3 # via sqlalchemy grpc-google-iam-v1==0.13.0 # via google-cloud-bigtable grpcio==1.63.0 # via - # feast (setup.py) # google-api-core # google-cloud-bigquery # googleapis-common-protos @@ -278,27 +242,19 @@ grpcio==1.63.0 # grpcio-testing # grpcio-tools grpcio-health-checking==1.62.2 - # via feast (setup.py) grpcio-reflection==1.62.2 - # via feast (setup.py) grpcio-status==1.62.2 # via google-api-core grpcio-testing==1.62.2 - # via feast (setup.py) grpcio-tools==1.62.2 - # via feast (setup.py) -gunicorn==22.0.0 ; platform_system != "Windows" - # via feast (setup.py) +gunicorn==22.0.0 h11==0.14.0 # via # httpcore # uvicorn happybase==1.2.0 - # via feast (setup.py) hazelcast-python-client==5.3.0 - # via feast (setup.py) hiredis==2.3.2 - # via feast (setup.py) httpcore==1.0.5 # via httpx httplib2==0.22.0 @@ -310,14 +266,10 @@ httptools==0.6.1 httpx==0.27.0 # via # fastapi - # feast (setup.py) # jupyterlab ibis-framework[duckdb]==8.0.0 - # via - # feast (setup.py) - # ibis-substrait + # via ibis-substrait ibis-substrait==3.2.0 - # via feast (setup.py) identify==2.5.36 # via pre-commit idna==3.7 @@ -349,11 +301,10 @@ isoduration==20.11.0 # via jsonschema jedi==0.19.1 # via ipython -jinja2==3.1.3 +jinja2==3.1.4 # via # altair # fastapi - # feast (setup.py) # great-expectations # jupyter-server # jupyterlab @@ -376,7 +327,6 @@ jsonpointer==2.4 jsonschema[format-nongpl]==4.22.0 # via # altair - # feast (setup.py) # great-expectations # jupyter-events # jupyterlab-server @@ -421,7 +371,6 @@ jupyterlab-server==2.27.1 jupyterlab-widgets==3.0.10 # via ipywidgets kubernetes==20.13.0 - # via feast (setup.py) locket==1.0.0 # via partd makefun==1.15.2 @@ -442,17 +391,13 @@ matplotlib-inline==0.1.7 mdurl==0.1.2 # via markdown-it-py minio==7.1.0 - # via feast (setup.py) mistune==3.0.2 # via # great-expectations # nbconvert mmh3==4.1.0 - # via feast (setup.py) mock==2.0.0 - # via feast (setup.py) moto==4.2.14 - # via feast (setup.py) msal==1.28.0 # via # azure-identity @@ -464,13 +409,10 @@ msgpack==1.0.8 multipledispatch==1.0.0 # via ibis-framework mypy==1.10.0 - # via - # feast (setup.py) - # sqlalchemy + # via sqlalchemy mypy-extensions==1.0.0 # via mypy mypy-protobuf==3.3.0 - # via feast (setup.py) nbclient==0.10.0 # via nbconvert nbconvert==7.16.4 @@ -496,7 +438,6 @@ numpy==1.26.4 # altair # dask # db-dtypes - # feast (setup.py) # great-expectations # ibis-framework # pandas @@ -504,7 +445,7 @@ numpy==1.26.4 # scipy oauthlib==3.2.2 # via requests-oauthlib -orjson==3.10.2 +orjson==3.10.3 # via fastapi overrides==7.7.0 # via jupyter-server @@ -535,7 +476,6 @@ pandas==2.2.2 # dask # dask-expr # db-dtypes - # feast (setup.py) # google-cloud-bigquery # great-expectations # ibis-framework @@ -546,14 +486,15 @@ parso==0.8.4 # via jedi parsy==2.1 # via ibis-framework -partd==1.4.1 +partd==1.4.2 # via dask pbr==6.0.0 # via mock pexpect==4.9.0 # via ipython +pip==24.0 + # via pip-tools pip-tools==7.4.1 - # via feast (setup.py) platformdirs==3.11.0 # via # jupyter-core @@ -566,7 +507,6 @@ ply==3.11 portalocker==2.8.2 # via msal-extensions pre-commit==3.3.1 - # via feast (setup.py) prometheus-client==0.20.0 # via jupyter-server prompt-toolkit==3.0.43 @@ -581,7 +521,6 @@ proto-plus==1.23.0 # google-cloud-firestore protobuf==4.25.3 # via - # feast (setup.py) # google-api-core # google-cloud-bigquery # google-cloud-bigquery-storage @@ -599,11 +538,8 @@ protobuf==4.25.3 # proto-plus # substrait psutil==5.9.0 - # via - # feast (setup.py) - # ipykernel + # via ipykernel psycopg2-binary==2.9.9 - # via feast (setup.py) ptyprocess==0.7.0 # via # pexpect @@ -611,7 +547,6 @@ ptyprocess==0.7.0 pure-eval==0.2.2 # via stack-data py==1.11.0 - # via feast (setup.py) py-cpuinfo==9.0.0 # via pytest-benchmark py4j==0.10.9.7 @@ -621,7 +556,6 @@ pyarrow==15.0.2 # dask-expr # db-dtypes # deltalake - # feast (setup.py) # google-cloud-bigquery # ibis-framework # snowflake-connector-python @@ -636,19 +570,16 @@ pyasn1==0.6.0 pyasn1-modules==0.4.0 # via google-auth pybindgen==0.22.1 - # via feast (setup.py) pycparser==2.22 # via cffi pydantic==2.7.1 # via # fastapi - # feast (setup.py) # great-expectations pydantic-core==2.18.2 # via pydantic -pygments==2.17.2 +pygments==2.18.0 # via - # feast (setup.py) # ipython # nbconvert # rich @@ -658,11 +589,8 @@ pyjwt[crypto]==2.8.0 # msal # snowflake-connector-python pymssql==2.3.0 - # via feast (setup.py) pymysql==1.1.0 - # via feast (setup.py) pyodbc==5.1.0 - # via feast (setup.py) pyopenssl==24.1.0 # via snowflake-connector-python pyparsing==3.1.2 @@ -674,10 +602,8 @@ pyproject-hooks==1.1.0 # build # pip-tools pyspark==3.5.1 - # via feast (setup.py) pytest==7.4.4 # via - # feast (setup.py) # pytest-benchmark # pytest-cov # pytest-env @@ -687,21 +613,13 @@ pytest==7.4.4 # pytest-timeout # pytest-xdist pytest-benchmark==3.4.1 - # via feast (setup.py) pytest-cov==5.0.0 - # via feast (setup.py) pytest-env==1.1.3 - # via feast (setup.py) pytest-lazy-fixture==0.6.3 - # via feast (setup.py) pytest-mock==1.10.4 - # via feast (setup.py) pytest-ordering==0.6 - # via feast (setup.py) pytest-timeout==1.4.2 - # via feast (setup.py) pytest-xdist==3.6.1 - # via feast (setup.py) python-dateutil==2.9.0.post0 # via # arrow @@ -731,7 +649,6 @@ pytz==2024.1 pyyaml==6.0.1 # via # dask - # feast (setup.py) # ibis-substrait # jupyter-events # kubernetes @@ -744,20 +661,17 @@ pyzmq==26.0.3 # jupyter-client # jupyter-server redis==4.6.0 - # via feast (setup.py) referencing==0.35.1 # via # jsonschema # jsonschema-specifications # jupyter-events regex==2024.4.28 - # via feast (setup.py) requests==2.31.0 # via # azure-core # cachecontrol # docker - # feast (setup.py) # google-api-core # google-cloud-bigquery # google-cloud-storage @@ -788,8 +702,7 @@ rich==13.7.1 # ibis-framework # typer rockset==2.1.2 - # via feast (setup.py) -rpds-py==0.18.0 +rpds-py==0.18.1 # via # jsonschema # referencing @@ -797,14 +710,19 @@ rsa==4.9 # via google-auth ruamel-yaml==0.17.17 # via great-expectations -ruff==0.4.2 - # via feast (setup.py) +ruff==0.4.3 s3transfer==0.10.1 # via boto3 scipy==1.13.0 # via great-expectations send2trash==1.8.3 # via jupyter-server +setuptools==69.5.1 + # via + # grpcio-tools + # kubernetes + # nodeenv + # pip-tools shellingham==1.5.4 # via typer six==1.16.0 @@ -827,13 +745,11 @@ sniffio==1.3.1 snowballstemmer==2.2.0 # via sphinx snowflake-connector-python[pandas]==3.10.0 - # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python soupsieve==2.5 # via beautifulsoup4 sphinx==6.2.1 - # via feast (setup.py) sphinxcontrib-applehelp==1.0.8 # via sphinx sphinxcontrib-devhelp==1.0.6 @@ -846,12 +762,10 @@ sphinxcontrib-qthelp==1.0.7 # via sphinx sphinxcontrib-serializinghtml==1.1.10 # via sphinx -sqlalchemy[mypy]==2.0.29 +sqlalchemy[mypy]==2.0.30 # via # duckdb-engine - # feast (setup.py) # ibis-framework - # sqlalchemy # sqlalchemy-views sqlalchemy-views==0.3.2 # via ibis-framework @@ -864,21 +778,17 @@ starlette==0.37.2 substrait==0.17.0 # via ibis-substrait tabulate==0.9.0 - # via feast (setup.py) -tenacity==8.2.3 - # via feast (setup.py) +tenacity==8.3.0 terminado==0.18.1 # via # jupyter-server # jupyter-server-terminals testcontainers==4.4.0 - # via feast (setup.py) -thriftpy2==0.4.20 +thriftpy2==0.5.0 # via happybase tinycss2==1.3.0 # via nbconvert toml==0.10.2 - # via feast (setup.py) tomli==2.0.1 # via # build @@ -905,9 +815,7 @@ tornado==6.4 # notebook # terminado tqdm==4.66.4 - # via - # feast (setup.py) - # great-expectations + # via great-expectations traitlets==5.14.3 # via # comm @@ -924,39 +832,25 @@ traitlets==5.14.3 # nbconvert # nbformat trino==0.328.0 - # via feast (setup.py) typeguard==4.2.1 - # via feast (setup.py) typer==0.12.3 # via fastapi-cli types-cffi==1.16.0.20240331 # via types-pyopenssl types-protobuf==3.19.22 - # via - # feast (setup.py) - # mypy-protobuf + # via mypy-protobuf types-pymysql==1.1.0.20240425 - # via feast (setup.py) types-pyopenssl==24.1.0.20240425 # via types-redis types-python-dateutil==2.9.0.20240316 - # via - # arrow - # feast (setup.py) + # via arrow types-pytz==2024.1.0.20240417 - # via feast (setup.py) types-pyyaml==6.0.12.20240311 - # via feast (setup.py) types-redis==4.6.0.20240425 - # via feast (setup.py) types-requests==2.30.0.0 - # via feast (setup.py) types-setuptools==69.5.0.20240423 - # via - # feast (setup.py) - # types-cffi + # via types-cffi types-tabulate==0.9.0.20240106 - # via feast (setup.py) types-urllib3==1.26.25.14 # via types-requests typing-extensions==4.11.0 @@ -994,7 +888,6 @@ urllib3==1.26.18 # via # botocore # docker - # feast (setup.py) # great-expectations # kubernetes # minio @@ -1006,13 +899,10 @@ uvicorn[standard]==0.29.0 # via # fastapi # fastapi-cli - # feast (setup.py) uvloop==0.19.0 # via uvicorn virtualenv==20.23.0 - # via - # feast (setup.py) - # pre-commit + # via pre-commit watchfiles==0.21.0 # via uvicorn wcwidth==0.2.13 @@ -1029,7 +919,7 @@ websocket-client==1.8.0 # kubernetes websockets==12.0 # via uvicorn -werkzeug==3.0.2 +werkzeug==3.0.3 # via moto wheel==0.43.0 # via pip-tools @@ -1040,8 +930,4 @@ wrapt==1.16.0 xmltodict==0.13.0 # via moto zipp==3.18.1 - # via importlib-metadata - -# The following packages are considered to be unsafe in a requirements file: -# pip -# setuptools + # via importlib-metadata \ No newline at end of file diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index 24946075a67..56a8259ab43 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -1,9 +1,5 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile --output-file=sdk/python/requirements/py3.10-requirements.txt -# +# This file was autogenerated by uv via the following command: +# uv pip compile --system --no-strip-extras setup.py --output-file sdk/python/requirements/py3.10-requirements.txt annotated-types==0.6.0 # via pydantic anyio==4.3.0 @@ -25,21 +21,16 @@ charset-normalizer==3.3.2 click==8.1.7 # via # dask - # feast (setup.py) # typer # uvicorn cloudpickle==3.0.0 # via dask colorama==0.4.6 - # via feast (setup.py) -dask[array,dataframe]==2024.4.2 - # via - # dask-expr - # feast (setup.py) -dask-expr==1.0.14 +dask[dataframe]==2024.5.0 + # via dask-expr +dask-expr==1.1.0 # via dask dill==0.3.8 - # via feast (setup.py) dnspython==2.6.1 # via email-validator email-validator==2.1.1 @@ -47,17 +38,14 @@ email-validator==2.1.1 exceptiongroup==1.2.1 # via anyio fastapi==0.111.0 - # via - # fastapi-cli - # feast (setup.py) + # via fastapi-cli fastapi-cli==0.0.2 # via fastapi fsspec==2024.3.1 # via dask greenlet==3.0.3 # via sqlalchemy -gunicorn==22.0.0 ; platform_system != "Windows" - # via feast (setup.py) +gunicorn==22.0.0 h11==0.14.0 # via # httpcore @@ -76,12 +64,9 @@ idna==3.7 # requests importlib-metadata==7.1.0 # via dask -jinja2==3.1.3 - # via - # fastapi - # feast (setup.py) +jinja2==3.1.4 + # via fastapi jsonschema==4.22.0 - # via feast (setup.py) jsonschema-specifications==2023.12.1 # via jsonschema locket==1.0.0 @@ -93,20 +78,17 @@ markupsafe==2.1.5 mdurl==0.1.2 # via markdown-it-py mmh3==4.1.0 - # via feast (setup.py) mypy==1.10.0 # via sqlalchemy mypy-extensions==1.0.0 # via mypy mypy-protobuf==3.6.0 - # via feast (setup.py) numpy==1.26.4 # via # dask - # feast (setup.py) # pandas # pyarrow -orjson==3.10.2 +orjson==3.10.3 # via fastapi packaging==24.0 # via @@ -116,27 +98,18 @@ pandas==2.2.2 # via # dask # dask-expr - # feast (setup.py) -partd==1.4.1 +partd==1.4.2 # via dask protobuf==4.25.3 - # via - # feast (setup.py) - # mypy-protobuf + # via mypy-protobuf pyarrow==16.0.0 - # via - # dask-expr - # feast (setup.py) + # via dask-expr pydantic==2.7.1 - # via - # fastapi - # feast (setup.py) + # via fastapi pydantic-core==2.18.2 # via pydantic -pygments==2.17.2 - # via - # feast (setup.py) - # rich +pygments==2.18.0 + # via rich python-dateutil==2.9.0.post0 # via pandas python-dotenv==1.0.1 @@ -148,17 +121,15 @@ pytz==2024.1 pyyaml==6.0.1 # via # dask - # feast (setup.py) # uvicorn referencing==0.35.1 # via # jsonschema # jsonschema-specifications requests==2.31.0 - # via feast (setup.py) rich==13.7.1 # via typer -rpds-py==0.18.0 +rpds-py==0.18.1 # via # jsonschema # referencing @@ -170,18 +141,12 @@ sniffio==1.3.1 # via # anyio # httpx -sqlalchemy[mypy]==2.0.29 - # via - # feast (setup.py) - # sqlalchemy +sqlalchemy[mypy]==2.0.30 starlette==0.37.2 # via fastapi tabulate==0.9.0 - # via feast (setup.py) -tenacity==8.2.3 - # via feast (setup.py) +tenacity==8.3.0 toml==0.10.2 - # via feast (setup.py) tomli==2.0.1 # via mypy toolz==0.12.1 @@ -189,9 +154,7 @@ toolz==0.12.1 # dask # partd tqdm==4.66.4 - # via feast (setup.py) typeguard==4.2.1 - # via feast (setup.py) typer==0.12.3 # via fastapi-cli types-protobuf==5.26.0.20240422 @@ -217,7 +180,6 @@ uvicorn[standard]==0.29.0 # via # fastapi # fastapi-cli - # feast (setup.py) uvloop==0.19.0 # via uvicorn watchfiles==0.21.0 @@ -225,4 +187,4 @@ watchfiles==0.21.0 websockets==12.0 # via uvicorn zipp==3.18.1 - # via importlib-metadata + # via importlib-metadata \ No newline at end of file diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 2abaac7ffb4..4df5a6abe9b 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -1,9 +1,5 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile --extra=ci --output-file=sdk/python/requirements/py3.9-ci-requirements.txt -# +# This file was autogenerated by uv via the following command: +# uv pip compile --system --no-strip-extras setup.py --extra ci --output-file sdk/python/requirements/py3.9-ci-requirements.txt alabaster==0.7.16 # via sphinx altair==4.2.2 @@ -25,7 +21,6 @@ arrow==1.3.0 asn1crypto==1.5.1 # via snowflake-connector-python assertpy==1.1 - # via feast (setup.py) asttokens==2.4.1 # via stack-data async-lru==2.0.4 @@ -43,10 +38,8 @@ azure-core==1.30.1 # azure-identity # azure-storage-blob azure-identity==1.16.0 - # via feast (setup.py) azure-storage-blob==12.19.1 - # via feast (setup.py) -babel==2.14.0 +babel==2.15.0 # via # jupyterlab-server # sphinx @@ -56,25 +49,20 @@ bidict==0.23.1 # via ibis-framework bleach==6.1.0 # via nbconvert -boto3==1.34.97 - # via - # feast (setup.py) - # moto -botocore==1.34.97 +boto3==1.34.99 + # via moto +botocore==1.34.99 # via # boto3 # moto # s3transfer build==1.2.1 - # via - # feast (setup.py) - # pip-tools + # via pip-tools cachecontrol==0.14.0 # via firebase-admin cachetools==5.3.3 # via google-auth cassandra-driver==3.29.1 - # via feast (setup.py) certifi==2024.2.2 # via # httpcore @@ -97,7 +85,6 @@ charset-normalizer==3.3.2 click==8.1.7 # via # dask - # feast (setup.py) # geomet # great-expectations # pip-tools @@ -106,20 +93,17 @@ click==8.1.7 cloudpickle==3.0.0 # via dask colorama==0.4.6 - # via - # feast (setup.py) - # great-expectations + # via great-expectations comm==0.2.2 # via # ipykernel # ipywidgets -coverage[toml]==7.5.0 +coverage[toml]==7.5.1 # via pytest-cov -cryptography==42.0.5 +cryptography==42.0.7 # via # azure-identity # azure-storage-blob - # feast (setup.py) # great-expectations # moto # msal @@ -128,11 +112,9 @@ cryptography==42.0.5 # snowflake-connector-python # types-pyopenssl # types-redis -dask[array,dataframe]==2024.4.2 - # via - # dask-expr - # feast (setup.py) -dask-expr==1.0.14 +dask[dataframe]==2024.5.0 + # via dask-expr +dask-expr==1.1.0 # via dask db-dtypes==1.2.0 # via google-cloud-bigquery @@ -143,17 +125,13 @@ decorator==5.1.1 defusedxml==0.7.1 # via nbconvert deltalake==0.17.3 - # via feast (setup.py) dill==0.3.8 - # via feast (setup.py) distlib==0.3.8 # via virtualenv dnspython==2.6.1 # via email-validator docker==7.0.0 - # via - # feast (setup.py) - # testcontainers + # via testcontainers docutils==0.19 # via sphinx duckdb==0.10.2 @@ -176,9 +154,7 @@ execnet==2.1.1 executing==2.0.1 # via stack-data fastapi==0.111.0 - # via - # fastapi-cli - # feast (setup.py) + # via fastapi-cli fastapi-cli==0.0.2 # via fastapi fastjsonschema==2.19.1 @@ -188,20 +164,16 @@ filelock==3.14.0 # snowflake-connector-python # virtualenv firebase-admin==5.4.0 - # via feast (setup.py) fqdn==1.5.1 # via jsonschema fsspec==2023.12.2 - # via - # dask - # feast (setup.py) + # via dask geojson==2.5.0 # via rockset geomet==0.2.1.post1 # via cassandra-driver google-api-core[grpc]==2.19.0 # via - # feast (setup.py) # firebase-admin # google-api-python-client # google-cloud-bigquery @@ -211,13 +183,14 @@ google-api-core[grpc]==2.19.0 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.127.0 +google-api-python-client==2.128.0 # via firebase-admin google-auth==2.29.0 # via # google-api-core # google-api-python-client # google-auth-httplib2 + # google-cloud-bigquery-storage # google-cloud-core # google-cloud-firestore # google-cloud-storage @@ -225,11 +198,8 @@ google-auth==2.29.0 google-auth-httplib2==0.2.0 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 - # via feast (setup.py) -google-cloud-bigquery-storage==2.24.0 - # via feast (setup.py) +google-cloud-bigquery-storage==2.25.0 google-cloud-bigtable==2.23.1 - # via feast (setup.py) google-cloud-core==2.4.1 # via # google-cloud-bigquery @@ -238,13 +208,10 @@ google-cloud-core==2.4.1 # google-cloud-firestore # google-cloud-storage google-cloud-datastore==2.19.0 - # via feast (setup.py) google-cloud-firestore==2.16.0 # via firebase-admin google-cloud-storage==2.16.0 - # via - # feast (setup.py) - # firebase-admin + # via firebase-admin google-crc32c==1.5.0 # via # google-cloud-storage @@ -255,19 +222,16 @@ google-resumable-media==2.7.0 # google-cloud-storage googleapis-common-protos[grpc]==1.63.0 # via - # feast (setup.py) # google-api-core # grpc-google-iam-v1 # grpcio-status great-expectations==0.18.13 - # via feast (setup.py) greenlet==3.0.3 # via sqlalchemy grpc-google-iam-v1==0.13.0 # via google-cloud-bigtable grpcio==1.63.0 # via - # feast (setup.py) # google-api-core # google-cloud-bigquery # googleapis-common-protos @@ -278,27 +242,19 @@ grpcio==1.63.0 # grpcio-testing # grpcio-tools grpcio-health-checking==1.62.2 - # via feast (setup.py) grpcio-reflection==1.62.2 - # via feast (setup.py) grpcio-status==1.62.2 # via google-api-core grpcio-testing==1.62.2 - # via feast (setup.py) grpcio-tools==1.62.2 - # via feast (setup.py) -gunicorn==22.0.0 ; platform_system != "Windows" - # via feast (setup.py) +gunicorn==22.0.0 h11==0.14.0 # via # httpcore # uvicorn happybase==1.2.0 - # via feast (setup.py) hazelcast-python-client==5.3.0 - # via feast (setup.py) hiredis==2.3.2 - # via feast (setup.py) httpcore==1.0.5 # via httpx httplib2==0.22.0 @@ -310,14 +266,10 @@ httptools==0.6.1 httpx==0.27.0 # via # fastapi - # feast (setup.py) # jupyterlab ibis-framework[duckdb]==8.0.0 - # via - # feast (setup.py) - # ibis-substrait + # via ibis-substrait ibis-substrait==3.2.0 - # via feast (setup.py) identify==2.5.36 # via pre-commit idna==3.7 @@ -358,11 +310,10 @@ isoduration==20.11.0 # via jsonschema jedi==0.19.1 # via ipython -jinja2==3.1.3 +jinja2==3.1.4 # via # altair # fastapi - # feast (setup.py) # great-expectations # jupyter-server # jupyterlab @@ -385,7 +336,6 @@ jsonpointer==2.4 jsonschema[format-nongpl]==4.22.0 # via # altair - # feast (setup.py) # great-expectations # jupyter-events # jupyterlab-server @@ -430,7 +380,6 @@ jupyterlab-server==2.27.1 jupyterlab-widgets==3.0.10 # via ipywidgets kubernetes==20.13.0 - # via feast (setup.py) locket==1.0.0 # via partd makefun==1.15.2 @@ -451,17 +400,13 @@ matplotlib-inline==0.1.7 mdurl==0.1.2 # via markdown-it-py minio==7.1.0 - # via feast (setup.py) mistune==3.0.2 # via # great-expectations # nbconvert mmh3==4.1.0 - # via feast (setup.py) mock==2.0.0 - # via feast (setup.py) moto==4.2.14 - # via feast (setup.py) msal==1.28.0 # via # azure-identity @@ -473,13 +418,10 @@ msgpack==1.0.8 multipledispatch==1.0.0 # via ibis-framework mypy==1.10.0 - # via - # feast (setup.py) - # sqlalchemy + # via sqlalchemy mypy-extensions==1.0.0 # via mypy mypy-protobuf==3.3.0 - # via feast (setup.py) nbclient==0.10.0 # via nbconvert nbconvert==7.16.4 @@ -505,7 +447,6 @@ numpy==1.26.4 # altair # dask # db-dtypes - # feast (setup.py) # great-expectations # ibis-framework # pandas @@ -513,7 +454,7 @@ numpy==1.26.4 # scipy oauthlib==3.2.2 # via requests-oauthlib -orjson==3.10.2 +orjson==3.10.3 # via fastapi overrides==7.7.0 # via jupyter-server @@ -544,7 +485,6 @@ pandas==2.2.2 # dask # dask-expr # db-dtypes - # feast (setup.py) # google-cloud-bigquery # great-expectations # ibis-framework @@ -555,14 +495,15 @@ parso==0.8.4 # via jedi parsy==2.1 # via ibis-framework -partd==1.4.1 +partd==1.4.2 # via dask pbr==6.0.0 # via mock pexpect==4.9.0 # via ipython +pip==24.0 + # via pip-tools pip-tools==7.4.1 - # via feast (setup.py) platformdirs==3.11.0 # via # jupyter-core @@ -575,7 +516,6 @@ ply==3.11 portalocker==2.8.2 # via msal-extensions pre-commit==3.3.1 - # via feast (setup.py) prometheus-client==0.20.0 # via jupyter-server prompt-toolkit==3.0.43 @@ -590,7 +530,6 @@ proto-plus==1.23.0 # google-cloud-firestore protobuf==4.25.3 # via - # feast (setup.py) # google-api-core # google-cloud-bigquery # google-cloud-bigquery-storage @@ -608,11 +547,8 @@ protobuf==4.25.3 # proto-plus # substrait psutil==5.9.0 - # via - # feast (setup.py) - # ipykernel + # via ipykernel psycopg2-binary==2.9.9 - # via feast (setup.py) ptyprocess==0.7.0 # via # pexpect @@ -620,7 +556,6 @@ ptyprocess==0.7.0 pure-eval==0.2.2 # via stack-data py==1.11.0 - # via feast (setup.py) py-cpuinfo==9.0.0 # via pytest-benchmark py4j==0.10.9.7 @@ -630,7 +565,6 @@ pyarrow==15.0.2 # dask-expr # db-dtypes # deltalake - # feast (setup.py) # google-cloud-bigquery # ibis-framework # snowflake-connector-python @@ -645,19 +579,16 @@ pyasn1==0.6.0 pyasn1-modules==0.4.0 # via google-auth pybindgen==0.22.1 - # via feast (setup.py) pycparser==2.22 # via cffi pydantic==2.7.1 # via # fastapi - # feast (setup.py) # great-expectations pydantic-core==2.18.2 # via pydantic -pygments==2.17.2 +pygments==2.18.0 # via - # feast (setup.py) # ipython # nbconvert # rich @@ -667,11 +598,8 @@ pyjwt[crypto]==2.8.0 # msal # snowflake-connector-python pymssql==2.3.0 - # via feast (setup.py) pymysql==1.1.0 - # via feast (setup.py) pyodbc==5.1.0 - # via feast (setup.py) pyopenssl==24.1.0 # via snowflake-connector-python pyparsing==3.1.2 @@ -683,10 +611,8 @@ pyproject-hooks==1.1.0 # build # pip-tools pyspark==3.5.1 - # via feast (setup.py) pytest==7.4.4 # via - # feast (setup.py) # pytest-benchmark # pytest-cov # pytest-env @@ -696,21 +622,13 @@ pytest==7.4.4 # pytest-timeout # pytest-xdist pytest-benchmark==3.4.1 - # via feast (setup.py) pytest-cov==5.0.0 - # via feast (setup.py) pytest-env==1.1.3 - # via feast (setup.py) pytest-lazy-fixture==0.6.3 - # via feast (setup.py) pytest-mock==1.10.4 - # via feast (setup.py) pytest-ordering==0.6 - # via feast (setup.py) pytest-timeout==1.4.2 - # via feast (setup.py) pytest-xdist==3.6.1 - # via feast (setup.py) python-dateutil==2.9.0.post0 # via # arrow @@ -740,7 +658,6 @@ pytz==2024.1 pyyaml==6.0.1 # via # dask - # feast (setup.py) # ibis-substrait # jupyter-events # kubernetes @@ -753,20 +670,17 @@ pyzmq==26.0.3 # jupyter-client # jupyter-server redis==4.6.0 - # via feast (setup.py) referencing==0.35.1 # via # jsonschema # jsonschema-specifications # jupyter-events regex==2024.4.28 - # via feast (setup.py) requests==2.31.0 # via # azure-core # cachecontrol # docker - # feast (setup.py) # google-api-core # google-cloud-bigquery # google-cloud-storage @@ -797,8 +711,7 @@ rich==13.7.1 # ibis-framework # typer rockset==2.1.2 - # via feast (setup.py) -rpds-py==0.18.0 +rpds-py==0.18.1 # via # jsonschema # referencing @@ -808,14 +721,19 @@ ruamel-yaml==0.17.17 # via great-expectations ruamel-yaml-clib==0.2.8 # via ruamel-yaml -ruff==0.4.2 - # via feast (setup.py) +ruff==0.4.3 s3transfer==0.10.1 # via boto3 scipy==1.13.0 # via great-expectations send2trash==1.8.3 # via jupyter-server +setuptools==69.5.1 + # via + # grpcio-tools + # kubernetes + # nodeenv + # pip-tools shellingham==1.5.4 # via typer six==1.16.0 @@ -838,13 +756,11 @@ sniffio==1.3.1 snowballstemmer==2.2.0 # via sphinx snowflake-connector-python[pandas]==3.10.0 - # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python soupsieve==2.5 # via beautifulsoup4 sphinx==6.2.1 - # via feast (setup.py) sphinxcontrib-applehelp==1.0.8 # via sphinx sphinxcontrib-devhelp==1.0.6 @@ -857,12 +773,10 @@ sphinxcontrib-qthelp==1.0.7 # via sphinx sphinxcontrib-serializinghtml==1.1.10 # via sphinx -sqlalchemy[mypy]==2.0.29 +sqlalchemy[mypy]==2.0.30 # via # duckdb-engine - # feast (setup.py) # ibis-framework - # sqlalchemy # sqlalchemy-views sqlalchemy-views==0.3.2 # via ibis-framework @@ -875,21 +789,17 @@ starlette==0.37.2 substrait==0.17.0 # via ibis-substrait tabulate==0.9.0 - # via feast (setup.py) -tenacity==8.2.3 - # via feast (setup.py) +tenacity==8.3.0 terminado==0.18.1 # via # jupyter-server # jupyter-server-terminals testcontainers==4.4.0 - # via feast (setup.py) -thriftpy2==0.4.20 +thriftpy2==0.5.0 # via happybase tinycss2==1.3.0 # via nbconvert toml==0.10.2 - # via feast (setup.py) tomli==2.0.1 # via # build @@ -916,9 +826,7 @@ tornado==6.4 # notebook # terminado tqdm==4.66.4 - # via - # feast (setup.py) - # great-expectations + # via great-expectations traitlets==5.14.3 # via # comm @@ -935,39 +843,25 @@ traitlets==5.14.3 # nbconvert # nbformat trino==0.328.0 - # via feast (setup.py) typeguard==4.2.1 - # via feast (setup.py) typer==0.12.3 # via fastapi-cli types-cffi==1.16.0.20240331 # via types-pyopenssl types-protobuf==3.19.22 - # via - # feast (setup.py) - # mypy-protobuf + # via mypy-protobuf types-pymysql==1.1.0.20240425 - # via feast (setup.py) types-pyopenssl==24.1.0.20240425 # via types-redis types-python-dateutil==2.9.0.20240316 - # via - # arrow - # feast (setup.py) + # via arrow types-pytz==2024.1.0.20240417 - # via feast (setup.py) types-pyyaml==6.0.12.20240311 - # via feast (setup.py) types-redis==4.6.0.20240425 - # via feast (setup.py) types-requests==2.30.0.0 - # via feast (setup.py) types-setuptools==69.5.0.20240423 - # via - # feast (setup.py) - # types-cffi + # via types-cffi types-tabulate==0.9.0.20240106 - # via feast (setup.py) types-urllib3==1.26.25.14 # via types-requests typing-extensions==4.11.0 @@ -1006,7 +900,6 @@ urllib3==1.26.18 # via # botocore # docker - # feast (setup.py) # great-expectations # kubernetes # minio @@ -1019,13 +912,10 @@ uvicorn[standard]==0.29.0 # via # fastapi # fastapi-cli - # feast (setup.py) uvloop==0.19.0 # via uvicorn virtualenv==20.23.0 - # via - # feast (setup.py) - # pre-commit + # via pre-commit watchfiles==0.21.0 # via uvicorn wcwidth==0.2.13 @@ -1042,7 +932,7 @@ websocket-client==1.8.0 # kubernetes websockets==12.0 # via uvicorn -werkzeug==3.0.2 +werkzeug==3.0.3 # via moto wheel==0.43.0 # via pip-tools @@ -1053,8 +943,4 @@ wrapt==1.16.0 xmltodict==0.13.0 # via moto zipp==3.18.1 - # via importlib-metadata - -# The following packages are considered to be unsafe in a requirements file: -# pip -# setuptools + # via importlib-metadata \ No newline at end of file diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 822176c6d0d..1092aac9d09 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -1,9 +1,5 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile --output-file=sdk/python/requirements/py3.9-requirements.txt -# +# This file was autogenerated by uv via the following command: +# uv pip compile --system --no-strip-extras setup.py --output-file sdk/python/requirements/py3.9-requirements.txt annotated-types==0.6.0 # via pydantic anyio==4.3.0 @@ -25,21 +21,16 @@ charset-normalizer==3.3.2 click==8.1.7 # via # dask - # feast (setup.py) # typer # uvicorn cloudpickle==3.0.0 # via dask colorama==0.4.6 - # via feast (setup.py) -dask[array,dataframe]==2024.4.2 - # via - # dask-expr - # feast (setup.py) -dask-expr==1.0.14 +dask[dataframe]==2024.5.0 + # via dask-expr +dask-expr==1.1.0 # via dask dill==0.3.8 - # via feast (setup.py) dnspython==2.6.1 # via email-validator email-validator==2.1.1 @@ -47,17 +38,14 @@ email-validator==2.1.1 exceptiongroup==1.2.1 # via anyio fastapi==0.111.0 - # via - # fastapi-cli - # feast (setup.py) + # via fastapi-cli fastapi-cli==0.0.2 # via fastapi fsspec==2024.3.1 # via dask greenlet==3.0.3 # via sqlalchemy -gunicorn==22.0.0 ; platform_system != "Windows" - # via feast (setup.py) +gunicorn==22.0.0 h11==0.14.0 # via # httpcore @@ -78,12 +66,9 @@ importlib-metadata==7.1.0 # via # dask # typeguard -jinja2==3.1.3 - # via - # fastapi - # feast (setup.py) +jinja2==3.1.4 + # via fastapi jsonschema==4.22.0 - # via feast (setup.py) jsonschema-specifications==2023.12.1 # via jsonschema locket==1.0.0 @@ -95,20 +80,17 @@ markupsafe==2.1.5 mdurl==0.1.2 # via markdown-it-py mmh3==4.1.0 - # via feast (setup.py) mypy==1.10.0 # via sqlalchemy mypy-extensions==1.0.0 # via mypy mypy-protobuf==3.6.0 - # via feast (setup.py) numpy==1.26.4 # via # dask - # feast (setup.py) # pandas # pyarrow -orjson==3.10.2 +orjson==3.10.3 # via fastapi packaging==24.0 # via @@ -118,27 +100,18 @@ pandas==2.2.2 # via # dask # dask-expr - # feast (setup.py) -partd==1.4.1 +partd==1.4.2 # via dask protobuf==4.25.3 - # via - # feast (setup.py) - # mypy-protobuf + # via mypy-protobuf pyarrow==16.0.0 - # via - # dask-expr - # feast (setup.py) + # via dask-expr pydantic==2.7.1 - # via - # fastapi - # feast (setup.py) + # via fastapi pydantic-core==2.18.2 # via pydantic -pygments==2.17.2 - # via - # feast (setup.py) - # rich +pygments==2.18.0 + # via rich python-dateutil==2.9.0.post0 # via pandas python-dotenv==1.0.1 @@ -150,17 +123,15 @@ pytz==2024.1 pyyaml==6.0.1 # via # dask - # feast (setup.py) # uvicorn referencing==0.35.1 # via # jsonschema # jsonschema-specifications requests==2.31.0 - # via feast (setup.py) rich==13.7.1 # via typer -rpds-py==0.18.0 +rpds-py==0.18.1 # via # jsonschema # referencing @@ -172,18 +143,12 @@ sniffio==1.3.1 # via # anyio # httpx -sqlalchemy[mypy]==2.0.29 - # via - # feast (setup.py) - # sqlalchemy +sqlalchemy[mypy]==2.0.30 starlette==0.37.2 # via fastapi tabulate==0.9.0 - # via feast (setup.py) -tenacity==8.2.3 - # via feast (setup.py) +tenacity==8.3.0 toml==0.10.2 - # via feast (setup.py) tomli==2.0.1 # via mypy toolz==0.12.1 @@ -191,9 +156,7 @@ toolz==0.12.1 # dask # partd tqdm==4.66.4 - # via feast (setup.py) typeguard==4.2.1 - # via feast (setup.py) typer==0.12.3 # via fastapi-cli types-protobuf==5.26.0.20240422 @@ -220,7 +183,6 @@ uvicorn[standard]==0.29.0 # via # fastapi # fastapi-cli - # feast (setup.py) uvloop==0.19.0 # via uvicorn watchfiles==0.21.0 @@ -228,4 +190,4 @@ watchfiles==0.21.0 websockets==12.0 # via uvicorn zipp==3.18.1 - # via importlib-metadata + # via importlib-metadata \ No newline at end of file