From 5b34cbb133b72588c55dde55d4d46a022238d5ae Mon Sep 17 00:00:00 2001 From: Venkateswarlu Boggavarapu Date: Wed, 22 Apr 2026 15:36:43 +0000 Subject: [PATCH 1/6] fix: Add project filter to apply_data_source and delete_data_source Fixes two methods in the shared registry that were matching data sources by name only, without filtering by project. In multi-project registries where different projects share the same data source name (e.g. the default "vals_to_add"), this caused: - apply_data_source: cross-project overwriting of data sources - delete_data_source: deleting data sources from the wrong project Changes: - apply_data_source: adds and existing_data_source_proto.project == project check (mirrors the pattern used in apply_entity, apply_feature_service, apply_feature_view) - delete_data_source: uses the same project-scoped filter for consistency - Fix Unicode mojibake in registry.py comments (em dashes and right arrows were stored as double-encoded Latin-1 bytes) - apply ruff format - collapse single-line timestamp assignment Fixes feast-dev#6206 Signed-off-by: Venkateswarlu Boggavarapu --- sdk/python/feast/infra/registry/registry.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/sdk/python/feast/infra/registry/registry.py b/sdk/python/feast/infra/registry/registry.py index 76da6ad831d..1872b43b2eb 100644 --- a/sdk/python/feast/infra/registry/registry.py +++ b/sdk/python/feast/infra/registry/registry.py @@ -394,16 +394,17 @@ def apply_data_source( registry = self._prepare_registry_for_changes(project) for idx, existing_data_source_proto in enumerate(registry.data_sources): - if existing_data_source_proto.name == data_source.name: + if ( + existing_data_source_proto.name == data_source.name + and existing_data_source_proto.project == project + ): existing_data_source = DataSource.from_proto(existing_data_source_proto) # Check if the data source has actually changed if existing_data_source == data_source: return else: # Preserve created_timestamp from existing data source - data_source.created_timestamp = ( - existing_data_source.created_timestamp - ) + data_source.created_timestamp = existing_data_source.created_timestamp del registry.data_sources[idx] break @@ -423,7 +424,10 @@ def delete_data_source(self, name: str, project: str, commit: bool = True): for idx, data_source_proto in enumerate( self.cached_registry_proto.data_sources ): - if data_source_proto.name == name: + if ( + data_source_proto.name == name + and data_source_proto.project == project + ): del self.cached_registry_proto.data_sources[idx] if commit: self.commit() @@ -688,13 +692,13 @@ def apply_feature_view( if not is_latest: # Explicit version: check if it exists (pin/revert) or not (forward declaration). - # Note: The file registry is last-write-wins for true concurrent races — + # Note: The file registry is last-write-wins for true concurrent races — # this is a pre-existing limitation for all file registry operations. # For multi-client environments, use the SQL registry. record = self._get_version_record(feature_view.name, project, pin_version) if record is not None: - # Version exists → pin/revert to that snapshot + # Version exists → pin/revert to that snapshot # Check that the user hasn't also modified the definition. # Compare user's FV (with version="latest") against active FV. self._prepare_registry_for_changes(project) @@ -735,7 +739,7 @@ def apply_feature_view( # Apply the restored FV using the standard path below feature_view = restored_fv else: - # Version doesn't exist → forward declaration: create it + # Version doesn't exist → forward declaration: create it feature_view.current_version_number = pin_version feature_view_proto = feature_view.to_proto() feature_view_proto.spec.project = project From 43faac7fe6d51ffa79225144d3dbbe67f8df0d5f Mon Sep 17 00:00:00 2001 From: Venkateswarlu Boggavarapu Date: Wed, 22 Apr 2026 15:39:54 +0000 Subject: [PATCH 2/6] test: add cross-project isolation tests for apply/delete_data_source Add two integration tests that verify the regression fix for #6206: - test_apply_data_source_cross_project_isolation: applies a FileSource with the same name to two separate projects and asserts that updating one project's source does not overwrite the other project's source. - test_delete_data_source_project_scoped: applies a FileSource with the same name to two projects, deletes it from one, and asserts the other project's source is unaffected. Signed-off-by: Venkateswarlu Boggavarapu --- .../registration/test_universal_registry.py | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/sdk/python/tests/integration/registration/test_universal_registry.py b/sdk/python/tests/integration/registration/test_universal_registry.py index fb09395d789..32a0e196568 100644 --- a/sdk/python/tests/integration/registration/test_universal_registry.py +++ b/sdk/python/tests/integration/registration/test_universal_registry.py @@ -898,6 +898,108 @@ def test_apply_data_source_with_timestamps(test_registry): test_registry.teardown() +@pytest.mark.integration +@pytest.mark.parametrize( + "test_registry", + all_fixtures, +) +def test_apply_data_source_cross_project_isolation(test_registry): + """Test that apply_data_source uses project-scoped filtering. + + Regression test for https://github.com/feast-dev/feast/issues/6206: + applying a data source to one project must not overwrite the data source + with the same name in a different project. + """ + project_a = "project_a" + project_b = "project_b" + + source_a = FileSource( + name="shared_source_name", + file_format=ParquetFormat(), + path="file://feast/project_a.parquet", + timestamp_field="ts_col", + ) + source_b = FileSource( + name="shared_source_name", + file_format=ParquetFormat(), + path="file://feast/project_b.parquet", + timestamp_field="ts_col", + ) + + test_registry.apply_data_source(source_a, project_a, commit=True) + test_registry.apply_data_source(source_b, project_b, commit=True) + + # Each project should have exactly its own source + sources_a = test_registry.list_data_sources(project_a) + sources_b = test_registry.list_data_sources(project_b) + assert len(sources_a) == 1 + assert len(sources_b) == 1 + + # Paths must be project-specific — not overwritten cross-project + assert sources_a[0].path == "file://feast/project_a.parquet" + assert sources_b[0].path == "file://feast/project_b.parquet" + + # Re-apply source_b with updated path: must not bleed into project_a + source_b_updated = FileSource( + name="shared_source_name", + file_format=ParquetFormat(), + path="file://feast/project_b_v2.parquet", + timestamp_field="ts_col", + ) + test_registry.apply_data_source(source_b_updated, project_b, commit=True) + + sources_a_after = test_registry.list_data_sources(project_a) + assert len(sources_a_after) == 1 + assert sources_a_after[0].path == "file://feast/project_a.parquet", ( + "apply_data_source for project_b must not overwrite project_a's source" + ) + + test_registry.teardown() + + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_registry", + all_fixtures, +) +def test_delete_data_source_project_scoped(test_registry): + """Test that delete_data_source only removes the source from the given project. + + Regression test for https://github.com/feast-dev/feast/issues/6206: + deleting a data source from one project must not delete the data source + with the same name from another project. + """ + project_a = "project_a" + project_b = "project_b" + + source_a = FileSource( + name="shared_source_name", + file_format=ParquetFormat(), + path="file://feast/project_a.parquet", + timestamp_field="ts_col", + ) + source_b = FileSource( + name="shared_source_name", + file_format=ParquetFormat(), + path="file://feast/project_b.parquet", + timestamp_field="ts_col", + ) + + test_registry.apply_data_source(source_a, project_a, commit=True) + test_registry.apply_data_source(source_b, project_b, commit=True) + + # Delete the source from project_a only + test_registry.delete_data_source("shared_source_name", project_a, commit=True) + + # project_a should have no sources; project_b should be unaffected + sources_a = test_registry.list_data_sources(project_a) + sources_b = test_registry.list_data_sources(project_b) + assert len(sources_a) == 0, "Source should be deleted from project_a" + assert len(sources_b) == 1, "Source in project_b must not be deleted" + assert sources_b[0].path == "file://feast/project_b.parquet" + + test_registry.teardown() + @pytest.mark.integration @pytest.mark.parametrize( From eb7175877975bbd2bc32b804d2c0c3a08613d90e Mon Sep 17 00:00:00 2001 From: mailtoboggavarapu-coder Date: Wed, 22 Apr 2026 12:57:10 -0400 Subject: [PATCH 3/6] style: apply ruff formatting fixes to registry.py Signed-off-by: Venkateswarlu Boggavarapu --- sdk/python/feast/infra/registry/registry.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sdk/python/feast/infra/registry/registry.py b/sdk/python/feast/infra/registry/registry.py index 1872b43b2eb..b6ae948d65c 100644 --- a/sdk/python/feast/infra/registry/registry.py +++ b/sdk/python/feast/infra/registry/registry.py @@ -404,7 +404,9 @@ def apply_data_source( return else: # Preserve created_timestamp from existing data source - data_source.created_timestamp = existing_data_source.created_timestamp + data_source.created_timestamp = ( + existing_data_source.created_timestamp + ) del registry.data_sources[idx] break @@ -424,10 +426,7 @@ def delete_data_source(self, name: str, project: str, commit: bool = True): for idx, data_source_proto in enumerate( self.cached_registry_proto.data_sources ): - if ( - data_source_proto.name == name - and data_source_proto.project == project - ): + if data_source_proto.name == name and data_source_proto.project == project: del self.cached_registry_proto.data_sources[idx] if commit: self.commit() From 58e4a3853ba93884b7ff1eecc782e7af3f477a74 Mon Sep 17 00:00:00 2001 From: mailtoboggavarapu-coder Date: Wed, 22 Apr 2026 13:01:08 -0400 Subject: [PATCH 4/6] style: fix ruff E302 missing blank line before test function Add missing blank line between top-level test functions to satisfy ruff E302. Signed-off-by: Venkateswarlu Boggavarapu --- .../tests/integration/registration/test_universal_registry.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/python/tests/integration/registration/test_universal_registry.py b/sdk/python/tests/integration/registration/test_universal_registry.py index 32a0e196568..54c15926fe2 100644 --- a/sdk/python/tests/integration/registration/test_universal_registry.py +++ b/sdk/python/tests/integration/registration/test_universal_registry.py @@ -898,6 +898,7 @@ def test_apply_data_source_with_timestamps(test_registry): test_registry.teardown() + @pytest.mark.integration @pytest.mark.parametrize( "test_registry", From c4380891e9167545fd7dba51cbf1b629c974904c Mon Sep 17 00:00:00 2001 From: mailtoboggavarapu-coder Date: Wed, 22 Apr 2026 13:19:42 -0400 Subject: [PATCH 5/6] DCO Remediation Commit for 58e4a3853ba93884b7ff1eecc782e7af3f477a74 I, mailtoboggavarapu-coder , hereby attest that 58e4a3853ba93884b7ff1eecc782e7af3f477a74 adheres to the Developer Certificate of Origin. Signed-off-by: mailtoboggavarapu-coder --- .../tests/integration/registration/test_universal_registry.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sdk/python/tests/integration/registration/test_universal_registry.py b/sdk/python/tests/integration/registration/test_universal_registry.py index 54c15926fe2..88d827d786a 100644 --- a/sdk/python/tests/integration/registration/test_universal_registry.py +++ b/sdk/python/tests/integration/registration/test_universal_registry.py @@ -910,6 +910,8 @@ def test_apply_data_source_cross_project_isolation(test_registry): Regression test for https://github.com/feast-dev/feast/issues/6206: applying a data source to one project must not overwrite the data source with the same name in a different project. + + See: feast-dev/feast#6298 """ project_a = "project_a" project_b = "project_b" From 528aa8fa4a4f64ec79a4a8aac788840faf8043d4 Mon Sep 17 00:00:00 2001 From: mailtoboggavarapu-coder Date: Wed, 22 Apr 2026 13:23:32 -0400 Subject: [PATCH 6/6] DCO Remediation Commit for eb7175877975bbd2bc32b804d2c0c3a08613d90e I, mailtoboggavarapu-coder , hereby attest that eb7175877975bbd2bc32b804d2c0c3a08613d90e adheres to the Developer Certificate of Origin. Signed-off-by: mailtoboggavarapu-coder --- sdk/python/feast/infra/registry/registry.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sdk/python/feast/infra/registry/registry.py b/sdk/python/feast/infra/registry/registry.py index b6ae948d65c..5eddc7379f6 100644 --- a/sdk/python/feast/infra/registry/registry.py +++ b/sdk/python/feast/infra/registry/registry.py @@ -386,6 +386,12 @@ def list_data_sources( def apply_data_source( self, data_source: DataSource, project: str, commit: bool = True ): + """Apply a data source to the registry with project-scoped deduplication. + + Filters existing data sources by both name and project (fixes feast-dev/feast#6206), + preserving the original created_timestamp if the source already exists in the + target project. + """ now = _utc_now() if not data_source.created_timestamp: data_source.created_timestamp = now