diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml
index 7910903..cf3fef0 100644
--- a/.github/workflows/MainDistributionPipeline.yml
+++ b/.github/workflows/MainDistributionPipeline.yml
@@ -4,7 +4,6 @@
 name: Main Extension Distribution Pipeline
 on:
   push:
-    branches: [main]
   pull_request:
   workflow_dispatch:
 
@@ -13,28 +12,19 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  duckdb-next-build:
-    name: Build extension binaries
-    uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
-    if: false   # extension-template is currently not compatible with main
-    with:
-      duckdb_version: main
-      ci_tools_version: main
-      extension_name: rpt
-
   duckdb-stable-build:
     name: Build extension binaries
-    uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.3.2
+    uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.5.3
     with:
-      duckdb_version: v1.3.2
-      ci_tools_version: v1.3.2
-      extension_name: rpt
+      duckdb_version: v1.5.3
+      ci_tools_version: v1.5.3
+      extension_name: robust
 
   code-quality-check:
     name: Code Quality Check
-    uses: duckdb/extension-ci-tools/.github/workflows/_extension_code_quality.yml@main
+    uses: duckdb/extension-ci-tools/.github/workflows/_extension_code_quality.yml@v1.5.3
     with:
-      duckdb_version: v1.3.2
-      ci_tools_version: main
-      extension_name: rpt
-      format_checks: 'format;tidy'
+      duckdb_version: v1.5.3
+      ci_tools_version: v1.5.3
+      extension_name: robust
+      format_checks: 'format;tidy'
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index cc55629..500778b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,4 +9,21 @@ test/python/__pycache__/
 venv/
 bin/
 lib64
-pyvenv.cfg
\ No newline at end of file
+pyvenv.cfg
+.tmp
+.claude/
+
+results/
+wip_docs/
+temp_scripts/
+benchmark
+duckdb_benchmark_data/
+jobdata/
+tpchdata/
+
+CLAUDE.md
+TODO.md
+
+
+# Custom bf implementation
+src/optimizer/bloom_filter/
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3dca1c6..312fad0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.5)
 
 # Set extension name here
-set(TARGET_NAME rpt)
+set(TARGET_NAME robust)
 
 # DuckDB's extension distribution supports vcpkg. As such, dependencies can be added in ./vcpkg.json and then
 # used in cmake with find_package. Feel free to remove or replace with other dependencies.
@@ -12,28 +12,18 @@ set(EXTENSION_NAME ${TARGET_NAME}_extension)
 set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension)
 
 project(${TARGET_NAME})
-include_directories(src/include)
-#add_subdirectory(src/operators)
-#include_directories(src)
+include_directories(src)
 
 set(EXTENSION_SOURCES
-    src/rpt_extension.cpp
-#    src/operators/logical_hello.cpp
-#    src/operators/physical_hello.cpp
-    src/dag.cpp
+    src/robust_extension.cpp
     src/bloom_filter.cpp
-    src/operators/logical_create_bf.cpp
-    src/operators/physical_create_bf.cpp
-    src/operators/logical_use_bf.cpp
-    src/operators/physical_use_bf.cpp
-#    src/predicate_transfer_optimization.cpp
-#    src/table_operator_manager.cpp
-#    src/transfer_graph_manager.cpp
-    src/optimizer/rpt_optimizer.cpp
-#    src/optimizer/graph_manager.cpp
+    src/operators/logical_create_filter.cpp
+    src/operators/physical_create_filter.cpp
+    src/operators/logical_probe_filter.cpp
+    src/operators/physical_probe_filter.cpp
+    src/optimizer/robust_optimizer.cpp
     src/optimizer/table_manager.cpp
-#    src/bottom_up_rpt_optimizer.cpp
-#    src/robust_predicate_transfer_optimizer.cpp
+    src/utils/dag_printer.cpp
 )
 
 build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES})
@@ -48,3 +38,17 @@ install(
   EXPORT "${DUCKDB_EXPORT_SET}"
   LIBRARY DESTINATION "${INSTALL_LIB_DIR}"
   ARCHIVE DESTINATION "${INSTALL_LIB_DIR}")
+
+# Bloom Filter Benchmark (standalone executable)
+option(BUILD_BLOOM_FILTER_BENCHMARK "Build the bloom filter benchmark" OFF)
+if(BUILD_BLOOM_FILTER_BENCHMARK)
+    add_executable(bloom_filter_benchmark
+        src/benchmark/bloom_filter_benchmark.cpp
+        src/bloom_filter.cpp
+    )
+    target_include_directories(bloom_filter_benchmark PRIVATE
+        src
+        ${CMAKE_SOURCE_DIR}/src
+    )
+    target_link_libraries(bloom_filter_benchmark duckdb)
+endif()
diff --git a/LICENSE b/LICENSE
index a14aed3..fc44b2d 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,5 @@
 Copyright 2018-2025 Stichting DuckDB Foundation
+Copyright 2025-2026 JP Reddy
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 
diff --git a/Makefile b/Makefile
index f820fe8..d93a9d1 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,18 @@
 PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
 
 # Configuration of extension
-EXT_NAME=rpt
+EXT_NAME=robust
 EXT_CONFIG=${PROJ_DIR}extension_config.cmake
 
 # Include the Makefile from extension-ci-tools
-include extension-ci-tools/makefiles/duckdb_extension.Makefile
\ No newline at end of file
+include extension-ci-tools/makefiles/duckdb_extension.Makefile
+
+# Vendor Robust-owned benchmark suites + duckdb patches into the submodule.
+# Runs automatically before release/debug builds. Safe to re-run; idempotent.
+# See scripts/vendor_duckdb_bench.sh for details.
+.PHONY: vendor-duckdb-bench
+vendor-duckdb-bench:
+	@bash $(PROJ_DIR)scripts/vendor_duckdb_bench.sh
+
+release: vendor-duckdb-bench
+debug: vendor-duckdb-bench
\ No newline at end of file
diff --git a/README.md b/README.md
index 3a90732..342a673 100644
--- a/README.md
+++ b/README.md
@@ -1,86 +1,302 @@
-# Rpt
+# Robust
 
-This repository is based on https://github.com/duckdb/extension-template, check it out if you want to build and ship your own DuckDB extension.
+[![CI](https://github.com/robust-sql/robust/actions/workflows/MainDistributionPipeline.yml/badge.svg)](https://github.com/robust-sql/robust/actions/workflows/MainDistributionPipeline.yml)
+[![DuckDB](https://img.shields.io/badge/DuckDB-v1.5.3-blue)](https://github.com/duckdb/duckdb/releases/tag/v1.5.3)
+[![extension-ci-tools](https://img.shields.io/badge/extension--ci--tools-v1.5.3-blue)](https://github.com/duckdb/extension-ci-tools/tree/v1.5.3)
+[![status](https://img.shields.io/badge/status-WIP-orange)](#current-status)
+<!-- JOB performance badges (geomean speedup, memory ratio) removed pending re-measurement on DuckDB v1.5.3. -->
+<!-- [![JOB speedup](https://img.shields.io/badge/JOB_geomean-1.76×-brightgreen)](#benchmark-results) -->
+<!-- [![JOB memory](https://img.shields.io/badge/JOB_memory-1.67×_lower-brightgreen)](#benchmark-results) -->
 
----
+A DuckDB extension that implements **Predicate Transfer**: a novel method that optimizes join performance by pre-filtering tables to reduce the join input sizes.
 
-This extension, Rpt, allow you to ... <extension_goal>.
+## Overview
 
+In a multi-join query, the vast majority of rows scanned never survive to the final result. Predicate transfer addresses this by building filters from join keys on the build side of each join, then applying those filters (and pushing them to scans) on the probe side. Filters cascade across the join graph: a filter built from the deepest join can prune scans at the leaves.
+
+The optimizer walks the join graph, detects equality-join equivalence classes, and inserts `CREATE_FILTER` and `PROBE_FILTER` operators into the physical plan. A second **backward pass** broadcasts each filter across its equivalence class, so filters built on one side of a join also apply to every other table that joins on the same key. Queries return identical results — they just touch dramatically fewer rows.
+
+## Benchmark Results
+
+Measured on the [Join Order Benchmark](http://www.vldb.org/pvldb/vol9/p204-leis.pdf) (113 queries over IMDb), DuckDB at commit [`88277463aa`](https://github.com/duckdb/duckdb/commit/88277463aa86b998f241a0cd0f87ea647e749576) (post-v1.5-merge on `main`), single machine, 8 threads.
+
+### Per-query speedup (Robust vs baseline DuckDB)
+
+![JOB speedup](docs/figures/speedup_join_order.png)
+
+Geometric mean: **1.76× faster**. Best case: **23×** on join-heavy queries (e.g. 07c).
+
+### Memory allocated per query (lower is better)
+
+![Memory ratio](docs/figures/memory_ratio.png)
+
+Geometric mean of `baseline_memory / robust_memory`: **1.67×**. Robust uses less memory on 100/113 queries; large reductions concentrated in queries with cardinality-explosive intermediate joins.
+
+### Hash-join output cardinality (matched rows emitted by HJs)
+
+![HJ cardinality sum](docs/figures/hj_card_sum_pairs_line.png)
+
+Sum of `operator_cardinality` across all `HASH_JOIN` operators in the plan — i.e. the total number of matched rows each HJ emits to its parent. Robust reduces this metric by **8× on the hardest queries**: by filtering probe-side rows out at the scan *before* they reach the hash table, fewer rows participate in the join, so fewer rows match and propagate downstream — shrinking every intermediate result.
+
+[All figures](docs/figures/) (PDF + PNG) are regenerated by `scripts/plot_results.py` from `docs/metrics.csv`.
 
 ## Building
-### Managing dependencies
-DuckDB extensions uses VCPKG for dependency management. Enabling VCPKG is very simple: follow the [installation instructions](https://vcpkg.io/en/getting-started) or just run the following:
-```shell
+
+### Prerequisites
+
+```bash
+git clone --recurse-submodules https://github.com/robust-sql/robust.git
+
+# vcpkg can live anywhere; pick a location once and reuse it for any C++ project  
 git clone https://github.com/Microsoft/vcpkg.git
 ./vcpkg/bootstrap-vcpkg.sh
-export VCPKG_TOOLCHAIN_PATH=`pwd`/vcpkg/scripts/buildsystems/vcpkg.cmake
+export VCPKG_TOOLCHAIN_PATH=$(pwd)/vcpkg/scripts/buildsystems/vcpkg.cmake
 ```
-Note: VCPKG is only required for extensions that want to rely on it for dependency management. If you want to develop an extension without dependencies, or want to do your own dependency management, just skip this step. Note that the example extension uses VCPKG to build with a dependency for instructive purposes, so when skipping this step the build may not work without removing the dependency.
 
-### Build steps
-Now to build the extension, run:
-```sh
-make
+### Build
+
+```bash
+# release
+GEN=ninja make release
+
+# debug (AddressSanitizer enabled)
+GEN=ninja make debug
+
+# release + benchmark runner (needed for bench_job.sh)
+BUILD_BENCHMARK=1 GEN=ninja make release
 ```
-The main binaries that will be built are:
-```sh
-./build/release/duckdb
-./build/release/test/unittest
-./build/release/extension/rpt/rpt.duckdb_extension
+
+### Build artifacts
+
+- `./build/release/duckdb` — DuckDB shell
+- `./build/release/extension/robust/robust.duckdb_extension` — loadable extension
+- `./build/release/benchmark/benchmark_runner` — benchmark runner (only with `BUILD_BENCHMARK=1`)
+
+## Running
+
+The extension is not published to the DuckDB extension repository, so it must be loaded as an unsigned extension:
+
+```bash
+./build/release/duckdb -unsigned
 ```
-- `duckdb` is the binary for the duckdb shell with the extension code automatically loaded.
-- `unittest` is the test runner of duckdb. Again, the extension is already linked into the binary.
-- `rpt.duckdb_extension` is the loadable binary as it would be distributed.
 
-## Running the extension
-To run the extension code, simply start the shell with `./build/release/duckdb`.
+```sql
+LOAD 'build/release/extension/robust/robust.duckdb_extension';
+
+-- Robust is designed for multi-way joins; it intentionally stays out of the way
+-- when the query has <= 1 join. The example below is a 3-way star schema:
+-- a wide fact table joined to two dimensions, with selective filters on the dims.
+CREATE TEMP TABLE orders AS
+    SELECT i              AS order_id,
+           i % 1000       AS customer_id,
+           i % 100        AS product_id,
+           (i * 13) % 10000 AS amount
+    FROM range(1000000) tbl(i);
+
+CREATE TEMP TABLE customers AS
+    SELECT i AS customer_id, 'cust_' || i AS name
+    FROM range(1000) tbl(i);
 
-Now we can use the features from the extension directly in DuckDB. The template contains a single scalar function `rpt()` that takes a string arguments and returns a string:
+CREATE TEMP TABLE products AS
+    SELECT i AS product_id, i % 10 AS category
+    FROM range(100) tbl(i);
+
+-- 3-way join with filters on each dimension.
+-- Robust builds filters (currently bloom filters, min/max ranges, and IN-lists
+-- where applicable — the filter set is extensible) from the filtered rows of
+-- customers and products, then propagates them across the join graph in two
+-- passes: a forward pass pushes filters down to probe-side scans (here, the
+-- orders scan), and a backward pass broadcasts them across the rest of the
+-- graph. Only rows that survive every applicable filter reach the hash joins.
+SELECT count(*), sum(o.amount)
+FROM orders o
+JOIN customers c ON o.customer_id = c.customer_id
+JOIN products  p ON o.product_id  = p.product_id
+WHERE p.category = 3
+  AND c.name LIKE 'cust_1%';
+
+-- inspect the rewritten plan: expect CREATE_FILTER nodes above the filtered
+-- scans of customers/products, and PROBE_FILTER feeding the orders scan.
+EXPLAIN
+SELECT count(*), sum(o.amount)
+FROM orders o
+JOIN customers c ON o.customer_id = c.customer_id
+JOIN products  p ON o.product_id  = p.product_id
+WHERE p.category = 3
+  AND c.name LIKE 'cust_1%';
 ```
-D select rpt('Jane') as result;
-┌───────────────┐
-│    result     │
-│    varchar    │
-├───────────────┤
-│ Rpt Jane 🐥 │
-└───────────────┘
+
+For a larger / more realistic workload, see [Benchmarks (JOB)](#benchmarks-job) below — the JOB suite has 113 queries (3–17-way joins over IMDb) that the extension is tuned against.
+
+### Settings
+
+| Setting | Type | Default | Controls |
+|---|---|---|---|
+| `robust_heuristic` | VARCHAR | `'join_order'` | DAG construction heuristic: `'join_order'` (DFS-build-first, follows DuckDB's own join order) or `'largest_root'` (Prim's MST rooted at largest table; the original paper formulation) |
+| `robust_pass_mode` | VARCHAR | `'both'` | Whether to run both passes or only forward: `'both'` or `'forward_only'` |
+| `robust_flip_roots` | BOOLEAN | `true` | In `join_order` mode, iteratively flip non-anchor roots to leaves so the largest table ends up as the sole anchor root |
+| `robust_filter_type` | VARCHAR | `'all'` | Restrict filter types pushed to scans: `'all'`, `'bf_only'`, `'minmax_only'` |
+| `robust_dynamic_or_filter_threshold` | UBIGINT | `50` | Max distinct build keys to use an IN-list (rather than a bloom filter) for scan pushdown |
+| `robust_profiling` | BOOLEAN | `false` | Emit per-operator timing and row-count stats after each query |
+| `robust_display_dag` | BOOLEAN | `false` | Print the logical transfer DAG to stdout before plan modification |
+| `robust_display_physical_dag` | BOOLEAN | `false` | Print the physical-plan DAG (before filter insertion) to stdout |
+
+All settings are registered in [`src/robust_extension.cpp`](src/robust_extension.cpp). See [docs/architecture.md](docs/architecture.md) for what these controls actually do at the algorithm level.
+
+## Benchmarks (JOB)
+
+### Setup
+
+One-time data setup — downloads 21 IMDB tables (~2.6G) from DuckDB's public
+release artifacts and materializes them as a native DuckDB database at
+`jobdata/imdb.duckdb`:
+
+```bash
+./scripts/setup_imdb_data.sh
 ```
 
-## Running the tests
-Different tests can be created for DuckDB extensions. The primary way of testing DuckDB extensions should be the SQL tests in `./test/sql`. These SQL tests can be run using:
-```sh
-make test
+```bash
+ls jobdata/imdb.duckdb
+ls jobdata/queries/1a.sql    # 113 queries
 ```
 
-### Installing the deployed binaries
-To install your extension binaries from S3, you will need to do two things. Firstly, DuckDB should be launched with the
-`allow_unsigned_extensions` option set to true. How to set this will depend on the client you're using. Some examples:
+### When to use which script
+
+Two complementary tools live in `scripts/`:
+
+- **`test_job.sh`** — *correctness-first dev loop tool.* Wraps the `duckdb` CLI, loads the extension dynamically with `LOAD`, diffs baseline vs Robust output to catch correctness regressions, and reports per-query wall-clock + geomean speedup. Use this on every PR / dev iteration. No special build flag needed; just a plain release build.
+- **`bench_job.sh`** — *authoritative measurement tool.* Wraps DuckDB's in-tree `benchmark_runner` (build with `BUILD_BENCHMARK=1`), runs all queries in a single process for lower measurement noise, and explicitly skips the cold first run. Use this for numbers that go into the README, a paper, or PR perf claims. No correctness check — pair with `test_job.sh` if you've changed the optimizer.
+
+The TPC-H counterpart of `bench_job.sh` is `bench_tpch.sh` (only the 9 queries where Robust currently inserts BFs and runs correctly).
+
+### Correctness + wall-clock comparison
+
+`./scripts/test_job.sh` runs every JOB query with and without the extension, diffs results, and reports per-query timing + geomean speedup.
 
-CLI:
-```shell
-duckdb -unsigned
+**Recommended invocation:**
+
+```bash
+./scripts/test_job.sh --heuristic join_order --timing --runs 5
 ```
 
-Python:
-```python
-con = duckdb.connect(':memory:', config={'allow_unsigned_extensions' : 'true'})
+For each of the 113 JOB queries it runs the query 5 times against the baseline (DuckDB stock optimizer) and 5 times with the Robust extension loaded, takes the minimum wall-clock from each side to suppress timer noise and cold-cache jitter, diffs the two result sets to confirm correctness, and prints per-query speedup plus a geometric mean across all queries. Output is also persisted to `job_test_results/summary.txt`.
+
+Other useful forms:
+
+```bash
+./scripts/test_job.sh                          # correctness only, no timing
+./scripts/test_job.sh --timing                 # single run per side
+./scripts/test_job.sh --query 7c --timing      # one query
+./scripts/test_job.sh --timing --limit 10      # first N queries
 ```
 
-NodeJS:
-```js
-db = new duckdb.Database(':memory:', {"allow_unsigned_extensions": "true"});
+Summary written to `job_test_results/summary.txt`.
+
+### DuckDB benchmark-runner suites
+
+`scripts/bench_job.sh` drives the in-tree DuckDB benchmark runner against the `imdb` (baseline) and `imdb_robust*` (extension) suites and writes a side-by-side comparison.
+
+```bash
+# all 113 queries, baseline + Robust, min of repeated runs
+./scripts/bench_job.sh
+
+# subset
+./scripts/bench_job.sh --pattern '07.*'
+
+# Robust with the largest_root heuristic (vs baseline)
+./scripts/bench_job.sh --heuristic largest_root
+
+# Robust forward-only (no backward equivalence-class broadcast)
+./scripts/bench_job.sh --forward-only
+
+# re-aggregate already-collected raw results without re-running
+./scripts/bench_job.sh --no-run
+
+# run Robust suite first (default is baseline first; flips the order to control thermals)
+./scripts/bench_job.sh --robust-first
 ```
 
-Secondly, you will need to set the repository endpoint in DuckDB to the HTTP url of your bucket + version of the extension
-you want to install. To do this run the following SQL query in DuckDB:
-```sql
-SET custom_extension_repository='bucket.s3.eu-west-1.amazonaws.com/<your_extension_name>/latest';
+Output: `benchmark_results/{baseline_raw.tsv, robust_raw.tsv, comparison.tsv}`.
+
+### How the benchmark harness is wired
+
+`bench_job.sh` and `bench_tpch.sh` invoke DuckDB's in-tree `benchmark_runner` against suites that need to live inside the DuckDB submodule (`duckdb/benchmark/imdb_robust*`, `duckdb/benchmark/tpch_*`). Those suites — plus two small upstream patches needed to load an unsigned dev extension and to work around a debug-build issue (see `wip_docs/features/13-debug-build-verify-op-failure.md`) — are kept inside this repo at:
+
+- `bench_suites/` — vendored suites (source-of-truth, tracked here)
+- `patches/` — duckdb-submodule patches (tracked here)
+
+`make release` and `make debug` automatically run `scripts/vendor_duckdb_bench.sh`, which copies the suites into `duckdb/benchmark/` and applies the patches. Re-running is idempotent. If a patch fails to apply (typically because the DuckDB submodule was bumped and upstream rewrote the patched region), the build aborts with a clear error.
+
+### Per-query profiling
+
+`scripts/profile_query.sh` runs a single query under both baseline and Robust, captures DuckDB's JSON profile, and prints a breakdown by operator class (HASH_JOIN, SEQ_SCAN, CREATE_FILTER, PROBE_FILTER, ...).
+
+```bash
+# JOB query 7c
+./scripts/profile_query.sh 7c
+
+# TPCH query 3
+./scripts/profile_query.sh --workload tpch 3
+
+# disable DuckDB's join_filter_pushdown for Robust (isolates Robust's contribution)
+./scripts/profile_query.sh --no-jfp robust 7c
+
+# compare both Robust heuristics against baseline
+./scripts/profile_query.sh --heuristic all 7c
+
+# inline SQL
+./scripts/profile_query.sh --sql "SELECT count(*) FROM t1 JOIN t2 ON t1.id = t2.id"
 ```
-Note that the `/latest` path will allow you to install the latest extension version available for your current version of
-DuckDB. To specify a specific version, you can pass the version instead.
 
-After running these steps, you can install and load your extension using the regular INSTALL/LOAD commands in DuckDB:
-```sql
-INSTALL rpt
-LOAD rpt
+### Full metrics sweep + plotting
+
+`scripts/bench_metrics.sh` sweeps all JOB queries and extracts six metrics from each profile JSON (memory allocated, rows scanned, cumulative cardinality, peak buffer, sum/max of HASH_JOIN cardinality). `scripts/plot_results.py` consumes the resulting CSV and emits the figures in `docs/figures/`.
+
+```bash
+./scripts/bench_metrics.sh                      # all queries → benchmark_results/metrics.csv
+./scripts/bench_metrics.sh --pattern '13.*'     # subset
+./scripts/bench_metrics.sh --query 13a          # single query
+
+./scripts/plot_results.py speedup benchmark_results/comparison.tsv --out fig.pdf
+./scripts/plot_results.py metric memory benchmark_results/metrics.csv --out memory_ratio.pdf
+./scripts/plot_results.py pairs hj_card_sum benchmark_results/metrics.csv \
+    --style line --out hj_card_sum_pairs_line.pdf
 ```
+
+## How predicate transfer works
+
+1. **Build DAG.** The optimizer extracts equality joins, builds equivalence classes over join columns (union-find), and constructs a DAG over base tables with filtered tables as roots.
+2. **Forward pass (leaves → root).** For each edge, the smaller side builds a filter (bloom filter + min/max + optional `IN`-list when the build side has few distinct values). The filter is applied to the larger side via a `PROBE_FILTER` operator inserted above the scan.
+3. **Backward pass (root → leaves).** Each filter is broadcast across its equivalence class. If tables A, B, C all join on the same key and a filter was built from C, it's pushed to A and B as well — even though they never directly joined with C.
+4. **Scan pushdown.** Built filters are pushed into DuckDB's `dynamic_filters` infrastructure via `BFTableFilter` + `SelectivityOptionalFilter`, so the scan can skip rows/segments before they're decompressed.
+
+
+## Bloom filter implementation
+
+The extension uses DuckDB's native [`BloomFilter`](duckdb/src/include/duckdb/planner/filter/bloom_filter.hpp) (`duckdb/planner/filter/bloom_filter.hpp`) as the underlying implementation:
+
+- 12 bits per key
+- `InsertHashes` uses atomic `fetch_or` for lock-free parallel building
+- `LookupHashes` returns a `SelectionVector` directly (no separate bit-vector pass)
+
+`src/include/bloom_filter.hpp` defines a thin wrapper, `PTBloomFilter`, which adds the glue DuckDB's native API doesn't provide:
+
+- `Insert(DataChunk&, cols)` and `LookupSel(DataChunk&, sel, cols, buf)` — hashes are computed from the chunk via `VectorOperations::Hash` + `CombineHash` before being passed to the native filter.
+- `ReinitializeAndRehash(actual_rows, data, cols)` — resizes the filter once the true build-side row count is known.
+- `IsEmpty()` / `finalized_` — lifecycle flags read by `PhysicalProbeFilter`.
+
+The bloom filter is one of three filter types pushed in a single `CREATE_FILTER` operation; min/max bounds and `IN`-lists (when the build side has ≤ 50 distinct keys) are emitted alongside it via DuckDB's `ConstantFilter` / `InFilter` infrastructure.
+
+## Pinned dependencies
+
+| Dependency | Pin | Notes |
+|---|---|---|
+| `duckdb` submodule | [`v1.5.3`](https://github.com/duckdb/duckdb/releases/tag/v1.5.3) | release tag |
+| `extension-ci-tools` submodule | [`v1.5.3`](https://github.com/duckdb/extension-ci-tools/tree/v1.5.3) | `v1.5.3` branch tip |
+| OpenSSL | 3.5.3+ via vcpkg | dependency of DuckDB build |
+
+CI pins are kept in sync with submodule pins in [`.github/workflows/MainDistributionPipeline.yml`](.github/workflows/MainDistributionPipeline.yml).
+
+## License
+
+Based on the [DuckDB Extension Template](https://github.com/duckdb/extension-template) (MIT).
diff --git a/bench_suites/imdb_robust/01a.benchmark b/bench_suites/imdb_robust/01a.benchmark
new file mode 100644
index 0000000..ef57148
--- /dev/null
+++ b/bench_suites/imdb_robust/01a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=01a
+QUERY_NUMBER_PADDED=01a
diff --git a/bench_suites/imdb_robust/01b.benchmark b/bench_suites/imdb_robust/01b.benchmark
new file mode 100644
index 0000000..adfa592
--- /dev/null
+++ b/bench_suites/imdb_robust/01b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=01b
+QUERY_NUMBER_PADDED=01b
diff --git a/bench_suites/imdb_robust/01c.benchmark b/bench_suites/imdb_robust/01c.benchmark
new file mode 100644
index 0000000..001ffcf
--- /dev/null
+++ b/bench_suites/imdb_robust/01c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=01c
+QUERY_NUMBER_PADDED=01c
diff --git a/bench_suites/imdb_robust/01d.benchmark b/bench_suites/imdb_robust/01d.benchmark
new file mode 100644
index 0000000..d0c35fd
--- /dev/null
+++ b/bench_suites/imdb_robust/01d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=01d
+QUERY_NUMBER_PADDED=01d
diff --git a/bench_suites/imdb_robust/02a.benchmark b/bench_suites/imdb_robust/02a.benchmark
new file mode 100644
index 0000000..e9e1ae2
--- /dev/null
+++ b/bench_suites/imdb_robust/02a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=02a
+QUERY_NUMBER_PADDED=02a
diff --git a/bench_suites/imdb_robust/02b.benchmark b/bench_suites/imdb_robust/02b.benchmark
new file mode 100644
index 0000000..96326df
--- /dev/null
+++ b/bench_suites/imdb_robust/02b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=02b
+QUERY_NUMBER_PADDED=02b
diff --git a/bench_suites/imdb_robust/02c.benchmark b/bench_suites/imdb_robust/02c.benchmark
new file mode 100644
index 0000000..89e56fc
--- /dev/null
+++ b/bench_suites/imdb_robust/02c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=02c
+QUERY_NUMBER_PADDED=02c
diff --git a/bench_suites/imdb_robust/02d.benchmark b/bench_suites/imdb_robust/02d.benchmark
new file mode 100644
index 0000000..962ffe5
--- /dev/null
+++ b/bench_suites/imdb_robust/02d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=02d
+QUERY_NUMBER_PADDED=02d
diff --git a/bench_suites/imdb_robust/03a.benchmark b/bench_suites/imdb_robust/03a.benchmark
new file mode 100644
index 0000000..da562fb
--- /dev/null
+++ b/bench_suites/imdb_robust/03a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=03a
+QUERY_NUMBER_PADDED=03a
diff --git a/bench_suites/imdb_robust/03b.benchmark b/bench_suites/imdb_robust/03b.benchmark
new file mode 100644
index 0000000..90d783d
--- /dev/null
+++ b/bench_suites/imdb_robust/03b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=03b
+QUERY_NUMBER_PADDED=03b
diff --git a/bench_suites/imdb_robust/03c.benchmark b/bench_suites/imdb_robust/03c.benchmark
new file mode 100644
index 0000000..4204dd2
--- /dev/null
+++ b/bench_suites/imdb_robust/03c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=03c
+QUERY_NUMBER_PADDED=03c
diff --git a/bench_suites/imdb_robust/04a.benchmark b/bench_suites/imdb_robust/04a.benchmark
new file mode 100644
index 0000000..a3b96ab
--- /dev/null
+++ b/bench_suites/imdb_robust/04a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=04a
+QUERY_NUMBER_PADDED=04a
diff --git a/bench_suites/imdb_robust/04b.benchmark b/bench_suites/imdb_robust/04b.benchmark
new file mode 100644
index 0000000..7a7f942
--- /dev/null
+++ b/bench_suites/imdb_robust/04b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=04b
+QUERY_NUMBER_PADDED=04b
diff --git a/bench_suites/imdb_robust/04c.benchmark b/bench_suites/imdb_robust/04c.benchmark
new file mode 100644
index 0000000..424dae6
--- /dev/null
+++ b/bench_suites/imdb_robust/04c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=04c
+QUERY_NUMBER_PADDED=04c
diff --git a/bench_suites/imdb_robust/05a.benchmark b/bench_suites/imdb_robust/05a.benchmark
new file mode 100644
index 0000000..74feb89
--- /dev/null
+++ b/bench_suites/imdb_robust/05a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=05a
+QUERY_NUMBER_PADDED=05a
diff --git a/bench_suites/imdb_robust/05b.benchmark b/bench_suites/imdb_robust/05b.benchmark
new file mode 100644
index 0000000..6f286ee
--- /dev/null
+++ b/bench_suites/imdb_robust/05b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=05b
+QUERY_NUMBER_PADDED=05b
diff --git a/bench_suites/imdb_robust/05c.benchmark b/bench_suites/imdb_robust/05c.benchmark
new file mode 100644
index 0000000..97c3a8b
--- /dev/null
+++ b/bench_suites/imdb_robust/05c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=05c
+QUERY_NUMBER_PADDED=05c
diff --git a/bench_suites/imdb_robust/06a.benchmark b/bench_suites/imdb_robust/06a.benchmark
new file mode 100644
index 0000000..cc11896
--- /dev/null
+++ b/bench_suites/imdb_robust/06a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=06a
+QUERY_NUMBER_PADDED=06a
diff --git a/bench_suites/imdb_robust/06b.benchmark b/bench_suites/imdb_robust/06b.benchmark
new file mode 100644
index 0000000..7c24cf9
--- /dev/null
+++ b/bench_suites/imdb_robust/06b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=06b
+QUERY_NUMBER_PADDED=06b
diff --git a/bench_suites/imdb_robust/06c.benchmark b/bench_suites/imdb_robust/06c.benchmark
new file mode 100644
index 0000000..af44f54
--- /dev/null
+++ b/bench_suites/imdb_robust/06c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=06c
+QUERY_NUMBER_PADDED=06c
diff --git a/bench_suites/imdb_robust/06d.benchmark b/bench_suites/imdb_robust/06d.benchmark
new file mode 100644
index 0000000..3574274
--- /dev/null
+++ b/bench_suites/imdb_robust/06d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=06d
+QUERY_NUMBER_PADDED=06d
diff --git a/bench_suites/imdb_robust/06e.benchmark b/bench_suites/imdb_robust/06e.benchmark
new file mode 100644
index 0000000..f383d07
--- /dev/null
+++ b/bench_suites/imdb_robust/06e.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=06e
+QUERY_NUMBER_PADDED=06e
diff --git a/bench_suites/imdb_robust/06f.benchmark b/bench_suites/imdb_robust/06f.benchmark
new file mode 100644
index 0000000..f060dd0
--- /dev/null
+++ b/bench_suites/imdb_robust/06f.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=06f
+QUERY_NUMBER_PADDED=06f
diff --git a/bench_suites/imdb_robust/07a.benchmark b/bench_suites/imdb_robust/07a.benchmark
new file mode 100644
index 0000000..6a7969d
--- /dev/null
+++ b/bench_suites/imdb_robust/07a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=07a
+QUERY_NUMBER_PADDED=07a
diff --git a/bench_suites/imdb_robust/07b.benchmark b/bench_suites/imdb_robust/07b.benchmark
new file mode 100644
index 0000000..1fbe839
--- /dev/null
+++ b/bench_suites/imdb_robust/07b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=07b
+QUERY_NUMBER_PADDED=07b
diff --git a/bench_suites/imdb_robust/07c.benchmark b/bench_suites/imdb_robust/07c.benchmark
new file mode 100644
index 0000000..862effe
--- /dev/null
+++ b/bench_suites/imdb_robust/07c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=07c
+QUERY_NUMBER_PADDED=07c
diff --git a/bench_suites/imdb_robust/08a.benchmark b/bench_suites/imdb_robust/08a.benchmark
new file mode 100644
index 0000000..8935010
--- /dev/null
+++ b/bench_suites/imdb_robust/08a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=08a
+QUERY_NUMBER_PADDED=08a
diff --git a/bench_suites/imdb_robust/08b.benchmark b/bench_suites/imdb_robust/08b.benchmark
new file mode 100644
index 0000000..cd1f1ca
--- /dev/null
+++ b/bench_suites/imdb_robust/08b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=08b
+QUERY_NUMBER_PADDED=08b
diff --git a/bench_suites/imdb_robust/08c.benchmark b/bench_suites/imdb_robust/08c.benchmark
new file mode 100644
index 0000000..cae9b83
--- /dev/null
+++ b/bench_suites/imdb_robust/08c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=08c
+QUERY_NUMBER_PADDED=08c
diff --git a/bench_suites/imdb_robust/08d.benchmark b/bench_suites/imdb_robust/08d.benchmark
new file mode 100644
index 0000000..147e1c0
--- /dev/null
+++ b/bench_suites/imdb_robust/08d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=08d
+QUERY_NUMBER_PADDED=08d
diff --git a/bench_suites/imdb_robust/09a.benchmark b/bench_suites/imdb_robust/09a.benchmark
new file mode 100644
index 0000000..e426c0c
--- /dev/null
+++ b/bench_suites/imdb_robust/09a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=09a
+QUERY_NUMBER_PADDED=09a
diff --git a/bench_suites/imdb_robust/09b.benchmark b/bench_suites/imdb_robust/09b.benchmark
new file mode 100644
index 0000000..6d6c008
--- /dev/null
+++ b/bench_suites/imdb_robust/09b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=09b
+QUERY_NUMBER_PADDED=09b
diff --git a/bench_suites/imdb_robust/09c.benchmark b/bench_suites/imdb_robust/09c.benchmark
new file mode 100644
index 0000000..d54aa9d
--- /dev/null
+++ b/bench_suites/imdb_robust/09c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=09c
+QUERY_NUMBER_PADDED=09c
diff --git a/bench_suites/imdb_robust/09d.benchmark b/bench_suites/imdb_robust/09d.benchmark
new file mode 100644
index 0000000..f5135bf
--- /dev/null
+++ b/bench_suites/imdb_robust/09d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=09d
+QUERY_NUMBER_PADDED=09d
diff --git a/bench_suites/imdb_robust/10a.benchmark b/bench_suites/imdb_robust/10a.benchmark
new file mode 100644
index 0000000..921d68c
--- /dev/null
+++ b/bench_suites/imdb_robust/10a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=10a
+QUERY_NUMBER_PADDED=10a
diff --git a/bench_suites/imdb_robust/10b.benchmark b/bench_suites/imdb_robust/10b.benchmark
new file mode 100644
index 0000000..f7082e8
--- /dev/null
+++ b/bench_suites/imdb_robust/10b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=10b
+QUERY_NUMBER_PADDED=10b
diff --git a/bench_suites/imdb_robust/10c.benchmark b/bench_suites/imdb_robust/10c.benchmark
new file mode 100644
index 0000000..5888469
--- /dev/null
+++ b/bench_suites/imdb_robust/10c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=10c
+QUERY_NUMBER_PADDED=10c
diff --git a/bench_suites/imdb_robust/11a.benchmark b/bench_suites/imdb_robust/11a.benchmark
new file mode 100644
index 0000000..dcb5eab
--- /dev/null
+++ b/bench_suites/imdb_robust/11a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=11a
+QUERY_NUMBER_PADDED=11a
diff --git a/bench_suites/imdb_robust/11b.benchmark b/bench_suites/imdb_robust/11b.benchmark
new file mode 100644
index 0000000..8d148bf
--- /dev/null
+++ b/bench_suites/imdb_robust/11b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=11b
+QUERY_NUMBER_PADDED=11b
diff --git a/bench_suites/imdb_robust/11c.benchmark b/bench_suites/imdb_robust/11c.benchmark
new file mode 100644
index 0000000..a1b830d
--- /dev/null
+++ b/bench_suites/imdb_robust/11c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=11c
+QUERY_NUMBER_PADDED=11c
diff --git a/bench_suites/imdb_robust/11d.benchmark b/bench_suites/imdb_robust/11d.benchmark
new file mode 100644
index 0000000..b91a255
--- /dev/null
+++ b/bench_suites/imdb_robust/11d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=11d
+QUERY_NUMBER_PADDED=11d
diff --git a/bench_suites/imdb_robust/12a.benchmark b/bench_suites/imdb_robust/12a.benchmark
new file mode 100644
index 0000000..3552bc4
--- /dev/null
+++ b/bench_suites/imdb_robust/12a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=12a
+QUERY_NUMBER_PADDED=12a
diff --git a/bench_suites/imdb_robust/12b.benchmark b/bench_suites/imdb_robust/12b.benchmark
new file mode 100644
index 0000000..1baa115
--- /dev/null
+++ b/bench_suites/imdb_robust/12b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=12b
+QUERY_NUMBER_PADDED=12b
diff --git a/bench_suites/imdb_robust/12c.benchmark b/bench_suites/imdb_robust/12c.benchmark
new file mode 100644
index 0000000..ab9d433
--- /dev/null
+++ b/bench_suites/imdb_robust/12c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=12c
+QUERY_NUMBER_PADDED=12c
diff --git a/bench_suites/imdb_robust/13a.benchmark b/bench_suites/imdb_robust/13a.benchmark
new file mode 100644
index 0000000..8d28624
--- /dev/null
+++ b/bench_suites/imdb_robust/13a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=13a
+QUERY_NUMBER_PADDED=13a
diff --git a/bench_suites/imdb_robust/13b.benchmark b/bench_suites/imdb_robust/13b.benchmark
new file mode 100644
index 0000000..44eedfc
--- /dev/null
+++ b/bench_suites/imdb_robust/13b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=13b
+QUERY_NUMBER_PADDED=13b
diff --git a/bench_suites/imdb_robust/13c.benchmark b/bench_suites/imdb_robust/13c.benchmark
new file mode 100644
index 0000000..fe46c55
--- /dev/null
+++ b/bench_suites/imdb_robust/13c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=13c
+QUERY_NUMBER_PADDED=13c
diff --git a/bench_suites/imdb_robust/13d.benchmark b/bench_suites/imdb_robust/13d.benchmark
new file mode 100644
index 0000000..2aca1ba
--- /dev/null
+++ b/bench_suites/imdb_robust/13d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=13d
+QUERY_NUMBER_PADDED=13d
diff --git a/bench_suites/imdb_robust/14a.benchmark b/bench_suites/imdb_robust/14a.benchmark
new file mode 100644
index 0000000..713a139
--- /dev/null
+++ b/bench_suites/imdb_robust/14a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=14a
+QUERY_NUMBER_PADDED=14a
diff --git a/bench_suites/imdb_robust/14b.benchmark b/bench_suites/imdb_robust/14b.benchmark
new file mode 100644
index 0000000..d328f45
--- /dev/null
+++ b/bench_suites/imdb_robust/14b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=14b
+QUERY_NUMBER_PADDED=14b
diff --git a/bench_suites/imdb_robust/14c.benchmark b/bench_suites/imdb_robust/14c.benchmark
new file mode 100644
index 0000000..86f88ac
--- /dev/null
+++ b/bench_suites/imdb_robust/14c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=14c
+QUERY_NUMBER_PADDED=14c
diff --git a/bench_suites/imdb_robust/15a.benchmark b/bench_suites/imdb_robust/15a.benchmark
new file mode 100644
index 0000000..7eaceef
--- /dev/null
+++ b/bench_suites/imdb_robust/15a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=15a
+QUERY_NUMBER_PADDED=15a
diff --git a/bench_suites/imdb_robust/15b.benchmark b/bench_suites/imdb_robust/15b.benchmark
new file mode 100644
index 0000000..09cf1bd
--- /dev/null
+++ b/bench_suites/imdb_robust/15b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=15b
+QUERY_NUMBER_PADDED=15b
diff --git a/bench_suites/imdb_robust/15c.benchmark b/bench_suites/imdb_robust/15c.benchmark
new file mode 100644
index 0000000..9170281
--- /dev/null
+++ b/bench_suites/imdb_robust/15c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=15c
+QUERY_NUMBER_PADDED=15c
diff --git a/bench_suites/imdb_robust/15d.benchmark b/bench_suites/imdb_robust/15d.benchmark
new file mode 100644
index 0000000..0e57f39
--- /dev/null
+++ b/bench_suites/imdb_robust/15d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=15d
+QUERY_NUMBER_PADDED=15d
diff --git a/bench_suites/imdb_robust/16a.benchmark b/bench_suites/imdb_robust/16a.benchmark
new file mode 100644
index 0000000..7fbfce2
--- /dev/null
+++ b/bench_suites/imdb_robust/16a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=16a
+QUERY_NUMBER_PADDED=16a
diff --git a/bench_suites/imdb_robust/16b.benchmark b/bench_suites/imdb_robust/16b.benchmark
new file mode 100644
index 0000000..ed5818b
--- /dev/null
+++ b/bench_suites/imdb_robust/16b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=16b
+QUERY_NUMBER_PADDED=16b
diff --git a/bench_suites/imdb_robust/16c.benchmark b/bench_suites/imdb_robust/16c.benchmark
new file mode 100644
index 0000000..8da8514
--- /dev/null
+++ b/bench_suites/imdb_robust/16c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=16c
+QUERY_NUMBER_PADDED=16c
diff --git a/bench_suites/imdb_robust/16d.benchmark b/bench_suites/imdb_robust/16d.benchmark
new file mode 100644
index 0000000..fd34099
--- /dev/null
+++ b/bench_suites/imdb_robust/16d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=16d
+QUERY_NUMBER_PADDED=16d
diff --git a/bench_suites/imdb_robust/17a.benchmark b/bench_suites/imdb_robust/17a.benchmark
new file mode 100644
index 0000000..7ee2888
--- /dev/null
+++ b/bench_suites/imdb_robust/17a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=17a
+QUERY_NUMBER_PADDED=17a
diff --git a/bench_suites/imdb_robust/17b.benchmark b/bench_suites/imdb_robust/17b.benchmark
new file mode 100644
index 0000000..eebcb82
--- /dev/null
+++ b/bench_suites/imdb_robust/17b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=17b
+QUERY_NUMBER_PADDED=17b
diff --git a/bench_suites/imdb_robust/17c.benchmark b/bench_suites/imdb_robust/17c.benchmark
new file mode 100644
index 0000000..5595ae9
--- /dev/null
+++ b/bench_suites/imdb_robust/17c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=17c
+QUERY_NUMBER_PADDED=17c
diff --git a/bench_suites/imdb_robust/17d.benchmark b/bench_suites/imdb_robust/17d.benchmark
new file mode 100644
index 0000000..a1585c2
--- /dev/null
+++ b/bench_suites/imdb_robust/17d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=17d
+QUERY_NUMBER_PADDED=17d
diff --git a/bench_suites/imdb_robust/17e.benchmark b/bench_suites/imdb_robust/17e.benchmark
new file mode 100644
index 0000000..30dcfa7
--- /dev/null
+++ b/bench_suites/imdb_robust/17e.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=17e
+QUERY_NUMBER_PADDED=17e
diff --git a/bench_suites/imdb_robust/17f.benchmark b/bench_suites/imdb_robust/17f.benchmark
new file mode 100644
index 0000000..45758f4
--- /dev/null
+++ b/bench_suites/imdb_robust/17f.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=17f
+QUERY_NUMBER_PADDED=17f
diff --git a/bench_suites/imdb_robust/18a.benchmark b/bench_suites/imdb_robust/18a.benchmark
new file mode 100644
index 0000000..e7edd16
--- /dev/null
+++ b/bench_suites/imdb_robust/18a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=18a
+QUERY_NUMBER_PADDED=18a
diff --git a/bench_suites/imdb_robust/18b.benchmark b/bench_suites/imdb_robust/18b.benchmark
new file mode 100644
index 0000000..b87afb9
--- /dev/null
+++ b/bench_suites/imdb_robust/18b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=18b
+QUERY_NUMBER_PADDED=18b
diff --git a/bench_suites/imdb_robust/18c.benchmark b/bench_suites/imdb_robust/18c.benchmark
new file mode 100644
index 0000000..b99d9dd
--- /dev/null
+++ b/bench_suites/imdb_robust/18c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=18c
+QUERY_NUMBER_PADDED=18c
diff --git a/bench_suites/imdb_robust/19a.benchmark b/bench_suites/imdb_robust/19a.benchmark
new file mode 100644
index 0000000..17ba373
--- /dev/null
+++ b/bench_suites/imdb_robust/19a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=19a
+QUERY_NUMBER_PADDED=19a
diff --git a/bench_suites/imdb_robust/19b.benchmark b/bench_suites/imdb_robust/19b.benchmark
new file mode 100644
index 0000000..30529ec
--- /dev/null
+++ b/bench_suites/imdb_robust/19b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=19b
+QUERY_NUMBER_PADDED=19b
diff --git a/bench_suites/imdb_robust/19c.benchmark b/bench_suites/imdb_robust/19c.benchmark
new file mode 100644
index 0000000..f7a1896
--- /dev/null
+++ b/bench_suites/imdb_robust/19c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=19c
+QUERY_NUMBER_PADDED=19c
diff --git a/bench_suites/imdb_robust/19d.benchmark b/bench_suites/imdb_robust/19d.benchmark
new file mode 100644
index 0000000..02e317a
--- /dev/null
+++ b/bench_suites/imdb_robust/19d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=19d
+QUERY_NUMBER_PADDED=19d
diff --git a/bench_suites/imdb_robust/20a.benchmark b/bench_suites/imdb_robust/20a.benchmark
new file mode 100644
index 0000000..7a4811f
--- /dev/null
+++ b/bench_suites/imdb_robust/20a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=20a
+QUERY_NUMBER_PADDED=20a
diff --git a/bench_suites/imdb_robust/20b.benchmark b/bench_suites/imdb_robust/20b.benchmark
new file mode 100644
index 0000000..e1c636f
--- /dev/null
+++ b/bench_suites/imdb_robust/20b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=20b
+QUERY_NUMBER_PADDED=20b
diff --git a/bench_suites/imdb_robust/20c.benchmark b/bench_suites/imdb_robust/20c.benchmark
new file mode 100644
index 0000000..718916c
--- /dev/null
+++ b/bench_suites/imdb_robust/20c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=20c
+QUERY_NUMBER_PADDED=20c
diff --git a/bench_suites/imdb_robust/21a.benchmark b/bench_suites/imdb_robust/21a.benchmark
new file mode 100644
index 0000000..4d0d704
--- /dev/null
+++ b/bench_suites/imdb_robust/21a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=21a
+QUERY_NUMBER_PADDED=21a
diff --git a/bench_suites/imdb_robust/21b.benchmark b/bench_suites/imdb_robust/21b.benchmark
new file mode 100644
index 0000000..02755d1
--- /dev/null
+++ b/bench_suites/imdb_robust/21b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=21b
+QUERY_NUMBER_PADDED=21b
diff --git a/bench_suites/imdb_robust/21c.benchmark b/bench_suites/imdb_robust/21c.benchmark
new file mode 100644
index 0000000..8adc804
--- /dev/null
+++ b/bench_suites/imdb_robust/21c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=21c
+QUERY_NUMBER_PADDED=21c
diff --git a/bench_suites/imdb_robust/22a.benchmark b/bench_suites/imdb_robust/22a.benchmark
new file mode 100644
index 0000000..75701e2
--- /dev/null
+++ b/bench_suites/imdb_robust/22a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=22a
+QUERY_NUMBER_PADDED=22a
diff --git a/bench_suites/imdb_robust/22b.benchmark b/bench_suites/imdb_robust/22b.benchmark
new file mode 100644
index 0000000..cce99f0
--- /dev/null
+++ b/bench_suites/imdb_robust/22b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=22b
+QUERY_NUMBER_PADDED=22b
diff --git a/bench_suites/imdb_robust/22c.benchmark b/bench_suites/imdb_robust/22c.benchmark
new file mode 100644
index 0000000..fca5f28
--- /dev/null
+++ b/bench_suites/imdb_robust/22c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=22c
+QUERY_NUMBER_PADDED=22c
diff --git a/bench_suites/imdb_robust/22d.benchmark b/bench_suites/imdb_robust/22d.benchmark
new file mode 100644
index 0000000..27fc253
--- /dev/null
+++ b/bench_suites/imdb_robust/22d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=22d
+QUERY_NUMBER_PADDED=22d
diff --git a/bench_suites/imdb_robust/23a.benchmark b/bench_suites/imdb_robust/23a.benchmark
new file mode 100644
index 0000000..aadb57c
--- /dev/null
+++ b/bench_suites/imdb_robust/23a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=23a
+QUERY_NUMBER_PADDED=23a
diff --git a/bench_suites/imdb_robust/23b.benchmark b/bench_suites/imdb_robust/23b.benchmark
new file mode 100644
index 0000000..883c50a
--- /dev/null
+++ b/bench_suites/imdb_robust/23b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=23b
+QUERY_NUMBER_PADDED=23b
diff --git a/bench_suites/imdb_robust/23c.benchmark b/bench_suites/imdb_robust/23c.benchmark
new file mode 100644
index 0000000..d5b110f
--- /dev/null
+++ b/bench_suites/imdb_robust/23c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=23c
+QUERY_NUMBER_PADDED=23c
diff --git a/bench_suites/imdb_robust/24a.benchmark b/bench_suites/imdb_robust/24a.benchmark
new file mode 100644
index 0000000..7cd43e2
--- /dev/null
+++ b/bench_suites/imdb_robust/24a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=24a
+QUERY_NUMBER_PADDED=24a
diff --git a/bench_suites/imdb_robust/24b.benchmark b/bench_suites/imdb_robust/24b.benchmark
new file mode 100644
index 0000000..b431b18
--- /dev/null
+++ b/bench_suites/imdb_robust/24b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=24b
+QUERY_NUMBER_PADDED=24b
diff --git a/bench_suites/imdb_robust/25a.benchmark b/bench_suites/imdb_robust/25a.benchmark
new file mode 100644
index 0000000..7dbd144
--- /dev/null
+++ b/bench_suites/imdb_robust/25a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=25a
+QUERY_NUMBER_PADDED=25a
diff --git a/bench_suites/imdb_robust/25b.benchmark b/bench_suites/imdb_robust/25b.benchmark
new file mode 100644
index 0000000..2970156
--- /dev/null
+++ b/bench_suites/imdb_robust/25b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=25b
+QUERY_NUMBER_PADDED=25b
diff --git a/bench_suites/imdb_robust/25c.benchmark b/bench_suites/imdb_robust/25c.benchmark
new file mode 100644
index 0000000..dafb6f8
--- /dev/null
+++ b/bench_suites/imdb_robust/25c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=25c
+QUERY_NUMBER_PADDED=25c
diff --git a/bench_suites/imdb_robust/26a.benchmark b/bench_suites/imdb_robust/26a.benchmark
new file mode 100644
index 0000000..d7707ec
--- /dev/null
+++ b/bench_suites/imdb_robust/26a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=26a
+QUERY_NUMBER_PADDED=26a
diff --git a/bench_suites/imdb_robust/26b.benchmark b/bench_suites/imdb_robust/26b.benchmark
new file mode 100644
index 0000000..7967bc9
--- /dev/null
+++ b/bench_suites/imdb_robust/26b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=26b
+QUERY_NUMBER_PADDED=26b
diff --git a/bench_suites/imdb_robust/26c.benchmark b/bench_suites/imdb_robust/26c.benchmark
new file mode 100644
index 0000000..089a3cc
--- /dev/null
+++ b/bench_suites/imdb_robust/26c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=26c
+QUERY_NUMBER_PADDED=26c
diff --git a/bench_suites/imdb_robust/27a.benchmark b/bench_suites/imdb_robust/27a.benchmark
new file mode 100644
index 0000000..3da2791
--- /dev/null
+++ b/bench_suites/imdb_robust/27a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=27a
+QUERY_NUMBER_PADDED=27a
diff --git a/bench_suites/imdb_robust/27b.benchmark b/bench_suites/imdb_robust/27b.benchmark
new file mode 100644
index 0000000..d3e8437
--- /dev/null
+++ b/bench_suites/imdb_robust/27b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=27b
+QUERY_NUMBER_PADDED=27b
diff --git a/bench_suites/imdb_robust/27c.benchmark b/bench_suites/imdb_robust/27c.benchmark
new file mode 100644
index 0000000..0295004
--- /dev/null
+++ b/bench_suites/imdb_robust/27c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=27c
+QUERY_NUMBER_PADDED=27c
diff --git a/bench_suites/imdb_robust/28a.benchmark b/bench_suites/imdb_robust/28a.benchmark
new file mode 100644
index 0000000..ba95625
--- /dev/null
+++ b/bench_suites/imdb_robust/28a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=28a
+QUERY_NUMBER_PADDED=28a
diff --git a/bench_suites/imdb_robust/28b.benchmark b/bench_suites/imdb_robust/28b.benchmark
new file mode 100644
index 0000000..6c37162
--- /dev/null
+++ b/bench_suites/imdb_robust/28b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=28b
+QUERY_NUMBER_PADDED=28b
diff --git a/bench_suites/imdb_robust/28c.benchmark b/bench_suites/imdb_robust/28c.benchmark
new file mode 100644
index 0000000..27ce3ce
--- /dev/null
+++ b/bench_suites/imdb_robust/28c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=28c
+QUERY_NUMBER_PADDED=28c
diff --git a/bench_suites/imdb_robust/29a.benchmark b/bench_suites/imdb_robust/29a.benchmark
new file mode 100644
index 0000000..18fe4c8
--- /dev/null
+++ b/bench_suites/imdb_robust/29a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=29a
+QUERY_NUMBER_PADDED=29a
diff --git a/bench_suites/imdb_robust/29b.benchmark b/bench_suites/imdb_robust/29b.benchmark
new file mode 100644
index 0000000..a284908
--- /dev/null
+++ b/bench_suites/imdb_robust/29b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=29b
+QUERY_NUMBER_PADDED=29b
diff --git a/bench_suites/imdb_robust/29c.benchmark b/bench_suites/imdb_robust/29c.benchmark
new file mode 100644
index 0000000..2505801
--- /dev/null
+++ b/bench_suites/imdb_robust/29c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=29c
+QUERY_NUMBER_PADDED=29c
diff --git a/bench_suites/imdb_robust/30a.benchmark b/bench_suites/imdb_robust/30a.benchmark
new file mode 100644
index 0000000..9a95171
--- /dev/null
+++ b/bench_suites/imdb_robust/30a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=30a
+QUERY_NUMBER_PADDED=30a
diff --git a/bench_suites/imdb_robust/30b.benchmark b/bench_suites/imdb_robust/30b.benchmark
new file mode 100644
index 0000000..1e76241
--- /dev/null
+++ b/bench_suites/imdb_robust/30b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=30b
+QUERY_NUMBER_PADDED=30b
diff --git a/bench_suites/imdb_robust/30c.benchmark b/bench_suites/imdb_robust/30c.benchmark
new file mode 100644
index 0000000..8a9e3f9
--- /dev/null
+++ b/bench_suites/imdb_robust/30c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=30c
+QUERY_NUMBER_PADDED=30c
diff --git a/bench_suites/imdb_robust/31a.benchmark b/bench_suites/imdb_robust/31a.benchmark
new file mode 100644
index 0000000..6419e16
--- /dev/null
+++ b/bench_suites/imdb_robust/31a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=31a
+QUERY_NUMBER_PADDED=31a
diff --git a/bench_suites/imdb_robust/31b.benchmark b/bench_suites/imdb_robust/31b.benchmark
new file mode 100644
index 0000000..1eaf37a
--- /dev/null
+++ b/bench_suites/imdb_robust/31b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=31b
+QUERY_NUMBER_PADDED=31b
diff --git a/bench_suites/imdb_robust/31c.benchmark b/bench_suites/imdb_robust/31c.benchmark
new file mode 100644
index 0000000..af03594
--- /dev/null
+++ b/bench_suites/imdb_robust/31c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=31c
+QUERY_NUMBER_PADDED=31c
diff --git a/bench_suites/imdb_robust/32a.benchmark b/bench_suites/imdb_robust/32a.benchmark
new file mode 100644
index 0000000..0b3d985
--- /dev/null
+++ b/bench_suites/imdb_robust/32a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=32a
+QUERY_NUMBER_PADDED=32a
diff --git a/bench_suites/imdb_robust/32b.benchmark b/bench_suites/imdb_robust/32b.benchmark
new file mode 100644
index 0000000..e1c7dd9
--- /dev/null
+++ b/bench_suites/imdb_robust/32b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=32b
+QUERY_NUMBER_PADDED=32b
diff --git a/bench_suites/imdb_robust/33a.benchmark b/bench_suites/imdb_robust/33a.benchmark
new file mode 100644
index 0000000..f9baa46
--- /dev/null
+++ b/bench_suites/imdb_robust/33a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=33a
+QUERY_NUMBER_PADDED=33a
diff --git a/bench_suites/imdb_robust/33b.benchmark b/bench_suites/imdb_robust/33b.benchmark
new file mode 100644
index 0000000..555479b
--- /dev/null
+++ b/bench_suites/imdb_robust/33b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=33b
+QUERY_NUMBER_PADDED=33b
diff --git a/bench_suites/imdb_robust/33c.benchmark b/bench_suites/imdb_robust/33c.benchmark
new file mode 100644
index 0000000..db9d51c
--- /dev/null
+++ b/bench_suites/imdb_robust/33c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust/imdb_robust.benchmark.in
+QUERY_NUMBER=33c
+QUERY_NUMBER_PADDED=33c
diff --git a/bench_suites/imdb_robust/imdb_robust.benchmark.in b/bench_suites/imdb_robust/imdb_robust.benchmark.in
new file mode 100644
index 0000000..fef8d10
--- /dev/null
+++ b/bench_suites/imdb_robust/imdb_robust.benchmark.in
@@ -0,0 +1,15 @@
+# name: ${FILE_PATH}
+# description: JOB query ${QUERY_NUMBER_PADDED} with Robust extension (default heuristic: join_order)
+# group: [imdb_robust]
+
+name Q${QUERY_NUMBER_PADDED}_Robust
+group imdb_robust
+
+cache imdb.duckdb
+
+init
+LOAD 'build/release/extension/robust/robust.duckdb_extension';
+SET disabled_optimizers = 'join_filter_pushdown';
+SET robust_heuristic = 'join_order';
+
+run benchmark/imdb_plan_cost/queries/${QUERY_NUMBER_PADDED}.sql
diff --git a/bench_suites/imdb_robust_fwd/01a.benchmark b/bench_suites/imdb_robust_fwd/01a.benchmark
new file mode 100644
index 0000000..63ce6a5
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/01a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=01a
+QUERY_NUMBER_PADDED=01a
diff --git a/bench_suites/imdb_robust_fwd/01b.benchmark b/bench_suites/imdb_robust_fwd/01b.benchmark
new file mode 100644
index 0000000..564546b
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/01b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=01b
+QUERY_NUMBER_PADDED=01b
diff --git a/bench_suites/imdb_robust_fwd/01c.benchmark b/bench_suites/imdb_robust_fwd/01c.benchmark
new file mode 100644
index 0000000..ef14ae4
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/01c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=01c
+QUERY_NUMBER_PADDED=01c
diff --git a/bench_suites/imdb_robust_fwd/01d.benchmark b/bench_suites/imdb_robust_fwd/01d.benchmark
new file mode 100644
index 0000000..459ce8f
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/01d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=01d
+QUERY_NUMBER_PADDED=01d
diff --git a/bench_suites/imdb_robust_fwd/02a.benchmark b/bench_suites/imdb_robust_fwd/02a.benchmark
new file mode 100644
index 0000000..b10f416
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/02a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=02a
+QUERY_NUMBER_PADDED=02a
diff --git a/bench_suites/imdb_robust_fwd/02b.benchmark b/bench_suites/imdb_robust_fwd/02b.benchmark
new file mode 100644
index 0000000..f89d8e9
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/02b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=02b
+QUERY_NUMBER_PADDED=02b
diff --git a/bench_suites/imdb_robust_fwd/02c.benchmark b/bench_suites/imdb_robust_fwd/02c.benchmark
new file mode 100644
index 0000000..eebc387
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/02c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=02c
+QUERY_NUMBER_PADDED=02c
diff --git a/bench_suites/imdb_robust_fwd/02d.benchmark b/bench_suites/imdb_robust_fwd/02d.benchmark
new file mode 100644
index 0000000..35a5574
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/02d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=02d
+QUERY_NUMBER_PADDED=02d
diff --git a/bench_suites/imdb_robust_fwd/03a.benchmark b/bench_suites/imdb_robust_fwd/03a.benchmark
new file mode 100644
index 0000000..e28a929
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/03a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=03a
+QUERY_NUMBER_PADDED=03a
diff --git a/bench_suites/imdb_robust_fwd/03b.benchmark b/bench_suites/imdb_robust_fwd/03b.benchmark
new file mode 100644
index 0000000..ba7408b
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/03b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=03b
+QUERY_NUMBER_PADDED=03b
diff --git a/bench_suites/imdb_robust_fwd/03c.benchmark b/bench_suites/imdb_robust_fwd/03c.benchmark
new file mode 100644
index 0000000..e65d37b
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/03c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=03c
+QUERY_NUMBER_PADDED=03c
diff --git a/bench_suites/imdb_robust_fwd/04a.benchmark b/bench_suites/imdb_robust_fwd/04a.benchmark
new file mode 100644
index 0000000..9d29d97
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/04a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=04a
+QUERY_NUMBER_PADDED=04a
diff --git a/bench_suites/imdb_robust_fwd/04b.benchmark b/bench_suites/imdb_robust_fwd/04b.benchmark
new file mode 100644
index 0000000..77394ce
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/04b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=04b
+QUERY_NUMBER_PADDED=04b
diff --git a/bench_suites/imdb_robust_fwd/04c.benchmark b/bench_suites/imdb_robust_fwd/04c.benchmark
new file mode 100644
index 0000000..4310f7c
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/04c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=04c
+QUERY_NUMBER_PADDED=04c
diff --git a/bench_suites/imdb_robust_fwd/05a.benchmark b/bench_suites/imdb_robust_fwd/05a.benchmark
new file mode 100644
index 0000000..9255c99
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/05a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=05a
+QUERY_NUMBER_PADDED=05a
diff --git a/bench_suites/imdb_robust_fwd/05b.benchmark b/bench_suites/imdb_robust_fwd/05b.benchmark
new file mode 100644
index 0000000..37f3fab
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/05b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=05b
+QUERY_NUMBER_PADDED=05b
diff --git a/bench_suites/imdb_robust_fwd/05c.benchmark b/bench_suites/imdb_robust_fwd/05c.benchmark
new file mode 100644
index 0000000..75fbda9
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/05c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=05c
+QUERY_NUMBER_PADDED=05c
diff --git a/bench_suites/imdb_robust_fwd/06a.benchmark b/bench_suites/imdb_robust_fwd/06a.benchmark
new file mode 100644
index 0000000..b01d8b4
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/06a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=06a
+QUERY_NUMBER_PADDED=06a
diff --git a/bench_suites/imdb_robust_fwd/06b.benchmark b/bench_suites/imdb_robust_fwd/06b.benchmark
new file mode 100644
index 0000000..8b5b533
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/06b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=06b
+QUERY_NUMBER_PADDED=06b
diff --git a/bench_suites/imdb_robust_fwd/06c.benchmark b/bench_suites/imdb_robust_fwd/06c.benchmark
new file mode 100644
index 0000000..5a6706b
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/06c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=06c
+QUERY_NUMBER_PADDED=06c
diff --git a/bench_suites/imdb_robust_fwd/06d.benchmark b/bench_suites/imdb_robust_fwd/06d.benchmark
new file mode 100644
index 0000000..d63fb14
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/06d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=06d
+QUERY_NUMBER_PADDED=06d
diff --git a/bench_suites/imdb_robust_fwd/06e.benchmark b/bench_suites/imdb_robust_fwd/06e.benchmark
new file mode 100644
index 0000000..595599b
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/06e.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=06e
+QUERY_NUMBER_PADDED=06e
diff --git a/bench_suites/imdb_robust_fwd/06f.benchmark b/bench_suites/imdb_robust_fwd/06f.benchmark
new file mode 100644
index 0000000..0758df3
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/06f.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=06f
+QUERY_NUMBER_PADDED=06f
diff --git a/bench_suites/imdb_robust_fwd/07a.benchmark b/bench_suites/imdb_robust_fwd/07a.benchmark
new file mode 100644
index 0000000..fad5cff
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/07a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=07a
+QUERY_NUMBER_PADDED=07a
diff --git a/bench_suites/imdb_robust_fwd/07b.benchmark b/bench_suites/imdb_robust_fwd/07b.benchmark
new file mode 100644
index 0000000..e1a8ef5
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/07b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=07b
+QUERY_NUMBER_PADDED=07b
diff --git a/bench_suites/imdb_robust_fwd/07c.benchmark b/bench_suites/imdb_robust_fwd/07c.benchmark
new file mode 100644
index 0000000..5f7b1d2
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/07c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=07c
+QUERY_NUMBER_PADDED=07c
diff --git a/bench_suites/imdb_robust_fwd/08a.benchmark b/bench_suites/imdb_robust_fwd/08a.benchmark
new file mode 100644
index 0000000..2a98d79
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/08a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=08a
+QUERY_NUMBER_PADDED=08a
diff --git a/bench_suites/imdb_robust_fwd/08b.benchmark b/bench_suites/imdb_robust_fwd/08b.benchmark
new file mode 100644
index 0000000..5bbd9c6
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/08b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=08b
+QUERY_NUMBER_PADDED=08b
diff --git a/bench_suites/imdb_robust_fwd/08c.benchmark b/bench_suites/imdb_robust_fwd/08c.benchmark
new file mode 100644
index 0000000..6784570
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/08c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=08c
+QUERY_NUMBER_PADDED=08c
diff --git a/bench_suites/imdb_robust_fwd/08d.benchmark b/bench_suites/imdb_robust_fwd/08d.benchmark
new file mode 100644
index 0000000..e39cb34
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/08d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=08d
+QUERY_NUMBER_PADDED=08d
diff --git a/bench_suites/imdb_robust_fwd/09a.benchmark b/bench_suites/imdb_robust_fwd/09a.benchmark
new file mode 100644
index 0000000..74d8d4c
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/09a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=09a
+QUERY_NUMBER_PADDED=09a
diff --git a/bench_suites/imdb_robust_fwd/09b.benchmark b/bench_suites/imdb_robust_fwd/09b.benchmark
new file mode 100644
index 0000000..f6d5a75
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/09b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=09b
+QUERY_NUMBER_PADDED=09b
diff --git a/bench_suites/imdb_robust_fwd/09c.benchmark b/bench_suites/imdb_robust_fwd/09c.benchmark
new file mode 100644
index 0000000..1fb0f58
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/09c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=09c
+QUERY_NUMBER_PADDED=09c
diff --git a/bench_suites/imdb_robust_fwd/09d.benchmark b/bench_suites/imdb_robust_fwd/09d.benchmark
new file mode 100644
index 0000000..55a6a87
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/09d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=09d
+QUERY_NUMBER_PADDED=09d
diff --git a/bench_suites/imdb_robust_fwd/10a.benchmark b/bench_suites/imdb_robust_fwd/10a.benchmark
new file mode 100644
index 0000000..ba0a285
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/10a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=10a
+QUERY_NUMBER_PADDED=10a
diff --git a/bench_suites/imdb_robust_fwd/10b.benchmark b/bench_suites/imdb_robust_fwd/10b.benchmark
new file mode 100644
index 0000000..6612c45
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/10b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=10b
+QUERY_NUMBER_PADDED=10b
diff --git a/bench_suites/imdb_robust_fwd/10c.benchmark b/bench_suites/imdb_robust_fwd/10c.benchmark
new file mode 100644
index 0000000..c8fc0bd
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/10c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=10c
+QUERY_NUMBER_PADDED=10c
diff --git a/bench_suites/imdb_robust_fwd/11a.benchmark b/bench_suites/imdb_robust_fwd/11a.benchmark
new file mode 100644
index 0000000..89bd18c
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/11a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=11a
+QUERY_NUMBER_PADDED=11a
diff --git a/bench_suites/imdb_robust_fwd/11b.benchmark b/bench_suites/imdb_robust_fwd/11b.benchmark
new file mode 100644
index 0000000..def5a65
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/11b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=11b
+QUERY_NUMBER_PADDED=11b
diff --git a/bench_suites/imdb_robust_fwd/11c.benchmark b/bench_suites/imdb_robust_fwd/11c.benchmark
new file mode 100644
index 0000000..ed63277
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/11c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=11c
+QUERY_NUMBER_PADDED=11c
diff --git a/bench_suites/imdb_robust_fwd/11d.benchmark b/bench_suites/imdb_robust_fwd/11d.benchmark
new file mode 100644
index 0000000..721e655
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/11d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=11d
+QUERY_NUMBER_PADDED=11d
diff --git a/bench_suites/imdb_robust_fwd/12a.benchmark b/bench_suites/imdb_robust_fwd/12a.benchmark
new file mode 100644
index 0000000..e4bd9ac
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/12a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=12a
+QUERY_NUMBER_PADDED=12a
diff --git a/bench_suites/imdb_robust_fwd/12b.benchmark b/bench_suites/imdb_robust_fwd/12b.benchmark
new file mode 100644
index 0000000..e0c2d7e
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/12b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=12b
+QUERY_NUMBER_PADDED=12b
diff --git a/bench_suites/imdb_robust_fwd/12c.benchmark b/bench_suites/imdb_robust_fwd/12c.benchmark
new file mode 100644
index 0000000..e921254
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/12c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=12c
+QUERY_NUMBER_PADDED=12c
diff --git a/bench_suites/imdb_robust_fwd/13a.benchmark b/bench_suites/imdb_robust_fwd/13a.benchmark
new file mode 100644
index 0000000..ec6febc
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/13a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=13a
+QUERY_NUMBER_PADDED=13a
diff --git a/bench_suites/imdb_robust_fwd/13b.benchmark b/bench_suites/imdb_robust_fwd/13b.benchmark
new file mode 100644
index 0000000..82c2748
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/13b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=13b
+QUERY_NUMBER_PADDED=13b
diff --git a/bench_suites/imdb_robust_fwd/13c.benchmark b/bench_suites/imdb_robust_fwd/13c.benchmark
new file mode 100644
index 0000000..8cf8b1a
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/13c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=13c
+QUERY_NUMBER_PADDED=13c
diff --git a/bench_suites/imdb_robust_fwd/13d.benchmark b/bench_suites/imdb_robust_fwd/13d.benchmark
new file mode 100644
index 0000000..a6c9807
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/13d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=13d
+QUERY_NUMBER_PADDED=13d
diff --git a/bench_suites/imdb_robust_fwd/14a.benchmark b/bench_suites/imdb_robust_fwd/14a.benchmark
new file mode 100644
index 0000000..919f038
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/14a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=14a
+QUERY_NUMBER_PADDED=14a
diff --git a/bench_suites/imdb_robust_fwd/14b.benchmark b/bench_suites/imdb_robust_fwd/14b.benchmark
new file mode 100644
index 0000000..0e33827
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/14b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=14b
+QUERY_NUMBER_PADDED=14b
diff --git a/bench_suites/imdb_robust_fwd/14c.benchmark b/bench_suites/imdb_robust_fwd/14c.benchmark
new file mode 100644
index 0000000..5295e90
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/14c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=14c
+QUERY_NUMBER_PADDED=14c
diff --git a/bench_suites/imdb_robust_fwd/15a.benchmark b/bench_suites/imdb_robust_fwd/15a.benchmark
new file mode 100644
index 0000000..65e1045
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/15a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=15a
+QUERY_NUMBER_PADDED=15a
diff --git a/bench_suites/imdb_robust_fwd/15b.benchmark b/bench_suites/imdb_robust_fwd/15b.benchmark
new file mode 100644
index 0000000..7fd0a80
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/15b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=15b
+QUERY_NUMBER_PADDED=15b
diff --git a/bench_suites/imdb_robust_fwd/15c.benchmark b/bench_suites/imdb_robust_fwd/15c.benchmark
new file mode 100644
index 0000000..064d7be
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/15c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=15c
+QUERY_NUMBER_PADDED=15c
diff --git a/bench_suites/imdb_robust_fwd/15d.benchmark b/bench_suites/imdb_robust_fwd/15d.benchmark
new file mode 100644
index 0000000..f9c5a4b
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/15d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=15d
+QUERY_NUMBER_PADDED=15d
diff --git a/bench_suites/imdb_robust_fwd/16a.benchmark b/bench_suites/imdb_robust_fwd/16a.benchmark
new file mode 100644
index 0000000..b363927
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/16a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=16a
+QUERY_NUMBER_PADDED=16a
diff --git a/bench_suites/imdb_robust_fwd/16b.benchmark b/bench_suites/imdb_robust_fwd/16b.benchmark
new file mode 100644
index 0000000..df84b7a
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/16b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=16b
+QUERY_NUMBER_PADDED=16b
diff --git a/bench_suites/imdb_robust_fwd/16c.benchmark b/bench_suites/imdb_robust_fwd/16c.benchmark
new file mode 100644
index 0000000..4e8b5dd
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/16c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=16c
+QUERY_NUMBER_PADDED=16c
diff --git a/bench_suites/imdb_robust_fwd/16d.benchmark b/bench_suites/imdb_robust_fwd/16d.benchmark
new file mode 100644
index 0000000..329c396
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/16d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=16d
+QUERY_NUMBER_PADDED=16d
diff --git a/bench_suites/imdb_robust_fwd/17a.benchmark b/bench_suites/imdb_robust_fwd/17a.benchmark
new file mode 100644
index 0000000..a9bce2a
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/17a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=17a
+QUERY_NUMBER_PADDED=17a
diff --git a/bench_suites/imdb_robust_fwd/17b.benchmark b/bench_suites/imdb_robust_fwd/17b.benchmark
new file mode 100644
index 0000000..23d791b
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/17b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=17b
+QUERY_NUMBER_PADDED=17b
diff --git a/bench_suites/imdb_robust_fwd/17c.benchmark b/bench_suites/imdb_robust_fwd/17c.benchmark
new file mode 100644
index 0000000..0da9cfb
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/17c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=17c
+QUERY_NUMBER_PADDED=17c
diff --git a/bench_suites/imdb_robust_fwd/17d.benchmark b/bench_suites/imdb_robust_fwd/17d.benchmark
new file mode 100644
index 0000000..6bef0d3
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/17d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=17d
+QUERY_NUMBER_PADDED=17d
diff --git a/bench_suites/imdb_robust_fwd/17e.benchmark b/bench_suites/imdb_robust_fwd/17e.benchmark
new file mode 100644
index 0000000..605909a
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/17e.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=17e
+QUERY_NUMBER_PADDED=17e
diff --git a/bench_suites/imdb_robust_fwd/17f.benchmark b/bench_suites/imdb_robust_fwd/17f.benchmark
new file mode 100644
index 0000000..3e442ba
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/17f.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=17f
+QUERY_NUMBER_PADDED=17f
diff --git a/bench_suites/imdb_robust_fwd/18a.benchmark b/bench_suites/imdb_robust_fwd/18a.benchmark
new file mode 100644
index 0000000..8b23ffa
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/18a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=18a
+QUERY_NUMBER_PADDED=18a
diff --git a/bench_suites/imdb_robust_fwd/18b.benchmark b/bench_suites/imdb_robust_fwd/18b.benchmark
new file mode 100644
index 0000000..e629ae2
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/18b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=18b
+QUERY_NUMBER_PADDED=18b
diff --git a/bench_suites/imdb_robust_fwd/18c.benchmark b/bench_suites/imdb_robust_fwd/18c.benchmark
new file mode 100644
index 0000000..4ec72fe
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/18c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=18c
+QUERY_NUMBER_PADDED=18c
diff --git a/bench_suites/imdb_robust_fwd/19a.benchmark b/bench_suites/imdb_robust_fwd/19a.benchmark
new file mode 100644
index 0000000..383ef33
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/19a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=19a
+QUERY_NUMBER_PADDED=19a
diff --git a/bench_suites/imdb_robust_fwd/19b.benchmark b/bench_suites/imdb_robust_fwd/19b.benchmark
new file mode 100644
index 0000000..ddbd991
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/19b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=19b
+QUERY_NUMBER_PADDED=19b
diff --git a/bench_suites/imdb_robust_fwd/19c.benchmark b/bench_suites/imdb_robust_fwd/19c.benchmark
new file mode 100644
index 0000000..ea56a85
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/19c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=19c
+QUERY_NUMBER_PADDED=19c
diff --git a/bench_suites/imdb_robust_fwd/19d.benchmark b/bench_suites/imdb_robust_fwd/19d.benchmark
new file mode 100644
index 0000000..299bba9
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/19d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=19d
+QUERY_NUMBER_PADDED=19d
diff --git a/bench_suites/imdb_robust_fwd/20a.benchmark b/bench_suites/imdb_robust_fwd/20a.benchmark
new file mode 100644
index 0000000..75df93b
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/20a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=20a
+QUERY_NUMBER_PADDED=20a
diff --git a/bench_suites/imdb_robust_fwd/20b.benchmark b/bench_suites/imdb_robust_fwd/20b.benchmark
new file mode 100644
index 0000000..1bfa689
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/20b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=20b
+QUERY_NUMBER_PADDED=20b
diff --git a/bench_suites/imdb_robust_fwd/20c.benchmark b/bench_suites/imdb_robust_fwd/20c.benchmark
new file mode 100644
index 0000000..d8b89e0
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/20c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=20c
+QUERY_NUMBER_PADDED=20c
diff --git a/bench_suites/imdb_robust_fwd/21a.benchmark b/bench_suites/imdb_robust_fwd/21a.benchmark
new file mode 100644
index 0000000..8e9b826
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/21a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=21a
+QUERY_NUMBER_PADDED=21a
diff --git a/bench_suites/imdb_robust_fwd/21b.benchmark b/bench_suites/imdb_robust_fwd/21b.benchmark
new file mode 100644
index 0000000..b638ef4
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/21b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=21b
+QUERY_NUMBER_PADDED=21b
diff --git a/bench_suites/imdb_robust_fwd/21c.benchmark b/bench_suites/imdb_robust_fwd/21c.benchmark
new file mode 100644
index 0000000..6759863
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/21c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=21c
+QUERY_NUMBER_PADDED=21c
diff --git a/bench_suites/imdb_robust_fwd/22a.benchmark b/bench_suites/imdb_robust_fwd/22a.benchmark
new file mode 100644
index 0000000..71ac749
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/22a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=22a
+QUERY_NUMBER_PADDED=22a
diff --git a/bench_suites/imdb_robust_fwd/22b.benchmark b/bench_suites/imdb_robust_fwd/22b.benchmark
new file mode 100644
index 0000000..ea3582b
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/22b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=22b
+QUERY_NUMBER_PADDED=22b
diff --git a/bench_suites/imdb_robust_fwd/22c.benchmark b/bench_suites/imdb_robust_fwd/22c.benchmark
new file mode 100644
index 0000000..fa781ff
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/22c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=22c
+QUERY_NUMBER_PADDED=22c
diff --git a/bench_suites/imdb_robust_fwd/22d.benchmark b/bench_suites/imdb_robust_fwd/22d.benchmark
new file mode 100644
index 0000000..80f370a
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/22d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=22d
+QUERY_NUMBER_PADDED=22d
diff --git a/bench_suites/imdb_robust_fwd/23a.benchmark b/bench_suites/imdb_robust_fwd/23a.benchmark
new file mode 100644
index 0000000..22e249f
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/23a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=23a
+QUERY_NUMBER_PADDED=23a
diff --git a/bench_suites/imdb_robust_fwd/23b.benchmark b/bench_suites/imdb_robust_fwd/23b.benchmark
new file mode 100644
index 0000000..9575ff4
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/23b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=23b
+QUERY_NUMBER_PADDED=23b
diff --git a/bench_suites/imdb_robust_fwd/23c.benchmark b/bench_suites/imdb_robust_fwd/23c.benchmark
new file mode 100644
index 0000000..96e43c5
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/23c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=23c
+QUERY_NUMBER_PADDED=23c
diff --git a/bench_suites/imdb_robust_fwd/24a.benchmark b/bench_suites/imdb_robust_fwd/24a.benchmark
new file mode 100644
index 0000000..a4c7275
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/24a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=24a
+QUERY_NUMBER_PADDED=24a
diff --git a/bench_suites/imdb_robust_fwd/24b.benchmark b/bench_suites/imdb_robust_fwd/24b.benchmark
new file mode 100644
index 0000000..88f309e
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/24b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=24b
+QUERY_NUMBER_PADDED=24b
diff --git a/bench_suites/imdb_robust_fwd/25a.benchmark b/bench_suites/imdb_robust_fwd/25a.benchmark
new file mode 100644
index 0000000..76c3ef7
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/25a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=25a
+QUERY_NUMBER_PADDED=25a
diff --git a/bench_suites/imdb_robust_fwd/25b.benchmark b/bench_suites/imdb_robust_fwd/25b.benchmark
new file mode 100644
index 0000000..fa970b9
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/25b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=25b
+QUERY_NUMBER_PADDED=25b
diff --git a/bench_suites/imdb_robust_fwd/25c.benchmark b/bench_suites/imdb_robust_fwd/25c.benchmark
new file mode 100644
index 0000000..ac53f4c
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/25c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=25c
+QUERY_NUMBER_PADDED=25c
diff --git a/bench_suites/imdb_robust_fwd/26a.benchmark b/bench_suites/imdb_robust_fwd/26a.benchmark
new file mode 100644
index 0000000..574c324
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/26a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=26a
+QUERY_NUMBER_PADDED=26a
diff --git a/bench_suites/imdb_robust_fwd/26b.benchmark b/bench_suites/imdb_robust_fwd/26b.benchmark
new file mode 100644
index 0000000..594d811
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/26b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=26b
+QUERY_NUMBER_PADDED=26b
diff --git a/bench_suites/imdb_robust_fwd/26c.benchmark b/bench_suites/imdb_robust_fwd/26c.benchmark
new file mode 100644
index 0000000..c19f998
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/26c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=26c
+QUERY_NUMBER_PADDED=26c
diff --git a/bench_suites/imdb_robust_fwd/27a.benchmark b/bench_suites/imdb_robust_fwd/27a.benchmark
new file mode 100644
index 0000000..cdf8a60
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/27a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=27a
+QUERY_NUMBER_PADDED=27a
diff --git a/bench_suites/imdb_robust_fwd/27b.benchmark b/bench_suites/imdb_robust_fwd/27b.benchmark
new file mode 100644
index 0000000..98efd66
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/27b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=27b
+QUERY_NUMBER_PADDED=27b
diff --git a/bench_suites/imdb_robust_fwd/27c.benchmark b/bench_suites/imdb_robust_fwd/27c.benchmark
new file mode 100644
index 0000000..31ed3b7
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/27c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=27c
+QUERY_NUMBER_PADDED=27c
diff --git a/bench_suites/imdb_robust_fwd/28a.benchmark b/bench_suites/imdb_robust_fwd/28a.benchmark
new file mode 100644
index 0000000..3010486
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/28a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=28a
+QUERY_NUMBER_PADDED=28a
diff --git a/bench_suites/imdb_robust_fwd/28b.benchmark b/bench_suites/imdb_robust_fwd/28b.benchmark
new file mode 100644
index 0000000..59a08d2
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/28b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=28b
+QUERY_NUMBER_PADDED=28b
diff --git a/bench_suites/imdb_robust_fwd/28c.benchmark b/bench_suites/imdb_robust_fwd/28c.benchmark
new file mode 100644
index 0000000..ffbe287
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/28c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=28c
+QUERY_NUMBER_PADDED=28c
diff --git a/bench_suites/imdb_robust_fwd/29a.benchmark b/bench_suites/imdb_robust_fwd/29a.benchmark
new file mode 100644
index 0000000..66797ee
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/29a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=29a
+QUERY_NUMBER_PADDED=29a
diff --git a/bench_suites/imdb_robust_fwd/29b.benchmark b/bench_suites/imdb_robust_fwd/29b.benchmark
new file mode 100644
index 0000000..10c48b7
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/29b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=29b
+QUERY_NUMBER_PADDED=29b
diff --git a/bench_suites/imdb_robust_fwd/29c.benchmark b/bench_suites/imdb_robust_fwd/29c.benchmark
new file mode 100644
index 0000000..b0415cd
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/29c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=29c
+QUERY_NUMBER_PADDED=29c
diff --git a/bench_suites/imdb_robust_fwd/30a.benchmark b/bench_suites/imdb_robust_fwd/30a.benchmark
new file mode 100644
index 0000000..b3d6322
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/30a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=30a
+QUERY_NUMBER_PADDED=30a
diff --git a/bench_suites/imdb_robust_fwd/30b.benchmark b/bench_suites/imdb_robust_fwd/30b.benchmark
new file mode 100644
index 0000000..a2a8ad6
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/30b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=30b
+QUERY_NUMBER_PADDED=30b
diff --git a/bench_suites/imdb_robust_fwd/30c.benchmark b/bench_suites/imdb_robust_fwd/30c.benchmark
new file mode 100644
index 0000000..5e03d96
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/30c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=30c
+QUERY_NUMBER_PADDED=30c
diff --git a/bench_suites/imdb_robust_fwd/31a.benchmark b/bench_suites/imdb_robust_fwd/31a.benchmark
new file mode 100644
index 0000000..80fe28e
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/31a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=31a
+QUERY_NUMBER_PADDED=31a
diff --git a/bench_suites/imdb_robust_fwd/31b.benchmark b/bench_suites/imdb_robust_fwd/31b.benchmark
new file mode 100644
index 0000000..e497bad
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/31b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=31b
+QUERY_NUMBER_PADDED=31b
diff --git a/bench_suites/imdb_robust_fwd/31c.benchmark b/bench_suites/imdb_robust_fwd/31c.benchmark
new file mode 100644
index 0000000..4960d82
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/31c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=31c
+QUERY_NUMBER_PADDED=31c
diff --git a/bench_suites/imdb_robust_fwd/32a.benchmark b/bench_suites/imdb_robust_fwd/32a.benchmark
new file mode 100644
index 0000000..dd89194
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/32a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=32a
+QUERY_NUMBER_PADDED=32a
diff --git a/bench_suites/imdb_robust_fwd/32b.benchmark b/bench_suites/imdb_robust_fwd/32b.benchmark
new file mode 100644
index 0000000..c462433
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/32b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=32b
+QUERY_NUMBER_PADDED=32b
diff --git a/bench_suites/imdb_robust_fwd/33a.benchmark b/bench_suites/imdb_robust_fwd/33a.benchmark
new file mode 100644
index 0000000..c9b4f1d
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/33a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=33a
+QUERY_NUMBER_PADDED=33a
diff --git a/bench_suites/imdb_robust_fwd/33b.benchmark b/bench_suites/imdb_robust_fwd/33b.benchmark
new file mode 100644
index 0000000..ff21ed7
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/33b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=33b
+QUERY_NUMBER_PADDED=33b
diff --git a/bench_suites/imdb_robust_fwd/33c.benchmark b/bench_suites/imdb_robust_fwd/33c.benchmark
new file mode 100644
index 0000000..8e92f55
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/33c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
+QUERY_NUMBER=33c
+QUERY_NUMBER_PADDED=33c
diff --git a/bench_suites/imdb_robust_fwd/imdb_robust_fwd.benchmark.in b/bench_suites/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
new file mode 100644
index 0000000..59e1f32
--- /dev/null
+++ b/bench_suites/imdb_robust_fwd/imdb_robust_fwd.benchmark.in
@@ -0,0 +1,15 @@
+# name: ${FILE_PATH}
+# description: JOB query ${QUERY_NUMBER_PADDED} with Robust extension (forward only)
+# group: [imdb_robust_fwd]
+
+name Q${QUERY_NUMBER_PADDED}_Robust_FWD
+group imdb_robust_fwd
+
+cache imdb.duckdb
+
+init
+LOAD 'build/release/extension/robust/robust.duckdb_extension';
+SET disabled_optimizers = 'join_filter_pushdown';
+SET robust_pass_mode = 'forward_only';
+
+run benchmark/imdb_plan_cost/queries/${QUERY_NUMBER_PADDED}.sql
diff --git a/bench_suites/imdb_robust_lr/01a.benchmark b/bench_suites/imdb_robust_lr/01a.benchmark
new file mode 100644
index 0000000..ce52bb0
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/01a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=01a
+QUERY_NUMBER_PADDED=01a
diff --git a/bench_suites/imdb_robust_lr/01b.benchmark b/bench_suites/imdb_robust_lr/01b.benchmark
new file mode 100644
index 0000000..4fc7ceb
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/01b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=01b
+QUERY_NUMBER_PADDED=01b
diff --git a/bench_suites/imdb_robust_lr/01c.benchmark b/bench_suites/imdb_robust_lr/01c.benchmark
new file mode 100644
index 0000000..88f351a
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/01c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=01c
+QUERY_NUMBER_PADDED=01c
diff --git a/bench_suites/imdb_robust_lr/01d.benchmark b/bench_suites/imdb_robust_lr/01d.benchmark
new file mode 100644
index 0000000..afabb64
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/01d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=01d
+QUERY_NUMBER_PADDED=01d
diff --git a/bench_suites/imdb_robust_lr/02a.benchmark b/bench_suites/imdb_robust_lr/02a.benchmark
new file mode 100644
index 0000000..195bce6
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/02a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=02a
+QUERY_NUMBER_PADDED=02a
diff --git a/bench_suites/imdb_robust_lr/02b.benchmark b/bench_suites/imdb_robust_lr/02b.benchmark
new file mode 100644
index 0000000..d442eb5
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/02b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=02b
+QUERY_NUMBER_PADDED=02b
diff --git a/bench_suites/imdb_robust_lr/02c.benchmark b/bench_suites/imdb_robust_lr/02c.benchmark
new file mode 100644
index 0000000..88c3b13
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/02c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=02c
+QUERY_NUMBER_PADDED=02c
diff --git a/bench_suites/imdb_robust_lr/02d.benchmark b/bench_suites/imdb_robust_lr/02d.benchmark
new file mode 100644
index 0000000..c521376
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/02d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=02d
+QUERY_NUMBER_PADDED=02d
diff --git a/bench_suites/imdb_robust_lr/03a.benchmark b/bench_suites/imdb_robust_lr/03a.benchmark
new file mode 100644
index 0000000..84fa4d9
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/03a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=03a
+QUERY_NUMBER_PADDED=03a
diff --git a/bench_suites/imdb_robust_lr/03b.benchmark b/bench_suites/imdb_robust_lr/03b.benchmark
new file mode 100644
index 0000000..7c8f8e8
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/03b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=03b
+QUERY_NUMBER_PADDED=03b
diff --git a/bench_suites/imdb_robust_lr/03c.benchmark b/bench_suites/imdb_robust_lr/03c.benchmark
new file mode 100644
index 0000000..a15e5cb
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/03c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=03c
+QUERY_NUMBER_PADDED=03c
diff --git a/bench_suites/imdb_robust_lr/04a.benchmark b/bench_suites/imdb_robust_lr/04a.benchmark
new file mode 100644
index 0000000..8a15a82
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/04a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=04a
+QUERY_NUMBER_PADDED=04a
diff --git a/bench_suites/imdb_robust_lr/04b.benchmark b/bench_suites/imdb_robust_lr/04b.benchmark
new file mode 100644
index 0000000..3822138
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/04b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=04b
+QUERY_NUMBER_PADDED=04b
diff --git a/bench_suites/imdb_robust_lr/04c.benchmark b/bench_suites/imdb_robust_lr/04c.benchmark
new file mode 100644
index 0000000..d6d8769
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/04c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=04c
+QUERY_NUMBER_PADDED=04c
diff --git a/bench_suites/imdb_robust_lr/05a.benchmark b/bench_suites/imdb_robust_lr/05a.benchmark
new file mode 100644
index 0000000..6ad9054
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/05a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=05a
+QUERY_NUMBER_PADDED=05a
diff --git a/bench_suites/imdb_robust_lr/05b.benchmark b/bench_suites/imdb_robust_lr/05b.benchmark
new file mode 100644
index 0000000..190210c
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/05b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=05b
+QUERY_NUMBER_PADDED=05b
diff --git a/bench_suites/imdb_robust_lr/05c.benchmark b/bench_suites/imdb_robust_lr/05c.benchmark
new file mode 100644
index 0000000..110a39e
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/05c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=05c
+QUERY_NUMBER_PADDED=05c
diff --git a/bench_suites/imdb_robust_lr/06a.benchmark b/bench_suites/imdb_robust_lr/06a.benchmark
new file mode 100644
index 0000000..442b93c
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/06a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=06a
+QUERY_NUMBER_PADDED=06a
diff --git a/bench_suites/imdb_robust_lr/06b.benchmark b/bench_suites/imdb_robust_lr/06b.benchmark
new file mode 100644
index 0000000..eaba6ff
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/06b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=06b
+QUERY_NUMBER_PADDED=06b
diff --git a/bench_suites/imdb_robust_lr/06c.benchmark b/bench_suites/imdb_robust_lr/06c.benchmark
new file mode 100644
index 0000000..32ebf41
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/06c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=06c
+QUERY_NUMBER_PADDED=06c
diff --git a/bench_suites/imdb_robust_lr/06d.benchmark b/bench_suites/imdb_robust_lr/06d.benchmark
new file mode 100644
index 0000000..1f2f635
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/06d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=06d
+QUERY_NUMBER_PADDED=06d
diff --git a/bench_suites/imdb_robust_lr/06e.benchmark b/bench_suites/imdb_robust_lr/06e.benchmark
new file mode 100644
index 0000000..312cd17
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/06e.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=06e
+QUERY_NUMBER_PADDED=06e
diff --git a/bench_suites/imdb_robust_lr/06f.benchmark b/bench_suites/imdb_robust_lr/06f.benchmark
new file mode 100644
index 0000000..58849e3
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/06f.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=06f
+QUERY_NUMBER_PADDED=06f
diff --git a/bench_suites/imdb_robust_lr/07a.benchmark b/bench_suites/imdb_robust_lr/07a.benchmark
new file mode 100644
index 0000000..da1f356
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/07a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=07a
+QUERY_NUMBER_PADDED=07a
diff --git a/bench_suites/imdb_robust_lr/07b.benchmark b/bench_suites/imdb_robust_lr/07b.benchmark
new file mode 100644
index 0000000..ee02b10
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/07b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=07b
+QUERY_NUMBER_PADDED=07b
diff --git a/bench_suites/imdb_robust_lr/07c.benchmark b/bench_suites/imdb_robust_lr/07c.benchmark
new file mode 100644
index 0000000..7162bf9
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/07c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=07c
+QUERY_NUMBER_PADDED=07c
diff --git a/bench_suites/imdb_robust_lr/08a.benchmark b/bench_suites/imdb_robust_lr/08a.benchmark
new file mode 100644
index 0000000..105d4e0
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/08a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=08a
+QUERY_NUMBER_PADDED=08a
diff --git a/bench_suites/imdb_robust_lr/08b.benchmark b/bench_suites/imdb_robust_lr/08b.benchmark
new file mode 100644
index 0000000..52422ff
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/08b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=08b
+QUERY_NUMBER_PADDED=08b
diff --git a/bench_suites/imdb_robust_lr/08c.benchmark b/bench_suites/imdb_robust_lr/08c.benchmark
new file mode 100644
index 0000000..36978b7
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/08c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=08c
+QUERY_NUMBER_PADDED=08c
diff --git a/bench_suites/imdb_robust_lr/08d.benchmark b/bench_suites/imdb_robust_lr/08d.benchmark
new file mode 100644
index 0000000..86fbd68
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/08d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=08d
+QUERY_NUMBER_PADDED=08d
diff --git a/bench_suites/imdb_robust_lr/09a.benchmark b/bench_suites/imdb_robust_lr/09a.benchmark
new file mode 100644
index 0000000..8bff9f1
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/09a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=09a
+QUERY_NUMBER_PADDED=09a
diff --git a/bench_suites/imdb_robust_lr/09b.benchmark b/bench_suites/imdb_robust_lr/09b.benchmark
new file mode 100644
index 0000000..48e42cb
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/09b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=09b
+QUERY_NUMBER_PADDED=09b
diff --git a/bench_suites/imdb_robust_lr/09c.benchmark b/bench_suites/imdb_robust_lr/09c.benchmark
new file mode 100644
index 0000000..eecaea6
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/09c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=09c
+QUERY_NUMBER_PADDED=09c
diff --git a/bench_suites/imdb_robust_lr/09d.benchmark b/bench_suites/imdb_robust_lr/09d.benchmark
new file mode 100644
index 0000000..5d5459c
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/09d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=09d
+QUERY_NUMBER_PADDED=09d
diff --git a/bench_suites/imdb_robust_lr/10a.benchmark b/bench_suites/imdb_robust_lr/10a.benchmark
new file mode 100644
index 0000000..4d5de22
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/10a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=10a
+QUERY_NUMBER_PADDED=10a
diff --git a/bench_suites/imdb_robust_lr/10b.benchmark b/bench_suites/imdb_robust_lr/10b.benchmark
new file mode 100644
index 0000000..c12ddca
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/10b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=10b
+QUERY_NUMBER_PADDED=10b
diff --git a/bench_suites/imdb_robust_lr/10c.benchmark b/bench_suites/imdb_robust_lr/10c.benchmark
new file mode 100644
index 0000000..81518b8
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/10c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=10c
+QUERY_NUMBER_PADDED=10c
diff --git a/bench_suites/imdb_robust_lr/11a.benchmark b/bench_suites/imdb_robust_lr/11a.benchmark
new file mode 100644
index 0000000..9ebc4f7
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/11a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=11a
+QUERY_NUMBER_PADDED=11a
diff --git a/bench_suites/imdb_robust_lr/11b.benchmark b/bench_suites/imdb_robust_lr/11b.benchmark
new file mode 100644
index 0000000..713b97b
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/11b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=11b
+QUERY_NUMBER_PADDED=11b
diff --git a/bench_suites/imdb_robust_lr/11c.benchmark b/bench_suites/imdb_robust_lr/11c.benchmark
new file mode 100644
index 0000000..87fce5e
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/11c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=11c
+QUERY_NUMBER_PADDED=11c
diff --git a/bench_suites/imdb_robust_lr/11d.benchmark b/bench_suites/imdb_robust_lr/11d.benchmark
new file mode 100644
index 0000000..8df29bc
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/11d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=11d
+QUERY_NUMBER_PADDED=11d
diff --git a/bench_suites/imdb_robust_lr/12a.benchmark b/bench_suites/imdb_robust_lr/12a.benchmark
new file mode 100644
index 0000000..81d4e7c
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/12a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=12a
+QUERY_NUMBER_PADDED=12a
diff --git a/bench_suites/imdb_robust_lr/12b.benchmark b/bench_suites/imdb_robust_lr/12b.benchmark
new file mode 100644
index 0000000..3f5d3ff
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/12b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=12b
+QUERY_NUMBER_PADDED=12b
diff --git a/bench_suites/imdb_robust_lr/12c.benchmark b/bench_suites/imdb_robust_lr/12c.benchmark
new file mode 100644
index 0000000..0a9f572
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/12c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=12c
+QUERY_NUMBER_PADDED=12c
diff --git a/bench_suites/imdb_robust_lr/13a.benchmark b/bench_suites/imdb_robust_lr/13a.benchmark
new file mode 100644
index 0000000..926dea4
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/13a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=13a
+QUERY_NUMBER_PADDED=13a
diff --git a/bench_suites/imdb_robust_lr/13b.benchmark b/bench_suites/imdb_robust_lr/13b.benchmark
new file mode 100644
index 0000000..897fdcc
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/13b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=13b
+QUERY_NUMBER_PADDED=13b
diff --git a/bench_suites/imdb_robust_lr/13c.benchmark b/bench_suites/imdb_robust_lr/13c.benchmark
new file mode 100644
index 0000000..534c0ff
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/13c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=13c
+QUERY_NUMBER_PADDED=13c
diff --git a/bench_suites/imdb_robust_lr/13d.benchmark b/bench_suites/imdb_robust_lr/13d.benchmark
new file mode 100644
index 0000000..4973804
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/13d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=13d
+QUERY_NUMBER_PADDED=13d
diff --git a/bench_suites/imdb_robust_lr/14a.benchmark b/bench_suites/imdb_robust_lr/14a.benchmark
new file mode 100644
index 0000000..edc2729
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/14a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=14a
+QUERY_NUMBER_PADDED=14a
diff --git a/bench_suites/imdb_robust_lr/14b.benchmark b/bench_suites/imdb_robust_lr/14b.benchmark
new file mode 100644
index 0000000..efc8ad3
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/14b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=14b
+QUERY_NUMBER_PADDED=14b
diff --git a/bench_suites/imdb_robust_lr/14c.benchmark b/bench_suites/imdb_robust_lr/14c.benchmark
new file mode 100644
index 0000000..33b01ff
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/14c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=14c
+QUERY_NUMBER_PADDED=14c
diff --git a/bench_suites/imdb_robust_lr/15a.benchmark b/bench_suites/imdb_robust_lr/15a.benchmark
new file mode 100644
index 0000000..d794854
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/15a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=15a
+QUERY_NUMBER_PADDED=15a
diff --git a/bench_suites/imdb_robust_lr/15b.benchmark b/bench_suites/imdb_robust_lr/15b.benchmark
new file mode 100644
index 0000000..bae3586
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/15b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=15b
+QUERY_NUMBER_PADDED=15b
diff --git a/bench_suites/imdb_robust_lr/15c.benchmark b/bench_suites/imdb_robust_lr/15c.benchmark
new file mode 100644
index 0000000..1dda56c
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/15c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=15c
+QUERY_NUMBER_PADDED=15c
diff --git a/bench_suites/imdb_robust_lr/15d.benchmark b/bench_suites/imdb_robust_lr/15d.benchmark
new file mode 100644
index 0000000..bace9c1
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/15d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=15d
+QUERY_NUMBER_PADDED=15d
diff --git a/bench_suites/imdb_robust_lr/16a.benchmark b/bench_suites/imdb_robust_lr/16a.benchmark
new file mode 100644
index 0000000..b348e4c
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/16a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=16a
+QUERY_NUMBER_PADDED=16a
diff --git a/bench_suites/imdb_robust_lr/16b.benchmark b/bench_suites/imdb_robust_lr/16b.benchmark
new file mode 100644
index 0000000..eb8b50c
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/16b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=16b
+QUERY_NUMBER_PADDED=16b
diff --git a/bench_suites/imdb_robust_lr/16c.benchmark b/bench_suites/imdb_robust_lr/16c.benchmark
new file mode 100644
index 0000000..355688b
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/16c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=16c
+QUERY_NUMBER_PADDED=16c
diff --git a/bench_suites/imdb_robust_lr/16d.benchmark b/bench_suites/imdb_robust_lr/16d.benchmark
new file mode 100644
index 0000000..ff92970
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/16d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=16d
+QUERY_NUMBER_PADDED=16d
diff --git a/bench_suites/imdb_robust_lr/17a.benchmark b/bench_suites/imdb_robust_lr/17a.benchmark
new file mode 100644
index 0000000..6f2e6c0
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/17a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=17a
+QUERY_NUMBER_PADDED=17a
diff --git a/bench_suites/imdb_robust_lr/17b.benchmark b/bench_suites/imdb_robust_lr/17b.benchmark
new file mode 100644
index 0000000..a95d7e5
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/17b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=17b
+QUERY_NUMBER_PADDED=17b
diff --git a/bench_suites/imdb_robust_lr/17c.benchmark b/bench_suites/imdb_robust_lr/17c.benchmark
new file mode 100644
index 0000000..d08a776
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/17c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=17c
+QUERY_NUMBER_PADDED=17c
diff --git a/bench_suites/imdb_robust_lr/17d.benchmark b/bench_suites/imdb_robust_lr/17d.benchmark
new file mode 100644
index 0000000..4b2418c
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/17d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=17d
+QUERY_NUMBER_PADDED=17d
diff --git a/bench_suites/imdb_robust_lr/17e.benchmark b/bench_suites/imdb_robust_lr/17e.benchmark
new file mode 100644
index 0000000..9febec8
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/17e.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=17e
+QUERY_NUMBER_PADDED=17e
diff --git a/bench_suites/imdb_robust_lr/17f.benchmark b/bench_suites/imdb_robust_lr/17f.benchmark
new file mode 100644
index 0000000..b38d69a
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/17f.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=17f
+QUERY_NUMBER_PADDED=17f
diff --git a/bench_suites/imdb_robust_lr/18a.benchmark b/bench_suites/imdb_robust_lr/18a.benchmark
new file mode 100644
index 0000000..2fafce4
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/18a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=18a
+QUERY_NUMBER_PADDED=18a
diff --git a/bench_suites/imdb_robust_lr/18b.benchmark b/bench_suites/imdb_robust_lr/18b.benchmark
new file mode 100644
index 0000000..2b7bb92
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/18b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=18b
+QUERY_NUMBER_PADDED=18b
diff --git a/bench_suites/imdb_robust_lr/18c.benchmark b/bench_suites/imdb_robust_lr/18c.benchmark
new file mode 100644
index 0000000..9a7198e
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/18c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=18c
+QUERY_NUMBER_PADDED=18c
diff --git a/bench_suites/imdb_robust_lr/19a.benchmark b/bench_suites/imdb_robust_lr/19a.benchmark
new file mode 100644
index 0000000..7b2f3db
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/19a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=19a
+QUERY_NUMBER_PADDED=19a
diff --git a/bench_suites/imdb_robust_lr/19b.benchmark b/bench_suites/imdb_robust_lr/19b.benchmark
new file mode 100644
index 0000000..7e7b56f
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/19b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=19b
+QUERY_NUMBER_PADDED=19b
diff --git a/bench_suites/imdb_robust_lr/19c.benchmark b/bench_suites/imdb_robust_lr/19c.benchmark
new file mode 100644
index 0000000..6ce01a4
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/19c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=19c
+QUERY_NUMBER_PADDED=19c
diff --git a/bench_suites/imdb_robust_lr/19d.benchmark b/bench_suites/imdb_robust_lr/19d.benchmark
new file mode 100644
index 0000000..2cdc17b
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/19d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=19d
+QUERY_NUMBER_PADDED=19d
diff --git a/bench_suites/imdb_robust_lr/20a.benchmark b/bench_suites/imdb_robust_lr/20a.benchmark
new file mode 100644
index 0000000..a47e2da
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/20a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=20a
+QUERY_NUMBER_PADDED=20a
diff --git a/bench_suites/imdb_robust_lr/20b.benchmark b/bench_suites/imdb_robust_lr/20b.benchmark
new file mode 100644
index 0000000..57034d1
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/20b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=20b
+QUERY_NUMBER_PADDED=20b
diff --git a/bench_suites/imdb_robust_lr/20c.benchmark b/bench_suites/imdb_robust_lr/20c.benchmark
new file mode 100644
index 0000000..91a819a
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/20c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=20c
+QUERY_NUMBER_PADDED=20c
diff --git a/bench_suites/imdb_robust_lr/21a.benchmark b/bench_suites/imdb_robust_lr/21a.benchmark
new file mode 100644
index 0000000..848baa4
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/21a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=21a
+QUERY_NUMBER_PADDED=21a
diff --git a/bench_suites/imdb_robust_lr/21b.benchmark b/bench_suites/imdb_robust_lr/21b.benchmark
new file mode 100644
index 0000000..ed4d09c
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/21b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=21b
+QUERY_NUMBER_PADDED=21b
diff --git a/bench_suites/imdb_robust_lr/21c.benchmark b/bench_suites/imdb_robust_lr/21c.benchmark
new file mode 100644
index 0000000..e2781a8
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/21c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=21c
+QUERY_NUMBER_PADDED=21c
diff --git a/bench_suites/imdb_robust_lr/22a.benchmark b/bench_suites/imdb_robust_lr/22a.benchmark
new file mode 100644
index 0000000..000ed29
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/22a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=22a
+QUERY_NUMBER_PADDED=22a
diff --git a/bench_suites/imdb_robust_lr/22b.benchmark b/bench_suites/imdb_robust_lr/22b.benchmark
new file mode 100644
index 0000000..7c3a980
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/22b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=22b
+QUERY_NUMBER_PADDED=22b
diff --git a/bench_suites/imdb_robust_lr/22c.benchmark b/bench_suites/imdb_robust_lr/22c.benchmark
new file mode 100644
index 0000000..83861d7
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/22c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=22c
+QUERY_NUMBER_PADDED=22c
diff --git a/bench_suites/imdb_robust_lr/22d.benchmark b/bench_suites/imdb_robust_lr/22d.benchmark
new file mode 100644
index 0000000..f75ae9a
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/22d.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=22d
+QUERY_NUMBER_PADDED=22d
diff --git a/bench_suites/imdb_robust_lr/23a.benchmark b/bench_suites/imdb_robust_lr/23a.benchmark
new file mode 100644
index 0000000..f120716
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/23a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=23a
+QUERY_NUMBER_PADDED=23a
diff --git a/bench_suites/imdb_robust_lr/23b.benchmark b/bench_suites/imdb_robust_lr/23b.benchmark
new file mode 100644
index 0000000..3117ff7
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/23b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=23b
+QUERY_NUMBER_PADDED=23b
diff --git a/bench_suites/imdb_robust_lr/23c.benchmark b/bench_suites/imdb_robust_lr/23c.benchmark
new file mode 100644
index 0000000..6c2a96d
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/23c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=23c
+QUERY_NUMBER_PADDED=23c
diff --git a/bench_suites/imdb_robust_lr/24a.benchmark b/bench_suites/imdb_robust_lr/24a.benchmark
new file mode 100644
index 0000000..638ceb7
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/24a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=24a
+QUERY_NUMBER_PADDED=24a
diff --git a/bench_suites/imdb_robust_lr/24b.benchmark b/bench_suites/imdb_robust_lr/24b.benchmark
new file mode 100644
index 0000000..e63cee7
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/24b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=24b
+QUERY_NUMBER_PADDED=24b
diff --git a/bench_suites/imdb_robust_lr/25a.benchmark b/bench_suites/imdb_robust_lr/25a.benchmark
new file mode 100644
index 0000000..86c258f
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/25a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=25a
+QUERY_NUMBER_PADDED=25a
diff --git a/bench_suites/imdb_robust_lr/25b.benchmark b/bench_suites/imdb_robust_lr/25b.benchmark
new file mode 100644
index 0000000..6611857
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/25b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=25b
+QUERY_NUMBER_PADDED=25b
diff --git a/bench_suites/imdb_robust_lr/25c.benchmark b/bench_suites/imdb_robust_lr/25c.benchmark
new file mode 100644
index 0000000..41646b1
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/25c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=25c
+QUERY_NUMBER_PADDED=25c
diff --git a/bench_suites/imdb_robust_lr/26a.benchmark b/bench_suites/imdb_robust_lr/26a.benchmark
new file mode 100644
index 0000000..477bc38
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/26a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=26a
+QUERY_NUMBER_PADDED=26a
diff --git a/bench_suites/imdb_robust_lr/26b.benchmark b/bench_suites/imdb_robust_lr/26b.benchmark
new file mode 100644
index 0000000..1e5f2e8
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/26b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=26b
+QUERY_NUMBER_PADDED=26b
diff --git a/bench_suites/imdb_robust_lr/26c.benchmark b/bench_suites/imdb_robust_lr/26c.benchmark
new file mode 100644
index 0000000..c0039e5
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/26c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=26c
+QUERY_NUMBER_PADDED=26c
diff --git a/bench_suites/imdb_robust_lr/27a.benchmark b/bench_suites/imdb_robust_lr/27a.benchmark
new file mode 100644
index 0000000..67be07e
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/27a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=27a
+QUERY_NUMBER_PADDED=27a
diff --git a/bench_suites/imdb_robust_lr/27b.benchmark b/bench_suites/imdb_robust_lr/27b.benchmark
new file mode 100644
index 0000000..16924d2
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/27b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=27b
+QUERY_NUMBER_PADDED=27b
diff --git a/bench_suites/imdb_robust_lr/27c.benchmark b/bench_suites/imdb_robust_lr/27c.benchmark
new file mode 100644
index 0000000..ca3c51b
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/27c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=27c
+QUERY_NUMBER_PADDED=27c
diff --git a/bench_suites/imdb_robust_lr/28a.benchmark b/bench_suites/imdb_robust_lr/28a.benchmark
new file mode 100644
index 0000000..cb40788
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/28a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=28a
+QUERY_NUMBER_PADDED=28a
diff --git a/bench_suites/imdb_robust_lr/28b.benchmark b/bench_suites/imdb_robust_lr/28b.benchmark
new file mode 100644
index 0000000..fe6c8ac
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/28b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=28b
+QUERY_NUMBER_PADDED=28b
diff --git a/bench_suites/imdb_robust_lr/28c.benchmark b/bench_suites/imdb_robust_lr/28c.benchmark
new file mode 100644
index 0000000..a24e347
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/28c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=28c
+QUERY_NUMBER_PADDED=28c
diff --git a/bench_suites/imdb_robust_lr/29a.benchmark b/bench_suites/imdb_robust_lr/29a.benchmark
new file mode 100644
index 0000000..2825c67
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/29a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=29a
+QUERY_NUMBER_PADDED=29a
diff --git a/bench_suites/imdb_robust_lr/29b.benchmark b/bench_suites/imdb_robust_lr/29b.benchmark
new file mode 100644
index 0000000..1b213bd
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/29b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=29b
+QUERY_NUMBER_PADDED=29b
diff --git a/bench_suites/imdb_robust_lr/29c.benchmark b/bench_suites/imdb_robust_lr/29c.benchmark
new file mode 100644
index 0000000..6ac006c
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/29c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=29c
+QUERY_NUMBER_PADDED=29c
diff --git a/bench_suites/imdb_robust_lr/30a.benchmark b/bench_suites/imdb_robust_lr/30a.benchmark
new file mode 100644
index 0000000..57b2918
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/30a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=30a
+QUERY_NUMBER_PADDED=30a
diff --git a/bench_suites/imdb_robust_lr/30b.benchmark b/bench_suites/imdb_robust_lr/30b.benchmark
new file mode 100644
index 0000000..8214624
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/30b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=30b
+QUERY_NUMBER_PADDED=30b
diff --git a/bench_suites/imdb_robust_lr/30c.benchmark b/bench_suites/imdb_robust_lr/30c.benchmark
new file mode 100644
index 0000000..369e7b2
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/30c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=30c
+QUERY_NUMBER_PADDED=30c
diff --git a/bench_suites/imdb_robust_lr/31a.benchmark b/bench_suites/imdb_robust_lr/31a.benchmark
new file mode 100644
index 0000000..7d349cb
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/31a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=31a
+QUERY_NUMBER_PADDED=31a
diff --git a/bench_suites/imdb_robust_lr/31b.benchmark b/bench_suites/imdb_robust_lr/31b.benchmark
new file mode 100644
index 0000000..0b8c0bb
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/31b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=31b
+QUERY_NUMBER_PADDED=31b
diff --git a/bench_suites/imdb_robust_lr/31c.benchmark b/bench_suites/imdb_robust_lr/31c.benchmark
new file mode 100644
index 0000000..4fb2d9a
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/31c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=31c
+QUERY_NUMBER_PADDED=31c
diff --git a/bench_suites/imdb_robust_lr/32a.benchmark b/bench_suites/imdb_robust_lr/32a.benchmark
new file mode 100644
index 0000000..8d233ee
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/32a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=32a
+QUERY_NUMBER_PADDED=32a
diff --git a/bench_suites/imdb_robust_lr/32b.benchmark b/bench_suites/imdb_robust_lr/32b.benchmark
new file mode 100644
index 0000000..e4557a4
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/32b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=32b
+QUERY_NUMBER_PADDED=32b
diff --git a/bench_suites/imdb_robust_lr/33a.benchmark b/bench_suites/imdb_robust_lr/33a.benchmark
new file mode 100644
index 0000000..f863d02
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/33a.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=33a
+QUERY_NUMBER_PADDED=33a
diff --git a/bench_suites/imdb_robust_lr/33b.benchmark b/bench_suites/imdb_robust_lr/33b.benchmark
new file mode 100644
index 0000000..f70f625
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/33b.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=33b
+QUERY_NUMBER_PADDED=33b
diff --git a/bench_suites/imdb_robust_lr/33c.benchmark b/bench_suites/imdb_robust_lr/33c.benchmark
new file mode 100644
index 0000000..6a3a14d
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/33c.benchmark
@@ -0,0 +1,3 @@
+template benchmark/imdb_robust_lr/imdb_robust_lr.benchmark.in
+QUERY_NUMBER=33c
+QUERY_NUMBER_PADDED=33c
diff --git a/bench_suites/imdb_robust_lr/imdb_robust_lr.benchmark.in b/bench_suites/imdb_robust_lr/imdb_robust_lr.benchmark.in
new file mode 100644
index 0000000..b789606
--- /dev/null
+++ b/bench_suites/imdb_robust_lr/imdb_robust_lr.benchmark.in
@@ -0,0 +1,15 @@
+# name: ${FILE_PATH}
+# description: JOB query ${QUERY_NUMBER_PADDED} with Robust extension (largest_root heuristic)
+# group: [imdb_robust_lr]
+
+name Q${QUERY_NUMBER_PADDED}_Robust_LR
+group imdb_robust_lr
+
+cache imdb.duckdb
+
+init
+LOAD 'build/release/extension/robust/robust.duckdb_extension';
+SET disabled_optimizers = 'join_filter_pushdown';
+SET robust_heuristic = 'largest_root';
+
+run benchmark/imdb_plan_cost/queries/${QUERY_NUMBER_PADDED}.sql
diff --git a/bench_suites/tpch_baseline/q02.benchmark b/bench_suites/tpch_baseline/q02.benchmark
new file mode 100644
index 0000000..34162b8
--- /dev/null
+++ b/bench_suites/tpch_baseline/q02.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch_baseline/q02.benchmark
+# description: Run TPCH query 02 (baseline, single-threaded)
+# group: [tpch_baseline]
+
+template benchmark/tpch_baseline/tpch_baseline.benchmark.in
+QUERY_NUMBER=2
+QUERY_NUMBER_PADDED=02
diff --git a/bench_suites/tpch_baseline/q03.benchmark b/bench_suites/tpch_baseline/q03.benchmark
new file mode 100644
index 0000000..982959f
--- /dev/null
+++ b/bench_suites/tpch_baseline/q03.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch_baseline/q03.benchmark
+# description: Run TPCH query 03 (baseline, single-threaded)
+# group: [tpch_baseline]
+
+template benchmark/tpch_baseline/tpch_baseline.benchmark.in
+QUERY_NUMBER=3
+QUERY_NUMBER_PADDED=03
diff --git a/bench_suites/tpch_baseline/q07.benchmark b/bench_suites/tpch_baseline/q07.benchmark
new file mode 100644
index 0000000..7c09532
--- /dev/null
+++ b/bench_suites/tpch_baseline/q07.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch_baseline/q07.benchmark
+# description: Run TPCH query 07 (baseline, single-threaded)
+# group: [tpch_baseline]
+
+template benchmark/tpch_baseline/tpch_baseline.benchmark.in
+QUERY_NUMBER=7
+QUERY_NUMBER_PADDED=07
diff --git a/bench_suites/tpch_baseline/q08.benchmark b/bench_suites/tpch_baseline/q08.benchmark
new file mode 100644
index 0000000..4387eb4
--- /dev/null
+++ b/bench_suites/tpch_baseline/q08.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch_baseline/q08.benchmark
+# description: Run TPCH query 08 (baseline, single-threaded)
+# group: [tpch_baseline]
+
+template benchmark/tpch_baseline/tpch_baseline.benchmark.in
+QUERY_NUMBER=8
+QUERY_NUMBER_PADDED=08
diff --git a/bench_suites/tpch_baseline/q10.benchmark b/bench_suites/tpch_baseline/q10.benchmark
new file mode 100644
index 0000000..3cf0833
--- /dev/null
+++ b/bench_suites/tpch_baseline/q10.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch_baseline/q10.benchmark
+# description: Run TPCH query 10 (baseline, single-threaded)
+# group: [tpch_baseline]
+
+template benchmark/tpch_baseline/tpch_baseline.benchmark.in
+QUERY_NUMBER=10
+QUERY_NUMBER_PADDED=10
diff --git a/bench_suites/tpch_baseline/q11.benchmark b/bench_suites/tpch_baseline/q11.benchmark
new file mode 100644
index 0000000..d0e5e85
--- /dev/null
+++ b/bench_suites/tpch_baseline/q11.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch_baseline/q11.benchmark
+# description: Run TPCH query 11 (baseline, single-threaded)
+# group: [tpch_baseline]
+
+template benchmark/tpch_baseline/tpch_baseline.benchmark.in
+QUERY_NUMBER=11
+QUERY_NUMBER_PADDED=11
diff --git a/bench_suites/tpch_baseline/q17.benchmark b/bench_suites/tpch_baseline/q17.benchmark
new file mode 100644
index 0000000..a497c52
--- /dev/null
+++ b/bench_suites/tpch_baseline/q17.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch_baseline/q17.benchmark
+# description: Run TPCH query 17 (baseline, single-threaded)
+# group: [tpch_baseline]
+
+template benchmark/tpch_baseline/tpch_baseline.benchmark.in
+QUERY_NUMBER=17
+QUERY_NUMBER_PADDED=17
diff --git a/bench_suites/tpch_baseline/q18.benchmark b/bench_suites/tpch_baseline/q18.benchmark
new file mode 100644
index 0000000..38c6ea5
--- /dev/null
+++ b/bench_suites/tpch_baseline/q18.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch_baseline/q18.benchmark
+# description: Run TPCH query 18 (baseline, single-threaded)
+# group: [tpch_baseline]
+
+template benchmark/tpch_baseline/tpch_baseline.benchmark.in
+QUERY_NUMBER=18
+QUERY_NUMBER_PADDED=18
diff --git a/bench_suites/tpch_baseline/q21.benchmark b/bench_suites/tpch_baseline/q21.benchmark
new file mode 100644
index 0000000..87ef345
--- /dev/null
+++ b/bench_suites/tpch_baseline/q21.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch_baseline/q21.benchmark
+# description: Run TPCH query 21 (baseline, single-threaded)
+# group: [tpch_baseline]
+
+template benchmark/tpch_baseline/tpch_baseline.benchmark.in
+QUERY_NUMBER=21
+QUERY_NUMBER_PADDED=21
diff --git a/bench_suites/tpch_baseline/tpch_baseline.benchmark.in b/bench_suites/tpch_baseline/tpch_baseline.benchmark.in
new file mode 100644
index 0000000..fd6fb05
--- /dev/null
+++ b/bench_suites/tpch_baseline/tpch_baseline.benchmark.in
@@ -0,0 +1,20 @@
+# name: ${FILE_PATH}
+# description: ${DESCRIPTION}
+# group: [tpch_baseline]
+
+argument sf 1
+
+require tpch
+
+cache tpch_sf${sf}.duckdb
+
+load
+CALL dbgen(sf=${sf});
+
+name Q${QUERY_NUMBER_PADDED}_baseline
+group tpch_baseline
+subgroup sf${sf}
+
+run tpchdata/queries/q${QUERY_NUMBER_PADDED}.sql
+
+result tpchdata/answers/sf1/q${QUERY_NUMBER_PADDED}.csv sf=1
diff --git a/bench_suites/tpch_robust/q02.benchmark b/bench_suites/tpch_robust/q02.benchmark
new file mode 100644
index 0000000..4a736fe
--- /dev/null
+++ b/bench_suites/tpch_robust/q02.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch_robust/q02.benchmark
+# description: Run TPCH query 02 with Robust extension (single-threaded)
+# group: [tpch_robust]
+
+template benchmark/tpch_robust/tpch_robust.benchmark.in
+QUERY_NUMBER=2
+QUERY_NUMBER_PADDED=02
diff --git a/bench_suites/tpch_robust/q03.benchmark b/bench_suites/tpch_robust/q03.benchmark
new file mode 100644
index 0000000..1cb8669
--- /dev/null
+++ b/bench_suites/tpch_robust/q03.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch_robust/q03.benchmark
+# description: Run TPCH query 03 with Robust extension (single-threaded)
+# group: [tpch_robust]
+
+template benchmark/tpch_robust/tpch_robust.benchmark.in
+QUERY_NUMBER=3
+QUERY_NUMBER_PADDED=03
diff --git a/bench_suites/tpch_robust/q07.benchmark b/bench_suites/tpch_robust/q07.benchmark
new file mode 100644
index 0000000..2012738
--- /dev/null
+++ b/bench_suites/tpch_robust/q07.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch_robust/q07.benchmark
+# description: Run TPCH query 07 with Robust extension (single-threaded)
+# group: [tpch_robust]
+
+template benchmark/tpch_robust/tpch_robust.benchmark.in
+QUERY_NUMBER=7
+QUERY_NUMBER_PADDED=07
diff --git a/bench_suites/tpch_robust/q08.benchmark b/bench_suites/tpch_robust/q08.benchmark
new file mode 100644
index 0000000..c14b051
--- /dev/null
+++ b/bench_suites/tpch_robust/q08.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch_robust/q08.benchmark
+# description: Run TPCH query 08 with Robust extension (single-threaded)
+# group: [tpch_robust]
+
+template benchmark/tpch_robust/tpch_robust.benchmark.in
+QUERY_NUMBER=8
+QUERY_NUMBER_PADDED=08
diff --git a/bench_suites/tpch_robust/q10.benchmark b/bench_suites/tpch_robust/q10.benchmark
new file mode 100644
index 0000000..5c6f73f
--- /dev/null
+++ b/bench_suites/tpch_robust/q10.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch_robust/q10.benchmark
+# description: Run TPCH query 10 with Robust extension (single-threaded)
+# group: [tpch_robust]
+
+template benchmark/tpch_robust/tpch_robust.benchmark.in
+QUERY_NUMBER=10
+QUERY_NUMBER_PADDED=10
diff --git a/bench_suites/tpch_robust/q11.benchmark b/bench_suites/tpch_robust/q11.benchmark
new file mode 100644
index 0000000..b2204e8
--- /dev/null
+++ b/bench_suites/tpch_robust/q11.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch_robust/q11.benchmark
+# description: Run TPCH query 11 with Robust extension (single-threaded)
+# group: [tpch_robust]
+
+template benchmark/tpch_robust/tpch_robust.benchmark.in
+QUERY_NUMBER=11
+QUERY_NUMBER_PADDED=11
diff --git a/bench_suites/tpch_robust/q17.benchmark b/bench_suites/tpch_robust/q17.benchmark
new file mode 100644
index 0000000..7949a1f
--- /dev/null
+++ b/bench_suites/tpch_robust/q17.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch_robust/q17.benchmark
+# description: Run TPCH query 17 with Robust extension (single-threaded)
+# group: [tpch_robust]
+
+template benchmark/tpch_robust/tpch_robust.benchmark.in
+QUERY_NUMBER=17
+QUERY_NUMBER_PADDED=17
diff --git a/bench_suites/tpch_robust/q18.benchmark b/bench_suites/tpch_robust/q18.benchmark
new file mode 100644
index 0000000..b5d334c
--- /dev/null
+++ b/bench_suites/tpch_robust/q18.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch_robust/q18.benchmark
+# description: Run TPCH query 18 with Robust extension (single-threaded)
+# group: [tpch_robust]
+
+template benchmark/tpch_robust/tpch_robust.benchmark.in
+QUERY_NUMBER=18
+QUERY_NUMBER_PADDED=18
diff --git a/bench_suites/tpch_robust/q21.benchmark b/bench_suites/tpch_robust/q21.benchmark
new file mode 100644
index 0000000..54a123d
--- /dev/null
+++ b/bench_suites/tpch_robust/q21.benchmark
@@ -0,0 +1,7 @@
+# name: benchmark/tpch_robust/q21.benchmark
+# description: Run TPCH query 21 with Robust extension (single-threaded)
+# group: [tpch_robust]
+
+template benchmark/tpch_robust/tpch_robust.benchmark.in
+QUERY_NUMBER=21
+QUERY_NUMBER_PADDED=21
diff --git a/bench_suites/tpch_robust/tpch_robust.benchmark.in b/bench_suites/tpch_robust/tpch_robust.benchmark.in
new file mode 100644
index 0000000..82202d8
--- /dev/null
+++ b/bench_suites/tpch_robust/tpch_robust.benchmark.in
@@ -0,0 +1,25 @@
+# name: ${FILE_PATH}
+# description: ${DESCRIPTION}
+# group: [tpch_robust]
+
+argument sf 1
+
+require tpch
+
+cache tpch_sf${sf}.duckdb
+
+load
+CALL dbgen(sf=${sf});
+
+name Q${QUERY_NUMBER_PADDED}_Robust
+group tpch_robust
+subgroup sf${sf}
+
+init
+LOAD 'build/release/extension/robust/robust.duckdb_extension';
+SET disabled_optimizers = 'join_filter_pushdown';
+SET robust_heuristic = 'join_order';
+
+run tpchdata/queries/q${QUERY_NUMBER_PADDED}.sql
+
+result tpchdata/answers/sf1/q${QUERY_NUMBER_PADDED}.csv sf=1
diff --git a/docs/SCRIPTS.md b/docs/SCRIPTS.md
new file mode 100644
index 0000000..ff9e9cb
--- /dev/null
+++ b/docs/SCRIPTS.md
@@ -0,0 +1,200 @@
+# Scripts
+
+## Profiling & Analysis
+
+### profile_query.sh
+Profiles a single query with and without RPT. Shows operator tree, scan detail, and a comparison summary with CPU breakdown and per-table scan stats.
+
+```
+./profile_query.sh <query>           # e.g. ./profile_query.sh 1a
+./profile_query.sh --sql "SELECT…"   # inline SQL
+./profile_query.sh --rpt-only 1a     # skip baseline
+./profile_query.sh --no-jfp rpt 1a   # disable join_filter_pushdown for rpt/baseline/both
+```
+
+### warmup_bench.sh
+Runs a query N times in a single session (1 cold + N-1 warm) for both baseline and RPT. Outputs a CSV and a wall/CPU time plot.
+
+```
+./warmup_bench.sh 32a        # 5 runs (default)
+./warmup_bench.sh -n 10 32a  # 10 runs
+./warmup_bench.sh --sql "…"  # inline SQL
+```
+
+| Flag | Description |
+|------|-------------|
+| `-n <N>` | Number of iterations (default: 5) |
+| `--sql "…"` | Inline SQL instead of query name |
+
+Output: `profiling_results/<query>_warmup.csv`, `profiling_results/<query>_warmup.png`
+
+### scan_compare.sh
+Runs all JOB queries (N runs each, single session per mode), ranks by rows scanned / wall time / CPU time % increase (RPT vs baseline). Prints per-query stats live.
+
+```
+./scan_compare.sh                  # top 10 by scanned %, 5 runs
+./scan_compare.sh -n 20            # top 20
+./scan_compare.sh -r 3             # 3 runs per query
+./scan_compare.sh --sort wall      # sort by wall time % increase
+./scan_compare.sh --sort cpu       # sort by CPU time % increase
+```
+
+| Flag | Description |
+|------|-------------|
+| `-n <N>` | Show top N queries (default: 10) |
+| `-r <R>` | Runs per query (default: 5) |
+| `--sort <key>` | `scanned` (default), `wall`, `cpu` |
+
+Output: `profiling_results/scan_compare.csv`
+
+### time_breakdown.sh
+Runs all JOB queries, breaks down CPU time by operator category (SEQ_SCAN, HASH_JOIN, BF, Other) with percentages. Two lines per query (BASE + RPT).
+
+```
+./time_breakdown.sh                # all queries, 5 runs
+./time_breakdown.sh -r 3           # 3 runs
+./time_breakdown.sh -q 1a,2b,13a   # specific queries only
+```
+
+| Flag | Description |
+|------|-------------|
+| `-r <R>` | Runs per query (default: 5) |
+| `-q <list>` | Comma-separated query names (default: all) |
+
+Output: `profiling_results/time_breakdown.csv`
+
+## Benchmarking
+
+### bench_job.sh
+Benchmarks JOB queries with RPT, tracking wall time and RPT-specific profiling stats (sink, source, finalize, probe times, rows filtered). Appends to a summary CSV for tracking across commits.
+
+```
+./bench_job.sh                          # all queries, min of 3 runs
+./bench_job.sh --query 3a --runs 5      # single query, 5 runs
+./bench_job.sh --with-baseline          # include baseline comparison
+./bench_job.sh --agg median --limit 20  # median of runs, first 20 queries
+./bench_job.sh --no-save                # print only, no CSV output
+```
+
+| Flag | Description |
+|------|-------------|
+| `--runs N` | Runs per query (default: 3) |
+| `--agg <mode>` | `min` (default), `median`, `max` |
+| `--limit N` | Only first N queries |
+| `--query <name>` | Single query |
+| `--title <text>` | Custom title (default: commit subject) |
+| `--with-baseline` | Also run without RPT for comparison |
+| `--no-save` | Don't write CSV files |
+
+Output: `benchmark_results/<commit>_<title>_detail.csv`, `benchmark_results/summary.csv`
+
+### bench_commits.sh
+Runs `bench_job.sh` across recent git commits. Checks out each commit, rebuilds, and benchmarks. Useful for tracking performance regressions.
+
+```
+./bench_commits.sh                          # last 10 commits, query 1a
+./bench_commits.sh --query 3a --last 5      # last 5 commits, query 3a
+./bench_commits.sh --upto abc123 --runs 5   # from abc123 to HEAD
+./bench_commits.sh --reverse                # newest first
+```
+
+| Flag | Description |
+|------|-------------|
+| `--last N` | Last N commits (default: 10) |
+| `--upto <commit>` | From commit to HEAD |
+| `--query <name>` | Query to benchmark (default: 1a) |
+| `--runs N` | Runs per query (default: 3) |
+| `--agg <mode>` | `min` (default), `median`, `max` |
+| `--reverse` | Run newest to oldest |
+
+### bench_compare.sh
+Compares JOB performance between current and previous commit. Builds both, runs `scripts/test_job.sh --timing` 3 times each, reports average geometric mean speedup.
+
+```
+./bench_compare.sh
+```
+
+### bench_job.sh
+Runs DuckDB's built-in benchmark runner for baseline and RPT benchmark suites, then compares results side-by-side with speedup calculations.
+
+```
+./bench_job.sh                       # all queries
+./bench_job.sh --pattern "03.*"      # specific pattern
+./bench_job.sh --rpt-only            # RPT benchmarks only
+./bench_job.sh --no-run              # compare existing results
+```
+
+| Flag | Description |
+|------|-------------|
+| `--pattern <pat>` | Query name regex (default: all) |
+| `--baseline-only` | Run only baseline |
+| `--rpt-only` | Run only RPT |
+| `--no-run` | Skip running, compare existing results |
+| `--out <dir>` | Output directory (default: `benchmark_results`) |
+
+Requires: `BUILD_BENCHMARK=1 GEN=ninja make release`
+
+## Testing
+
+### scripts/test_job.sh
+Tests all JOB queries for correctness (baseline vs RPT result comparison) with optional timing and speedup reporting.
+
+```
+./scripts/test_job.sh                           # test all queries
+./scripts/test_job.sh --timing --runs 3         # with timing, min of 3
+./scripts/test_job.sh --query 1a --verbose      # single query, verbose
+./scripts/test_job.sh --generate-baseline       # regenerate baselines
+./scripts/test_job.sh --no-jfp both --limit 10  # disable JFP, first 10
+```
+
+| Flag | Description |
+|------|-------------|
+| `--timing` | Show wall times and speedup |
+| `--runs N` | Runs per query, take min (default: 1) |
+| `--query <name>` | Test single query |
+| `--verbose` | Show diff details on failure |
+| `--generate-baseline` | Generate baseline results only |
+| `--test-only` | Test against existing baselines |
+| `--limit N` | First N queries only |
+| `--no-jfp <target>` | Disable join_filter_pushdown: `rpt`, `baseline`, `both` |
+
+Output: `job_test_results/`
+
+### test_job3a.sh
+Quick smoke test — runs JOB query 3a with and without RPT, compares results.
+
+```
+./test_job3a.sh
+```
+
+## Other
+
+### debug_duckdb.sh
+Runs the debug build of DuckDB with `src/tests.sql`.
+
+```
+./debug_duckdb.sh
+```
+
+### run_benchmark.sh
+Compiles and runs the bloom filter microbenchmark (`src/benchmark/bloom_filter_benchmark.cpp`).
+
+```
+./run_benchmark.sh           # full benchmark
+./run_benchmark.sh -q        # quick mode
+./run_benchmark.sh -v        # verbose
+```
+
+### run_threshold_experiment.sh
+Compiles and runs the threshold experiment to find the optimal crossover point between single-threaded vs parallel bloom filter building.
+
+```
+./run_threshold_experiment.sh              # 8 threads (default)
+./run_threshold_experiment.sh -t 4         # 4 threads
+./run_threshold_experiment.sh -o out.csv   # custom output file
+```
+
+| Flag | Description |
+|------|-------------|
+| `-t <N>` | Number of threads (default: 8) |
+| `-o <file>` | Output CSV (default: `threshold_results.csv`) |
diff --git a/docs/architecture.md b/docs/architecture.md
new file mode 100644
index 0000000..c815525
--- /dev/null
+++ b/docs/architecture.md
@@ -0,0 +1,260 @@
+# Robust — Architecture
+
+This document explains what the `robust` DuckDB extension does, how it does it, and where to look in the code for each piece. It assumes you've read [the README](../README.md) and have some DuckDB internals familiarity (logical/physical operators, sinks, optimizer extensions).
+
+If you're trying to decide whether Robust is the right tool for a problem, skim §1. If you're trying to understand the algorithm, read §2–5. If you're modifying the code, read §6 onward.
+
+## Contents
+
+1. [Overview](#1-overview)
+2. [The rewrite at a glance](#2-the-rewrite-at-a-glance)
+3. [The join graph (DAG)](#3-the-join-graph-dag)
+4. [Forward pass](#4-forward-pass)
+5. [Backward pass](#5-backward-pass)
+6. [Operator internals](#6-operator-internals)
+7. [Filter types and the picker](#7-filter-types-and-the-picker)
+8. [Edge cases and tricks](#8-edge-cases-and-tricks)
+
+---
+
+## 1. Overview
+
+In a multi-join analytic query, most of the rows scanned at the storage layer will never appear in the final result. They get joined to something on one side, then thrown away by a hash join further up the tree. The work of reading them, decompressing them, building hash tables on them, probing those hash tables, and propagating their column values through intermediate operators is wasted.
+
+**Robust** addresses this by implementing **predicate transfer** — a technique that lifts filter information out of join keys, propagates it across the entire join graph, and pushes it down so that probe-side scans skip rows that can't survive downstream joins. The filters propagated are **bloom filters**, **min/max ranges**, and **IN-lists** today; the filter set is extensible (see §7).
+
+The technique is described in two papers:
+
+- [**Predicate Transfer: Efficient Pre-Filtering on Multi-Join Queries**](https://www.cidrdb.org/cidr2024/papers/p22-yang.pdf) — Yang et al., CIDR 2024. The original formulation; introduces the `largest_root` heuristic used by this extension.
+- [**Debunking the Myth of Join Ordering: Toward Robust SQL Analytics**](https://arxiv.org/pdf/2502.15181) — 2025. Follow-up work motivating the broader "robust analytics" framing this extension takes its name from.
+
+Robust runs as a DuckDB optimizer extension. It builds a **predicate transfer DAG** over the query's join graph, performs a **forward pass** to construct filter sources, then a **backward pass** to propagate those filters back across the graph. The resulting filters are installed both as new operators in the physical plan (`CREATE_FILTER` / `PROBE_FILTER`) and — for forward-pass filters — as dynamic scan filters on the underlying `SEQ_SCAN`.
+
+### How it differs from DuckDB's native join_filter_pushdown
+
+DuckDB already does a form of pre-filtering internally via the `join_filter_pushdown` optimizer rule (internal name **JFP**). As each hash join builds its hash table, JFP extracts filters from the join keys (bloom filters, min/max ranges, and IN-lists — the same filter types Robust uses) and pushes them down to the probe-side scan.
+
+JFP's pushed filters also chain forward: in an `A ⋈ B ⋈ C` plan where `C` is the deepest build side, JFP can build a filter at the `C ⋈ B` join, push it into the scan of `B`, and then build a fresh filter at the next join up that propagates further down to `A`. So the "single immediate probe side" framing is too narrow — JFP does cascade forward through linear chains.
+
+What JFP *doesn't* do, and what Robust adds, is:
+
+1. **Bushy plans dilute the forward chain.** JFP's forward chaining assumes a single spine of joins. In bushy plans — where multiple build pipelines run in parallel before merging at a higher join — JFP can't carry a filter from one branch into a sibling branch, because the filter has no join to ride. Robust's DAG sees the whole graph at once and propagates filters across branches that JFP can't.
+2. **No backward pass.** JFP only flows filters in the build → probe direction within the join order. If an early join has an intermediate-result explosion that a later, more-selective join would have prevented, JFP can't apply that later filter retroactively. Robust's backward pass does exactly that — filters discovered late in the plan get propagated back to scans early in the plan, shrinking intermediate results before they explode.
+
+The two systems are complementary. On baseline measurements we leave JFP on; on Robust runs we typically disable JFP to isolate Robust's contribution. The README's [Benchmarks](../README.md#benchmarks-job) section has the methodology.
+
+### When Robust engages
+
+- **≥ 2 joins required.** Single-join queries get no benefit from cross-graph propagation (JFP already handles them). `RobustOptimizerContextState::Optimize` checks `edges.size() <= 1` and returns the plan unchanged ([`src/optimizer/robust_optimizer.cpp:1603`](../src/optimizer/robust_optimizer.cpp)).
+- **Equality joins only.** Non-equality predicates aren't tracked. Range joins flow through but produce no filters.
+- **Acyclic graphs.** The current focus is on acyclic join graphs, where Robust's behaviour is well understood. Behaviour on cyclic join graphs is not characterised yet — they may work, may degrade, or may crash; characterising and handling them properly is on the near-term roadmap.
+
+---
+
+## 2. The rewrite at a glance
+
+![Baseline vs. Robust plan](figures/arch/d1-baseline-vs-robust.png)
+
+The diagram above shows a 4-way join (`orders ⋈ customers ⋈ products ⋈ categories`) under stock DuckDB on the left and under Robust on the right. Both plans return identical rows.
+
+The Robust plan inserts two kinds of new operator:
+
+- **`CREATE_FILTER`** sits above any base scan that contributes to a join. It's a parallel sink: it consumes the entire build-side chunk stream, builds the filters in thread-local state, merges them globally on combine, and finalizes them once all input is seen.
+- **`PROBE_FILTER`** sits above the *probe-side* scan of a join. It applies the filters produced by its paired `CREATE_FILTER` to incoming rows so that only rows with a chance of matching downstream make it into the hash join. The exact mechanism — whether the filter is evaluated per-row inside the operator or pushed all the way into the underlying scan as a dynamic filter — depends on which pass produced it, and is covered in §6.
+
+The salmon-shaded boxes in every diagram throughout this doc represent extension operators we add; outline-only boxes are stock DuckDB operators we don't modify. Where the `PROBE_FILTER` border is *dashed*, the filter has been pushed into the underlying scan as a dynamic filter rather than evaluated by the operator itself (see §6.3).
+
+Mental model in one paragraph: the rest of this doc explains (a) how we decide *which* `CREATE_FILTER`s and `PROBE_FILTER`s to insert and *where* — that's the DAG and the two passes; (b) what runs *inside* each operator — that's §6; and (c) which *filter types* it builds and pushes — that's §7. The rest is edge cases.
+
+---
+
+## 3. The join graph (DAG)
+
+### 3.1 Equivalence classes
+
+A query's equality joins partition all column bindings into **equivalence classes**: sets of `(table_index, column_index)` pairs that are transitively equal via join predicates. For example, in
+
+```sql
+FROM A JOIN B ON A.x = B.x
+       JOIN C ON B.x = C.y
+```
+
+the bindings `{A.x, B.x, C.y}` form a single equivalence class — any filter derived from any of them is semantically valid on the other two.
+
+Robust tracks these via a union-find structure built incrementally as the plan is traversed in `ExtractOperators` and `ExtractOperatorsRecursive` ([`src/optimizer/robust_optimizer.cpp`](../src/optimizer/robust_optimizer.cpp), around line 50 onward). DELIM_JOINs are treated identically to comparison joins for edge extraction, which lets decorrelated subqueries (TPC-H Q17, Q21) participate; MARK_JOINs have their probe child recursed into but their build child (a column-data scan of IN-list values) is skipped, since it's not a real base table.
+
+### 3.2 Heuristic A — `largest_root`
+
+The `largest_root` heuristic is the formulation from the **original predicate transfer paper** (Yang et al., CIDR 2024 — linked in §1). It builds a **minimum spanning tree** (Prim's algorithm) over the join edges, growing from the table with highest base cardinality. The MST is then rooted at that same table via BFS (`BuildRootedTree`).
+
+![DAG under largest_root](figures/arch/d2a-dag-largest-root.png)
+
+It tends to produce well-balanced trees where the largest table is the anchor and small dim tables become leaves. Forward and backward passes are generated by the tree-based `GenerateStageModifications`.
+
+### 3.3 Heuristic B — `join_order` (default)
+
+`join_order` is a **new heuristic introduced by this extension** — it does not appear in the original paper — and is what we ship as the default. Instead of building an MST, it mirrors *DuckDB's own join execution order* by running a build-first DFS over the logical plan: at each join, the right (build) child is visited before the left (probe) child. The resulting **DFS index** of each base-table node encodes execution order — a lower index means earlier (build-side) execution.
+
+For each equality join condition, both column bindings are resolved through `rename_col_bindings` (a map that tracks renames across projections and aggregates), union-find merges them, and a directed edge is added **from higher DFS index to lower DFS index** — i.e. from the later-executed table to its build-side feeder.
+
+![DAG under join_order](figures/arch/d2b-dag-join-order.png)
+
+**Why this works:** DuckDB has already done expensive cardinality estimation and plan enumeration before our optimizer runs. Its join order already reflects "build the smaller side, probe with the larger" decisions. By following the same order, our filter cascades fire in the same sequence as the hash-join pipelines, which maximises early pruning on linear join chains.
+
+The implementation is in `BuildPhysicalPlanDAG` and `GenerateStageModificationsFromDAG` ([`src/optimizer/robust_optimizer.cpp:1619–1648`](../src/optimizer/robust_optimizer.cpp)).
+
+### 3.4 Iterative root flipping
+
+A `join_order` DAG can have **multiple roots** — nodes with no parents. This happens when small dimensions are at the "top" of the join order. The problem: if a fact table sits one level down with a small-dim root above it, the fact table is on the probe side of its only edge but has no parent driving a filter into it. The largest table — the one we most want to filter — ends up doing the full scan.
+
+The fix is `FlipRootsToLeaves` ([`src/optimizer/robust_optimizer.cpp:718–813`](../src/optimizer/robust_optimizer.cpp)). It identifies all roots, keeps the one with highest cardinality as the **anchor root**, and iteratively reverses edges from every other root to its children until only the anchor has no parents.
+
+The *iterative* part matters: a single-pass flip can orphan intermediate nodes (flipping `name → aka_name` makes `aka_name` parentless, creating a new non-anchor root). The while-loop at [`robust_optimizer.cpp:741`](../src/optimizer/robust_optimizer.cpp) keeps flipping until the graph is stable. See [feature 5](../wip_docs/features/5-scan-table-filters-bypass.md) for the bug story.
+
+Gated by `robust_flip_roots` (default `true`). Set it to `false` to compare against pre-flip behaviour.
+
+### 3.5 What the optimizer accepts
+
+The DAG extractor walks past several DuckDB operator types it considers "transparent":
+
+| Operator | Treatment |
+|---|---|
+| `LOGICAL_COMPARISON_JOIN` | Edge source — extracts equality conditions |
+| `LOGICAL_DELIM_JOIN` | Same as above (decorrelated subqueries) |
+| `LOGICAL_FILTER`, `LOGICAL_PROJECTION` | Walked through; renames tracked |
+| `LOGICAL_MARK_JOIN` | Probe child recursed, build child skipped |
+| `LOGICAL_AGGREGATE` | Currently a barrier — [feature 11](../wip_docs/features/11-bf-above-aggregate-and-compress.md) plans to lift CREATE_FILTERs above |
+| `__internal_compress_integral_*` projections | Currently a barrier — same feature 11 plan |
+
+---
+
+## 4. Forward pass
+
+![Forward pass](figures/arch/d3-forward-pass.png)
+
+The forward pass walks the DAG **bottom-up** — from leaves to root. For each child node at depth L, for each edge to a parent: emit a `CREATE_FILTER` on the child (building filters from the child's join column) and a `PROBE_FILTER` on the parent (using those filters to filter the parent's join column).
+
+The output of the forward pass is a stream of `FilterOperation` records, each marked with `is_forward_pass = true` and a `sequence_number` preserving generation order (needed for stack insertion later).
+
+Where the filters are *built* — meaning what rows feed into the `CREATE_FILTER` sink — matters enormously. If a `CREATE_FILTER` sits below a `FILTER` operator, it sees the unfiltered rows and produces a too-broad filter. Two lift passes ensure the `CREATE_FILTER` sees the *post-filter* row set:
+
+- **`LiftCreateFilterAboveMarkJoin`** ([`robust_optimizer.cpp:1528–1559`](../src/optimizer/robust_optimizer.cpp)) detects `LOGICAL_FILTER(child = LOGICAL_MARK_JOIN)` patterns — DuckDB's representation of `IN` predicates — and moves the `CREATE_FILTER` stack from below the `MARK_JOIN`'s probe child up to just above the `FILTER`. Without this, the `cast_info` filter on JOB 18c builds a bloom filter from 8.2M rows instead of the 1.2M that survive the `IN` predicate ([feature 6](../wip_docs/features/6-lift-create-bf-above-mark-join.md)).
+- **`LiftCreateFilterAboveFilter`** ([`robust_optimizer.cpp:1561–1586`](../src/optimizer/robust_optimizer.cpp)) handles the plain-`FILTER` analogue — `CREATE_FILTER` lifts above any inline filter that sits between it and the underlying scan.
+
+After lifting, `LinkProbeFilterToCreateFilter` walks the plan twice to wire each `LogicalProbeFilter` to its matching `LogicalCreateFilter` via the `related_create_filter` and `related_probe_filter` vectors. This is what makes the backward pass's per-row `LookupSel` (see §6.2) able to find its filter.
+
+Finally, `SetupDynamicFilterPushdown` runs for forward-pass `CREATE_FILTER`s **only**: it locates each related probe's `LogicalGet`, attaches a `DynamicTableFilterSet`, and registers the `LogicalCreateFilter` to populate that set during `Finalize`. The forward-pass `PROBE_FILTER`s are marked `is_passthrough = true` — the real filtering happens at the scan now.
+
+---
+
+## 5. Backward pass
+
+![Backward pass](figures/arch/d4-backward-pass.png)
+
+The backward pass walks the DAG **top-down** — from root to leaves — and propagates filters discovered late in the plan back into scans earlier in the plan. Where the forward pass builds filters bottom-up from each leaf, the backward pass takes filters that already exist higher in the tree and reuses them downstream, shrinking intermediate results before they have a chance to explode.
+
+This is the main thing that distinguishes Robust from DuckDB's native JFP (§1) and is where the bulk of the JOB workload's wins come from. On `title`-anchored queries — where many tables join on `movie_id` — one filter derived from a highly selective `title` predicate ends up pruning a dozen other probe-side scans.
+
+### How it works under the hood
+
+For each child node, parents are sorted by **cardinality ascending** (smallest first — the most selective filter applies earliest). For each parent edge, the pass consults the union-find structure for the **equivalence class** of the edge's column (§3.1) — the set of `(table, column)` pairs the join graph has proven transitively equal:
+
+- If that class **already has a `CREATE_FILTER` source** from an ancestor traversal, only emit a `USE` operation on the child, pointing at the ancestor's existing filter. No new bloom filter is built; the existing filter is reused across the class.
+- Otherwise, emit both a `CREATE` on the parent (because that's where the filter is built) and a `USE` on the child, recording the parent as the new source for that equivalence class.
+
+Unlike the forward pass, the backward pass does **not** currently push its filters into scans as dynamic filters. Its filters apply per-row inside `PROBE_FILTER::Execute` via the bloom filter's `LookupSel` — see §6.2 for the operator side and §6.3 for the dynamic-pushdown mechanism the forward pass uses.
+
+---
+
+## 6. Operator internals
+
+### 6.1 `CREATE_FILTER` as a parallel sink
+
+![CREATE_FILTER sink lifecycle](figures/arch/d5-create-filter-lifecycle.png)
+
+`PhysicalCreateFilter` implements DuckDB's parallel sink interface (`IsSink() = true`, `ParallelSink() = true`):
+
+| Phase | What it does |
+|---|---|
+| `GetGlobalSinkState` | Allocates one `PTBloomFilter` per build column at the operator's `estimated_cardinality`. Resolves the shared `probe_empty_flag` from `ProbeEmptyRegistry` if the operator is in the forward pass. |
+| `Sink(chunk)` | Per-thread: checks `probe_empty_flag` and short-circuits if set; appends the chunk to a thread-local `ColumnDataCollection`; inserts hashes into the BF; (forward pass only) updates thread-local min/max and tracks distinct values up to `threshold + 1`. |
+| `Combine` | Under mutex: merges thread-local `ColumnDataCollection` / min-max / distinct sets into the global state. |
+| `Finalize` | Concatenates per-thread collections into a single `total_data` collection. Runs the **BF resize check**: if `actual_rows * 8 > allocated_bits` (i.e. we're under 8 bits/key, ≥ 2.3% FPR), tear down the BF and call `PTBloomFilter::ReinitializeAndRehash` to rebuild at the correct size. Sets `probe_empty_flag` to `true` if `actual_rows == 0`. Calls `PushDynamicFilters`. |
+| `PushDynamicFilters` (forward only) | For each `pushdown_target`, picks the right filter type (see §7) and installs it on the target `DynamicTableFilterSet`. |
+
+Two of the most material decisions inside this lifecycle:
+
+**BF resize at Finalize** ([`physical_create_filter.cpp:493–513`](../src/operators/physical_create_filter.cpp)). The BF is sized at plan time using optimizer cardinality estimates, which can be 5–10× wrong. If we under-allocate, the resulting bloom filter is so dense (1.68 bits/key in pathological cases) that its FPR approaches 65% — effectively a no-op. The resize at Finalize uses the actual row count, targeting ≥ 8 bits/key (≤ 2.3% FPR). On the JOB suite this single change moved geomean speedup from **1.132× to 1.193×** ([feature 7](../wip_docs/features/7-accurate-bf-sizing.md)).
+
+**Empty-probe short-circuit** ([feature 8](../wip_docs/features/8-short-circuit-empty-probe.md), [`physical_create_filter.cpp:199–203, 236–238, 524–526`](../src/operators/physical_create_filter.cpp)). Multiple `CREATE_FILTER`s often target the same probe-side table. If one of them finalizes with zero rows (the filter on the build side is so selective nothing survives), it sets a shared `atomic<bool>` on the `ProbeEmptyRegistry` for that probe. Sibling `CREATE_FILTER`s targeting the same probe check that flag at the top of every `Sink` call and immediately return `SinkResultType::FINISHED`, draining their pipelines without doing further build work. On JOB 32a this saved 3.6ms on a 186K-row scan that would have been entirely pointless.
+
+### 6.2 `PROBE_FILTER` — two execution modes
+
+`PhysicalProbeFilter` extends `CachingPhysicalOperator`. It has two distinct execution behaviours depending on which pass created it:
+
+**Forward-pass (`is_passthrough = true`).** The filtering work has been hoisted into the underlying scan's `DynamicTableFilterSet`. The operator itself does nothing useful at runtime — `ExecuteInternal` just calls `chunk.Reference(input)`. The operator still exists in the plan because it makes the dataflow legible in `EXPLAIN` output and because it's where the `LogicalProbeFilter ↔ LogicalCreateFilter` link is anchored.
+
+**Backward-pass.** The operator actually filters chunks per-row. On first call it lazily initialises a vector of `PTBloomFilter` references from `related_create_filter_vec` (because the backward pass can have multiple build sides feeding one probe). Each chunk passes through `PTBloomFilter::LookupSel`, which produces a `SelectionVector` of surviving rows; the operator then slices the chunk and emits.
+
+The dashed border on `PROBE_FILTER` boxes in every diagram throughout this doc marks the passthrough variant.
+
+### 6.3 Dynamic filter pushdown into scans
+
+![Dynamic filter pushdown](figures/arch/d6-dynamic-pushdown.png)
+
+For forward-pass `CREATE_FILTER`s, the optimizer installs the filter on `LogicalGet.dynamic_filters` during planning (`SetupDynamicFilterPushdown`). When DuckDB later constructs the physical `PhysicalTableScan`, it picks up `dynamic_filters` from the `LogicalGet` and consults them at scan time via `DynamicTableFilterSet::GetCombinedFilter`.
+
+The result: the scan does both **zonemap-based row-group pruning** (entire row groups skipped without reading) *and* **per-row filtering** as appropriate to the filter type. The `PROBE_FILTER` operator above doesn't have to do anything because the rows that would have been filtered never make it out of the scan.
+
+This pushdown is **forward-pass only**. Backward-pass filters apply at the `PROBE_FILTER` operator instead.
+
+---
+
+## 7. Filter types and the picker
+
+![Filter-type picker](figures/arch/d7-filter-picker.png)
+
+Robust ships four filter types today; the picker logic lives in `PhysicalCreateFilter::PushDynamicFilters` ([`physical_create_filter.cpp:364`](../src/operators/physical_create_filter.cpp)).
+
+| Filter | Implementation | Push form | When chosen |
+|---|---|---|---|
+| **Always-false** | n/a | `ConstantFilter(GREATERTHAN, MaximumValue(type))` | Build side has 0 rows |
+| **Equality constant** | n/a | `ConstantFilter(EQUAL, v)` | Exactly 1 distinct build value |
+| **IN-list** | `value_set_t` (capped, dynamic-or-filter) | `OptionalFilter(InFilter(values))` | Distinct count ≤ `robust_dynamic_or_filter_threshold` (default 50) |
+| **Bloom + min/max** | `PTBloomFilter` + `ColumnMinMax` | `SelectivityOptionalFilter(BFTableFilter)` plus two `ConstantFilter`s for min and max | Otherwise — the default path |
+
+The **IN-list path** ([feature 9](../wip_docs/features/9-in-filter-for-small-build-side.md)) is the most consequential addition beyond the original paper. When a build side has a small number of distinct values, pushing an exact `IN`-list enables tighter zonemap pruning than a bloom filter does — for PK-clustered columns this can mean landing in a single row group versus the BF's "could be anywhere in the cardinality range". On a full JOB sweep this change improved 37 queries with zero regressions and reduced total rows scanned by **−84.7M** in aggregate.
+
+**Filter-type override.** The `robust_filter_type` setting (default `"all"`) can force the picker to a subset: `"bf_only"`, `"minmax_only"`. Useful for ablation studies.
+
+### Bloom filter internals
+
+We wrap DuckDB's native `duckdb::BloomFilter` in `PTBloomFilter` ([`src/include/bloom_filter.hpp`](../src/include/bloom_filter.hpp), [`src/bloom_filter.cpp`](../src/bloom_filter.cpp)). The wrapper adds:
+
+- `DataChunk`-level `Insert` / `LookupSel` operations
+- Resize-and-rehash for the cardinality-mismatch case (§6.1)
+- An `IsEmpty` check (`!bf_.IsInitialized()`)
+
+Insertion uses `BloomFilter::InsertHashes`, which is atomic via `fetch_or` (safe for parallel sink threads). Lookup returns a `SelectionVector` directly via `LookupSel`, avoiding an intermediate `uint32_t` array allocation.
+
+We don't implement the bloom filter ourselves — that's DuckDB's split-block design with 12 bits/key and 4 bits set per insert. Replacing the earlier hand-rolled implementation with the native one was a significant cleanup; see `wip_docs/bloom_filter_comparison.md` for the migration notes.
+
+---
+
+## 8. Edge cases and tricks
+
+A non-exhaustive list of things the code handles explicitly because they bit us at some point:
+
+- **MARK_JOIN probe-only DFS** ([`robust_optimizer.cpp:549–551`](../src/optimizer/robust_optimizer.cpp)). MARK_JOIN's build child is a column-data scan of `IN`-list values, not a real table; recursing into it would corrupt the DAG. Only the probe child is walked.
+- **DELIM_JOIN acceptance** ([`robust_optimizer.cpp:50–51`](../src/optimizer/robust_optimizer.cpp)). Decorrelated subqueries (TPC-H Q17, Q21) use `LOGICAL_DELIM_JOIN`; treating it identically to `LOGICAL_COMPARISON_JOIN` for edge extraction lets these queries participate in transfer.
+- **Consecutive `CREATE` merging** ([`BuildStackedBFOperators`, robust_optimizer.cpp:1183–1214](../src/optimizer/robust_optimizer.cpp)). Multiple `FilterOperation`s targeting the same `build_table_idx` get merged into a single `LogicalCreateFilter` with multiple build/probe column pairs. Fewer pipeline breaks, fewer redundant materializations.
+- **Lift above MARK_JOIN / FILTER** (§4). Without these, `CREATE_FILTER`s build their filters from the wrong row set.
+- **Iterative root flipping** (§3.4). Single-pass flipping orphans intermediate nodes; the while-loop iterates until stable.
+- **Single-join early exit** ([`robust_optimizer.cpp:1603`](../src/optimizer/robust_optimizer.cpp)). `edges.size() <= 1` returns the plan unchanged — predicate transfer needs at least two joins to have anything to transfer.
+- **Cyclic join graph OOB.** Cyclic graphs (TPC-H Q5, Q9, Q20) produce equivalence classes that `LogicalCreateFilter::CreatePlan` can't fully resolve, causing an out-of-bounds in `bound_column_indices` at sink time. Currently handled by skip-listing those queries in the test harness; a proper detect-and-bail guard is [feature 10](../wip_docs/features/10-tpch-workload-integration.md) work.
+
+---
+
+---
+
+**Last updated:** 2026-05. Audit-of-record against current `main` (commits `08bdd01` and earlier).
diff --git a/docs/figures/hj_card_sum_pairs_line.png b/docs/figures/hj_card_sum_pairs_line.png
new file mode 100644
index 0000000..751007d
Binary files /dev/null and b/docs/figures/hj_card_sum_pairs_line.png differ
diff --git a/docs/figures/memory_ratio.png b/docs/figures/memory_ratio.png
new file mode 100644
index 0000000..fff2c66
Binary files /dev/null and b/docs/figures/memory_ratio.png differ
diff --git a/docs/figures/speedup_join_order.png b/docs/figures/speedup_join_order.png
new file mode 100644
index 0000000..78f2dac
Binary files /dev/null and b/docs/figures/speedup_join_order.png differ
diff --git a/duckdb b/duckdb
index 0b83e5d..14eca11 160000
--- a/duckdb
+++ b/duckdb
@@ -1 +1 @@
-Subproject commit 0b83e5d2f68bc02dfefde74b846bd039f078affa
+Subproject commit 14eca11bd9d4a0de2ea0f078be588a9c1c5b279c
diff --git a/extension-ci-tools b/extension-ci-tools
index b808e51..4b3b37b 160000
--- a/extension-ci-tools
+++ b/extension-ci-tools
@@ -1 +1 @@
-Subproject commit b808e5130cb6e1341ab968a2a9c0f5f236dd2ec8
+Subproject commit 4b3b37b0c9de00da54e1765d65abfea3f94617f4
diff --git a/extension_config.cmake b/extension_config.cmake
index 0bdaa55..ea1b5e1 100644
--- a/extension_config.cmake
+++ b/extension_config.cmake
@@ -1,7 +1,7 @@
 # This file is included by DuckDB's build system. It specifies which extension to load
 
 # Extension from this repo - build as dynamic extension 
-duckdb_extension_load(rpt
+duckdb_extension_load(robust
     SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}
     LOAD_TESTS
     DONT_LINK
diff --git a/patches/01-benchmark-runner-allow-unsigned-extensions.patch b/patches/01-benchmark-runner-allow-unsigned-extensions.patch
new file mode 100644
index 0000000..8a7d006
--- /dev/null
+++ b/patches/01-benchmark-runner-allow-unsigned-extensions.patch
@@ -0,0 +1,12 @@
+diff --git a/benchmark/interpreted_benchmark.cpp b/benchmark/interpreted_benchmark.cpp
+index 3510fe760e..e3333d6093 100644
+--- a/benchmark/interpreted_benchmark.cpp
++++ b/benchmark/interpreted_benchmark.cpp
+@@ -62,6 +62,7 @@ struct InterpretedBenchmarkState : public BenchmarkState {
+ 			result->options.serialization_compatibility = SerializationCompatibility::FromString(version);
+ 		}
+ 		result->options.load_extensions = false;
++		result->SetOptionByName("allow_unsigned_extensions", true);
+ 		return result;
+ 	}
+ };
diff --git a/patches/02-skip-verify-op-debug-workaround.patch b/patches/02-skip-verify-op-debug-workaround.patch
new file mode 100644
index 0000000..4a62a82
--- /dev/null
+++ b/patches/02-skip-verify-op-debug-workaround.patch
@@ -0,0 +1,13 @@
+diff --git a/src/optimizer/column_lifetime_analyzer.cpp b/src/optimizer/column_lifetime_analyzer.cpp
+index e31e9b4451..26048450a9 100644
+--- a/src/optimizer/column_lifetime_analyzer.cpp
++++ b/src/optimizer/column_lifetime_analyzer.cpp
+@@ -56,7 +56,7 @@ void ColumnLifetimeAnalyzer::ExtractColumnBindings(const Expression &expr, vecto
+ }
+ 
+ void ColumnLifetimeAnalyzer::VisitOperator(LogicalOperator &op) {
+-	Verify(op);
++	// Verify(op);
+ 	if (TopN::CanOptimize(op) && op.children[0]->type == LogicalOperatorType::LOGICAL_ORDER_BY) {
+ 		// Let's not mess with this, TopN is more important than projection maps
+ 		// TopN does not support a projection map like Order does
diff --git a/scripts/bench_job.sh b/scripts/bench_job.sh
new file mode 100755
index 0000000..db71e46
--- /dev/null
+++ b/scripts/bench_job.sh
@@ -0,0 +1,196 @@
+#!/bin/bash
+# bench_job.sh - Run baseline and Robust JOB benchmarks via DuckDB's benchmark_runner, report side-by-side comparison
+#
+# Usage: ./scripts/bench_job.sh [options]
+#   --pattern <pat>   Query name pattern, e.g. "03.*" (default: .* = all)
+#   --baseline-only   Run only baseline benchmarks
+#   --robust-only        Run only Robust benchmarks
+#   --no-run          Skip running, just compare existing results
+#   --forward-only    Use forward-only pass mode for Robust
+#   --heuristic <name> Robust heuristic: join_order (default), largest_root
+#   --out <dir>       Output directory (default: benchmark_results)
+#   --metric <m>      Aggregation metric across runs: min (default), geomean
+#   --robust-first    Run Robust suite before baseline (default: baseline first)
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+cd "$PROJECT_ROOT"
+RUNNER="$PROJECT_ROOT/build/release/benchmark/benchmark_runner"
+PATTERN=".*"
+RUN_BASELINE=true
+RUN_ROBUST=true
+FORWARD_ONLY=false
+HEURISTIC=""
+OUT_DIR="$PROJECT_ROOT/results/benchmark_results"
+METRIC="min"
+ROBUST_FIRST=false
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --pattern) PATTERN="$2"; shift 2 ;;
+        --baseline-only) RUN_ROBUST=false; shift ;;
+        --robust-only) RUN_BASELINE=false; shift ;;
+        --no-run) RUN_BASELINE=false; RUN_ROBUST=false; shift ;;
+        --forward-only) FORWARD_ONLY=true; shift ;;
+        --heuristic) HEURISTIC="$2"; shift 2 ;;
+        --out) OUT_DIR="$2"; shift 2 ;;
+        --metric) METRIC="$2"; shift 2 ;;
+        --robust-first) ROBUST_FIRST=true; shift ;;
+        *) echo "Unknown option: $1"; exit 1 ;;
+    esac
+done
+
+mkdir -p "$OUT_DIR"
+
+if [ ! -f "$RUNNER" ]; then
+    echo "Error: benchmark_runner not found at $RUNNER"
+    echo "Build with: BUILD_BENCHMARK=1 GEN=ninja make release"
+    exit 1
+fi
+
+BASELINE_RAW="$OUT_DIR/baseline_raw.tsv"
+ROBUST_RAW="$OUT_DIR/robust_raw.tsv"
+
+run_baseline() {
+    if [ "$RUN_BASELINE" = true ]; then
+        echo "Running baseline benchmarks (pattern: $PATTERN)..."
+        "$RUNNER" "benchmark/imdb/$PATTERN" 2>&1 | tee "$BASELINE_RAW"
+        echo "Baseline done."
+    fi
+}
+
+run_robust() {
+    if [ "$RUN_ROBUST" = true ]; then
+        if [ "$HEURISTIC" = "largest_root" ]; then
+            ROBUST_SUITE="imdb_robust_lr"
+            echo "Running Robust largest_root benchmarks (pattern: $PATTERN)..."
+        elif [ "$FORWARD_ONLY" = true ]; then
+            ROBUST_SUITE="imdb_robust_fwd"
+            echo "Running Robust forward-only benchmarks (pattern: $PATTERN)..."
+        else
+            ROBUST_SUITE="imdb_robust"
+            echo "Running Robust default (join_order) benchmarks (pattern: $PATTERN)..."
+        fi
+        "$RUNNER" "benchmark/${ROBUST_SUITE}/$PATTERN" 2>&1 | tee "$ROBUST_RAW"
+        echo "Robust done."
+    fi
+}
+
+if [ "$ROBUST_FIRST" = true ]; then
+    run_robust
+    run_baseline
+else
+    run_baseline
+    run_robust
+fi
+
+if [ ! -f "$BASELINE_RAW" ] || [ ! -f "$ROBUST_RAW" ]; then
+    echo "Error: need both baseline and Robust results to compare"
+    exit 1
+fi
+
+python3 - "$BASELINE_RAW" "$ROBUST_RAW" "$OUT_DIR/comparison.tsv" "$METRIC" <<'PYEOF'
+import sys
+from collections import defaultdict
+import math
+
+def aggregate(ts, metric):
+    """Aggregate warm runs (skip first) using the given metric."""
+    warm = ts[1:] if len(ts) > 1 else ts
+    if metric == "geomean":
+        return math.exp(sum(math.log(t) for t in warm) / len(warm))
+    return min(warm)  # default: min
+
+def parse_results(path, metric):
+    """Parse benchmark runner TSV, return {query_name: aggregated_time}."""
+    times = defaultdict(list)
+    with open(path) as f:
+        for line in f:
+            line = line.strip()
+            if not line or line.startswith("name"):
+                continue
+            parts = line.split("\t")
+            if len(parts) != 3:
+                continue
+            name, run, t = parts
+            qname = name.split("/")[-1].replace(".benchmark", "")
+            times[qname].append(float(t))
+    return {qname: aggregate(ts, metric) for qname, ts in times.items()}
+
+metric = sys.argv[4] if len(sys.argv) > 4 else "min"
+baseline = parse_results(sys.argv[1], metric)
+robust = parse_results(sys.argv[2], metric)
+out_path = sys.argv[3]
+
+queries = sorted(set(baseline.keys()) & set(robust.keys()),
+                 key=lambda q: (int(''.join(c for c in q if c.isdigit()) or '0'),
+                                ''.join(c for c in q if c.isalpha())))
+
+if not queries:
+    print("No common queries found between baseline and Robust results.")
+    sys.exit(1)
+
+faster = []
+slower = []
+log_speedups = []
+
+metric_label = "geomean" if metric == "geomean" else "min"
+print(f"Metric: {metric_label} of warm runs (excluding first/cold run)\n")
+header = f"{'Query':<10} {'Baseline(s)':>12} {'Robust(s)':>12} {'Speedup':>10} {'Status':>8}"
+sep = "-" * len(header)
+
+lines = [header, sep]
+
+for q in queries:
+    b = baseline[q]
+    r = robust[q]
+    speedup = b / r if r > 0 else float('inf')
+    log_speedups.append(math.log(speedup))
+
+    if speedup > 1.05:
+        status = "FASTER"
+        faster.append((q, speedup))
+    elif speedup < 0.95:
+        status = "SLOWER"
+        slower.append((q, speedup))
+    else:
+        status = "~same"
+
+    lines.append(f"{q:<10} {b:>12.6f} {r:>12.6f} {speedup:>9.3f}x {status:>8}")
+
+geo_mean = math.exp(sum(log_speedups) / len(log_speedups)) if log_speedups else 1.0
+total_b = sum(baseline[q] for q in queries)
+total_r = sum(robust[q] for q in queries)
+
+lines.append(sep)
+lines.append(f"{'TOTAL':<10} {total_b:>12.6f} {total_r:>12.6f} {total_b/total_r:>9.3f}x")
+lines.append("")
+lines.append(f"Queries: {len(queries)}  |  Faster: {len(faster)}  |  Slower: {len(slower)}  |  Same: {len(queries)-len(faster)-len(slower)}")
+lines.append(f"Geometric mean speedup: {geo_mean:.3f}x")
+
+if faster:
+    lines.append("")
+    lines.append("Top Robust wins:")
+    for q, s in sorted(faster, key=lambda x: -x[1])[:10]:
+        lines.append(f"  {q}: {s:.3f}x")
+
+if slower:
+    lines.append("")
+    lines.append("Top Robust regressions:")
+    for q, s in sorted(slower, key=lambda x: x[1])[:10]:
+        lines.append(f"  {q}: {s:.3f}x")
+
+output = "\n".join(lines)
+print(output)
+
+with open(out_path, "w") as f:
+    f.write("query\tbaseline\trobust\tspeedup\n")
+    for q in queries:
+        b = baseline[q]
+        r = robust[q]
+        f.write(f"{q}\t{b:.6f}\t{r:.6f}\t{b/r:.3f}\n")
+
+print(f"\nTSV saved to {out_path}")
+PYEOF
diff --git a/scripts/bench_metrics.sh b/scripts/bench_metrics.sh
new file mode 100755
index 0000000..05b4eee
--- /dev/null
+++ b/scripts/bench_metrics.sh
@@ -0,0 +1,145 @@
+#!/bin/bash
+# bench_metrics.sh — sweep JOB queries × {baseline, robust+join_order} and capture
+# six metrics from each profile JSON: total_memory_allocated, cumulative_rows_scanned,
+# cumulative_cardinality, system_peak_buffer_memory, sum-of-HASH_JOIN-cardinality
+# (deterministic), max-HASH_JOIN-cardinality (deterministic). Output: <out>/metrics.csv
+#
+# Note: system_peak_buffer_memory is parallelism/timing-sensitive — treat as advisory.
+# The HJ cardinality metrics are deterministic given the same plan.
+#
+# CSV columns:
+#   query, baseline_memory, robust_memory,
+#          baseline_rows_scanned, robust_rows_scanned,
+#          baseline_cardinality, robust_cardinality,
+#          baseline_peak_buffer, robust_peak_buffer,
+#          baseline_hj_card_sum, robust_hj_card_sum,
+#          baseline_hj_card_max, robust_hj_card_max
+#
+# Usage:
+#   ./scripts/bench_metrics.sh                             # all JOB queries
+#   ./scripts/bench_metrics.sh --pattern '13.*'            # subset
+#   ./scripts/bench_metrics.sh --query 13a                 # single query
+#   ./scripts/bench_metrics.sh --out benchmark_results     # output dir
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+cd "$PROJECT_ROOT"
+
+DUCKDB="$PROJECT_ROOT/build/release/duckdb"
+EXT="$PROJECT_ROOT/build/release/extension/robust/robust.duckdb_extension"
+DB="$PROJECT_ROOT/jobdata/imdb.duckdb"
+QUERIES_DIR="$PROJECT_ROOT/jobdata/queries"
+
+PATTERN=""
+SPECIFIC_QUERY=""
+OUT_DIR="$PROJECT_ROOT/benchmark_results"
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --pattern) PATTERN="$2"; shift 2 ;;
+        --query)   SPECIFIC_QUERY="$2"; shift 2 ;;
+        --out)     OUT_DIR="$2"; shift 2 ;;
+        -h|--help) sed -n '2,/^$/p' "$0" | sed 's/^# \{0,1\}//'; exit 0 ;;
+        *) echo "Unknown option: $1"; exit 1 ;;
+    esac
+done
+
+for f in "$DUCKDB" "$EXT" "$DB"; do
+    [ -f "$f" ] || { echo "Missing: $f"; exit 1; }
+done
+
+mkdir -p "$OUT_DIR"
+OUT_CSV="$OUT_DIR/metrics.csv"
+TMP_BASE=$(mktemp /tmp/metrics_base_XXXXXX.json)
+TMP_ROBUST=$(mktemp /tmp/metrics_robust_XXXXXX.json)
+trap 'rm -f "$TMP_BASE" "$TMP_ROBUST" "$TMP_DB"' EXIT
+
+# if the db is locked, copy it to a temp file
+ACTUAL_DB="$DB"
+TMP_DB=""
+if ! "$DUCKDB" "$DB" -unsigned -readonly -c "SELECT 1;" > /dev/null 2>&1; then
+    TMP_DB=$(mktemp /tmp/job_metrics_XXXXXX.duckdb)
+    cp "$DB" "$TMP_DB"
+    ACTUAL_DB="$TMP_DB"
+    echo "(database locked, using temp copy)"
+fi
+
+# build query list
+if [ -n "$SPECIFIC_QUERY" ]; then
+    QUERY_FILES="$QUERIES_DIR/${SPECIFIC_QUERY}.sql"
+    [ -f "$QUERY_FILES" ] || { echo "Query not found: $QUERY_FILES"; exit 1; }
+elif [ -n "$PATTERN" ]; then
+    QUERY_FILES=$(ls -1 "$QUERIES_DIR"/*.sql | grep -E "$PATTERN" || true)
+else
+    QUERY_FILES=$(ls -1 "$QUERIES_DIR"/*.sql | sort -V)
+fi
+[ -n "$QUERY_FILES" ] || { echo "No queries matched."; exit 1; }
+
+echo "query,baseline_memory,robust_memory,baseline_rows_scanned,robust_rows_scanned,baseline_cardinality,robust_cardinality,baseline_peak_buffer,robust_peak_buffer,baseline_hj_card_sum,robust_hj_card_sum,baseline_hj_card_max,robust_hj_card_max" > "$OUT_CSV"
+
+# extract six metrics from a profile JSON: 4 top-level + 2 derived from walking the
+# operator tree (HASH_JOIN cardinality sum and max).
+extract_metrics() {
+    python3 -c "
+import json, sys
+d = json.load(open(sys.argv[1]))
+
+def hj_walk(node, agg):
+    name = node.get('operator_name') or node.get('operator_type')
+    if name == 'HASH_JOIN':
+        c = node.get('operator_cardinality', 0)
+        agg[0] += c
+        if c > agg[1]:
+            agg[1] = c
+    for ch in node.get('children', []):
+        hj_walk(ch, agg)
+
+root = d.get('children', [d])[0] if 'children' in d else d
+agg = [0, 0]
+hj_walk(root, agg)
+print(','.join(str(x) for x in (
+    d.get('total_memory_allocated', 0),
+    d.get('cumulative_rows_scanned', 0),
+    d.get('cumulative_cardinality', 0),
+    d.get('system_peak_buffer_memory', 0),
+    agg[0], agg[1])))
+" "$1"
+}
+
+TOTAL=$(echo "$QUERY_FILES" | wc -w | tr -d ' ')
+IDX=0
+for qf in $QUERY_FILES; do
+    qname=$(basename "$qf" .sql)
+    IDX=$((IDX + 1))
+    echo -n "[$IDX/$TOTAL] $qname ... "
+    QUERY_SQL=$(cat "$qf")
+
+    # baseline: jfp on, no extension
+    "$DUCKDB" "$ACTUAL_DB" -unsigned -readonly -c "
+PRAGMA enable_profiling='json';
+PRAGMA profiling_output='$TMP_BASE';
+$QUERY_SQL
+" > /dev/null 2>/dev/null || { echo "baseline failed"; continue; }
+
+    # robust: jfp off, extension loaded, join_order heuristic
+    "$DUCKDB" "$ACTUAL_DB" -unsigned -readonly -c "
+SET disabled_optimizers = 'join_filter_pushdown';
+LOAD '$EXT';
+SET robust_heuristic = 'join_order';
+PRAGMA enable_profiling='json';
+PRAGMA profiling_output='$TMP_ROBUST';
+$QUERY_SQL
+" > /dev/null 2>/dev/null || { echo "robust failed"; continue; }
+
+    # extract_metrics emits 6 fields: mem,rows,card,peak_buf,hj_sum,hj_max
+    IFS=',' read -r BMEM BROWS BCARD BPEAK BHJSUM BHJMAX <<< "$(extract_metrics "$TMP_BASE")"
+    IFS=',' read -r RMEM RROWS RCARD RPEAK RHJSUM RHJMAX <<< "$(extract_metrics "$TMP_ROBUST")"
+    echo "$qname,$BMEM,$RMEM,$BROWS,$RROWS,$BCARD,$RCARD,$BPEAK,$RPEAK,$BHJSUM,$RHJSUM,$BHJMAX,$RHJMAX" >> "$OUT_CSV"
+    echo "mem=${BMEM}/${RMEM}  hj_sum=${BHJSUM}/${RHJSUM}  hj_max=${BHJMAX}/${RHJMAX}"
+done
+
+echo ""
+echo "Done. Wrote $OUT_CSV"
+echo "  rows: $(($(wc -l < "$OUT_CSV") - 1))"
diff --git a/scripts/bench_rg_instrument.sh b/scripts/bench_rg_instrument.sh
new file mode 100755
index 0000000..9f71a9c
--- /dev/null
+++ b/scripts/bench_rg_instrument.sh
@@ -0,0 +1,186 @@
+#!/bin/bash
+# bench_rg_instrument.sh — sweep JOB queries × {baseline, Robust-largest-root, Robust-join-order}
+# with DUCKDB_RG_INSTRUMENT=1 and emit a per-(query,config,table) CSV of row-group counters.
+#
+# Phase 1 of the row-group instrumentation lives in the duckdb submodule; each scan
+# thread emits `[rg_instr] table=<n> visited=N pruned_zonemap=N zero_emit=N ...`
+# on destruction. This script aggregates those lines per query/config and joins
+# them with total row-group counts pulled from pragma_storage_info().
+#
+# Output: rg_instrument_results/<timestamp>/results.csv
+#   columns: query, config, table, total_rgs, visited, pruned_zonemap,
+#            zero_emit, zero_emit_pct
+#
+# Usage:
+#   ./bench_rg_instrument.sh                 # run all JOB queries × all 3 configs
+#   ./bench_rg_instrument.sh --limit 5       # first 5 queries
+#   ./bench_rg_instrument.sh --query 13a     # single query
+#   ./bench_rg_instrument.sh --configs baseline,robust-largest-root
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+cd "$PROJECT_ROOT"
+DUCKDB="$PROJECT_ROOT/build/release/duckdb"
+EXT="$PROJECT_ROOT/build/release/extension/robust/robust.duckdb_extension"
+DB="$PROJECT_ROOT/jobdata/imdb.duckdb"
+QUERIES_DIR="$PROJECT_ROOT/jobdata/queries"
+AGG="$SCRIPT_DIR/rg_aggregate.py"
+
+LIMIT=0
+SPECIFIC_QUERY=""
+CONFIGS="baseline,robust-largest-root,robust-join-order"
+OUT_DIR=""
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --limit)   LIMIT="$2"; shift 2 ;;
+        --query)   SPECIFIC_QUERY="$2"; shift 2 ;;
+        --configs) CONFIGS="$2"; shift 2 ;;
+        --out)     OUT_DIR="$2"; shift 2 ;;
+        -h|--help)
+            sed -n '2,/^$/p' "$0" | sed 's/^# \{0,1\}//'
+            exit 0 ;;
+        *) echo "Unknown option: $1"; exit 1 ;;
+    esac
+done
+
+for f in "$DUCKDB" "$EXT" "$DB" "$AGG"; do
+    [ -e "$f" ] || { echo "Missing: $f"; exit 1; }
+done
+
+if [ -z "$OUT_DIR" ]; then
+    OUT_DIR="$PROJECT_ROOT/rg_instrument_results/$(date +%Y%m%d_%H%M%S)"
+fi
+mkdir -p "$OUT_DIR"
+PER_QUERY_CSV="$OUT_DIR/per_query.csv"
+TOTAL_CSV="$OUT_DIR/total_rgs.csv"
+RESULT_CSV="$OUT_DIR/results.csv"
+RAW_DIR="$OUT_DIR/raw"
+mkdir -p "$RAW_DIR"
+
+# if the db is locked, copy it to a temp file
+ACTUAL_DB="$DB"
+TMP_DB=""
+if ! "$DUCKDB" "$DB" -unsigned -readonly -c "SELECT 1;" > /dev/null 2>&1; then
+    TMP_DB=$(mktemp /tmp/job_rginstr_XXXXXX.duckdb)
+    cp "$DB" "$TMP_DB"
+    ACTUAL_DB="$TMP_DB"
+    echo "(database locked, using temp copy)"
+fi
+trap '[ -n "$TMP_DB" ] && rm -f "$TMP_DB"' EXIT
+
+# step 1: dump total row groups per table (loop in shell because
+# pragma_storage_info doesn't accept lateral column parameters)
+echo "Capturing total row groups per table..."
+echo "table,total_rgs" > "$TOTAL_CSV"
+TABLES=$("$DUCKDB" "$ACTUAL_DB" -unsigned -readonly -noheader -list -c \
+    "SELECT table_name FROM duckdb_tables() WHERE schema_name='main' ORDER BY table_name;")
+for tbl in $TABLES; do
+    rgs=$("$DUCKDB" "$ACTUAL_DB" -unsigned -readonly -noheader -list -c \
+        "SELECT COUNT(DISTINCT row_group_id) FROM pragma_storage_info('$tbl');")
+    echo "$tbl,$rgs" >> "$TOTAL_CSV"
+done
+echo "  wrote $(($(wc -l < "$TOTAL_CSV") - 1)) tables to $TOTAL_CSV"
+
+# step 2: build the query list
+if [ -n "$SPECIFIC_QUERY" ]; then
+    QUERY_FILES="$QUERIES_DIR/${SPECIFIC_QUERY}.sql"
+    [ -f "$QUERY_FILES" ] || { echo "Query not found: $QUERY_FILES"; exit 1; }
+else
+    QUERY_FILES=$(ls -1 "$QUERIES_DIR"/*.sql | sort -V)
+fi
+
+# step 3: per-config setup
+config_settings() {
+    case "$1" in
+        baseline)
+            # jfp on, no robust
+            echo "" ;;
+        robust-largest-root)
+            echo "SET disabled_optimizers = 'join_filter_pushdown'; LOAD '$EXT'; SET robust_heuristic = 'largest_root';" ;;
+        robust-join-order)
+            echo "SET disabled_optimizers = 'join_filter_pushdown'; LOAD '$EXT'; SET robust_heuristic = 'join_order';" ;;
+        *) echo "Unknown config: $1" >&2; return 1 ;;
+    esac
+}
+
+# init per-query CSV with header
+> "$PER_QUERY_CSV"
+echo "query,config,table,visited,pruned_zonemap,zero_emit,zero_emit_pct" > "$PER_QUERY_CSV"
+
+# step 4: sweep
+TOTAL_QUERIES=$(echo "$QUERY_FILES" | wc -w | tr -d ' ')
+if [ "$LIMIT" -gt 0 ] && [ "$LIMIT" -lt "$TOTAL_QUERIES" ]; then
+    TOTAL_QUERIES=$LIMIT
+fi
+IFS=',' read -ra CFG_ARR <<< "$CONFIGS"
+TOTAL_RUNS=$((TOTAL_QUERIES * ${#CFG_ARR[@]}))
+RUN_IDX=0
+COUNT=0
+
+for query_file in $QUERY_FILES; do
+    query_name=$(basename "$query_file" .sql)
+    ((COUNT++))
+    if [ "$LIMIT" -gt 0 ] && [ "$COUNT" -gt "$LIMIT" ]; then
+        break
+    fi
+    QUERY_SQL=$(cat "$query_file")
+
+    for cfg in "${CFG_ARR[@]}"; do
+        ((RUN_IDX++))
+        echo -n "[$RUN_IDX/$TOTAL_RUNS] $query_name @ $cfg ... "
+
+        SETTINGS=$(config_settings "$cfg")
+        STDERR_FILE="$RAW_DIR/${query_name}_${cfg}.stderr"
+
+        DUCKDB_RG_INSTRUMENT=1 "$DUCKDB" "$ACTUAL_DB" -unsigned -readonly \
+            -c "$SETTINGS $QUERY_SQL" \
+            > /dev/null 2> "$STDERR_FILE" || true
+
+        python3 "$AGG" \
+            --input "$STDERR_FILE" \
+            --query "$query_name" \
+            --config "$cfg" \
+            --out "$PER_QUERY_CSV"
+
+        rg_lines=$(grep -c '^\[rg_instr\]' "$STDERR_FILE" 2>/dev/null || echo 0)
+        echo "($rg_lines rg_instr lines)"
+    done
+done
+
+# step 5: join per-query totals with total_rgs and emit results.csv
+echo ""
+echo "Joining with total_rgs..."
+"$DUCKDB" -c "
+COPY (
+    SELECT
+        pq.query,
+        pq.config,
+        pq.\"table\",
+        COALESCE(t.total_rgs, 0) AS total_rgs,
+        pq.visited,
+        pq.pruned_zonemap,
+        pq.zero_emit,
+        pq.zero_emit_pct
+    FROM read_csv('$PER_QUERY_CSV', header=true, delim=',', auto_detect=false,
+                  columns={'query':'VARCHAR','config':'VARCHAR','table':'VARCHAR',
+                           'visited':'BIGINT','pruned_zonemap':'BIGINT',
+                           'zero_emit':'BIGINT','zero_emit_pct':'DOUBLE'}) pq
+    LEFT JOIN read_csv('$TOTAL_CSV', header=true, delim=',', auto_detect=false,
+                       columns={'table':'VARCHAR','total_rgs':'BIGINT'}) t
+      USING (\"table\")
+    ORDER BY pq.query, pq.config, pq.\"table\"
+) TO '$RESULT_CSV' (HEADER, DELIMITER ',');
+" > /dev/null
+
+echo ""
+echo "Done."
+echo "  per-query CSV:  $PER_QUERY_CSV"
+echo "  total_rgs CSV:  $TOTAL_CSV"
+echo "  joined CSV:     $RESULT_CSV"
+echo "  raw stderr:     $RAW_DIR/"
+echo ""
+echo "Quick view:"
+head -5 "$RESULT_CSV" | column -t -s,
diff --git a/scripts/bench_tpch.sh b/scripts/bench_tpch.sh
new file mode 100755
index 0000000..c606b6d
--- /dev/null
+++ b/scripts/bench_tpch.sh
@@ -0,0 +1,209 @@
+#!/bin/bash
+# bench_tpch.sh - Run baseline and Robust TPCH benchmarks via DuckDB's benchmark_runner, report side-by-side comparison.
+#
+# Suites used:
+#   benchmark/tpch_baseline/  (baseline, no extension)
+#   benchmark/tpch_robust/       (LOAD robust + jfp disabled)
+#
+# Only the 9 queries where Robust inserts bloom filters and runs correctly are wired up:
+#   Q02, Q03, Q07, Q08, Q10, Q11, Q17, Q18, Q21
+# The other queries are deliberately omitted: Q01/Q06 are single-table, Q12/Q13/Q14/Q19
+# have only one join (Robust's edges<=1 early-exit), Q04/Q15/Q16/Q22 currently insert no BFs,
+# and Q05/Q09/Q20 have cyclic join graphs that crash or miscompare under Robust.
+#
+# Usage: ./scripts/bench_tpch.sh [options]
+#   --pattern <pat>    Query name pattern, e.g. "q03" (default: all wired queries)
+#   --baseline-only    Run only baseline benchmarks
+#   --robust-only         Run only Robust benchmarks
+#   --no-run           Skip running, just compare existing results
+#   --out <dir>        Output directory (default: benchmark_results/tpch)
+#   --metric <m>       Aggregation metric across runs: min (default), geomean
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+cd "$PROJECT_ROOT"
+RUNNER="$PROJECT_ROOT/build/release/benchmark/benchmark_runner"
+PATTERN="q(02|03|07|08|10|11|17|18|21)\.benchmark"
+RUN_BASELINE=true
+RUN_ROBUST=true
+OUT_DIR="$PROJECT_ROOT/benchmark_results/tpch"
+METRIC="min"
+DISABLE_TIMEOUT=false
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --pattern) PATTERN="$2"; shift 2 ;;
+        --baseline-only) RUN_ROBUST=false; shift ;;
+        --robust-only) RUN_BASELINE=false; shift ;;
+        --no-run) RUN_BASELINE=false; RUN_ROBUST=false; shift ;;
+        --out) OUT_DIR="$2"; shift 2 ;;
+        --metric) METRIC="$2"; shift 2 ;;
+        --disable-timeout) DISABLE_TIMEOUT=true; shift ;;
+        *) echo "Unknown option: $1"; exit 1 ;;
+    esac
+done
+
+mkdir -p "$OUT_DIR"
+
+if [ ! -f "$RUNNER" ]; then
+    echo "Error: benchmark_runner not found at $RUNNER"
+    echo "Build with: BUILD_BENCHMARK=1 GEN=ninja make release"
+    exit 1
+fi
+
+BASELINE_RAW="$OUT_DIR/baseline_raw.tsv"
+ROBUST_RAW="$OUT_DIR/robust_raw.tsv"
+
+RUNNER_FLAGS=()
+if [ "$DISABLE_TIMEOUT" = true ]; then
+    RUNNER_FLAGS+=(--disable-timeout)
+fi
+
+if [ "$RUN_BASELINE" = true ]; then
+    echo "Running TPCH baseline benchmarks (pattern: $PATTERN)..."
+    "$RUNNER" "${RUNNER_FLAGS[@]}" "benchmark/tpch_baseline/$PATTERN" 2>&1 | tee "$BASELINE_RAW"
+    echo "Baseline done."
+fi
+
+if [ "$RUN_ROBUST" = true ]; then
+    echo "Running TPCH Robust benchmarks (pattern: $PATTERN)..."
+    "$RUNNER" "${RUNNER_FLAGS[@]}" "benchmark/tpch_robust/$PATTERN" 2>&1 | tee "$ROBUST_RAW"
+    echo "Robust done."
+fi
+
+if [ ! -f "$BASELINE_RAW" ] || [ ! -f "$ROBUST_RAW" ]; then
+    echo "Error: need both baseline and Robust results to compare"
+    exit 1
+fi
+
+python3 - "$BASELINE_RAW" "$ROBUST_RAW" "$OUT_DIR/comparison.tsv" "$METRIC" <<'PYEOF'
+import sys
+from collections import defaultdict
+import math
+
+def aggregate(ts, metric):
+    """Aggregate warm runs (skip first) using the given metric."""
+    warm = ts[1:] if len(ts) > 1 else ts
+    if metric == "geomean":
+        return math.exp(sum(math.log(t) for t in warm) / len(warm))
+    return min(warm)
+
+def parse_results(path, metric):
+    """Parse benchmark runner TSV, return {query_name: aggregated_time or None for timeouts}."""
+    times = defaultdict(list)
+    timed_out = set()
+    with open(path) as f:
+        for line in f:
+            line = line.strip()
+            if not line or line.startswith("name"):
+                continue
+            parts = line.split("\t")
+            if len(parts) != 3:
+                continue
+            name, run, t = parts
+            qname = name.split("/")[-1].replace(".benchmark", "")
+            try:
+                times[qname].append(float(t))
+            except ValueError:
+                # benchmark_runner emits a free-form error string when a run hits
+                # the timeout or otherwise fails. mark the query as timed-out.
+                timed_out.add(qname)
+    out = {}
+    for qname, ts in times.items():
+        out[qname] = aggregate(ts, metric) if ts else None
+    for qname in timed_out:
+        if qname not in out or out[qname] is None:
+            out[qname] = None
+    return out
+
+metric = sys.argv[4] if len(sys.argv) > 4 else "min"
+baseline = parse_results(sys.argv[1], metric)
+robust = parse_results(sys.argv[2], metric)
+out_path = sys.argv[3]
+
+queries = sorted(set(baseline.keys()) | set(robust.keys()),
+                 key=lambda q: int(''.join(c for c in q if c.isdigit()) or '0'))
+
+if not queries:
+    print("No queries found in either baseline or Robust results.")
+    sys.exit(1)
+
+faster = []
+slower = []
+timeouts = []
+log_speedups = []
+
+metric_label = "geomean" if metric == "geomean" else "min"
+print(f"Metric: {metric_label} of warm runs (excluding first/cold run)\n")
+header = f"{'Query':<10} {'Baseline(s)':>12} {'Robust(s)':>12} {'Speedup':>10} {'Status':>8}"
+sep = "-" * len(header)
+
+lines = [header, sep]
+
+for q in queries:
+    b = baseline.get(q)
+    r = robust.get(q)
+    if b is None or r is None:
+        b_str = f"{b:.6f}" if b is not None else "TIMEOUT"
+        r_str = f"{r:.6f}" if r is not None else "TIMEOUT"
+        lines.append(f"{q:<10} {b_str:>12} {r_str:>12} {'-':>10} {'TIMEOUT':>8}")
+        timeouts.append(q)
+        continue
+    speedup = b / r if r > 0 else float('inf')
+    log_speedups.append(math.log(speedup))
+
+    if speedup > 1.05:
+        status = "FASTER"
+        faster.append((q, speedup))
+    elif speedup < 0.95:
+        status = "SLOWER"
+        slower.append((q, speedup))
+    else:
+        status = "~same"
+
+    lines.append(f"{q:<10} {b:>12.6f} {r:>12.6f} {speedup:>9.3f}x {status:>8}")
+
+geo_mean = math.exp(sum(log_speedups) / len(log_speedups)) if log_speedups else float('nan')
+finished = [q for q in queries if baseline.get(q) is not None and robust.get(q) is not None]
+total_b = sum(baseline[q] for q in finished)
+total_r = sum(robust[q] for q in finished)
+
+lines.append(sep)
+if total_r > 0:
+    lines.append(f"{'TOTAL':<10} {total_b:>12.6f} {total_r:>12.6f} {total_b/total_r:>9.3f}x   (excl. timeouts)")
+lines.append("")
+same = len(finished) - len(faster) - len(slower)
+lines.append(f"Queries: {len(queries)}  |  Faster: {len(faster)}  |  Slower: {len(slower)}  |  Same: {same}  |  Timeouts: {len(timeouts)}")
+lines.append(f"Geometric mean speedup (over {len(finished)} finished queries): {geo_mean:.3f}x")
+if timeouts:
+    lines.append(f"Timed-out queries: {', '.join(timeouts)}")
+
+if faster:
+    lines.append("")
+    lines.append("Top Robust wins:")
+    for q, s in sorted(faster, key=lambda x: -x[1])[:10]:
+        lines.append(f"  {q}: {s:.3f}x")
+
+if slower:
+    lines.append("")
+    lines.append("Top Robust regressions:")
+    for q, s in sorted(slower, key=lambda x: x[1])[:10]:
+        lines.append(f"  {q}: {s:.3f}x")
+
+output = "\n".join(lines)
+print(output)
+
+with open(out_path, "w") as f:
+    f.write("query\tbaseline\trobust\tspeedup\n")
+    for q in queries:
+        b = baseline.get(q)
+        r = robust.get(q)
+        b_str = f"{b:.6f}" if b is not None else "TIMEOUT"
+        r_str = f"{r:.6f}" if r is not None else "TIMEOUT"
+        sp = f"{b/r:.3f}" if (b is not None and r is not None and r > 0) else "-"
+        f.write(f"{q}\t{b_str}\t{r_str}\t{sp}\n")
+
+print(f"\nTSV saved to {out_path}")
+PYEOF
diff --git a/scripts/plot_results.py b/scripts/plot_results.py
new file mode 100755
index 0000000..9205fcb
--- /dev/null
+++ b/scripts/plot_results.py
@@ -0,0 +1,472 @@
+#!/usr/bin/env python3
+"""Plot benchmark results in publication-ready style.
+
+Usage:
+    plot_results.py speedup <comparison.tsv> --out <file.pdf|png>
+    plot_results.py metric  <memory|rows_scanned|cardinality> <metrics.csv>
+                    --out-ratio <file> --out-totals <file>
+"""
+import argparse
+import csv
+import math
+import sys
+
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+from matplotlib.ticker import FuncFormatter
+import numpy as np
+
+GREEN = "#558b2f"
+GREEN_FILL = "#7cb342"
+RED = "#b71c1c"
+RED_FILL = "#c62828"
+INK = "#1a1a1a"
+GRID = "#bdbdbd"
+
+
+def style():
+    plt.rcParams.update({
+        "font.family": "serif",
+        "font.serif": ["Times New Roman", "DejaVu Serif"],
+        "font.size": 11,
+        "axes.linewidth": 0.8,
+        "axes.edgecolor": INK,
+        "axes.labelcolor": INK,
+        "xtick.color": INK,
+        "ytick.color": INK,
+        "savefig.bbox": "tight",
+    })
+
+
+def parse_comparison(path):
+    rows = []
+    with open(path) as f:
+        for r in csv.DictReader(f, delimiter="\t"):
+            rows.append({
+                "query": r["query"],
+                "baseline": float(r["baseline"]),
+                "robust": float(r["robust"]),
+                "ratio": float(r["speedup"]),
+            })
+    return rows
+
+
+METRIC_CONFIG = {
+    "memory": {
+        "csv_baseline": "baseline_memory",
+        "csv_robust": "robust_memory",
+        "title": "Robust Memory — JOB Benchmark",
+        "ylabel": "Memory savings (Baseline / Robust)",
+        "totals_label": "Total Memory Allocated",
+        "totals_unit": "GB",
+        "totals_divisor": 1024 ** 3,
+    },
+    "rows_scanned": {
+        "csv_baseline": "baseline_rows_scanned",
+        "csv_robust": "robust_rows_scanned",
+        "title": "Robust Rows Scanned — JOB Benchmark",
+        "ylabel": "Scan reduction (Baseline / Robust)",
+        "totals_label": "Cumulative Rows Scanned",
+        "totals_unit": "M rows",
+        "totals_divisor": 1e6,
+    },
+    "cardinality": {
+        "csv_baseline": "baseline_cardinality",
+        "csv_robust": "robust_cardinality",
+        "title": "Robust Cardinality — JOB Benchmark",
+        "ylabel": "Cardinality reduction (Baseline / Robust)",
+        "totals_label": "Cumulative Cardinality",
+        "totals_unit": "M rows",
+        "totals_divisor": 1e6,
+    },
+    "peak_buffer": {
+        "csv_baseline": "baseline_peak_buffer",
+        "csv_robust": "robust_peak_buffer",
+        "title": "Robust Peak Buffer Memory — JOB Benchmark",
+        "ylabel": "Peak buffer ratio (Baseline / Robust)",
+        "totals_label": "Peak Buffer Memory",
+        "totals_unit": "GB",
+        "totals_divisor": 1024 ** 3,
+    },
+    "hj_card_sum": {
+        "csv_baseline": "baseline_hj_card_sum",
+        "csv_robust": "robust_hj_card_sum",
+        "title": "Robust HJ Output (sum) — JOB Benchmark",
+        "ylabel": "HJ output sum reduction (Baseline / Robust)",
+        "totals_label": "Sum of HASH_JOIN Output Cardinality",
+        "totals_unit": "M rows",
+        "totals_divisor": 1e6,
+    },
+    "hj_card_max": {
+        "csv_baseline": "baseline_hj_card_max",
+        "csv_robust": "robust_hj_card_max",
+        "title": "Robust HJ Output (peak) — JOB Benchmark",
+        "ylabel": "HJ peak reduction (Baseline / Robust)",
+        "totals_label": "Max HASH_JOIN Output Cardinality (sum across queries)",
+        "totals_unit": "M rows",
+        "totals_divisor": 1e6,
+    },
+}
+
+
+def parse_metric(path, baseline_col, robust_col):
+    rows = []
+    with open(path) as f:
+        for r in csv.DictReader(f):
+            b = float(r[baseline_col])
+            x = float(r[robust_col])
+            rows.append({
+                "query": r["query"],
+                "baseline": b,
+                "robust": x,
+                # ratio = baseline / robust → >1 means robust is leaner (matches speedup orientation)
+                "ratio": (b / x) if x > 0 else float("nan"),
+            })
+    return [r for r in rows if not math.isnan(r["ratio"])]
+
+
+def sorted_curve_plot(rows, out_path, *,
+                      title, ylabel,
+                      good_when_above=True,
+                      annotate_top=3, annotate_bot=3,
+                      summary_word_good="faster", summary_word_bad="slower",
+                      summary_metric_label="Geomean",
+                      yscale="log"):
+    """Plot a sorted curve with red/green fills around y=1.
+
+    good_when_above=True  → values >1 are green (good), <1 are red.
+    good_when_above=False → values <1 are green (good), >1 are red.
+    """
+    rows_sorted = sorted(rows, key=lambda r: r["ratio"])
+    ratios = np.array([r["ratio"] for r in rows_sorted])
+    n = len(ratios)
+    x = np.arange(n)
+
+    geomean = math.exp(np.mean(np.log(ratios)))
+    n_good = sum(1 for v in ratios if (v > 1.0) == good_when_above)
+    n_bad = sum(1 for v in ratios if (v < 1.0) == good_when_above)
+
+    good_color = GREEN_FILL
+    bad_color = RED_FILL
+    above_color = good_color if good_when_above else bad_color
+    below_color = bad_color if good_when_above else good_color
+    above_label_color = GREEN if good_when_above else RED
+    below_label_color = RED if good_when_above else GREEN
+
+    fig, ax = plt.subplots(figsize=(7.5, 4.0))
+
+    ax.plot(x, ratios, color=INK, linewidth=1.1)
+    ax.fill_between(x, 1.0, ratios, where=(ratios >= 1.0),
+                    color=above_color, alpha=0.35, interpolate=True, linewidth=0)
+    ax.fill_between(x, 1.0, ratios, where=(ratios <= 1.0),
+                    color=below_color, alpha=0.35, interpolate=True, linewidth=0)
+
+    ax.axhline(1.0, color="#666666", linewidth=0.6, linestyle="--", alpha=0.7)
+    ax.axhline(geomean, color=GREEN if (geomean > 1) == good_when_above else RED,
+               linewidth=0.9, linestyle="--", alpha=0.85)
+
+    # only annotate actual outliers in the right direction;
+    # stagger y offsets so close-together points don't overlap.
+    top = [r for r in rows_sorted if r["ratio"] > 1.0][-annotate_top:][::-1]
+    bot = [r for r in rows_sorted if r["ratio"] < 1.0][:annotate_bot]
+    # diagonal stagger so labels of close-together points don't overlap
+    for k, r in enumerate(top):
+        i = rows_sorted.index(r)
+        ax.annotate(f"{r['query']} {r['ratio']:.2f}×",
+                    xy=(i, r["ratio"]),
+                    xytext=(-12 - k * 32, 4 + k * 22), textcoords="offset points",
+                    fontsize=8, ha="right", va="bottom",
+                    color=above_label_color,
+                    arrowprops=dict(arrowstyle="-", color=above_label_color,
+                                    lw=0.5, alpha=0.6))
+    for k, r in enumerate(bot):
+        i = rows_sorted.index(r)
+        ax.annotate(f"{r['query']} {r['ratio']:.2f}×",
+                    xy=(i, r["ratio"]),
+                    xytext=(10 + k * 28, -6 - k * 16), textcoords="offset points",
+                    fontsize=8, ha="left", va="top",
+                    color=below_label_color,
+                    arrowprops=dict(arrowstyle="-", color=below_label_color,
+                                    lw=0.5, alpha=0.6))
+
+    geomean_label_color = GREEN if (geomean > 1) == good_when_above else RED
+    ax.text(n * 0.55, geomean, f"{summary_metric_label}: {geomean:.3f}×",
+            fontsize=9, va="bottom", ha="center", color=geomean_label_color,
+            backgroundcolor="white")
+
+    ax.set_xlabel("Queries (sorted)", fontsize=10)
+    ax.set_ylabel(ylabel, fontsize=10)
+    ax.set_title(title, fontsize=12, pad=10, color=INK)
+    ax.spines["top"].set_visible(False)
+    ax.spines["right"].set_visible(False)
+    ax.set_xticks([])
+    ax.set_xlim(-1, n)
+    if yscale == "log":
+        ax.set_yscale("log")
+        ymin = max(ratios.min() * 0.85, 1e-3)
+        ymax = ratios.max() * 1.18
+        ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: f"{y:g}×"))
+    else:
+        ymin = max(0, ratios.min() * 0.85)
+        ymax = max(ratios.max(), 1.05) * 1.15
+    ax.set_ylim(ymin, ymax)
+    ax.grid(axis="y", alpha=0.3, linewidth=0.5, color=GRID, which="both")
+
+    fig.text(0.5, 0.01,
+             f"{n} queries  •  {n_good} {summary_word_good}  •  "
+             f"{n_bad} {summary_word_bad}  •  {summary_metric_label}: {geomean:.3f}×",
+             ha="center", fontsize=9, color=INK)
+
+    fig.tight_layout(rect=[0, 0.03, 1, 1])
+    fig.savefig(out_path, dpi=200)
+    plt.close(fig)
+    print(f"wrote {out_path}")
+
+
+def paired_bar_plot(rows, out_path, *, title, ylabel, unit, divisor,
+                    sort_by="baseline", split=1):
+    """Back-to-back bars: Robust above the center axis, Baseline below it (inverted).
+    Both halves use log y-scale; sort queries by baseline magnitude (default) or by
+    ratio. `split` > 1 → N stacked panels of contiguous query chunks."""
+    import matplotlib.gridspec as gridspec
+
+    if sort_by == "ratio":
+        rows_sorted = sorted(rows, key=lambda r: r["ratio"])
+    else:
+        rows_sorted = sorted(rows, key=lambda r: -r["baseline"])
+    n = len(rows_sorted)
+    chunk = (n + split - 1) // split
+
+    fig = plt.figure(figsize=(7.8, 3.2 * split))
+    outer = gridspec.GridSpec(split, 1, figure=fig, hspace=0.45)
+
+    # consistent y-limits across panels: pick global max/min so all panels share scale
+    bvals_all = np.array([r["baseline"] / divisor for r in rows_sorted])
+    rvals_all = np.array([r["robust"] / divisor for r in rows_sorted])
+    ymax = max(bvals_all.max(), rvals_all.max()) * 1.15
+    pos_min = min(bvals_all[bvals_all > 0].min() if (bvals_all > 0).any() else 1,
+                  rvals_all[rvals_all > 0].min() if (rvals_all > 0).any() else 1)
+    ymin = pos_min * 0.7
+
+    for k in range(split):
+        a, b = k * chunk, min((k + 1) * chunk, n)
+        sub = rows_sorted[a:b]
+        nn = len(sub)
+        x = np.arange(nn)
+        bvals = np.array([r["baseline"] / divisor for r in sub])
+        rvals = np.array([r["robust"] / divisor for r in sub])
+
+        inner = gridspec.GridSpecFromSubplotSpec(2, 1, subplot_spec=outer[k], hspace=0)
+        ax_top = fig.add_subplot(inner[0])
+        ax_bot = fig.add_subplot(inner[1], sharex=ax_top)
+
+        ax_top.bar(x, rvals, color=GREEN, edgecolor="none", width=0.85, zorder=3)
+        ax_top.set_yscale("log")
+        ax_top.set_ylabel("Robust", fontsize=9, color=GREEN)
+        ax_top.tick_params(labelbottom=False, length=0)
+        ax_top.spines["top"].set_visible(False)
+        ax_top.spines["right"].set_visible(False)
+        ax_top.spines["bottom"].set_color(INK)
+        ax_top.spines["bottom"].set_linewidth(0.9)
+        ax_top.grid(axis="y", alpha=0.30, color=GRID, which="both", linewidth=0.4)
+        ax_top.yaxis.set_major_formatter(FuncFormatter(lambda y, _: f"{y:g}"))
+
+        ax_bot.bar(x, bvals, color="#7a7a7a", edgecolor="none", width=0.85, zorder=3)
+        ax_bot.set_yscale("log")
+        ax_bot.invert_yaxis()
+        ax_bot.set_ylabel("Baseline", fontsize=9, color="#444444")
+        ax_bot.tick_params(length=0)
+        ax_bot.set_xticks([])
+        ax_bot.spines["top"].set_visible(False)
+        ax_bot.spines["bottom"].set_visible(False)
+        ax_bot.spines["right"].set_visible(False)
+        ax_bot.grid(axis="y", alpha=0.30, color=GRID, which="both", linewidth=0.4)
+        ax_bot.yaxis.set_major_formatter(FuncFormatter(lambda y, _: f"{y:g}"))
+
+        ax_top.set_ylim(ymin, ymax)
+        ax_bot.set_ylim(ymax, ymin)
+        ax_top.set_xlim(-0.5, max(nn - 0.5, 0.5))
+
+        if split > 1:
+            ax_top.text(0.005, 0.92, f"queries {a+1}–{b} of {n}",
+                        transform=ax_top.transAxes, fontsize=8, color="#555555",
+                        va="top", ha="left")
+
+    fig.suptitle(title, fontsize=12, y=0.995)
+    sort_caption = ("sorted by Robust/Baseline ratio (regressions left)"
+                    if sort_by == "ratio"
+                    else "sorted by baseline magnitude (largest left)")
+    fig.text(0.5, 0.01, f"Queries — {sort_caption}   •   y-axis: {ylabel} ({unit})",
+             ha="center", fontsize=9, color=INK)
+    fig.tight_layout(rect=[0, 0.025, 1, 0.97])
+    fig.savefig(out_path, dpi=200)
+    plt.close(fig)
+    print(f"wrote {out_path}")
+
+
+def paired_curve_plot(rows, out_path, *, title, ylabel, unit, divisor,
+                      sort_by="baseline", split=1):
+    """Plot baseline and robust as two curves on shared axes, sorted by `sort_by`
+    descending. Fills between to show per-query gap. `split` > 1 → N stacked subplots
+    each covering a contiguous chunk of queries."""
+    if sort_by == "ratio":
+        rows_sorted = sorted(rows, key=lambda r: r["ratio"])  # asc: regressions left
+    else:
+        rows_sorted = sorted(rows, key=lambda r: -r["baseline"])  # desc: largest left
+    n = len(rows_sorted)
+    fig, axes = plt.subplots(split, 1, figsize=(7.5, 3.0 * split), squeeze=False)
+    axes = axes.flatten()
+    chunk = (n + split - 1) // split
+
+    for k, ax in enumerate(axes):
+        a, b = k * chunk, min((k + 1) * chunk, n)
+        sub = rows_sorted[a:b]
+        x = np.arange(b - a)
+        bvals = np.array([r["baseline"] / divisor for r in sub])
+        rvals = np.array([r["robust"] / divisor for r in sub])
+        bvals = np.where(bvals <= 0, 1e-12, bvals)
+        rvals = np.where(rvals <= 0, 1e-12, rvals)
+
+        ax.plot(x, bvals, color="#777777", linewidth=1.0, label="Baseline", zorder=3)
+        ax.plot(x, rvals, color=GREEN, linewidth=1.0, label="Robust", zorder=4)
+        ax.fill_between(x, bvals, rvals, where=(rvals < bvals),
+                        color=GREEN_FILL, alpha=0.30, interpolate=True, linewidth=0)
+        ax.fill_between(x, bvals, rvals, where=(rvals > bvals),
+                        color=RED_FILL, alpha=0.30, interpolate=True, linewidth=0)
+
+        ax.set_yscale("log")
+        ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: f"{y:g}"))
+        ax.set_ylabel(f"{ylabel}\n({unit})", fontsize=9)
+        ax.set_xticks([])
+        ax.set_xlim(-1, max(b - a, 1))
+        ax.spines["top"].set_visible(False)
+        ax.spines["right"].set_visible(False)
+        ax.grid(axis="y", alpha=0.3, linewidth=0.5, color=GRID, which="both")
+        if k == 0:
+            ax.legend(loc="upper right", frameon=False, fontsize=9)
+        if split > 1:
+            ax.text(0.01, 0.95, f"queries {a+1}–{b} of {n}",
+                    transform=ax.transAxes, fontsize=8, color="#555555",
+                    va="top", ha="left")
+
+    fig.suptitle(title, fontsize=12)
+    sort_caption = ("sorted by Robust/Baseline ratio" if sort_by == "ratio"
+                    else "sorted by baseline magnitude (largest left)")
+    fig.text(0.5, 0.01, f"Queries — {sort_caption}",
+             ha="center", fontsize=9, color=INK)
+    fig.tight_layout(rect=[0, 0.03, 1, 0.95])
+    fig.savefig(out_path, dpi=200)
+    plt.close(fig)
+    print(f"wrote {out_path}")
+
+
+def metric_totals_plot(rows, out_path, *, label, unit, divisor):
+    """Bar chart of workload totals: baseline vs robust for any metric."""
+    base_total = sum(r["baseline"] for r in rows) / divisor
+    rob_total = sum(r["robust"] for r in rows) / divisor
+    delta_pct = (rob_total - base_total) / base_total * 100 if base_total else 0
+
+    fig, ax = plt.subplots(figsize=(4.0, 3.2))
+    bars = ax.bar(["Baseline", "Robust"],
+                  [base_total, rob_total],
+                  color=["#888888", GREEN if rob_total < base_total else RED],
+                  width=0.55, edgecolor=INK, linewidth=0.5)
+
+    for b, v in zip(bars, [base_total, rob_total]):
+        ax.text(b.get_x() + b.get_width() / 2, v,
+                f"{v:.2f} {unit}", ha="center", va="bottom", fontsize=10)
+
+    sign = "+" if delta_pct >= 0 else ""
+    ax.set_title(f"{label}  ({sign}{delta_pct:.1f}%)",
+                 fontsize=11, pad=10, color=INK)
+    ax.set_ylabel(f"{unit} across workload", fontsize=10)
+    ax.spines["top"].set_visible(False)
+    ax.spines["right"].set_visible(False)
+    ax.grid(axis="y", alpha=0.3, linewidth=0.5, color=GRID)
+    ax.set_ylim(0, max(base_total, rob_total) * 1.18)
+
+    fig.tight_layout()
+    fig.savefig(out_path, dpi=200)
+    plt.close(fig)
+    print(f"wrote {out_path}")
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    sub = ap.add_subparsers(dest="cmd", required=True)
+
+    sp = sub.add_parser("speedup")
+    sp.add_argument("input")
+    sp.add_argument("--out", required=True)
+    sp.add_argument("--title", default="Robust Speedup — JOB Benchmark")
+    sp.add_argument("--yscale", choices=["log", "linear"], default="log")
+
+    mp = sub.add_parser("metric")
+    mp.add_argument("metric", choices=list(METRIC_CONFIG.keys()))
+    mp.add_argument("input")
+    mp.add_argument("--out-ratio", required=True)
+    mp.add_argument("--out-totals", required=True)
+    mp.add_argument("--title", default=None,
+                    help="override the default title for this metric")
+    mp.add_argument("--yscale", choices=["log", "linear"], default="log")
+
+    pp = sub.add_parser("pairs")
+    pp.add_argument("metric", choices=list(METRIC_CONFIG.keys()))
+    pp.add_argument("input")
+    pp.add_argument("--out", required=True)
+    pp.add_argument("--split", type=int, default=1,
+                    help="split queries into N stacked sub-panels (default 1)")
+    pp.add_argument("--sort-by", choices=["baseline", "ratio"], default="baseline",
+                    help="sort queries by baseline magnitude (default) or by ratio")
+    pp.add_argument("--style", choices=["bars", "line"], default="bars",
+                    help="back-to-back bars (default) or sorted dual lines")
+    pp.add_argument("--title", default=None)
+
+    args = ap.parse_args()
+    style()
+
+    if args.cmd == "speedup":
+        rows = parse_comparison(args.input)
+        sorted_curve_plot(
+            rows, args.out,
+            title=args.title,
+            ylabel="Speedup (Robust / Baseline)",
+            good_when_above=True,
+            summary_word_good="faster", summary_word_bad="slower",
+            summary_metric_label="Geomean",
+            yscale=args.yscale,
+        )
+    elif args.cmd == "metric":
+        cfg = METRIC_CONFIG[args.metric]
+        rows = parse_metric(args.input, cfg["csv_baseline"], cfg["csv_robust"])
+        sorted_curve_plot(
+            rows, args.out_ratio,
+            title=args.title or cfg["title"],
+            ylabel=cfg["ylabel"],
+            good_when_above=True,  # above 1 = robust is leaner = good
+            summary_word_good="leaner", summary_word_bad="heavier",
+            summary_metric_label="Geomean",
+            yscale=args.yscale,
+        )
+        metric_totals_plot(rows, args.out_totals,
+                           label=cfg["totals_label"],
+                           unit=cfg["totals_unit"],
+                           divisor=cfg["totals_divisor"])
+    elif args.cmd == "pairs":
+        cfg = METRIC_CONFIG[args.metric]
+        rows = parse_metric(args.input, cfg["csv_baseline"], cfg["csv_robust"])
+        plot_fn = paired_bar_plot if args.style == "bars" else paired_curve_plot
+        plot_fn(
+            rows, args.out,
+            title=args.title or cfg["title"],
+            ylabel=cfg["totals_label"],
+            unit=cfg["totals_unit"],
+            divisor=cfg["totals_divisor"],
+            sort_by=args.sort_by,
+            split=args.split,
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/profile_breakdown.py b/scripts/profile_breakdown.py
new file mode 100755
index 0000000..f15832c
--- /dev/null
+++ b/scripts/profile_breakdown.py
@@ -0,0 +1,255 @@
+#!/usr/bin/env python3
+"""Parse DuckDB JSON profiling output and print operator breakdown.
+
+Usage:
+    profile_breakdown.py <baseline.json> <robust.json>
+    profile_breakdown.py <baseline.json> <robust.json> <join_order.json>
+    profile_breakdown.py <single.json>
+"""
+import json
+import sys
+
+
+def flatten_operators(node, depth=0):
+    ops = []
+    name = node.get("operator_name", node.get("operator_type", "?"))
+    ops.append({
+        "depth": depth,
+        "name": name,
+        "timing_ms": node.get("operator_timing", 0) * 1000,
+        "cardinality": node.get("operator_cardinality", 0),
+        "rows_scanned": node.get("operator_rows_scanned", 0),
+        "extra": node.get("extra_info", {}),
+    })
+    for child in node.get("children", []):
+        ops.extend(flatten_operators(child, depth + 1))
+    return ops
+
+
+def scan_map(ops):
+    """Map scans to unique keys (table, ordinal) so duplicates in the same query
+    (e.g. self-joins) are preserved rather than overwritten."""
+    counts = {}
+    m = {}
+    for o in ops:
+        if "SCAN" in o["name"]:
+            table = o["extra"].get("Table", "?")
+            idx = counts.get(table, 0)
+            counts[table] = idx + 1
+            m[(table, idx)] = o
+    return m
+
+
+def scan_label(key, multi_tables=None):
+    table, idx = key
+    if multi_tables is not None and table not in multi_tables:
+        return table
+    return f"{table}#{idx+1}"
+
+
+def multi_scan_tables(*scan_maps):
+    """Return set of table names that appear more than once in any scan map."""
+    multi = set()
+    for m in scan_maps:
+        counts = {}
+        for (t, _) in m.keys():
+            counts[t] = counts.get(t, 0) + 1
+        for t, c in counts.items():
+            if c > 1:
+                multi.add(t)
+    return multi
+
+
+def print_profile(label, data):
+    latency = data.get("latency", 0) * 1000
+    cpu = data.get("cpu_time", 0) * 1000
+    blocked = data.get("blocked_thread_time", 0) * 1000
+
+    root = data.get("children", [data])[0] if "children" in data else data
+    ops = flatten_operators(root)
+    total_op = sum(o["timing_ms"] for o in ops)
+
+    print(f"\n{'='*70}")
+    print(f" {label}")
+    print(f"{'='*70}")
+    print(f"  Wall (latency):         {latency:8.2f} ms")
+    print(f"  CPU time (all threads): {cpu:8.2f} ms")
+    print(f"  Blocked thread time:    {blocked:8.2f} ms")
+    print(f"  CPU / Wall ratio:       {cpu/latency:8.2f}x" if latency > 0 else "")
+    print()
+
+    # operator tree
+    print(f"  {'Operator':<35s} {'CPU(ms)':>10s} {'Rows':>12s} {'Scanned':>12s}")
+    print(f"  {'-'*35} {'-'*10} {'-'*12} {'-'*12}")
+    for o in ops:
+        indent = "  " * o["depth"]
+        name = indent + o["name"]
+        print(f"  {name:<35s} {o['timing_ms']:10.2f} {o['cardinality']:12,} {o['rows_scanned']:12,}")
+    print()
+
+    # scan detail
+    print(f"  --- SEQ_SCAN detail ---")
+    for o in ops:
+        if "SCAN" not in o["name"]:
+            continue
+        table = o["extra"].get("Table", "?")
+        projs = o["extra"].get("Projections", "?")
+        filters = o["extra"].get("Filters", "")
+        print(f"  {table:20s}  cpu={o['timing_ms']:8.2f}ms  rows_out={o['cardinality']:>10,}  scanned={o['rows_scanned']:>12,}")
+        print(f"    projections: {projs}")
+        if filters:
+            print(f"    filters: {filters}")
+
+    return ops, latency, cpu
+
+
+def print_comparison(baseline_data, robust_data):
+    def categorize(ops):
+        cats = {"SEQ_SCAN": 0, "HASH_JOIN": 0, "FILTER": 0, "Other": 0}
+        for o in ops:
+            if "SCAN" in o["name"]:
+                cats["SEQ_SCAN"] += o["timing_ms"]
+            elif "HASH_JOIN" in o["name"]:
+                cats["HASH_JOIN"] += o["timing_ms"]
+            elif "CREATE_FILTER" in o["name"] or "PROBE_FILTER" in o["name"]:
+                cats["FILTER"] += o["timing_ms"]
+            else:
+                cats["Other"] += o["timing_ms"]
+        return cats
+
+    b_ops, b_wall, b_cpu = print_profile("BASELINE", baseline_data)
+    r_ops, r_wall, r_cpu = print_profile("Robust", robust_data)
+
+    b_cats = categorize(b_ops)
+    r_cats = categorize(r_ops)
+
+    print(f"\n{'='*70}")
+    print(f" COMPARISON")
+    print(f"{'='*70}")
+    print(f"  {'':20s} {'Baseline':>10s} {'Robust':>10s} {'Delta':>10s}")
+    print(f"  {'-'*20} {'-'*10} {'-'*10} {'-'*10}")
+    print(f"  {'Wall time (ms)':<20s} {b_wall:10.1f} {r_wall:10.1f} {r_wall-b_wall:+10.1f}")
+    print(f"  {'Total CPU (ms)':<20s} {b_cpu:10.1f} {r_cpu:10.1f} {r_cpu-b_cpu:+10.1f}")
+    for cat in ["SEQ_SCAN", "HASH_JOIN", "FILTER", "Other"]:
+        print(f"  {'  ' + cat:<20s} {b_cats[cat]:10.1f} {r_cats[cat]:10.1f} {r_cats[cat]-b_cats[cat]:+10.1f}")
+
+    # scan-level comparison (preserves duplicates: same table can be scanned multiple times)
+    b_scans = scan_map(b_ops)
+    r_scans = scan_map(r_ops)
+    all_keys = sorted(set(list(b_scans.keys()) + list(r_scans.keys())))
+    multi = multi_scan_tables(b_scans, r_scans)
+
+    # compute totals for rows emitted and rows scanned
+    b_rows_total = sum((o["cardinality"] for o in b_scans.values()), 0)
+    r_rows_total = sum((o["cardinality"] for o in r_scans.values()), 0)
+    b_scanned_total = sum((o["rows_scanned"] for o in b_scans.values()), 0)
+    r_scanned_total = sum((o["rows_scanned"] for o in r_scans.values()), 0)
+
+    print(f"  {'Rows emitted':<20s} {b_rows_total:10,} {r_rows_total:10,} {r_rows_total-b_rows_total:+10,}")
+    print(f"  {'Rows scanned':<20s} {b_scanned_total:10,} {r_scanned_total:10,} {r_scanned_total-b_scanned_total:+10,}")
+
+    print(f"\n  --- Per-table scan comparison ---")
+    print(f"  {'Table':<28s} {'B cpu(ms)':>10s} {'R cpu(ms)':>10s} {'B rows':>12s} {'R rows':>12s} {'B scanned':>12s} {'R scanned':>12s}")
+    print(f"  {'-'*28} {'-'*10} {'-'*10} {'-'*12} {'-'*12} {'-'*12} {'-'*12}")
+    for k in all_keys:
+        b = b_scans.get(k)
+        r = r_scans.get(k)
+        label = scan_label(k, multi)
+        b_t = b["timing_ms"] if b else 0
+        r_t = r["timing_ms"] if r else 0
+        b_r = b["cardinality"] if b else 0
+        r_r = r["cardinality"] if r else 0
+        b_s = b["rows_scanned"] if b else 0
+        r_s = r["rows_scanned"] if r else 0
+        print(f"  {label:<28s} {b_t:10.2f} {r_t:10.2f} {b_r:12,} {r_r:12,} {b_s:12,} {r_s:12,}")
+    print()
+
+
+def print_comparison_3way(baseline_data, lr_data, jo_data):
+    """3-way comparison: baseline vs largest_root vs join_order."""
+    def categorize(ops):
+        cats = {"SEQ_SCAN": 0, "HASH_JOIN": 0, "FILTER": 0, "Other": 0}
+        for o in ops:
+            if "SCAN" in o["name"]:
+                cats["SEQ_SCAN"] += o["timing_ms"]
+            elif "HASH_JOIN" in o["name"]:
+                cats["HASH_JOIN"] += o["timing_ms"]
+            elif "CREATE_FILTER" in o["name"] or "PROBE_FILTER" in o["name"]:
+                cats["FILTER"] += o["timing_ms"]
+            else:
+                cats["Other"] += o["timing_ms"]
+        return cats
+
+    b_ops, b_wall, b_cpu = print_profile("BASELINE", baseline_data)
+    l_ops, l_wall, l_cpu = print_profile("Robust (largest_root)", lr_data)
+    j_ops, j_wall, j_cpu = print_profile("Robust (join_order)", jo_data)
+
+    b_cats = categorize(b_ops)
+    l_cats = categorize(l_ops)
+    j_cats = categorize(j_ops)
+
+    print(f"\n{'='*80}")
+    print(f" 3-WAY COMPARISON")
+    print(f"{'='*80}")
+    print(f"  {'':20s} {'Baseline':>10s} {'LR':>10s} {'JO':>10s} {'LR delta':>10s} {'JO delta':>10s}")
+    print(f"  {'-'*20} {'-'*10} {'-'*10} {'-'*10} {'-'*10} {'-'*10}")
+    print(f"  {'Wall time (ms)':<20s} {b_wall:10.1f} {l_wall:10.1f} {j_wall:10.1f} {l_wall-b_wall:+10.1f} {j_wall-b_wall:+10.1f}")
+    print(f"  {'Total CPU (ms)':<20s} {b_cpu:10.1f} {l_cpu:10.1f} {j_cpu:10.1f} {l_cpu-b_cpu:+10.1f} {j_cpu-b_cpu:+10.1f}")
+    for cat in ["SEQ_SCAN", "HASH_JOIN", "FILTER", "Other"]:
+        print(f"  {'  ' + cat:<20s} {b_cats[cat]:10.1f} {l_cats[cat]:10.1f} {j_cats[cat]:10.1f} {l_cats[cat]-b_cats[cat]:+10.1f} {j_cats[cat]-b_cats[cat]:+10.1f}")
+
+    # scan-level comparison (preserves duplicates: same table can be scanned multiple times)
+    b_scans = scan_map(b_ops)
+    l_scans = scan_map(l_ops)
+    j_scans = scan_map(j_ops)
+    all_keys = sorted(set(list(b_scans.keys()) + list(l_scans.keys()) + list(j_scans.keys())))
+    multi = multi_scan_tables(b_scans, l_scans, j_scans)
+
+    b_rows = sum(o["cardinality"] for o in b_scans.values())
+    l_rows = sum(o["cardinality"] for o in l_scans.values())
+    j_rows = sum(o["cardinality"] for o in j_scans.values())
+    b_scnd = sum(o["rows_scanned"] for o in b_scans.values())
+    l_scnd = sum(o["rows_scanned"] for o in l_scans.values())
+    j_scnd = sum(o["rows_scanned"] for o in j_scans.values())
+
+    print(f"  {'Rows emitted':<20s} {b_rows:10,} {l_rows:10,} {j_rows:10,} {l_rows-b_rows:+10,} {j_rows-b_rows:+10,}")
+    print(f"  {'Rows scanned':<20s} {b_scnd:10,} {l_scnd:10,} {j_scnd:10,} {l_scnd-b_scnd:+10,} {j_scnd-b_scnd:+10,}")
+
+    print(f"\n  --- Per-table scan comparison ---")
+    print(f"  {'Table':<28s} {'B cpu':>8s} {'LR cpu':>8s} {'JO cpu':>8s} {'B rows':>10s} {'LR rows':>10s} {'JO rows':>10s} {'B scanned':>12s} {'LR scanned':>12s} {'JO scanned':>12s}")
+    print(f"  {'-'*28} {'-'*8} {'-'*8} {'-'*8} {'-'*10} {'-'*10} {'-'*10} {'-'*12} {'-'*12} {'-'*12}")
+    for k in all_keys:
+        b = b_scans.get(k)
+        l = l_scans.get(k)
+        j = j_scans.get(k)
+        label = scan_label(k, multi)
+        print(f"  {label:<28s} {(b['timing_ms'] if b else 0):8.2f} {(l['timing_ms'] if l else 0):8.2f} {(j['timing_ms'] if j else 0):8.2f} {(b['cardinality'] if b else 0):10,} {(l['cardinality'] if l else 0):10,} {(j['cardinality'] if j else 0):10,} {(b['rows_scanned'] if b else 0):12,} {(l['rows_scanned'] if l else 0):12,} {(j['rows_scanned'] if j else 0):12,}")
+    print()
+
+
+def main():
+    if len(sys.argv) == 4:
+        with open(sys.argv[1]) as f:
+            baseline = json.load(f)
+        with open(sys.argv[2]) as f:
+            lr = json.load(f)
+        with open(sys.argv[3]) as f:
+            jo = json.load(f)
+        print_comparison_3way(baseline, lr, jo)
+    elif len(sys.argv) == 3:
+        with open(sys.argv[1]) as f:
+            baseline = json.load(f)
+        with open(sys.argv[2]) as f:
+            robust = json.load(f)
+        print_comparison(baseline, robust)
+    elif len(sys.argv) == 2:
+        with open(sys.argv[1]) as f:
+            data = json.load(f)
+        print_profile(sys.argv[1], data)
+    else:
+        print(__doc__.strip())
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/profile_query.sh b/scripts/profile_query.sh
new file mode 100755
index 0000000..6caa55f
--- /dev/null
+++ b/scripts/profile_query.sh
@@ -0,0 +1,180 @@
+#!/bin/bash
+# profile_query.sh - Run a JOB or TPCH query with and without Robust, show operator breakdown.
+#
+# Usage:
+#   ./profile_query.sh <query_name>                      # JOB (default), e.g. 1a
+#   ./profile_query.sh --workload tpch <query_name>      # TPCH, e.g. 03 or q03
+#   ./profile_query.sh --sql "SELECT ..."                # inline SQL
+#   ./profile_query.sh --robust-only 1a                     # skip baseline
+#   ./profile_query.sh --no-jfp robust 1a                   # disable join_filter_pushdown
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+cd "$PROJECT_ROOT"
+DUCKDB="$PROJECT_ROOT/build/release/duckdb"
+EXT="$PROJECT_ROOT/build/release/extension/robust/robust.duckdb_extension"
+PROFILE_PY="$SCRIPT_DIR/profile_breakdown.py"
+
+WORKLOAD="job"
+ROBUST_ONLY=false
+FORWARD_ONLY=false
+QUERY_SQL=""
+QUERY_NAME=""
+NO_JFP=""
+HEURISTIC=""
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --workload)
+            WORKLOAD="$2"
+            if [[ "$WORKLOAD" != "job" && "$WORKLOAD" != "tpch" ]]; then
+                echo "Error: --workload must be 'job' or 'tpch'"; exit 1
+            fi
+            shift 2 ;;
+        --sql)       QUERY_SQL="$2"; QUERY_NAME="inline"; shift 2 ;;
+        --robust-only)  ROBUST_ONLY=true; shift ;;
+        --forward-only) FORWARD_ONLY=true; shift ;;
+        --no-jfp)
+            NO_JFP="$2"
+            if [[ "$NO_JFP" != "robust" && "$NO_JFP" != "baseline" && "$NO_JFP" != "both" ]]; then
+                echo "Error: --no-jfp must be 'robust', 'baseline', or 'both'"; exit 1
+            fi
+            shift 2 ;;
+        --heuristic)
+            HEURISTIC="$2"
+            if [[ "$HEURISTIC" != "largest_root" && "$HEURISTIC" != "join_order" && "$HEURISTIC" != "all" ]]; then
+                echo "Error: --heuristic must be 'largest_root', 'join_order', or 'all'"; exit 1
+            fi
+            shift 2 ;;
+        -h|--help)
+            echo "Usage: $0 [options] <query_name>"
+            echo "  <query_name>       JOB (e.g. 1a, 2b) or TPCH (e.g. 3, q03) query name"
+            echo "  --workload <w>     'job' (default) or 'tpch'"
+            echo "  --sql \"<SQL>\"      run inline SQL instead of a stored query"
+            echo "  --robust-only         only profile with Robust (skip baseline)"
+            echo "  --forward-only     use forward-only pass mode for Robust"
+            echo "  --heuristic <h>    Robust heuristic: largest_root, join_order, or all (compare both)"
+            echo "  --no-jfp <target>  disable join_filter_pushdown (robust, baseline, or both)"
+            exit 0 ;;
+        *)
+            if [ -z "$QUERY_NAME" ]; then
+                QUERY_NAME="$1"
+            else
+                echo "Unknown option: $1"; exit 1
+            fi
+            shift ;;
+    esac
+done
+
+if [ -z "$QUERY_NAME" ]; then
+    echo "Error: provide a query name or --sql. Use -h for help."
+    exit 1
+fi
+
+# resolve workload-specific paths
+if [ "$WORKLOAD" = "tpch" ]; then
+    DB="$PROJECT_ROOT/tpchdata/tpch_sf1.duckdb"
+    QUERIES_DIR="$PROJECT_ROOT/tpchdata/queries"
+    # normalize "3" / "q3" / "q03" -> "q03"
+    if [ "$QUERY_NAME" != "inline" ]; then
+        raw="${QUERY_NAME#q}"; raw="${raw#0}"
+        if [[ "$raw" =~ ^[0-9]+$ ]]; then
+            QUERY_NAME=$(printf "q%02d" "$raw")
+        fi
+    fi
+else
+    DB="$PROJECT_ROOT/jobdata/imdb.duckdb"
+    QUERIES_DIR="$PROJECT_ROOT/jobdata/queries"
+fi
+
+# resolve SQL
+if [ -z "$QUERY_SQL" ]; then
+    QUERY_FILE="$QUERIES_DIR/${QUERY_NAME}.sql"
+    [ -f "$QUERY_FILE" ] || { echo "Query file not found: $QUERY_FILE"; exit 1; }
+    QUERY_SQL=$(cat "$QUERY_FILE")
+fi
+
+for f in "$DUCKDB" "$DB" "$EXT" "$PROFILE_PY"; do
+    [ -f "$f" ] || { echo "Missing: $f"; exit 1; }
+done
+
+TMP_BASE=$(mktemp /tmp/profile_base_XXXXXX.json)
+TMP_ROBUST=$(mktemp /tmp/profile_robust_XXXXXX.json)
+TMP_JO=$(mktemp /tmp/profile_jo_XXXXXX.json)
+TMP_DB=""
+trap 'rm -f $TMP_BASE $TMP_ROBUST $TMP_JO; [ -n "$TMP_DB" ] && rm -f "$TMP_DB"' EXIT
+
+# if the db is locked, copy it to a temp file
+ACTUAL_DB="$DB"
+if ! "$DUCKDB" "$DB" -unsigned -c "SELECT 1;" > /dev/null 2>&1; then
+    TMP_DB=$(mktemp /tmp/job_profile_XXXXXX.duckdb)
+    cp "$DB" "$TMP_DB"
+    ACTUAL_DB="$TMP_DB"
+    echo "(database locked, using temp copy)"
+fi
+
+JFP_DISABLE="SET disabled_optimizers = 'join_filter_pushdown';"
+JFP_ROBUST=""
+JFP_BASE=""
+if [[ "$NO_JFP" = "robust" || "$NO_JFP" = "both" ]]; then JFP_ROBUST="$JFP_DISABLE"; fi
+if [[ "$NO_JFP" = "baseline" || "$NO_JFP" = "both" ]]; then JFP_BASE="$JFP_DISABLE"; fi
+
+ROBUST_PASS_MODE=""
+if [ "$FORWARD_ONLY" = "true" ]; then
+    ROBUST_PASS_MODE="SET robust_pass_mode = 'forward_only';"
+fi
+
+ROBUST_HEURISTIC=""
+if [[ -n "$HEURISTIC" && "$HEURISTIC" != "all" ]]; then
+    ROBUST_HEURISTIC="SET robust_heuristic = '$HEURISTIC';"
+fi
+
+echo "Profiling query: $QUERY_NAME"
+[ -n "$NO_JFP" ] && echo "(join_filter_pushdown disabled for: $NO_JFP)"
+[ "$FORWARD_ONLY" = "true" ] && echo "(forward-only pass mode)"
+[ -n "$HEURISTIC" ] && echo "(heuristic: $HEURISTIC)"
+echo ""
+
+run_profile() {
+    local label="$1"
+    local outfile="$2"
+    local extra_settings="$3"
+    local with_ext="$4"
+    echo -n "Running $label... "
+    if [ "$with_ext" = "true" ]; then
+        "$DUCKDB" "$ACTUAL_DB" -unsigned -c "
+$JFP_ROBUST
+PRAGMA enable_profiling='json';
+PRAGMA profiling_output='$outfile';
+LOAD '$EXT';
+$extra_settings
+$QUERY_SQL
+" > /dev/null 2>/dev/null
+    else
+        "$DUCKDB" "$ACTUAL_DB" -unsigned -c "
+$JFP_BASE
+PRAGMA enable_profiling='json';
+PRAGMA profiling_output='$outfile';
+$extra_settings
+$QUERY_SQL
+" > /dev/null 2>/dev/null
+    fi
+    echo "done"
+}
+
+if [ "$HEURISTIC" = "all" ]; then
+    # 3-way comparison: baseline vs largest_root vs join_order
+    run_profile "baseline" "$TMP_BASE" "" "false"
+    run_profile "Robust (largest_root)" "$TMP_ROBUST" "$ROBUST_PASS_MODE SET robust_heuristic = 'largest_root';" "true"
+    run_profile "Robust (join_order)" "$TMP_JO" "$ROBUST_PASS_MODE SET robust_heuristic = 'join_order';" "true"
+    python3 "$PROFILE_PY" "$TMP_BASE" "$TMP_ROBUST" "$TMP_JO"
+elif [ "$ROBUST_ONLY" = "true" ]; then
+    run_profile "Robust" "$TMP_ROBUST" "$ROBUST_PASS_MODE $ROBUST_HEURISTIC" "true"
+    python3 "$PROFILE_PY" "$TMP_ROBUST"
+else
+    run_profile "baseline" "$TMP_BASE" "" "false"
+    run_profile "Robust" "$TMP_ROBUST" "$ROBUST_PASS_MODE $ROBUST_HEURISTIC" "true"
+    python3 "$PROFILE_PY" "$TMP_BASE" "$TMP_ROBUST"
+fi
diff --git a/scripts/rg_aggregate.py b/scripts/rg_aggregate.py
new file mode 100755
index 0000000..4e9cf15
--- /dev/null
+++ b/scripts/rg_aggregate.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+"""rg_aggregate.py — aggregate `[rg_instr] ...` stderr lines per table.
+
+Phase-1 instrumentation in the duckdb submodule emits one line per scan
+thread on `~CollectionScanState()`:
+
+  [rg_instr] table=<name> visited=N pruned_zonemap=N zero_emit=N zero_emit_pct=X.XX
+
+This script reads stdin (or a file via --input), sums the counters per
+table, and writes a CSV. Optional --query and --config columns are
+prepended so the wrapper script can pipe rows straight into a single
+results CSV.
+
+usage:
+    rg_aggregate.py [--input FILE] [--query Q] [--config C]
+                    [--out CSV] [--header]
+"""
+import argparse
+import csv
+import re
+import sys
+from collections import defaultdict
+
+LINE_RE = re.compile(
+    r"\[rg_instr\]\s+"
+    r"table=(?P<table>\S+)\s+"
+    r"visited=(?P<visited>\d+)\s+"
+    r"pruned_zonemap=(?P<pruned>\d+)\s+"
+    r"zero_emit=(?P<zero>\d+)"
+)
+
+
+def aggregate(stream):
+    totals = defaultdict(lambda: [0, 0, 0])  # visited, pruned, zero_emit
+    for line in stream:
+        m = LINE_RE.search(line)
+        if not m:
+            continue
+        t = totals[m["table"]]
+        t[0] += int(m["visited"])
+        t[1] += int(m["pruned"])
+        t[2] += int(m["zero"])
+    return totals
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--input", help="read from this file instead of stdin")
+    ap.add_argument("--query", default="", help="query label to prepend")
+    ap.add_argument("--config", default="", help="config label to prepend")
+    ap.add_argument("--out", help="write CSV to this path (append if exists)")
+    ap.add_argument("--header", action="store_true",
+                    help="emit CSV header row before data")
+    args = ap.parse_args()
+
+    src = open(args.input) if args.input else sys.stdin
+    try:
+        totals = aggregate(src)
+    finally:
+        if args.input:
+            src.close()
+
+    fieldnames = ["query", "config", "table",
+                  "visited", "pruned_zonemap", "zero_emit", "zero_emit_pct"]
+
+    if args.out:
+        # append mode; header only if file is empty / forced
+        import os
+        write_header = args.header or not os.path.exists(args.out) \
+            or os.path.getsize(args.out) == 0
+        f = open(args.out, "a", newline="")
+    else:
+        write_header = args.header
+        f = sys.stdout
+
+    try:
+        w = csv.DictWriter(f, fieldnames=fieldnames, lineterminator="\n")
+        if write_header:
+            w.writeheader()
+        for table in sorted(totals):
+            visited, pruned, zero = totals[table]
+            pct = (100.0 * zero / visited) if visited else 0.0
+            w.writerow({
+                "query": args.query,
+                "config": args.config,
+                "table": table,
+                "visited": visited,
+                "pruned_zonemap": pruned,
+                "zero_emit": zero,
+                "zero_emit_pct": f"{pct:.2f}",
+            })
+    finally:
+        if args.out:
+            f.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/setup_imdb_data.sh b/scripts/setup_imdb_data.sh
new file mode 100755
index 0000000..016f59e
--- /dev/null
+++ b/scripts/setup_imdb_data.sh
@@ -0,0 +1,86 @@
+#!/usr/bin/env bash
+#
+# setup_imdb_data.sh — one-time setup for the IMDB / Join Order Benchmark data.
+#
+# Materializes jobdata/imdb.duckdb by streaming the 21 IMDB tables from DuckDB's
+# public release artifacts (github.com/duckdb/duckdb-data, ~2.6G download). The
+# SQL is the same load.sql DuckDB's own benchmark_runner uses, taken from the
+# duckdb submodule at duckdb/benchmark/imdb/init/load.sql.
+#
+# Usage:
+#   ./scripts/setup_imdb_data.sh           # skips if jobdata/imdb.duckdb exists
+#   ./scripts/setup_imdb_data.sh --force   # rebuild even if it exists
+#
+# Environment:
+#   DUCKDB    path to duckdb CLI (default: ./build/release/duckdb)
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+DB="$PROJECT_ROOT/jobdata/imdb.duckdb"
+LOAD_SQL="$PROJECT_ROOT/duckdb/benchmark/imdb/init/load.sql"
+DUCKDB="${DUCKDB:-$PROJECT_ROOT/build/release/duckdb}"
+
+FORCE=0
+for arg in "$@"; do
+    case "$arg" in
+        --force|-f) FORCE=1 ;;
+        --help|-h)
+            sed -n '3,15p' "$0" | sed 's/^# \{0,1\}//'
+            exit 0
+            ;;
+        *)
+            echo "unknown arg: $arg (use --help)" >&2
+            exit 2
+            ;;
+    esac
+done
+
+if [ ! -f "$DUCKDB" ]; then
+    echo "error: duckdb CLI not found at $DUCKDB" >&2
+    echo "  build it with 'GEN=ninja make release', or set DUCKDB=/path/to/duckdb" >&2
+    exit 1
+fi
+
+if [ ! -f "$LOAD_SQL" ]; then
+    echo "error: load.sql not found at $LOAD_SQL" >&2
+    echo "  did you 'git submodule update --init --recursive'?" >&2
+    exit 1
+fi
+
+if [ -f "$DB" ] && [ "$FORCE" -eq 0 ]; then
+    echo "$DB already exists; nothing to do (pass --force to rebuild)."
+    exit 0
+fi
+
+mkdir -p "$(dirname "$DB")"
+rm -f "$DB"
+
+echo "Building $DB"
+echo "  source : $LOAD_SQL"
+echo "  fetches: 21 parquet files from github.com/duckdb/duckdb-data (~2.6G)"
+echo
+
+{
+    echo ".bail on"
+    echo "INSTALL httpfs; LOAD httpfs;"
+    cat "$LOAD_SQL"
+} | "$DUCKDB" "$DB"
+
+title_rows="$("$DUCKDB" "$DB" -readonly -noheader -list -c "SELECT count(*) FROM title" 2>/dev/null | tr -dc '0-9' || true)"
+if [ -z "$title_rows" ] || [ "$title_rows" = "0" ]; then
+    echo >&2
+    echo "error: load failed — 'title' table is missing or empty in $DB" >&2
+    echo "  common cause: httpfs extension blocked / no network to github.com" >&2
+    echo "  re-run manually to see the underlying error:" >&2
+    echo "    $DUCKDB $DB" >&2
+    echo "    INSTALL httpfs; LOAD httpfs;" >&2
+    echo "    .read $LOAD_SQL" >&2
+    exit 1
+fi
+
+echo
+echo "Done. Verifying:"
+"$DUCKDB" "$DB" -readonly -c "SELECT 'title' AS table_name, count(*) AS rows FROM title UNION ALL SELECT 'cast_info', count(*) FROM cast_info ORDER BY table_name;"
diff --git a/scripts/test_job.sh b/scripts/test_job.sh
new file mode 100755
index 0000000..6115151
--- /dev/null
+++ b/scripts/test_job.sh
@@ -0,0 +1,461 @@
+#!/bin/bash
+# test_job.sh - Test all JOB queries with and without the Robust extension
+#
+# Usage: ./scripts/test_job.sh [options]
+#   --generate-baseline    Generate baseline results only (no comparison)
+#   --test-only           Run tests against existing baseline (skip baseline generation)
+#   --query <name>        Test a specific query (e.g., --query 1a)
+#   --verbose             Show detailed output for failures
+#   --timing              Show timing information
+#   --runs N              Run each query N times and take the minimum (default: 1)
+#   --limit N             Only run the first N queries (default: all)
+#   --no-jfp <target>     Disable DuckDB's join_filter_pushdown optimizer
+#                         target: robust, baseline, or both
+#   --heuristic <name>    Robust heuristic: join_order (default), largest_root
+
+set -e
+
+# Configuration
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+DUCKDB="$PROJECT_ROOT/build/release/duckdb"
+DB="$PROJECT_ROOT/jobdata/imdb.duckdb"
+EXT="$PROJECT_ROOT/build/release/extension/robust/robust.duckdb_extension"
+QUERIES_DIR="$PROJECT_ROOT/jobdata/queries"
+RESULTS_DIR="$PROJECT_ROOT/job_test_results"
+
+# Options
+GENERATE_BASELINE=false
+TEST_ONLY=false
+SPECIFIC_QUERY=""
+VERBOSE=false
+TIMING=false
+RUNS=1
+LIMIT=0
+NO_JFP=""  # "", "robust", "baseline", or "both"
+HEURISTIC=""  # "", "join_order", "largest_root"
+
+# Parse arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --generate-baseline)
+            GENERATE_BASELINE=true
+            shift
+            ;;
+        --test-only)
+            TEST_ONLY=true
+            shift
+            ;;
+        --query)
+            SPECIFIC_QUERY="$2"
+            shift 2
+            ;;
+        --verbose)
+            VERBOSE=true
+            shift
+            ;;
+        --timing)
+            TIMING=true
+            shift
+            ;;
+        --runs)
+            RUNS="$2"
+            shift 2
+            ;;
+        --limit)
+            LIMIT="$2"
+            shift 2
+            ;;
+        --no-jfp)
+            NO_JFP="$2"
+            if [[ "$NO_JFP" != "robust" && "$NO_JFP" != "baseline" && "$NO_JFP" != "both" ]]; then
+                echo "Error: --no-jfp must be 'robust', 'baseline', or 'both'"
+                exit 1
+            fi
+            shift 2
+            ;;
+        --heuristic)
+            HEURISTIC="$2"
+            if [[ "$HEURISTIC" != "largest_root" && "$HEURISTIC" != "join_order" ]]; then
+                echo "Error: --heuristic must be 'largest_root' or 'join_order'"
+                exit 1
+            fi
+            shift 2
+            ;;
+        *)
+            echo "Unknown option: $1"
+            exit 1
+            ;;
+    esac
+done
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+# Check prerequisites
+check_prerequisites() {
+    if [ ! -f "$DUCKDB" ]; then
+        echo -e "${RED}Error: DuckDB binary not found at $DUCKDB${NC}"
+        echo "Run 'GEN=ninja make release' first"
+        exit 1
+    fi
+
+    if [ ! -f "$DB" ]; then
+        echo -e "${RED}Error: JOB database not found at $DB${NC}"
+        echo "Run: ./scripts/setup_imdb_data.sh"
+        exit 1
+    fi
+
+    if [ ! -f "$EXT" ]; then
+        echo -e "${RED}Error: Robust extension not found at $EXT${NC}"
+        echo "Run 'GEN=ninja make release' first"
+        exit 1
+    fi
+
+    if [ ! -d "$QUERIES_DIR" ]; then
+        echo -e "${RED}Error: Queries directory not found at $QUERIES_DIR${NC}"
+        exit 1
+    fi
+}
+
+# Build the disable-JFP prefix for a given mode ("true" = robust, "false" = baseline)
+jfp_prefix() {
+    local with_extension="$1"
+    if [[ "$NO_JFP" = "both" ]] || \
+       [[ "$NO_JFP" = "robust" && "$with_extension" = "true" ]] || \
+       [[ "$NO_JFP" = "baseline" && "$with_extension" = "false" ]]; then
+        echo "SET disabled_optimizers = 'join_filter_pushdown';"
+    fi
+}
+
+# Build the SET robust_heuristic prefix (only for Robust runs)
+heuristic_prefix() {
+    if [[ -n "$HEURISTIC" ]]; then
+        echo "SET robust_heuristic = '$HEURISTIC';"
+    fi
+}
+
+# Run a query and capture output
+run_query() {
+    local query="$1"
+    local with_extension="$2"
+    local output_file="$3"
+    local jfp=$(jfp_prefix "$with_extension")
+
+    if [ "$with_extension" = "true" ]; then
+        local heur=$(heuristic_prefix)
+        "$DUCKDB" "$DB" -unsigned -noheader -list -c "LOAD '$EXT'; $heur $jfp $query" 2>/dev/null > "$output_file" || true
+    else
+        "$DUCKDB" "$DB" -unsigned -noheader -list -c "$jfp $query" 2>/dev/null > "$output_file" || true
+    fi
+}
+
+# Run a query with timing using DuckDB's built-in profiling (runs $RUNS times, returns minimum)
+run_query_timed() {
+    local query="$1"
+    local with_extension="$2"
+    local output_file="$3"
+
+    local prof_dir=$(mktemp -d /tmp/prof_XXXXXXXX)
+    local prof_file="$prof_dir/profile.json"
+    local min_time=""
+
+    local jfp=$(jfp_prefix "$with_extension")
+
+    local heur=$(heuristic_prefix)
+
+    for ((r=1; r<=RUNS; r++)); do
+        if [ "$with_extension" = "true" ]; then
+            "$DUCKDB" "$DB" -unsigned -noheader -list -c "
+LOAD '$EXT';
+$heur
+$jfp
+PRAGMA enable_profiling='json';
+PRAGMA profiling_output='$prof_file';
+$query" 2>/dev/null > "$output_file" || true
+        else
+            "$DUCKDB" "$DB" -unsigned -noheader -list -c "
+$jfp
+PRAGMA enable_profiling='json';
+PRAGMA profiling_output='$prof_file';
+$query" 2>/dev/null > "$output_file" || true
+        fi
+
+        # extract query execution time from profiling JSON
+        local elapsed=$(python3 -c "
+import json, sys
+try:
+    with open('$prof_file') as f:
+        d = json.load(f)
+    print(d.get('latency', 0))
+except:
+    print(0)
+")
+        if [ -z "$min_time" ] || [ "$(python3 -c "print(1 if $elapsed < $min_time else 0)")" = "1" ]; then
+            min_time="$elapsed"
+        fi
+    done
+
+    rm -rf "$prof_dir"
+    echo "scale=3; $min_time / 1" | bc
+}
+
+# Generate baseline results for all queries
+generate_baseline() {
+    echo "=== Generating Baseline Results ==="
+    mkdir -p "$RESULTS_DIR/baseline"
+
+    local count=0
+    local total=$(ls -1 "$QUERIES_DIR"/*.sql 2>/dev/null | wc -l | tr -d ' ')
+
+    for query_file in "$QUERIES_DIR"/*.sql; do
+        local query_name=$(basename "$query_file" .sql)
+        ((count++))
+
+        echo -n "[$count/$total] Generating baseline for $query_name... "
+
+        local query=$(cat "$query_file")
+        run_query "$query" "false" "$RESULTS_DIR/baseline/$query_name.txt"
+
+        echo -e "${GREEN}done${NC}"
+    done
+
+    echo ""
+    echo "Baseline results saved to $RESULTS_DIR/baseline/"
+}
+
+# Test a single query
+test_query() {
+    local query_name="$1"
+    local query_file="$QUERIES_DIR/$query_name.sql"
+
+    if [ ! -f "$query_file" ]; then
+        echo -e "${RED}Query file not found: $query_file${NC}"
+        return 1
+    fi
+
+    local query=$(cat "$query_file")
+    local baseline_file="$RESULTS_DIR/baseline/$query_name.txt"
+    local robust_file="$RESULTS_DIR/robust/$query_name.txt"
+
+    mkdir -p "$RESULTS_DIR/robust"
+    mkdir -p "$RESULTS_DIR/baseline"
+
+    if [ "$TIMING" = "true" ]; then
+        local baseline_time=$(run_query_timed "$query" "false" "$baseline_file")
+        local robust_time=$(run_query_timed "$query" "true" "$robust_file")
+    else
+        run_query "$query" "false" "$baseline_file"
+        run_query "$query" "true" "$robust_file"
+    fi
+
+    # Compare results
+    if diff -q "$baseline_file" "$robust_file" > /dev/null 2>&1; then
+        if [ "$TIMING" = "true" ]; then
+            local speedup=$(echo "scale=2; $baseline_time / $robust_time" | bc 2>/dev/null || echo "N/A")
+            if [ "$RUNS" -gt 1 ]; then
+                echo -e "${GREEN}✅ PASS${NC} (baseline: ${baseline_time}s [min of $RUNS], robust: ${robust_time}s [min of $RUNS], speedup: ${speedup}x)"
+            else
+                echo -e "${GREEN}✅ PASS${NC} (baseline: ${baseline_time}s, robust: ${robust_time}s, speedup: ${speedup}x)"
+            fi
+
+            # accumulate log-speedup for geometric mean
+            local log_sp=$(python3 -c "import math; print(math.log($baseline_time / $robust_time))" 2>/dev/null || echo "0")
+            LOG_SUM_SPEEDUP=$(python3 -c "print($LOG_SUM_SPEEDUP + $log_sp)")
+            ((TIMED_QUERY_COUNT++))
+
+            # track faster/slower counts (use 5% threshold to avoid noise)
+            local is_faster=$(echo "$baseline_time > $robust_time * 1.05" | bc 2>/dev/null || echo "0")
+            local is_slower=$(echo "$robust_time > $baseline_time * 1.05" | bc 2>/dev/null || echo "0")
+
+            if [ "$is_faster" = "1" ]; then
+                ((ROBUST_FASTER++))
+                FASTER_QUERIES="$FASTER_QUERIES $query_name"
+            elif [ "$is_slower" = "1" ]; then
+                ((ROBUST_SLOWER++))
+                SLOWER_QUERIES="$SLOWER_QUERIES $query_name"
+            else
+                ((ROBUST_SAME++))
+            fi
+        else
+            echo -e "${GREEN}✅ PASS${NC}"
+        fi
+        return 0
+    else
+        echo -e "${RED}❌ FAIL${NC}"
+
+        if [ "$VERBOSE" = "true" ]; then
+            echo "  Expected (first 10 lines):"
+            head -10 "$baseline_file" | sed 's/^/    /'
+            echo "  Got (first 10 lines):"
+            head -10 "$robust_file" | sed 's/^/    /'
+            echo "  Diff:"
+            diff "$baseline_file" "$robust_file" | head -20 | sed 's/^/    /'
+        fi
+        return 1
+    fi
+}
+
+# Global timing counters (for tracking Robust performance)
+ROBUST_FASTER=0
+ROBUST_SLOWER=0
+ROBUST_SAME=0
+FASTER_QUERIES=""
+SLOWER_QUERIES=""
+LOG_SUM_SPEEDUP="0"
+TIMED_QUERY_COUNT=0
+
+# Test all queries
+test_all_queries() {
+    echo "=== Testing All JOB Queries ==="
+    echo ""
+
+    mkdir -p "$RESULTS_DIR/robust"
+
+    local passed=0
+    local failed=0
+    local failed_queries=""
+    local count=0
+    local total=$(ls -1 "$QUERIES_DIR"/*.sql 2>/dev/null | wc -l | tr -d ' ')
+
+    # Reset timing counters
+    ROBUST_FASTER=0
+    ROBUST_SLOWER=0
+    ROBUST_SAME=0
+    FASTER_QUERIES=""
+    SLOWER_QUERIES=""
+    LOG_SUM_SPEEDUP="0"
+    TIMED_QUERY_COUNT=0
+
+    # Sort queries naturally (1a, 1b, 1c, 2a, ... not 1a, 10a, 11a, ...)
+    for query_file in $(ls -1 "$QUERIES_DIR"/*.sql | sort -V); do
+        local query_name=$(basename "$query_file" .sql)
+        ((count++))
+
+        if [ "$LIMIT" -gt 0 ] && [ "$count" -gt "$LIMIT" ]; then
+            break
+        fi
+
+        echo -n "[$count/$total] Testing $query_name... "
+
+        if test_query "$query_name"; then
+            ((passed++))
+        else
+            ((failed++))
+            failed_queries="$failed_queries $query_name"
+        fi
+    done
+
+    # Summary
+    echo ""
+    echo "=========================================="
+    echo "               SUMMARY"
+    echo "=========================================="
+    echo -e "Passed: ${GREEN}$passed${NC} / $total"
+    echo -e "Failed: ${RED}$failed${NC} / $total"
+
+    if [ -n "$failed_queries" ]; then
+        echo ""
+        echo -e "${RED}Failed queries:${NC}"
+        for q in $failed_queries; do
+            echo "  - $q"
+        done
+        echo ""
+        echo "Run with --verbose to see details, or test individual queries:"
+        echo "  ./scripts/test_job.sh --query <name> --verbose"
+    fi
+
+    # Timing summary (only when --timing is enabled)
+    if [ "$TIMING" = "true" ]; then
+        echo ""
+        echo "=========================================="
+        echo "           TIMING SUMMARY"
+        echo "=========================================="
+        echo -e "Robust Faster: ${GREEN}$ROBUST_FASTER${NC} queries"
+        echo -e "Robust Slower: ${RED}$ROBUST_SLOWER${NC} queries"
+        echo -e "Robust Same:   $ROBUST_SAME queries"
+
+        if [ "$TIMED_QUERY_COUNT" -gt 0 ]; then
+            local geo_mean=$(python3 -c "import math; print(f'{math.exp($LOG_SUM_SPEEDUP / $TIMED_QUERY_COUNT):.3f}')")
+            echo ""
+            echo -e "Geometric Mean Speedup: ${YELLOW}${geo_mean}x${NC} (over $TIMED_QUERY_COUNT queries)"
+        fi
+
+        if [ -n "$FASTER_QUERIES" ]; then
+            echo ""
+            echo -e "${GREEN}Queries where Robust was faster:${NC}"
+            for q in $FASTER_QUERIES; do
+                echo "  - $q"
+            done
+        fi
+
+        if [ -n "$SLOWER_QUERIES" ]; then
+            echo ""
+            echo -e "${RED}Queries where Robust was slower:${NC}"
+            for q in $SLOWER_QUERIES; do
+                echo "  - $q"
+            done
+        fi
+    fi
+
+    # Save summary to file
+    {
+        echo "Test run: $(date)"
+        echo "Passed: $passed / $total"
+        echo "Failed: $failed / $total"
+        echo "Failed queries:$failed_queries"
+        if [ "$TIMING" = "true" ]; then
+            echo ""
+            echo "Timing Summary:"
+            echo "Robust Faster: $ROBUST_FASTER queries"
+            echo "Robust Slower: $ROBUST_SLOWER queries"
+            echo "Robust Same: $ROBUST_SAME queries"
+            echo "Faster queries:$FASTER_QUERIES"
+            echo "Slower queries:$SLOWER_QUERIES"
+            if [ "$TIMED_QUERY_COUNT" -gt 0 ]; then
+                local geo_mean=$(python3 -c "import math; print(f'{math.exp($LOG_SUM_SPEEDUP / $TIMED_QUERY_COUNT):.3f}')")
+                echo "Geometric Mean Speedup: ${geo_mean}x (over $TIMED_QUERY_COUNT queries)"
+            fi
+        fi
+    } > "$RESULTS_DIR/summary.txt"
+
+    if [ $failed -gt 0 ]; then
+        return 1
+    fi
+    return 0
+}
+
+# Main
+main() {
+    echo "JOB Query Test Suite for Robust Extension"
+    echo "========================================="
+    echo ""
+
+    check_prerequisites
+
+    if [ -n "$SPECIFIC_QUERY" ]; then
+        echo "Testing query: $SPECIFIC_QUERY"
+        echo ""
+        test_query "$SPECIFIC_QUERY"
+    elif [ "$GENERATE_BASELINE" = "true" ]; then
+        generate_baseline
+    elif [ "$TEST_ONLY" = "true" ]; then
+        if [ ! -d "$RESULTS_DIR/baseline" ]; then
+            echo -e "${RED}Error: No baseline results found. Run with --generate-baseline first${NC}"
+            exit 1
+        fi
+        test_all_queries
+    else
+        # Default: generate baseline if needed, then test
+        if [ ! -d "$RESULTS_DIR/baseline" ] || [ -z "$(ls -A "$RESULTS_DIR/baseline" 2>/dev/null)" ]; then
+            generate_baseline
+            echo ""
+        fi
+        test_all_queries
+    fi
+}
+
+main "$@"
diff --git a/scripts/vendor_duckdb_bench.sh b/scripts/vendor_duckdb_bench.sh
new file mode 100755
index 0000000..de40004
--- /dev/null
+++ b/scripts/vendor_duckdb_bench.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+# vendor_duckdb_bench.sh - Materialize Robust-owned content into the DuckDB submodule.
+#
+# Run by `make release` / `make debug` automatically. Safe to re-run.
+#
+# What it does:
+#   1. Copies bench_suites/{imdb_robust,imdb_robust_fwd,imdb_robust_jo,tpch_baseline,tpch_robust}/
+#      into duckdb/benchmark/  (so benchmark_runner can find them)
+#   2. Applies patches/*.patch to the duckdb submodule (idempotent: skips if already applied)
+#   3. Creates a portable relative symlink  benchmark -> duckdb/benchmark  at the project root
+#      (so scripts/bench_*.sh can reference benchmark/<suite>/<query>.benchmark from PROJECT_ROOT)
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+SUITE_SRC="$PROJECT_ROOT/bench_suites"
+DUCKDB_BENCH="$PROJECT_ROOT/duckdb/benchmark"
+PATCH_DIR="$PROJECT_ROOT/patches"
+
+# 1. copy vendored suites into duckdb/benchmark/
+if [ -d "$SUITE_SRC" ]; then
+    for suite_path in "$SUITE_SRC"/*/; do
+        suite_name="$(basename "$suite_path")"
+        # rsync would be nicer but we want zero deps; cp -R is enough.
+        rm -rf "$DUCKDB_BENCH/$suite_name"
+        cp -R "$suite_path" "$DUCKDB_BENCH/$suite_name"
+    done
+    echo "  bench suites copied: $(ls -1 "$SUITE_SRC" | tr '\n' ' ')"
+else
+    echo "  (no $SUITE_SRC dir; skipping suite copy)"
+fi
+
+# 2. apply patches idempotently
+apply_patch() {
+    local patch_path="$1"
+    local name; name="$(basename "$patch_path")"
+    if git -C "$PROJECT_ROOT/duckdb" apply -R --check "$patch_path" >/dev/null 2>&1; then
+        echo "  $name: already applied"
+    elif git -C "$PROJECT_ROOT/duckdb" apply --check "$patch_path" >/dev/null 2>&1; then
+        git -C "$PROJECT_ROOT/duckdb" apply "$patch_path"
+        echo "  $name: applied"
+    else
+        echo "  $name: ERROR - does not apply cleanly to current duckdb submodule" >&2
+        echo "    (probably means upstream DuckDB changed the patched region; the patch needs refreshing)" >&2
+        exit 1
+    fi
+}
+
+if [ -d "$PATCH_DIR" ]; then
+    for patch_path in "$PATCH_DIR"/*.patch; do
+        [ -e "$patch_path" ] || continue
+        apply_patch "$patch_path"
+    done
+fi
+
+# 3. ensure portable relative symlink benchmark -> duckdb/benchmark
+SYMLINK="$PROJECT_ROOT/benchmark"
+if [ -L "$SYMLINK" ]; then
+    current_target="$(readlink "$SYMLINK")"
+    if [ "$current_target" != "duckdb/benchmark" ]; then
+        rm "$SYMLINK"
+        ln -s duckdb/benchmark "$SYMLINK"
+        echo "  benchmark symlink: repointed to duckdb/benchmark (was $current_target)"
+    fi
+elif [ -e "$SYMLINK" ]; then
+    echo "  WARNING: $SYMLINK exists and is not a symlink; leaving untouched" >&2
+else
+    ln -s duckdb/benchmark "$SYMLINK"
+    echo "  benchmark symlink: created -> duckdb/benchmark"
+fi
diff --git a/src/bloom_filter.cpp b/src/bloom_filter.cpp
index 46c2295..0517d30 100644
--- a/src/bloom_filter.cpp
+++ b/src/bloom_filter.cpp
@@ -1,27 +1,13 @@
 #include "bloom_filter.hpp"
 
 #include "duckdb/common/types/selection_vector.hpp"
+#include "duckdb/common/types/column/column_data_collection.hpp"
 #include "duckdb/storage/buffer_manager.hpp"
-
-#include <random>
-#include <cmath>
-#include <iostream>
+#include "duckdb/common/vector_operations/vector_operations.hpp"
 
 namespace duckdb {
-namespace {
-static uint32_t CeilPowerOfTwo(uint32_t n) {
-	if (n <= 1) {
-		return 1;
-	}
-	n--;
-	n |= (n >> 1);
-	n |= (n >> 2);
-	n |= (n >> 4);
-	n |= (n >> 8);
-	n |= (n >> 16);
-	return n + 1;
-}
 
+namespace {
 static Vector HashColumns(DataChunk &chunk, const vector<idx_t> &cols) {
 	auto count = chunk.size();
 	Vector hashes(LogicalType::HASH);
@@ -38,31 +24,57 @@ static Vector HashColumns(DataChunk &chunk, const vector<idx_t> &cols) {
 }
 } // namespace
 
-void BloomFilter::Initialize(ClientContext &context_p, uint32_t est_num_rows) {
+void PTBloomFilter::Initialize(ClientContext &context_p, uint32_t est_num_rows) {
 	context = &context_p;
 	buffer_manager = &BufferManager::GetBufferManager(*context);
+	bf_.Initialize(context_p, static_cast<idx_t>(est_num_rows));
+	sized_for_rows_ = static_cast<idx_t>(est_num_rows);
+}
 
-	uint32_t min_bits = std::max<uint32_t>(MIN_NUM_BITS, est_num_rows * MIN_NUM_BITS_PER_KEY);
-	num_sectors = std::min(CeilPowerOfTwo(min_bits) >> LOG_SECTOR_SIZE, MAX_NUM_SECTORS);
-	num_sectors_log = static_cast<uint32_t>(std::log2(num_sectors));
+void PTBloomFilter::ReinitializeAndRehash(ClientContext &context_p, idx_t actual_rows, ColumnDataCollection &data,
+                                          const vector<idx_t> &cols) {
+	// re-allocate native BF with accurate count; the native Initialize reassigns AllocatedData,
+	// which releases the previous allocation via RAII
+	bf_.Initialize(context_p, actual_rows);
+	sized_for_rows_ = actual_rows;
+	has_data_ = data.Count() > 0;
 
-	buf_ = buffer_manager->GetBufferAllocator().Allocate(64 + num_sectors * sizeof(uint32_t));
-	// make sure blocks is a 64-byte aligned pointer, i.e., cache-line aligned
-	blocks = reinterpret_cast<uint32_t *>((64ULL + reinterpret_cast<uint64_t>(buf_.get())) & ~63ULL);
-	std::fill_n(blocks, num_sectors, 0);
+	if (data.Count() == 0) {
+		return;
+	}
+
+	DataChunk chunk;
+	data.InitializeScanChunk(chunk);
+	ColumnDataScanState scan_state;
+	data.InitializeScan(scan_state);
+	while (data.Scan(scan_state, chunk)) {
+		const idx_t count = chunk.size();
+		if (count == 0) {
+			continue;
+		}
+		Vector hashes = HashColumns(chunk, cols);
+		bf_.InsertHashes(hashes, count);
+	}
 }
 
-int BloomFilter::Lookup(DataChunk &chunk, vector<uint32_t> &results, const vector<idx_t> &bound_cols_applied) const {
-	int count = static_cast<int>(chunk.size());
+idx_t PTBloomFilter::LookupSel(DataChunk &chunk, SelectionVector &sel, const vector<idx_t> &bound_cols_applied,
+                               uint8_t *bit_vector_buf) const {
+	idx_t count = chunk.size();
+	if (count == 0) {
+		return 0;
+	}
 	Vector hashes = HashColumns(chunk, bound_cols_applied);
-	BloomFilterLookup(count, reinterpret_cast<uint64_t *>(hashes.GetData()), blocks, results.data());
-	return count;
+	return bf_.LookupHashes(hashes, sel, count);
 }
 
-void BloomFilter::Insert(DataChunk &chunk, const vector<idx_t> &bound_cols_built) {
-	int count = static_cast<int>(chunk.size());
+void PTBloomFilter::Insert(DataChunk &chunk, const vector<idx_t> &bound_cols_built) {
+	idx_t count = chunk.size();
+	if (count == 0) {
+		return;
+	}
+	has_data_ = true;
 	Vector hashes = HashColumns(chunk, bound_cols_built);
-	std::lock_guard<std::mutex> lock(insert_lock);
-	BloomFilterInsert(count, reinterpret_cast<uint64_t *>(hashes.GetData()), blocks);
+	bf_.InsertHashes(hashes, count);
 }
-} // namespace duckdb
\ No newline at end of file
+
+} // namespace duckdb
diff --git a/src/bloom_filter.hpp b/src/bloom_filter.hpp
new file mode 100644
index 0000000..ea17a4a
--- /dev/null
+++ b/src/bloom_filter.hpp
@@ -0,0 +1,59 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// duckdb/optimizer/predicate_transfer/bloom_filter/bloom_filter.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+#pragma once
+
+#include "duckdb/planner/filter/bloom_filter.hpp"
+#include "duckdb/planner/column_binding.hpp"
+#include "duckdb/storage/buffer_manager.hpp"
+#include "duckdb/common/types/data_chunk.hpp"
+
+#include <cstdint>
+
+namespace duckdb {
+
+class ColumnDataCollection;
+
+// wrapper around DuckDB's native BloomFilter with DataChunk-level operations
+class PTBloomFilter {
+public:
+	PTBloomFilter() = default;
+	void Initialize(ClientContext &context_p, uint32_t est_num_rows);
+
+	ClientContext *context = nullptr;
+	BufferManager *buffer_manager = nullptr;
+
+	bool finalized_ = false;
+
+public:
+	idx_t LookupSel(DataChunk &chunk, SelectionVector &sel, const vector<idx_t> &bound_cols_applied,
+	                uint8_t *bit_vector_buf) const;
+	void Insert(DataChunk &chunk, const vector<idx_t> &bound_cols_built);
+
+	// reallocate the native BF for `actual_rows` and re-hash all rows from `data` on `cols`
+	void ReinitializeAndRehash(ClientContext &context_p, idx_t actual_rows, ColumnDataCollection &data,
+	                           const vector<idx_t> &cols);
+
+	idx_t SizedForRows() const {
+		return sized_for_rows_;
+	}
+
+	bool IsEmpty() const {
+		return !has_data_;
+	}
+
+	BloomFilter &GetNativeFilter() {
+		return bf_;
+	}
+
+private:
+	bool has_data_ = false;
+	idx_t sized_for_rows_ = 0;
+	BloomFilter bf_;
+};
+
+} // namespace duckdb
diff --git a/src/dag.cpp b/src/dag.cpp
deleted file mode 100644
index ef7cc5f..0000000
--- a/src/dag.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-#include "dag.hpp"
-
-namespace duckdb {
-
-bool FilterPlan::operator==(const FilterPlan &other) const {
-	return build == other.build && apply == other.apply && return_types == other.return_types;
-}
-
-GraphEdge *GraphNode::Add(idx_t other, bool is_forward, bool is_in_edge) {
-	auto &stage = (is_forward ? forward_stage_edges : backward_stage_edges);
-	auto &edges = (is_in_edge ? stage.in : stage.out);
-	for (auto &edge : edges) {
-		if (edge->destination == other) {
-			return edge.get();
-		}
-	}
-	edges.emplace_back(make_uniq<GraphEdge>(other));
-	return edges.back().get();
-}
-
-GraphEdge *GraphNode::Add(idx_t other, const vector<ColumnBinding> &left_cols, const vector<ColumnBinding> &right_cols,
-						  const vector<LogicalType> &types, bool is_forward, bool is_in_edge) {
-	auto *edge = Add(other, is_forward, is_in_edge);
-	edge->left = left_cols;
-	edge->right = right_cols;
-	edge->return_types = types;
-	return edge;
-}
-
-GraphEdge *GraphNode::Add(idx_t other, const shared_ptr<FilterPlan> &filter_plan, bool is_forward, bool is_in_edge) {
-	auto *edge = Add(other, is_forward, is_in_edge);
-	edge->filter_plan.push_back(filter_plan);
-	return edge;
-}
-
-} // namespace duckdb
diff --git a/src/include/bloom_filter.hpp b/src/include/bloom_filter.hpp
deleted file mode 100644
index 78dca35..0000000
--- a/src/include/bloom_filter.hpp
+++ /dev/null
@@ -1,179 +0,0 @@
-//===----------------------------------------------------------------------===//
-//                         DuckDB
-//
-// duckdb/optimizer/predicate_transfer/bloom_filter/bloom_filter.hpp
-//
-//
-//===----------------------------------------------------------------------===//
-#pragma once
-
-#include "duckdb/planner/column_binding.hpp"
-#include "duckdb/storage/buffer_manager.hpp"
-
-#include <cstdint>
-#include <mutex>
-
-#ifndef BF_RESTRICT
-#if defined(_MSC_VER)
-#define BF_RESTRICT __restrict
-#elif defined(__GNUC__) || defined(__clang__)
-#define BF_RESTRICT __restrict__
-#else
-// Fallback: just return the pointer as-is
-#define BF_RESTRICT
-#endif
-#endif
-
-namespace duckdb {
-
-static constexpr const uint32_t MAX_NUM_SECTORS = (1ULL << 26);
-static constexpr const uint32_t MIN_NUM_BITS_PER_KEY = 16;
-static constexpr const uint32_t MIN_NUM_BITS = 512;
-static constexpr const uint32_t LOG_SECTOR_SIZE = 5;
-static constexpr const int32_t SIMD_BATCH_SIZE = 16;
-
-class BloomFilter {
-public:
-	BloomFilter() = default;
-	void Initialize(ClientContext &context_p, uint32_t est_num_rows);
-
-	ClientContext *context;
-	BufferManager *buffer_manager;
-
-	bool finalized_;
-
-public:
-	int Lookup(DataChunk &chunk, vector<uint32_t> &results, const vector<idx_t> &bound_cols_applied) const;
-	void Insert(DataChunk &chunk, const vector<idx_t> &bound_cols_built);
-
-	uint32_t num_sectors;
-	uint32_t num_sectors_log;
-
-	std::mutex insert_lock;
-	uint32_t *blocks;
-
-private:
-	// key_lo |5:bit3|5:bit2|5:bit1|  13:block    |4:sector1 | bit layout (32:total)
-	// key_hi |5:bit4|5:bit3|5:bit2|5:bit1|9:block|3:sector2 | bit layout (32:total)
-	inline uint32_t GetMask1(uint32_t key_lo) const {
-		// 3 bits in key_lo
-		return (1u << ((key_lo >> 17) & 31)) | (1u << ((key_lo >> 22) & 31)) | (1u << ((key_lo >> 27) & 31));
-	}
-	inline uint32_t GetMask2(uint32_t key_hi) const {
-		// 4 bits in key_hi
-		return (1u << ((key_hi >> 12) & 31)) | (1u << ((key_hi >> 17) & 31)) | (1u << ((key_hi >> 22) & 31)) |
-		       (1u << ((key_hi >> 27) & 31));
-	}
-
-	inline uint32_t GetSector1(uint32_t key_lo, uint32_t key_hi) const {
-		// block: 13 bits in key_lo and 9 bits in key_hi
-		// sector 1: 4 bits in key_lo
-		return ((key_lo & ((1 << 17) - 1)) + ((key_hi << 14) & (((1 << 9) - 1) << 17))) & (num_sectors - 1);
-	}
-	inline uint32_t GetSector2(uint32_t key_hi, uint32_t block1) const {
-		// sector 2: 3 bits in key_hi
-		return block1 ^ (8 + (key_hi & 7));
-	}
-
-	inline void InsertOne(uint32_t key_lo, uint32_t key_hi, uint32_t *BF_RESTRICT bf) const {
-		uint32_t sector1 = GetSector1(key_lo, key_hi);
-		uint32_t mask1 = GetMask1(key_lo);
-		uint32_t sector2 = GetSector2(key_hi, sector1);
-		uint32_t mask2 = GetMask2(key_hi);
-		bf[sector1] |= mask1;
-		bf[sector2] |= mask2;
-	}
-	inline bool LookupOne(uint32_t key_lo, uint32_t key_hi, const uint32_t *BF_RESTRICT bf) const {
-		uint32_t sector1 = GetSector1(key_lo, key_hi);
-		uint32_t mask1 = GetMask1(key_lo);
-		uint32_t sector2 = GetSector2(key_hi, sector1);
-		uint32_t mask2 = GetMask2(key_hi);
-		return ((bf[sector1] & mask1) == mask1) & ((bf[sector2] & mask2) == mask2);
-	}
-
-private:
-	int BloomFilterLookup(int num, const uint64_t *BF_RESTRICT key64, const uint32_t *BF_RESTRICT bf,
-	                      uint32_t *BF_RESTRICT out) const {
-		const uint32_t *BF_RESTRICT key = reinterpret_cast<const uint32_t * BF_RESTRICT>(key64);
-		for (int i = 0; i + SIMD_BATCH_SIZE <= num; i += SIMD_BATCH_SIZE) {
-			uint32_t block1[SIMD_BATCH_SIZE], mask1[SIMD_BATCH_SIZE];
-			uint32_t block2[SIMD_BATCH_SIZE], mask2[SIMD_BATCH_SIZE];
-
-			for (int j = 0; j < SIMD_BATCH_SIZE; j++) {
-				int p = i + j;
-				uint32_t key_lo = key[p + p];
-				uint32_t key_hi = key[p + p + 1];
-				block1[j] = GetSector1(key_lo, key_hi);
-				mask1[j] = GetMask1(key_lo);
-				block2[j] = GetSector2(key_hi, block1[j]);
-				mask2[j] = GetMask2(key_hi);
-			}
-
-			for (int j = 0; j < SIMD_BATCH_SIZE; j++) {
-				out[i + j] = ((bf[block1[j]] & mask1[j]) == mask1[j]) & ((bf[block2[j]] & mask2[j]) == mask2[j]);
-			}
-		}
-
-		// unaligned tail
-		for (int i = num & ~(SIMD_BATCH_SIZE - 1); i < num; i++) {
-			out[i] = LookupOne(key[i + i], key[i + i + 1], bf);
-		}
-		return num;
-	}
-
-	void BloomFilterInsert(int num, const uint64_t *BF_RESTRICT key64, uint32_t *BF_RESTRICT bf) const {
-		const uint32_t *BF_RESTRICT key = reinterpret_cast<const uint32_t * BF_RESTRICT>(key64);
-		for (int i = 0; i + SIMD_BATCH_SIZE <= num; i += SIMD_BATCH_SIZE) {
-			uint32_t block1[SIMD_BATCH_SIZE], mask1[SIMD_BATCH_SIZE];
-			uint32_t block2[SIMD_BATCH_SIZE], mask2[SIMD_BATCH_SIZE];
-
-			for (int j = 0; j < SIMD_BATCH_SIZE; j++) {
-				int p = i + j;
-				uint32_t key_lo = key[p + p];
-				uint32_t key_hi = key[p + p + 1];
-				block1[j] = GetSector1(key_lo, key_hi);
-				mask1[j] = GetMask1(key_lo);
-				block2[j] = GetSector2(key_hi, block1[j]);
-				mask2[j] = GetMask2(key_hi);
-			}
-
-			for (int j = 0; j < SIMD_BATCH_SIZE; j++) {
-				bf[block1[j]] |= mask1[j];
-				bf[block2[j]] |= mask2[j];
-			}
-		}
-
-		// unaligned tail
-		for (int i = num & ~(SIMD_BATCH_SIZE - 1); i < num; i++) {
-			InsertOne(key[i + i], key[i + i + 1], bf);
-		}
-	}
-
-	AllocatedData buf_;
-};
-
-class BloomFilterUsage {
-public:
-	BloomFilterUsage(shared_ptr<BloomFilter> bloom_filter, const vector<idx_t> &applied, const vector<idx_t> &built)
-	    : bloom_filter(std::move(bloom_filter)), bound_cols_applied(applied), bound_cols_built(built) {
-	}
-
-	bool IsValid() const {
-		return bloom_filter->finalized_;
-	}
-
-public:
-	int Lookup(DataChunk &chunk, vector<uint32_t> &results) const {
-		return bloom_filter->Lookup(chunk, results, bound_cols_applied);
-	}
-	void Insert(DataChunk &chunk) const {
-		return bloom_filter->Insert(chunk, bound_cols_applied);
-	}
-
-private:
-	shared_ptr<BloomFilter> bloom_filter;
-	vector<idx_t> bound_cols_applied;
-	vector<idx_t> bound_cols_built;
-};
-
-} // namespace duckdb
\ No newline at end of file
diff --git a/src/include/dag.hpp b/src/include/dag.hpp
deleted file mode 100644
index 5539fc5..0000000
--- a/src/include/dag.hpp
+++ /dev/null
@@ -1,70 +0,0 @@
-//===----------------------------------------------------------------------===//
-//                         DuckDB
-//
-// lib/dag.hpp
-//
-//
-//===----------------------------------------------------------------------===//
-
-#pragma once
-
-#include "duckdb/planner/logical_operator.hpp"
-#include "bloom_filter.hpp"
-
-namespace duckdb {
-
-struct FilterPlan {
-	vector<ColumnBinding> build;
-	vector<ColumnBinding> apply;
-	vector<LogicalType> return_types;
-
-	vector<idx_t> bound_cols_build;
-	vector<idx_t> bound_cols_apply;
-
-	void Serialize(Serializer &serializer) const;
-	static unique_ptr<FilterPlan> Deserialize(Deserializer &deserializer);
-
-	bool operator==(const FilterPlan &other) const;
-};
-
-class GraphEdge {
-public:
-	explicit GraphEdge(idx_t destination) : destination(destination) {
-	}
-
-	idx_t destination;
-
-	// The left is the smaller table side, while the right is the bigger table side
-	vector<ColumnBinding> left;
-	vector<ColumnBinding> right;
-	vector<LogicalType> return_types;
-	vector<shared_ptr<FilterPlan>> filter_plan;
-};
-
-struct Edges {
-	vector<unique_ptr<GraphEdge>> in;
-	vector<unique_ptr<GraphEdge>> out;
-};
-
-class GraphNode {
-public:
-	GraphNode(idx_t id, int32_t priority) : id(id), cardinality_order(priority) {
-	}
-
-	idx_t id;
-	int32_t cardinality_order;
-
-	//! Predicate Transfer has two stages. The transfer graph is different because of the existence of LEFT JOIN, RIGHT
-	//! JOIN, etc.
-	Edges forward_stage_edges;
-	Edges backward_stage_edges;
-
-public:
-	GraphEdge *Add(idx_t other, bool is_forward, bool is_in_edge);
-	GraphEdge *Add(idx_t other, const vector<ColumnBinding> &left_cols, const vector<ColumnBinding> &right_cols,
-	               const vector<LogicalType> &types, bool is_forward, bool is_in_edge);
-	GraphEdge *Add(idx_t other, const shared_ptr<FilterPlan> &filter_plan, bool is_forward, bool is_in_edge);
-};
-
-using TransferGraph = unordered_map<idx_t, unique_ptr<GraphNode>>;
-} // namespace duckdb
diff --git a/src/operators/logical_create_bf.cpp b/src/operators/logical_create_bf.cpp
deleted file mode 100644
index 53e6696..0000000
--- a/src/operators/logical_create_bf.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-#include "dag.hpp"
-#include "logical_create_bf.hpp"
-// #include "physical_hello.hpp"
-#include "duckdb/main/database.hpp"
-#include "duckdb/main/config.hpp"
-#include "duckdb/planner/logical_operator.hpp"
-#include "physical_create_bf.hpp"
-#include "dag.hpp"
-
-#include <utility>
-
-namespace duckdb {
-
-LogicalCreateBF::LogicalCreateBF() : LogicalExtensionOperator() {
-	this->type = LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR;
-	message = "CREATE_BF";
-}
-
-LogicalCreateBF::LogicalCreateBF(const BloomFilterOperation &bf_op)
-    : LogicalExtensionOperator(), bf_operation(bf_op) {
-	this->type = LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR;
-	message = "CREATE_BF";
-}
-
-InsertionOrderPreservingMap<string> LogicalCreateBF::ParamsToString() const {
-	InsertionOrderPreservingMap<string> result;
-	result["Operator"] = "LogicalCreateBF";
-	result["Build Table"] = to_string(bf_operation.build_table_idx);
-	result["Probe Table"] = to_string(bf_operation.probe_table_idx);
-
-	string build_cols = "";
-	for (size_t i = 0; i < bf_operation.build_columns.size(); i++) {
-		if (i > 0) {
-			build_cols += ", ";
-		}
-		build_cols += "(" + to_string(bf_operation.build_columns[i].table_index) +
-					 "." + to_string(bf_operation.build_columns[i].column_index) + ")";
-	}
-	result["Build Columns"] = build_cols;
-	
-	if (estimated_cardinality != DConstants::INVALID_INDEX) {
-		result["Estimated Cardinality"] = std::to_string(estimated_cardinality);
-	}
-	
-	return result;
-}
-
-vector<ColumnBinding> LogicalCreateBF::GetColumnBindings() {
-	return children[0]->GetColumnBindings();
-}
-
-void LogicalCreateBF::ResolveTypes() {
-	if (!children.empty() && children[0]) {
-		types = children[0]->types;
-	}
-}
-
-// shared_ptr<FilterPlan> BloomFilterOperationToFilterPlan(const BloomFilterOperation &bf_op) {
-// 	auto filter_plan = make_shared<FilterPlan>();
-// 	filter_plan->build = bf_op.build_columns;
-// 	filter_plan->apply = bf_op.probe_columns;
-// 	// filter_plan->return_types will be populated later during ResolveTypes()
-// 	return filter_plan;
-// }
-
-PhysicalOperator &LogicalCreateBF::CreatePlan(ClientContext &context, PhysicalPlanGenerator &generator) {
-	if (!physical) {
-		// step 1: get child column bindings to understand chunk schema
-		vector<ColumnBinding> child_bindings = children[0]->GetColumnBindings();
-
-		// step 2: resolve/map the bf operation columns to chunk column indices
-		vector<idx_t> resolved_indices;
-		for (const ColumnBinding &column_binding: bf_operation.build_columns) {
-			// find the position of the bf column ColumnBinding in the chunk columns
-			for (idx_t i = 0; i < child_bindings.size(); i++) {
-				if (child_bindings[i].table_index == column_binding.table_index &&
-					child_bindings[i].column_index == column_binding.column_index) {
-					resolved_indices.push_back(i);
-					break;
-				}
-			}
-		}
-
-		// step 3: create physical operator with the resolved indices
-		PhysicalOperator &physical_op = generator.Make<PhysicalCreateBF>(
-			  make_shared_ptr<BloomFilterOperation>(bf_operation),
-			  types,
-			  estimated_cardinality,
-			  resolved_indices);
-		// auto filter_plan = BloomFilterOperationToFilterPlan(bf_operation);
-		// auto &physical_op = generator.Make<PhysicalCreateBF>(make_shared<BloomFilterOperation>(bf_operation), types, estimated_cardinality);
-		for (auto &child : children) {
-			auto &child_physical = generator.CreatePlan(*child);
-			physical_op.children.emplace_back(child_physical);
-		}
-		physical = static_cast<PhysicalCreateBF*>(&physical_op);
-		return physical_op;
-	}
-	return *physical;
-}
-
-// void RegisterLogicalCreateBFOperatorExtension(DatabaseInstance &db) {
-// 	auto &config = DBConfig::GetConfig(db);
-// 	config.operator_extensions.push_back(make_uniq<LogicalCreateBF>());
-// }
-} // namespace duckdb
\ No newline at end of file
diff --git a/src/operators/logical_create_filter.cpp b/src/operators/logical_create_filter.cpp
new file mode 100644
index 0000000..f01ab33
--- /dev/null
+++ b/src/operators/logical_create_filter.cpp
@@ -0,0 +1,134 @@
+#include "duckdb/main/database.hpp"
+#include "duckdb/main/config.hpp"
+#include "duckdb/planner/logical_operator.hpp"
+#include "logical_create_filter.hpp"
+#include "logical_probe_filter.hpp"
+#include "physical_create_filter.hpp"
+#include "physical_probe_filter.hpp"
+#include <utility>
+
+namespace duckdb {
+
+LogicalCreateFilter::LogicalCreateFilter() : LogicalExtensionOperator() {
+	this->type = LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR;
+	message = "CREATE_FILTER";
+}
+
+LogicalCreateFilter::LogicalCreateFilter(const FilterOperation &filter_op)
+    : LogicalExtensionOperator(), filter_operation(filter_op) {
+	this->type = LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR;
+	message = "CREATE_FILTER";
+}
+
+InsertionOrderPreservingMap<string> LogicalCreateFilter::ParamsToString() const {
+	InsertionOrderPreservingMap<string> result;
+	result["Operator"] = "LogicalCreateFilter";
+	result["Build Table"] = to_string(filter_operation.build_table_idx);
+	// there can be multiple probe tables for a single create
+	string probe_tables;
+	vector<idx_t> seen_probe;
+	for (const auto &col : filter_operation.probe_columns) {
+		bool found = false;
+		for (auto idx : seen_probe) {
+			if (idx == col.table_index) {
+				found = true;
+				break;
+			}
+		}
+		if (!found) {
+			if (!probe_tables.empty()) {
+				probe_tables += ", ";
+			}
+			probe_tables += to_string(col.table_index);
+			seen_probe.push_back(col.table_index);
+		}
+	}
+	result["Probe Tables"] = probe_tables;
+
+	string build_cols = "";
+	for (size_t i = 0; i < filter_operation.build_columns.size(); i++) {
+		if (i > 0) {
+			build_cols += ", ";
+		}
+		build_cols += "(" + to_string(filter_operation.build_columns[i].table_index) + "." +
+		              to_string(filter_operation.build_columns[i].column_index) + ")";
+	}
+	result["Build Columns"] = build_cols;
+
+	if (estimated_cardinality != DConstants::INVALID_INDEX) {
+		result["Estimated Cardinality"] = std::to_string(estimated_cardinality);
+	}
+
+	return result;
+}
+
+vector<ColumnBinding> LogicalCreateFilter::GetColumnBindings() {
+	return children[0]->GetColumnBindings();
+}
+
+void LogicalCreateFilter::ResolveTypes() {
+	if (!children.empty() && children[0]) {
+		types = children[0]->types;
+	}
+}
+
+PhysicalOperator &LogicalCreateFilter::CreatePlan(ClientContext &context, PhysicalPlanGenerator &generator) {
+	if (!physical) {
+		// step 1: get child column bindings to understand chunk schema
+		vector<ColumnBinding> child_bindings = children[0]->GetColumnBindings();
+
+		// step 2: resolve/map the filter operation columns to chunk column indices.
+		// resolved_indices stores the columns on which the bloom filters are
+		// built.
+		// TODO: optimize: Use a map for filter_operation.build_columns to speed up lookup
+		vector<idx_t> resolved_indices;
+		for (const ColumnBinding &column_binding : filter_operation.build_columns) {
+			// find the position of the filter column ColumnBinding in the chunk columns
+			for (idx_t i = 0; i < child_bindings.size(); i++) {
+				if (child_bindings[i].table_index == column_binding.table_index &&
+				    child_bindings[i].column_index == column_binding.column_index) {
+					resolved_indices.push_back(i);
+					break;
+				}
+			}
+		}
+
+		// step 3: create physical operator with the resolved indices
+		PhysicalOperator &physical_op = generator.Make<PhysicalCreateFilter>(
+		    make_shared_ptr<FilterOperation>(filter_operation), types, estimated_cardinality, resolved_indices);
+		// auto filter_plan = FilterOperationToFilterPlan(filter_operation);
+		// auto &physical_op = generator.Make<PhysicalCreateFilter>(make_shared<FilterOperation>(filter_operation),
+		// types, estimated_cardinality);
+		for (auto &child : children) {
+			auto &child_physical = generator.CreatePlan(*child);
+			physical_op.children.emplace_back(child_physical);
+		}
+		physical = static_cast<PhysicalCreateFilter *>(&physical_op);
+
+		// propagate dynamic filter pushdown targets
+		for (auto &target : pushdown_targets) {
+			PhysicalCreateFilter::DynamicFilterTarget phys_target;
+			phys_target.dynamic_filters = target.dynamic_filters;
+			phys_target.scan_column_index = target.scan_column_index;
+			phys_target.probe_column = target.probe_column;
+			phys_target.column_type = target.column_type;
+			phys_target.column_name = target.column_name;
+			physical->pushdown_targets.push_back(std::move(phys_target));
+		}
+		physical->is_forward_pass = is_forward_pass;
+
+		// link back to related PROBE_FILTER operators
+		// the links are used to create pipeline dependencies
+		for (const LogicalProbeFilter *probe_filter : related_probe_filter) {
+			if (probe_filter->physical) {
+				// TODO: keep either related_create_filter or related_create_filter_vec. Not both. Most likely we'll
+				// have to remove related_create_filter.
+				probe_filter->physical->related_create_filter = physical;
+				probe_filter->physical->related_create_filter_vec.push_back(physical);
+			}
+		}
+		return physical_op;
+	}
+	return *physical;
+}
+} // namespace duckdb
diff --git a/src/operators/logical_create_bf.hpp b/src/operators/logical_create_filter.hpp
similarity index 54%
rename from src/operators/logical_create_bf.hpp
rename to src/operators/logical_create_filter.hpp
index a7ed1f3..c24d877 100644
--- a/src/operators/logical_create_bf.hpp
+++ b/src/operators/logical_create_filter.hpp
@@ -1,7 +1,7 @@
 //===----------------------------------------------------------------------===//
 //                         DuckDB
 //
-// duckdb/planner/operator/logical_create_bf.hpp
+// duckdb/planner/operator/logical_create_filter.hpp
 //
 //
 //===----------------------------------------------------------------------===//
@@ -9,31 +9,42 @@
 
 #include "duckdb/planner/logical_operator.hpp"
 #include "duckdb/planner/operator/logical_extension_operator.hpp"
+#include "duckdb/planner/table_filter.hpp"
 #include "../optimizer/graph_manager.hpp"
 
 namespace duckdb {
 class DatabaseInstance;
-class PhysicalCreateBF;
+class PhysicalCreateFilter;
+class LogicalProbeFilter;
 
-class LogicalCreateBF : public LogicalExtensionOperator {
+class LogicalCreateFilter : public LogicalExtensionOperator {
 public:
 	static constexpr const LogicalOperatorType TYPE = LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR;
-	static constexpr auto OPERATOR_TYPE_NAME = "logical_create_bf";
+	static constexpr auto OPERATOR_TYPE_NAME = "logical_create_filter";
 
 public:
-	explicit LogicalCreateBF();
-	explicit LogicalCreateBF(const BloomFilterOperation &bf_op);
+	explicit LogicalCreateFilter();
+	explicit LogicalCreateFilter(const FilterOperation &filter_op);
 
 	bool can_stop = false;
-	BloomFilterOperation bf_operation;
-	PhysicalCreateBF *physical = nullptr;
+	FilterOperation filter_operation;
+	PhysicalCreateFilter *physical = nullptr;
+
+	vector<LogicalProbeFilter *> related_probe_filter;
+	bool is_forward_pass = false;
+
+	struct DynamicFilterTarget {
+		shared_ptr<DynamicTableFilterSet> dynamic_filters;
+		idx_t scan_column_index;
+		ColumnBinding probe_column;
+		LogicalType column_type;
+		string column_name;
+	};
+	vector<DynamicFilterTarget> pushdown_targets;
 
-	vector<shared_ptr<DynamicTableFilterSet>> min_max_to_create;
-	vector<vector<ColumnBinding>> min_max_applied_cols;
 	string message;
 
 public:
-
 	string GetExtensionName() const override {
 		return "rpt";
 	}
@@ -46,6 +57,4 @@ class LogicalCreateBF : public LogicalExtensionOperator {
 	void ResolveTypes() override;
 };
 
-// void RegisterLogicalCreateBFOperatorExtension(DatabaseInstance &instance);
-
 } // namespace duckdb
diff --git a/src/operators/logical_probe_filter.cpp b/src/operators/logical_probe_filter.cpp
new file mode 100644
index 0000000..59172d9
--- /dev/null
+++ b/src/operators/logical_probe_filter.cpp
@@ -0,0 +1,109 @@
+#include "logical_probe_filter.hpp"
+#include "physical_probe_filter.hpp"
+#include "utils/debug_utils.hpp"
+
+namespace duckdb {
+
+LogicalProbeFilter::LogicalProbeFilter() : LogicalExtensionOperator() {
+	this->type = LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR;
+}
+
+LogicalProbeFilter::LogicalProbeFilter(const FilterOperation &filter_op)
+    : LogicalExtensionOperator(), filter_operation(filter_op) {
+	this->type = LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR;
+}
+
+InsertionOrderPreservingMap<string> LogicalProbeFilter::ParamsToString() const {
+	InsertionOrderPreservingMap<string> result;
+	result["Operator"] = "LogicalProbeFilter";
+
+	result["Build Table"] = to_string(filter_operation.build_table_idx);
+	result["Probe Table"] = to_string(filter_operation.probe_table_idx);
+
+	string probe_cols = "";
+	for (size_t i = 0; i < filter_operation.probe_columns.size(); i++) {
+		if (i > 0) {
+			probe_cols += ", ";
+		}
+		probe_cols += "(" + to_string(filter_operation.probe_columns[i].table_index) + "." +
+		              to_string(filter_operation.probe_columns[i].column_index) + ")";
+	}
+	result["Probe Columns"] = probe_cols;
+
+	if (estimated_cardinality != DConstants::INVALID_INDEX) {
+		result["Estimated Cardinality"] = std::to_string(estimated_cardinality);
+	}
+
+	return result;
+}
+
+vector<ColumnBinding> LogicalProbeFilter::GetColumnBindings() {
+	return children[0]->GetColumnBindings();
+}
+
+void LogicalProbeFilter::ResolveTypes() {
+	if (!children.empty() && children[0]) {
+		types = children[0]->types;
+	}
+}
+
+PhysicalOperator &LogicalProbeFilter::CreatePlan(ClientContext &context, PhysicalPlanGenerator &generator) {
+	if (!physical) {
+		// step 1: get child column bindings to understand chunk schema
+		vector<ColumnBinding> child_bindings = children[0]->GetColumnBindings();
+
+		// step 2: resolve/map the filter operation probe columns to chunk column indices
+		vector<idx_t> resolved_indices;
+
+#ifdef DEBUG
+		Printer::Print(StringUtil::Format("[RESOLVE] LogicalProbeFilter probe_table=%llu has %zu probe_columns",
+		                                  (unsigned long long)filter_operation.probe_table_idx,
+		                                  filter_operation.probe_columns.size()));
+		Printer::Print(StringUtil::Format("[RESOLVE] child_bindings.size()=%zu", child_bindings.size()));
+		for (idx_t j = 0; j < child_bindings.size(); j++) {
+			Printer::Print(StringUtil::Format("  child_bindings[%llu] = table_idx=%llu, col_idx=%llu",
+			                                  (unsigned long long)j, (unsigned long long)child_bindings[j].table_index,
+			                                  (unsigned long long)child_bindings[j].column_index));
+		}
+#endif
+
+		for (const ColumnBinding &column_binding : filter_operation.probe_columns) {
+			D_PRINTF("[RESOLVE] Looking for probe_column: table_idx=%llu, col_idx=%llu",
+			         (unsigned long long)column_binding.table_index, (unsigned long long)column_binding.column_index);
+			// find the position of the filter column ColumnBinding in the chunk columns
+			for (idx_t i = 0; i < child_bindings.size(); i++) {
+				if (child_bindings[i].table_index == column_binding.table_index &&
+				    child_bindings[i].column_index == column_binding.column_index) {
+					resolved_indices.push_back(i);
+					D_PRINTF("[RESOLVE] Matched at chunk position %llu", (unsigned long long)i);
+					break;
+				}
+			}
+		}
+
+		// step 3: create physical operator with the resolved indices
+		auto &plan = generator.CreatePlan(*children[0]);
+		PhysicalOperator &physical_op = generator.Make<PhysicalProbeFilter>(
+		    make_shared_ptr<FilterOperation>(filter_operation), plan.types, estimated_cardinality, resolved_indices);
+		physical = static_cast<PhysicalProbeFilter *>(&physical_op);
+		physical->is_passthrough = is_passthrough;
+
+		// set up reference to related PhysicalCreateFilter if available
+		if (related_create_filter) {
+			D_PRINTF("[LOGICAL USE] probe table - table_%llu Related_create_filter exists",
+			         (unsigned long long)filter_operation.probe_table_idx);
+		}
+		if (related_create_filter && related_create_filter->physical) {
+			D_PRINTF("[LOGICAL USE] probe table - table_%llu Related_create_filter physical exists",
+			         (unsigned long long)filter_operation.probe_table_idx);
+			physical->related_create_filter = related_create_filter->physical;
+			physical->related_create_filter_vec.push_back(related_create_filter->physical);
+		}
+
+		physical_op.children.emplace_back(plan);
+		return physical_op;
+	}
+	return *physical;
+}
+
+} // namespace duckdb
diff --git a/src/operators/logical_use_bf.hpp b/src/operators/logical_probe_filter.hpp
similarity index 61%
rename from src/operators/logical_use_bf.hpp
rename to src/operators/logical_probe_filter.hpp
index 8fe0868..5a826a1 100644
--- a/src/operators/logical_use_bf.hpp
+++ b/src/operators/logical_probe_filter.hpp
@@ -1,7 +1,7 @@
 //===----------------------------------------------------------------------===//
 //                         DuckDB
 //
-// operator/logical_use_bf.hpp
+// operator/logical_probe_filter.hpp
 //
 //
 //===----------------------------------------------------------------------===//
@@ -9,29 +9,29 @@
 
 #include "duckdb/planner/logical_operator.hpp"
 #include "duckdb/planner/operator/logical_extension_operator.hpp"
-#include "logical_create_bf.hpp"
+#include "logical_create_filter.hpp"
 #include "../optimizer/graph_manager.hpp"
 
 namespace duckdb {
 class DatabaseInstance;
-class PhysicalUseBF;
+class PhysicalProbeFilter;
 
-class LogicalUseBF final : public LogicalExtensionOperator {
+class LogicalProbeFilter final : public LogicalExtensionOperator {
 public:
 	static constexpr const LogicalOperatorType TYPE = LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR;
-	static constexpr auto OPERATOR_TYPE_NAME = "logical_use_bf";
+	static constexpr auto OPERATOR_TYPE_NAME = "logical_probe_filter";
 
 public:
-	explicit LogicalUseBF();
-	explicit LogicalUseBF(const BloomFilterOperation &bf_op);
+	explicit LogicalProbeFilter();
+	explicit LogicalProbeFilter(const FilterOperation &filter_op);
 
-	BloomFilterOperation bf_operation;
-	LogicalCreateBF *related_create_bf = nullptr;
+	FilterOperation filter_operation;
+	LogicalCreateFilter *related_create_filter = nullptr;
+	bool is_passthrough = false;
 
-	PhysicalUseBF *physical = nullptr;
+	PhysicalProbeFilter *physical = nullptr;
 
 public:
-
 	string GetExtensionName() const override {
 		return "rpt";
 	}
@@ -44,6 +44,6 @@ class LogicalUseBF final : public LogicalExtensionOperator {
 	void ResolveTypes() override;
 };
 
-// void RegisterLogicalUseBFOperatorExtension(DatabaseInstance &instance);
+// void RegisterLogicalProbeFilterOperatorExtension(DatabaseInstance &instance);
 
 } // namespace duckdb
diff --git a/src/operators/logical_use_bf.cpp b/src/operators/logical_use_bf.cpp
deleted file mode 100644
index 07e781d..0000000
--- a/src/operators/logical_use_bf.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
-#include "logical_use_bf.hpp"
-#include "physical_use_bf.hpp"
-#include "dag.hpp"
-
-namespace duckdb {
-
-LogicalUseBF::LogicalUseBF() : LogicalExtensionOperator() {
-	this->type = LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR;
-}
-
-LogicalUseBF::LogicalUseBF(const BloomFilterOperation &bf_op)
-    : LogicalExtensionOperator(), bf_operation(bf_op) {
-	this->type = LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR;
-}
-
-InsertionOrderPreservingMap<string> LogicalUseBF::ParamsToString() const {
-	InsertionOrderPreservingMap<string> result;
-	result["Operator"] = "LogicalUseBF";
-	
-	result["Build Table"] = to_string(bf_operation.build_table_idx);
-	result["Probe Table"] = to_string(bf_operation.probe_table_idx);
-
-	string probe_cols = "";
-	for (size_t i = 0; i < bf_operation.probe_columns.size(); i++) {
-		if (i > 0) {
-			probe_cols += ", ";
-		}
-		probe_cols += "(" + to_string(bf_operation.probe_columns[i].table_index) +
-					 "." + to_string(bf_operation.probe_columns[i].column_index) + ")";
-	}
-	result["Probe Columns"] = probe_cols;
-	
-	if (estimated_cardinality != DConstants::INVALID_INDEX) {
-		result["Estimated Cardinality"] = std::to_string(estimated_cardinality);
-	}
-	
-	return result;
-}
-
-vector<ColumnBinding> LogicalUseBF::GetColumnBindings() {
-	return children[0]->GetColumnBindings();
-}
-
-void LogicalUseBF::ResolveTypes() {
-	Printer::Print("Resolving types for LogicalUseBF");
-	// if (!children.empty() && children[0]) {
-		// Printer::Print("Resolving types for LogicalUseBF: children[0]");
-		types = children[0]->types;
-	// }
-}
-
-shared_ptr<FilterPlan> BloomFilterOperationToFilterPlan(const BloomFilterOperation &bf_op) {
-	auto filter_plan = make_shared_ptr<FilterPlan>();
-	filter_plan->build = bf_op.build_columns;
-	filter_plan->apply = bf_op.probe_columns;
-	// filter_plan->return_types will be populated later during ResolveTypes()
-	return filter_plan;
-}
-
-PhysicalOperator &LogicalUseBF::CreatePlan(ClientContext &context, PhysicalPlanGenerator &generator) {
-	printf("LogicalUseBF::CreatePlan called - Build table: %llu, Probe table: %llu\n",
-		   bf_operation.build_table_idx, bf_operation.probe_table_idx);
-	if (!physical) {
-		auto &plan = generator.CreatePlan(*children[0]);
-		// TODO: Replace filter_plan with bf_operation everywhere.
-		// this is a temp fix
-		auto filter_plan = BloomFilterOperationToFilterPlan(bf_operation);
-		auto &use_bf = generator.Make<PhysicalUseBF>(filter_plan, plan.types, estimated_cardinality);
-		physical = static_cast<PhysicalUseBF*>(&use_bf);
-
-		// Set up reference to related PhysicalCreateBF if available
-		if (related_create_bf && related_create_bf->physical) {
-			physical->related_create_bf = related_create_bf->physical;
-		}
-
-		use_bf.children.emplace_back(plan);
-		printf("  Created PhysicalUseBF successfully\n");
-		return use_bf;
-	}
-	printf("  Reusing existing physical operator\n");
-	return *physical;
-}
-
-// void RegisterLogicalUseBFOperatorExtension(DatabaseInstance &db) {
-// 	auto &config = DBConfig::GetConfig(db);
-// 	config.operator_extensions.push_back(make_uniq<LogicalUseBF>());
-// }
-} // namespace duckdb
\ No newline at end of file
diff --git a/src/operators/physical_create_bf.cpp b/src/operators/physical_create_bf.cpp
deleted file mode 100644
index 3bec554..0000000
--- a/src/operators/physical_create_bf.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-#include "physical_create_bf.hpp"
-#include "dag.hpp"
-#include "bloom_filter.hpp"
-#include "duckdb/execution/expression_executor.hpp"
-#include "duckdb/parallel/pipeline.hpp"
-#include "duckdb/parallel/base_pipeline_event.hpp"
-#include <iostream>
-
-namespace duckdb {
-
-PhysicalCreateBF::PhysicalCreateBF(const shared_ptr<BloomFilterOperation> bf_operation, vector<LogicalType> types,
-                                   idx_t estimated_cardinality, vector<idx_t> bound_column_indices)
-    : PhysicalOperator(PhysicalOperatorType::EXTENSION, std::move(types), estimated_cardinality),
-      bf_operation(bf_operation), is_probing_side(false), bound_column_indices(std::move(bound_column_indices)) {
-}
-
-string PhysicalCreateBF::GetName() const {
-    return "CREATE_BF";
-}
-
-string PhysicalCreateBF::ToString(ExplainFormat format) const {
-    string result = "CREATE_BF";
-    result += " [" + std::to_string(bf_operation->build_columns.size()) + " filters]";
-    return result;
-}
-
-unique_ptr<GlobalSinkState> PhysicalCreateBF::GetGlobalSinkState(ClientContext &context) const {
-    auto state = make_uniq<PhysicalCreateBFGlobalSinkState>();
-    
-    // initialize bloom filters for each filter plan
-    state->bloom_filters.reserve(bf_operation->build_columns.size());
-    for (size_t i = 0; i < bf_operation->build_columns.size(); i++) {
-        auto bf = make_shared_ptr<BloomFilter>();
-        state->bloom_filters.push_back(bf);
-    }
-    
-    return std::move(state);
-}
-
-unique_ptr<LocalSinkState> PhysicalCreateBF::GetLocalSinkState(ExecutionContext &context) const {
-    return make_uniq<PhysicalCreateBFLocalSinkState>();
-}
-
-SinkResultType PhysicalCreateBF::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const {
-    auto &lstate = input.local_state.Cast<PhysicalCreateBFLocalSinkState>();
-    auto &gstate = input.global_state.Cast<PhysicalCreateBFGlobalSinkState>();
-
-	Printer::Print("PhysicalCreateBF::Sink() called with chunk size: " + std::to_string(chunk.size()));
-
-    if (chunk.size() > 0) {
-        lock_guard<mutex> bf_guard(gstate.bf_lock);
-
-        // process each bloom filter operation
-        for (size_t i = 0; i < bf_operation->build_columns.size() && i < gstate.bloom_filters.size(); i++) {
-            auto &bf = gstate.bloom_filters[i];
-
-            if (bf) {
-            	idx_t chunked_column_index = bound_column_indices[i];
-                bf->Insert(chunk, {chunked_column_index});
-            	printf("  inserted %llu rows into bloom filter %zu (chunk column %llu)\n",
-						 chunk.size(), i, chunked_column_index);
-            }
-        }
-
-    }
-
-    return SinkResultType::NEED_MORE_INPUT;
-}
-
-SinkCombineResultType PhysicalCreateBF::Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const {
-    // no local state combining needed
-    return SinkCombineResultType::FINISHED;
-}
-
-SinkFinalizeType PhysicalCreateBF::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
-                                            OperatorSinkFinalizeInput &input) const {
-    auto &gstate = input.global_state.Cast<PhysicalCreateBFGlobalSinkState>();
-    
-    lock_guard<mutex> bf_guard(gstate.bf_lock);
-    
-    // initialize and finalize bloom filters
-    for (auto &bf : gstate.bloom_filters) {
-        if (bf) {
-            // Initialize with a reasonable size based on estimated cardinality
-            bf->Initialize(context, estimated_cardinality);
-            bf->finalized_ = true;
-        }
-    }
-    
-    // return READY with no output - we only build the filter, don't output data
-    return SinkFinalizeType::READY;
-}
-
-vector<shared_ptr<BloomFilter>> PhysicalCreateBF::GetBloomFilters() const {
-    // access the sink state to get bloom filters
-    if (sink_state) {
-        auto &gstate = sink_state->Cast<PhysicalCreateBFGlobalSinkState>();
-        lock_guard<mutex> bf_guard(gstate.bf_lock);
-        return gstate.bloom_filters;
-    }
-    return {};
-}
-
-} // namespace duckdb
\ No newline at end of file
diff --git a/src/operators/physical_create_bf.hpp b/src/operators/physical_create_bf.hpp
deleted file mode 100644
index c9e2ef8..0000000
--- a/src/operators/physical_create_bf.hpp
+++ /dev/null
@@ -1,59 +0,0 @@
-#pragma once
-
-#include "duckdb/execution/physical_operator.hpp"
-#include "dag.hpp"
-#include "bloom_filter.hpp"
-#include "../optimizer/graph_manager.hpp"
-namespace duckdb {
-
-class PhysicalCreateBFLocalSinkState : public LocalSinkState {
-public:
-	PhysicalCreateBFLocalSinkState() = default;
-};
-
-class PhysicalCreateBFGlobalSinkState : public GlobalSinkState {
-public:
-	PhysicalCreateBFGlobalSinkState() = default;
-
-	vector<shared_ptr<BloomFilter>> bloom_filters;
-	mutex bf_lock;
-};
-
-class PhysicalCreateBF : public PhysicalOperator {
-public:
-	static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::EXTENSION;
-
-public:
-	PhysicalCreateBF(const shared_ptr<BloomFilterOperation> bf_operation, vector<LogicalType> types,
-	                 idx_t estimated_cardinality, vector<idx_t> bound_column_indices);
-
-	// Required virtual methods
-	virtual ~PhysicalCreateBF() = default;
-
-	string GetName() const override;
-	string ToString(ExplainFormat format = ExplainFormat::DEFAULT) const override;
-
-	// sink interface - PhysicalOperator can act as sink
-	unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
-	unique_ptr<LocalSinkState> GetLocalSinkState(ExecutionContext &context) const override;
-	SinkResultType Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const override;
-	SinkCombineResultType Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const override;
-	SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
-	                          OperatorSinkFinalizeInput &input) const override;
-
-	bool IsSink() const override {
-		return true;
-	}
-public:
-	// vector<shared_ptr<FilterPlan>> filter_plans;
-	shared_ptr<BloomFilterOperation> bf_operation;
-	bool is_probing_side;
-
-	// maps the column indices to resolved chunk column positions
-	vector<idx_t> bound_column_indices;
-
-	// access to created bloom filters for PhysicalUseBF operators
-	vector<shared_ptr<BloomFilter>> GetBloomFilters() const;
-};
-
-} // namespace duckdb
\ No newline at end of file
diff --git a/src/operators/physical_create_filter.cpp b/src/operators/physical_create_filter.cpp
new file mode 100644
index 0000000..458fd91
--- /dev/null
+++ b/src/operators/physical_create_filter.cpp
@@ -0,0 +1,729 @@
+#include "physical_create_filter.hpp"
+#include "bloom_filter.hpp"
+#include "duckdb/execution/expression_executor.hpp"
+#include "duckdb/parallel/pipeline.hpp"
+#include "utils/debug_utils.hpp"
+#include "robust_profiling.hpp"
+#include "probe_empty_registry.hpp"
+#include <duckdb/parallel/meta_pipeline.hpp>
+#include "duckdb/planner/filter/bloom_filter.hpp"
+#include "duckdb/planner/filter/constant_filter.hpp"
+#include "duckdb/planner/filter/selectivity_optional_filter.hpp"
+#include "duckdb/planner/filter/in_filter.hpp"
+#include "duckdb/planner/filter/optional_filter.hpp"
+#include "duckdb/optimizer/filter_combiner.hpp"
+#include "duckdb/main/config.hpp"
+
+namespace duckdb {
+
+PhysicalCreateFilter::PhysicalCreateFilter(PhysicalPlan &physical_plan,
+                                           const shared_ptr<FilterOperation> &filter_operation,
+                                           vector<LogicalType> types, idx_t estimated_cardinality,
+                                           vector<idx_t> bound_column_indices)
+    : PhysicalOperator(physical_plan, PhysicalOperatorType::EXTENSION, std::move(types), estimated_cardinality),
+      filter_operation(filter_operation), is_probing_side(false),
+      bound_column_indices(std::move(bound_column_indices)) {
+	// create bloom filter for each build column, keyed by ColumnBinding
+	for (size_t i = 0; i < filter_operation->build_columns.size(); i++) {
+		const auto &col = filter_operation->build_columns[i];
+		bloom_filter_map[col] = make_shared_ptr<PTBloomFilter>();
+	}
+}
+
+string PhysicalCreateFilter::GetName() const {
+	return "CREATE_FILTER";
+}
+
+string PhysicalCreateFilter::ToString(ExplainFormat format) const {
+	string result = "CREATE_FILTER";
+	result += " [" + std::to_string(filter_operation->build_columns.size()) + " filters]";
+	return result;
+}
+
+InsertionOrderPreservingMap<string> PhysicalCreateFilter::ParamsToString() const {
+	InsertionOrderPreservingMap<string> result;
+	result["Operator"] = "PhysicalCreateFilter";
+	result["Build Table"] = to_string(filter_operation->build_table_idx);
+	// there can be multiple probe tables for a single create
+	string probe_tables;
+	vector<idx_t> seen_probe;
+	for (const auto &col : filter_operation->probe_columns) {
+		bool found = false;
+		for (auto idx : seen_probe) {
+			if (idx == col.table_index) {
+				found = true;
+				break;
+			}
+		}
+		if (!found) {
+			if (!probe_tables.empty()) {
+				probe_tables += ", ";
+			}
+			probe_tables += to_string(col.table_index);
+			seen_probe.push_back(col.table_index);
+		}
+	}
+	result["Probe Tables"] = probe_tables;
+
+	string build_cols = "";
+	for (size_t i = 0; i < filter_operation->build_columns.size(); i++) {
+		if (i > 0) {
+			build_cols += ", ";
+		}
+		build_cols += "(" + to_string(filter_operation->build_columns[i].table_index) + "." +
+		              to_string(filter_operation->build_columns[i].column_index) + ")";
+	}
+	result["Build Columns"] = build_cols;
+
+	if (estimated_cardinality != DConstants::INVALID_INDEX) {
+		result["Estimated Cardinality"] = std::to_string(estimated_cardinality);
+	}
+
+	return result;
+}
+
+//===--------------------------------------------------------------------===//
+// Min-Max helpers
+//===--------------------------------------------------------------------===//
+
+template <typename T>
+static void TypedUpdateMinMax(Vector &vec, idx_t count, ColumnMinMax &mm) {
+	UnifiedVectorFormat vdata;
+	vec.ToUnifiedFormat(count, vdata);
+	auto *data = UnifiedVectorFormat::GetData<T>(vdata);
+
+	T local_min {}, local_max {};
+	bool has_val = false;
+
+	for (idx_t row = 0; row < count; row++) {
+		auto idx = vdata.sel->get_index(row);
+		if (!vdata.validity.RowIsValid(idx)) {
+			continue;
+		}
+		const auto &val = data[idx];
+		if (!has_val) {
+			local_min = val;
+			local_max = val;
+			has_val = true;
+		} else {
+			if (val < local_min) {
+				local_min = val;
+			}
+			if (val > local_max) {
+				local_max = val;
+			}
+		}
+	}
+
+	if (!has_val) {
+		return;
+	}
+
+	Value vmin = Value::CreateValue(local_min);
+	Value vmax = Value::CreateValue(local_max);
+	if (!mm.has_value) {
+		mm.min_val = vmin;
+		mm.max_val = vmax;
+		mm.has_value = true;
+	} else {
+		if (vmin < mm.min_val) {
+			mm.min_val = vmin;
+		}
+		if (vmax > mm.max_val) {
+			mm.max_val = vmax;
+		}
+	}
+}
+
+static void UpdateMinMax(Vector &vec, idx_t count, ColumnMinMax &mm) {
+	auto &type = vec.GetType();
+	switch (type.id()) {
+	case LogicalTypeId::TINYINT:
+		TypedUpdateMinMax<int8_t>(vec, count, mm);
+		break;
+	case LogicalTypeId::SMALLINT:
+		TypedUpdateMinMax<int16_t>(vec, count, mm);
+		break;
+	case LogicalTypeId::INTEGER:
+		TypedUpdateMinMax<int32_t>(vec, count, mm);
+		break;
+	case LogicalTypeId::BIGINT:
+		TypedUpdateMinMax<int64_t>(vec, count, mm);
+		break;
+	case LogicalTypeId::UTINYINT:
+		TypedUpdateMinMax<uint8_t>(vec, count, mm);
+		break;
+	case LogicalTypeId::USMALLINT:
+		TypedUpdateMinMax<uint16_t>(vec, count, mm);
+		break;
+	case LogicalTypeId::UINTEGER:
+		TypedUpdateMinMax<uint32_t>(vec, count, mm);
+		break;
+	case LogicalTypeId::UBIGINT:
+		TypedUpdateMinMax<uint64_t>(vec, count, mm);
+		break;
+	case LogicalTypeId::FLOAT:
+		TypedUpdateMinMax<float>(vec, count, mm);
+		break;
+	case LogicalTypeId::DOUBLE:
+		TypedUpdateMinMax<double>(vec, count, mm);
+		break;
+	case LogicalTypeId::DATE:
+		TypedUpdateMinMax<date_t>(vec, count, mm);
+		break;
+	case LogicalTypeId::TIMESTAMP:
+		TypedUpdateMinMax<timestamp_t>(vec, count, mm);
+		break;
+	case LogicalTypeId::VARCHAR:
+		TypedUpdateMinMax<string_t>(vec, count, mm);
+		break;
+	default:
+		break;
+	}
+}
+
+//===--------------------------------------------------------------------===//
+// Sink
+//===--------------------------------------------------------------------===//
+
+CreateFilterGlobalSinkState::CreateFilterGlobalSinkState(ClientContext &context, const PhysicalCreateFilter &op)
+    : op(op) {
+	total_data = make_uniq<ColumnDataCollection>(context, op.types);
+	// initialize bloom filters upfront so Sink can insert directly
+	for (auto &entry : op.bloom_filter_map) {
+		if (entry.second) {
+			entry.second->Initialize(context, op.estimated_cardinality);
+		}
+	}
+	// resolve shared probe-empty flag once (single-threaded); forward pass only
+	if (op.is_forward_pass) {
+		auto reg = GetProbeEmptyRegistry(context);
+		if (reg) {
+			probe_empty_flag = reg->GetOrCreate(op.filter_operation->probe_table_idx);
+		}
+		Value v;
+		if (context.TryGetCurrentSetting("robust_dynamic_or_filter_threshold", v)) {
+			distinct_threshold = v.GetValue<uint64_t>();
+		}
+		column_distinct.resize(op.bound_column_indices.size());
+	}
+}
+
+CreateFilterLocalSinkState::CreateFilterLocalSinkState(ClientContext &context, const PhysicalCreateFilter &op)
+    : client_context(context) {
+	local_data = make_uniq<ColumnDataCollection>(client_context, op.types);
+	// initialize min-max and distinct tracking for each build column
+	if (op.is_forward_pass) {
+		local_min_max.resize(op.bound_column_indices.size());
+		local_distinct.resize(op.bound_column_indices.size());
+	}
+}
+
+SinkResultType PhysicalCreateFilter::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const {
+	if (!profiling_checked) {
+		profiling_checked = true;
+		auto prof = GetRobustProfilingState(context.client);
+		if (prof) {
+			profiling_stats =
+			    prof->RegisterCreateFilter(filter_operation->build_table_idx, filter_operation->probe_columns,
+			                               filter_operation->sequence_number, is_forward_pass);
+		}
+	}
+
+	// short-circuit: if probe side is already known empty, don't ingest into BF.
+	// single relaxed load, lock-free.
+	auto &gstate = input.global_state.Cast<CreateFilterGlobalSinkState>();
+	if (gstate.probe_empty_flag && gstate.probe_empty_flag->load(std::memory_order_relaxed)) {
+		return SinkResultType::FINISHED;
+	}
+
+	CreateFilterLocalSinkState &local_state = input.local_state.Cast<CreateFilterLocalSinkState>();
+	if (profiling_stats) {
+		ScopedTimer timer(profiling_stats->sink_time_us);
+		profiling_stats->rows_materialized.fetch_add(chunk.size(), std::memory_order_relaxed);
+		local_state.local_data->Append(chunk);
+	} else {
+		local_state.local_data->Append(chunk);
+	}
+
+	// insert into bloom filters
+	for (size_t i = 0; i < filter_operation->build_columns.size(); i++) {
+		const auto &col = filter_operation->build_columns[i];
+		auto it = bloom_filter_map.find(col);
+		if (it != bloom_filter_map.end() && it->second) {
+			it->second->Insert(chunk, {bound_column_indices[i]});
+		}
+	}
+
+	// compute min-max using typed pointer access
+	if (is_forward_pass && !local_state.local_min_max.empty() && chunk.size() > 0) {
+		for (idx_t i = 0; i < bound_column_indices.size() && i < local_state.local_min_max.size(); i++) {
+			idx_t col_idx = bound_column_indices[i];
+			if (col_idx >= chunk.ColumnCount()) {
+				continue;
+			}
+			auto &vec = chunk.data[col_idx];
+			UpdateMinMax(vec, chunk.size(), local_state.local_min_max[i]);
+		}
+	}
+
+	// track distinct values up to threshold+1 (overflow stops further insertion)
+	if (is_forward_pass && !local_state.local_distinct.empty() && chunk.size() > 0) {
+		const idx_t threshold = gstate.distinct_threshold;
+		for (idx_t i = 0; i < bound_column_indices.size() && i < local_state.local_distinct.size(); i++) {
+			auto &cd = local_state.local_distinct[i];
+			if (cd.over_threshold) {
+				continue;
+			}
+			idx_t col_idx = bound_column_indices[i];
+			if (col_idx >= chunk.ColumnCount()) {
+				continue;
+			}
+			auto &vec = chunk.data[col_idx];
+			for (idx_t row = 0; row < chunk.size(); row++) {
+				Value val = vec.GetValue(row);
+				if (val.IsNull()) {
+					continue;
+				}
+				cd.values.insert(std::move(val));
+				if (cd.values.size() > threshold) {
+					cd.over_threshold = true;
+					cd.values.clear();
+					break;
+				}
+			}
+		}
+	}
+
+	return SinkResultType::NEED_MORE_INPUT;
+}
+
+SinkCombineResultType PhysicalCreateFilter::Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const {
+	CreateFilterGlobalSinkState &gstate = input.global_state.Cast<CreateFilterGlobalSinkState>();
+	CreateFilterLocalSinkState &local_state = input.local_state.Cast<CreateFilterLocalSinkState>();
+	lock_guard<mutex> lock(gstate.glock);
+	gstate.local_data_collections.emplace_back(std::move(local_state.local_data));
+
+	// merge local min-max into global
+	if (!local_state.local_min_max.empty()) {
+		if (gstate.column_min_max.empty()) {
+			gstate.column_min_max.resize(local_state.local_min_max.size());
+		}
+		for (idx_t i = 0; i < local_state.local_min_max.size(); i++) {
+			auto &local_mm = local_state.local_min_max[i];
+			if (!local_mm.has_value) {
+				continue;
+			}
+			auto &global_mm = gstate.column_min_max[i];
+			if (!global_mm.has_value) {
+				global_mm = local_mm;
+			} else {
+				if (local_mm.min_val < global_mm.min_val) {
+					global_mm.min_val = local_mm.min_val;
+				}
+				if (local_mm.max_val > global_mm.max_val) {
+					global_mm.max_val = local_mm.max_val;
+				}
+			}
+		}
+	}
+
+	// merge local distinct values into global, propagating over_threshold
+	if (!local_state.local_distinct.empty()) {
+		const idx_t threshold = gstate.distinct_threshold;
+		for (idx_t i = 0; i < local_state.local_distinct.size() && i < gstate.column_distinct.size(); i++) {
+			auto &local_cd = local_state.local_distinct[i];
+			auto &global_cd = gstate.column_distinct[i];
+			if (global_cd.over_threshold) {
+				continue;
+			}
+			if (local_cd.over_threshold) {
+				global_cd.over_threshold = true;
+				global_cd.values.clear();
+				continue;
+			}
+			for (auto &val : local_cd.values) {
+				global_cd.values.insert(val);
+				if (global_cd.values.size() > threshold) {
+					global_cd.over_threshold = true;
+					global_cd.values.clear();
+					break;
+				}
+			}
+		}
+	}
+
+	return SinkCombineResultType::FINISHED;
+}
+
+//===--------------------------------------------------------------------===//
+// Finalize
+//===--------------------------------------------------------------------===//
+
+// pushes dynamic filters (BF + min-max) to table scans after BF is fully built
+static void PushDynamicFilters(const PhysicalCreateFilter &op, const CreateFilterGlobalSinkState &gsink,
+                               ClientContext &context) {
+	if (!op.is_forward_pass || op.pushdown_targets.empty()) {
+		return;
+	}
+
+	// if build side produced 0 rows, no probe-side rows can match — push always-false filter
+	if (gsink.total_data->Count() == 0) {
+		for (auto &target : op.pushdown_targets) {
+			auto always_false =
+			    make_uniq<ConstantFilter>(ExpressionType::COMPARE_GREATERTHAN, Value::MaximumValue(target.column_type));
+			target.dynamic_filters->PushFilter(op, target.scan_column_index, std::move(always_false));
+			D_PRINTF("[PUSHDOWN] pushed always-false for col %s (empty build side)", target.column_name.c_str());
+		}
+		return;
+	}
+
+	string filter_type = "all";
+	Value filter_type_val;
+	if (context.TryGetCurrentSetting("robust_filter_type", filter_type_val)) {
+		filter_type = filter_type_val.GetValue<string>();
+	}
+
+	bool push_bf = (filter_type == "all" || filter_type == "bf_only");
+	bool push_minmax = (filter_type == "all" || filter_type == "minmax_only");
+	bool consider_in = (filter_type == "all");
+
+	for (auto &target : op.pushdown_targets) {
+		for (size_t i = 0; i < op.filter_operation->build_columns.size(); i++) {
+			if (i >= op.filter_operation->probe_columns.size()) {
+				break;
+			}
+			const auto &probe_col = op.filter_operation->probe_columns[i];
+			if (probe_col.table_index != target.probe_column.table_index ||
+			    probe_col.column_index != target.probe_column.column_index) {
+				continue;
+			}
+
+			const auto &build_col = op.filter_operation->build_columns[i];
+
+			// push IN-filter (zonemap-only) or equality constant alongside BF; equality
+			// is per-row and supersedes BF/min-max
+			bool pushed_equal = false;
+			if (consider_in && i < gsink.column_distinct.size()) {
+				auto &cd = gsink.column_distinct[i];
+				if (!cd.over_threshold && cd.values.size() == 1) {
+					auto eq = make_uniq<ConstantFilter>(ExpressionType::COMPARE_EQUAL, *cd.values.begin());
+					target.dynamic_filters->PushFilter(op, target.scan_column_index, std::move(eq));
+					pushed_equal = true;
+					D_PRINTF("[PUSHDOWN] pushed equality constant for col %s", target.column_name.c_str());
+				} else if (!cd.over_threshold && cd.values.size() > 1) {
+					vector<Value> in_list(cd.values.begin(), cd.values.end());
+					if (!FilterCombiner::ContainsNull(in_list) && !FilterCombiner::IsDenseRange(in_list)) {
+						auto in_f = make_uniq<InFilter>(std::move(in_list));
+						auto opt = make_uniq<OptionalFilter>(std::move(in_f));
+						target.dynamic_filters->PushFilter(op, target.scan_column_index, std::move(opt));
+						D_PRINTF("[PUSHDOWN] pushed IN-filter (%llu values) for col %s",
+						         (unsigned long long)cd.values.size(), target.column_name.c_str());
+					}
+				}
+			}
+
+			// keep BF alongside IN-filter for per-row pruning (IN is zonemap-only);
+			// equality filter already does per-row, skip BF there
+			if (push_bf && !pushed_equal) {
+				auto bf_it = op.bloom_filter_map.find(build_col);
+				if (bf_it != op.bloom_filter_map.end() && bf_it->second && !bf_it->second->IsEmpty()) {
+					auto bf_filter = make_uniq<BFTableFilter>(bf_it->second->GetNativeFilter(), false,
+					                                          target.column_name, target.column_type);
+					auto wrapped = make_uniq<SelectivityOptionalFilter>(std::move(bf_filter), 1, 1000000);
+					target.dynamic_filters->PushFilter(op, target.scan_column_index, std::move(wrapped));
+					D_PRINTF("[PUSHDOWN] pushed BF for col %s to scan col %llu", target.column_name.c_str(),
+					         (unsigned long long)target.scan_column_index);
+				}
+			}
+
+			// equality filter already expresses min/max; skip min/max push in that case
+			if (push_minmax && !pushed_equal && i < gsink.column_min_max.size() && gsink.column_min_max[i].has_value) {
+				auto &mm = gsink.column_min_max[i];
+				auto min_filter = make_uniq<ConstantFilter>(ExpressionType::COMPARE_GREATERTHANOREQUALTO, mm.min_val);
+				target.dynamic_filters->PushFilter(op, target.scan_column_index, std::move(min_filter));
+
+				auto max_filter = make_uniq<ConstantFilter>(ExpressionType::COMPARE_LESSTHANOREQUALTO, mm.max_val);
+				target.dynamic_filters->PushFilter(op, target.scan_column_index, std::move(max_filter));
+
+				D_PRINTF("[PUSHDOWN] pushed min-max for col %s [%s, %s]", target.column_name.c_str(),
+				         mm.min_val.ToString().c_str(), mm.max_val.ToString().c_str());
+			}
+
+			break;
+		}
+	}
+}
+
+SinkFinalizeType PhysicalCreateFilter::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
+                                                OperatorSinkFinalizeInput &input) const {
+	// lazy init profiling if Sink was never called (e.g., empty input)
+	if (!profiling_checked) {
+		profiling_checked = true;
+		auto prof = GetRobustProfilingState(context);
+		if (prof) {
+			profiling_stats =
+			    prof->RegisterCreateFilter(filter_operation->build_table_idx, filter_operation->probe_columns,
+			                               filter_operation->sequence_number, is_forward_pass);
+		}
+	}
+
+	auto &gsink = input.global_state.Cast<CreateFilterGlobalSinkState>();
+
+	// time the finalize phase
+	unique_ptr<ScopedTimer> fin_timer;
+	if (profiling_stats) {
+		fin_timer = make_uniq<ScopedTimer>(profiling_stats->finalize_time_us);
+	}
+
+	// 1. merge local data collections - needed for downstream Source
+	for (auto &local_data : gsink.local_data_collections) {
+		gsink.total_data->Combine(*local_data);
+	}
+	gsink.local_data_collections.clear();
+
+	string build_table = filter_operation ? "table_" + std::to_string(filter_operation->build_table_idx) : "unknown";
+	D_PRINTF("[FINALIZE] CREATE_FILTER (build=%s): total_data contains %llu rows, %zu bloom filters",
+	         build_table.c_str(), (unsigned long long)gsink.total_data->Count(), bloom_filter_map.size());
+
+	// 2. resize any undersized BFs and rehash from materialized data.
+	// rule: resize iff allocated_bits / actual_rows < 8  (i.e., <8 bits/key -> FPR > ~2%).
+	// shrink-on-overestimate is intentionally skipped.
+	// TODO - evaluate memory savings and performance tradeoff with shrink-on-overestimate
+	const idx_t actual_rows = gsink.total_data->Count();
+	if (actual_rows > 0) {
+		for (size_t i = 0; i < filter_operation->build_columns.size(); i++) {
+			const auto &col = filter_operation->build_columns[i];
+			auto it = bloom_filter_map.find(col);
+			if (it == bloom_filter_map.end() || !it->second) {
+				continue;
+			}
+			auto &bf = *it->second;
+			const idx_t min_bits = std::max<idx_t>(512, bf.SizedForRows() * 12);
+			const idx_t allocated_bits = NextPowerOfTwo(min_bits);
+			if (actual_rows * 8 > allocated_bits) {
+				D_PRINTF("[RESIZE] CREATE_FILTER (build=%s) col=(%llu.%llu) sized_for=%llu actual=%llu "
+				         "allocated_bits=%llu -> rehashing",
+				         build_table.c_str(), (unsigned long long)col.table_index, (unsigned long long)col.column_index,
+				         (unsigned long long)bf.SizedForRows(), (unsigned long long)actual_rows,
+				         (unsigned long long)allocated_bits);
+				bf.ReinitializeAndRehash(context, actual_rows, *gsink.total_data, {bound_column_indices[i]});
+			}
+		}
+	}
+
+	// 3. mark bloom filters as finalized
+	for (auto &entry : bloom_filter_map) {
+		if (entry.second) {
+			entry.second->finalized_ = true;
+		}
+	}
+
+	// 4. if this forward CREATE_FILTER produced an empty BF, signal sibling CREATE_FILTERs
+	// targeting the same probe that the probe side will be empty (relaxed store, lock-free).
+	if (gsink.probe_empty_flag && actual_rows == 0) {
+		gsink.probe_empty_flag->store(true, std::memory_order_relaxed);
+	}
+
+	// 5. push dynamic filters to table scans
+	PushDynamicFilters(*this, gsink, context);
+
+	return SinkFinalizeType::READY;
+}
+
+unique_ptr<GlobalSinkState> PhysicalCreateFilter::GetGlobalSinkState(ClientContext &context) const {
+	return make_uniq<CreateFilterGlobalSinkState>(context, *this);
+}
+
+unique_ptr<LocalSinkState> PhysicalCreateFilter::GetLocalSinkState(ExecutionContext &context) const {
+	return make_uniq<CreateFilterLocalSinkState>(context.client, *this);
+}
+
+shared_ptr<PTBloomFilter> PhysicalCreateFilter::GetBloomFilter(const ColumnBinding &col) const {
+	auto it = bloom_filter_map.find(col);
+	if (it != bloom_filter_map.end()) {
+		return it->second;
+	}
+	return nullptr;
+}
+
+//===--------------------------------------------------------------------===//
+// Source
+//===--------------------------------------------------------------------===//
+
+CreateFilterGlobalSourceState::CreateFilterGlobalSourceState(ClientContext &context, const PhysicalCreateFilter &op)
+    : context(context) {
+	D_ASSERT(op.sink_state);
+	auto &gstate = op.sink_state->Cast<CreateFilterGlobalSinkState>();
+	gstate.total_data->InitializeScan(scan_state);
+	partition_id = 0;
+}
+
+idx_t CreateFilterGlobalSourceState::MaxThreads() {
+	return TaskScheduler::GetScheduler(context).NumberOfThreads();
+}
+
+unique_ptr<GlobalSourceState> PhysicalCreateFilter::GetGlobalSourceState(ClientContext &context) const {
+	auto state = make_uniq<CreateFilterGlobalSourceState>(context, *this);
+
+	D_ASSERT(sink_state);
+	auto &gsink = sink_state->Cast<CreateFilterGlobalSinkState>();
+
+	auto chunk_count = gsink.total_data->ChunkCount();
+	auto row_count = gsink.total_data->Count();
+
+#ifdef DEBUG
+	string build_table = filter_operation ? "table_" + std::to_string(filter_operation->build_table_idx) : "unknown";
+	Printer::Print(
+	    StringUtil::Format("[SOURCE] CREATE_FILTER (build=%s) GetGlobalSourceState: chunk_count=%llu, row_count=%llu",
+	                       build_table.c_str(), (unsigned long long)chunk_count, (unsigned long long)row_count));
+#endif
+
+	const idx_t num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
+	auto chunks_per_thread = MaxValue<idx_t>((chunk_count + num_threads - 1) / num_threads, 1);
+	idx_t chunk_idx = 0;
+	for (idx_t thread_idx = 0; thread_idx < num_threads; thread_idx++) {
+		if (chunk_idx == chunk_count) {
+			break;
+		}
+		auto chunk_idx_from = chunk_idx;
+		auto chunk_idx_to = MinValue<idx_t>(chunk_idx_from + chunks_per_thread, chunk_count);
+		state->chunks_todo.emplace_back(chunk_idx_from, chunk_idx_to);
+#ifdef DEBUG
+		Printer::Print(StringUtil::Format("[SOURCE] CREATE_FILTER (build=%s) Partition %llu: chunks [%llu, %llu)",
+		                                  build_table.c_str(), (unsigned long long)thread_idx,
+		                                  (unsigned long long)chunk_idx_from, (unsigned long long)chunk_idx_to));
+#endif
+		chunk_idx = chunk_idx_to;
+	}
+	return unique_ptr_cast<CreateFilterGlobalSourceState, GlobalSourceState>(std::move(state));
+}
+
+unique_ptr<LocalSourceState> PhysicalCreateFilter::GetLocalSourceState(ExecutionContext &context,
+                                                                       GlobalSourceState &gstate) const {
+	return make_uniq<CreateFilterLocalSourceState>();
+}
+
+SourceResultType PhysicalCreateFilter::GetDataInternal(ExecutionContext &context, DataChunk &chunk,
+                                                       OperatorSourceInput &input) const {
+	auto &gstate = sink_state->Cast<CreateFilterGlobalSinkState>();
+	auto &lstate = input.local_state.Cast<CreateFilterLocalSourceState>();
+	auto &state = input.global_state.Cast<CreateFilterGlobalSourceState>();
+
+#ifdef DEBUG
+	string build_table = filter_operation ? "table_" + std::to_string(filter_operation->build_table_idx) : "unknown";
+#endif
+
+	if (lstate.initial) {
+		lstate.local_partition_id = state.partition_id.fetch_add(1);
+		lstate.initial = false;
+
+#ifdef DEBUG
+		Printer::Print(StringUtil::Format(
+		    "[SOURCE] CREATE_FILTER (build=%s) GetData initial: partition_id=%llu, chunks_todo.size()=%zu",
+		    build_table.c_str(), (unsigned long long)lstate.local_partition_id, state.chunks_todo.size()));
+#endif
+
+		if (lstate.local_partition_id >= state.chunks_todo.size()) {
+			D_PRINTF("[SOURCE] CREATE_FILTER No more partitions, returning FINISHED");
+			return SourceResultType::FINISHED;
+		}
+		lstate.chunk_from = state.chunks_todo[lstate.local_partition_id].first;
+		lstate.chunk_to = state.chunks_todo[lstate.local_partition_id].second;
+
+		// parallel source
+		lstate.local_current_chunk_id = lstate.chunk_from;
+
+#ifdef DEBUG
+		Printer::Print(StringUtil::Format("[SOURCE] CREATE_FILTER (build=%s) Assigned range: [%llu, %llu)",
+		                                  build_table.c_str(), (unsigned long long)lstate.chunk_from,
+		                                  (unsigned long long)lstate.chunk_to));
+#endif
+	}
+
+	// sequential source
+	// auto chunk_count = gstate.total_data->ChunkCount();
+	//
+	// if (lstate.local_current_chunk_id >= chunk_count) {
+	// 	return SourceResultType::FINISHED;
+	// }
+	//
+	// if (lstate.local_current_chunk_id == 0) {
+	// 	lstate.local_current_chunk_id = lstate.chunk_from;
+	// }
+
+	// parallel source
+	{
+		// auto chunk_count = gstate.total_data->ChunkCount();
+
+		if (lstate.local_current_chunk_id >= lstate.chunk_to) {
+			return SourceResultType::FINISHED;
+		}
+	}
+	if (profiling_stats) {
+		ScopedTimer timer(profiling_stats->source_time_us);
+		gstate.total_data->FetchChunk(lstate.local_current_chunk_id++, chunk);
+	} else {
+		gstate.total_data->FetchChunk(lstate.local_current_chunk_id++, chunk);
+	}
+	return SourceResultType::HAVE_MORE_OUTPUT;
+}
+
+void PhysicalCreateFilter::BuildPipelines(Pipeline &current, MetaPipeline &meta_pipeline) {
+	op_state.reset();
+	sink_state.reset();
+
+#ifdef DEBUG
+	string build_table = filter_operation ? "table_" + std::to_string(filter_operation->build_table_idx) : "unknown";
+#endif
+
+	auto &state = meta_pipeline.GetState();
+
+	// make this operator source of the pipeline
+	state.SetPipelineSource(current, *this);
+
+	if (this_pipeline == nullptr) {
+		D_PRINTF("[PIPELINE] CREATE_FILTER (build=%s) creating NEW child pipeline for build-side", build_table.c_str());
+		auto &child_meta_pipeline = meta_pipeline.CreateChildMetaPipeline(current, *this);
+		this_pipeline = child_meta_pipeline.GetBasePipeline();
+		// CreateChildMetaPipeline() automatically registers the child pipeline as a dependency
+		child_meta_pipeline.Build(children[0].get());
+		D_PRINTF("[PIPELINE] CREATE_FILTER (build=%s) child pipeline created", build_table.c_str());
+	} else {
+		D_PRINTF("[PIPELINE] CREATE_FILTER (build=%s) adding existing child pipeline as dependency",
+		         build_table.c_str());
+		current.AddDependency(this_pipeline);
+	}
+}
+
+void PhysicalCreateFilter::BuildPipelinesFromRelated(Pipeline &current, MetaPipeline &meta_pipeline) {
+	op_state.reset();
+
+	D_ASSERT(children.size() == 1);
+
+#ifdef DEBUG
+	string build_table = filter_operation ? "table_" + std::to_string(filter_operation->build_table_idx) : "unknown";
+	char ptr_str[32];
+	snprintf(ptr_str, sizeof(ptr_str), "%p", (void *)this);
+	Printer::Print(StringUtil::Format(
+	    "[PIPELINE] CREATE_FILTER (build=%s, this=%s) BuildPipelinesFromRelated - PROBE_FILTER needs this filter",
+	    build_table.c_str(), ptr_str));
+#endif
+
+	if (this_pipeline == nullptr) {
+		D_PRINTF("[PIPELINE] CREATE_FILTER creating NEW child pipeline from BuildPipelinesFromRelated");
+		auto &child_meta_pipeline = meta_pipeline.CreateChildMetaPipeline(current, *this);
+		this_pipeline = child_meta_pipeline.GetBasePipeline();
+		child_meta_pipeline.Build(children[0].get());
+		D_PRINT("[PIPELINE] CREATE_FILTER child pipeline created and dependency added automatically");
+	} else {
+		D_PRINT("[PIPELINE] CREATE_FILTER adding existing pipeline as dependency");
+		current.AddDependency(this_pipeline);
+	}
+
+#ifdef DEBUG
+	this_pipeline->Print();
+#endif
+}
+
+} // namespace duckdb
diff --git a/src/operators/physical_create_filter.hpp b/src/operators/physical_create_filter.hpp
new file mode 100644
index 0000000..4b0d6ca
--- /dev/null
+++ b/src/operators/physical_create_filter.hpp
@@ -0,0 +1,171 @@
+#pragma once
+
+#include "duckdb/execution/physical_operator.hpp"
+#include "bloom_filter.hpp"
+#include "../optimizer/graph_manager.hpp"
+#include "duckdb/common/types/column/column_data_collection.hpp"
+#include <duckdb/common/types/column/column_data_scan_states.hpp>
+#include "duckdb/planner/table_filter.hpp"
+#include "duckdb/common/types/value_map.hpp"
+
+namespace duckdb {
+
+struct CreateFilterStats;
+
+class PhysicalCreateFilter : public PhysicalOperator {
+public:
+	static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::EXTENSION;
+
+public:
+	PhysicalCreateFilter(PhysicalPlan &physical_plan, const shared_ptr<FilterOperation> &filter_operation,
+	                     vector<LogicalType> types, idx_t estimated_cardinality, vector<idx_t> bound_column_indices);
+
+	// Required virtual methods
+	~PhysicalCreateFilter() override = default;
+
+	string GetName() const override;
+	string ToString(ExplainFormat format = ExplainFormat::DEFAULT) const override;
+
+	// populate info in query plan
+	InsertionOrderPreservingMap<string> ParamsToString() const override;
+
+	// sink interface - PhysicalOperator can act as sink
+	unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
+	unique_ptr<LocalSinkState> GetLocalSinkState(ExecutionContext &context) const override;
+	SinkResultType Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const override;
+	SinkCombineResultType Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const override;
+	SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
+	                          OperatorSinkFinalizeInput &input) const override;
+
+	bool IsSink() const override {
+		return true;
+	}
+
+	bool ParallelSink() const override {
+		return true;
+	}
+	// source interface
+	unique_ptr<GlobalSourceState> GetGlobalSourceState(ClientContext &context) const override;
+	unique_ptr<LocalSourceState> GetLocalSourceState(ExecutionContext &context,
+	                                                 GlobalSourceState &gstate) const override;
+	SourceResultType GetDataInternal(ExecutionContext &context, DataChunk &chunk,
+	                                 OperatorSourceInput &input) const override;
+
+	bool IsSource() const override {
+		return true;
+	}
+
+	bool ParallelSource() const override {
+		return true;
+	}
+
+	void BuildPipelines(Pipeline &current, MetaPipeline &meta_pipeline) override;
+	void BuildPipelinesFromRelated(Pipeline &current, MetaPipeline &meta_pipeline);
+
+public:
+	// vector<shared_ptr<FilterPlan>> filter_plans;
+	shared_ptr<FilterOperation> filter_operation;
+	bool is_probing_side;
+
+	// maps the column indices to resolved chunk column positions
+	vector<idx_t> bound_column_indices;
+
+	// column-keyed bloom filter map: ColumnBinding -> PTBloomFilter
+	unordered_map<ColumnBinding, shared_ptr<PTBloomFilter>, ColumnBindingHash, ColumnBindingEqual> bloom_filter_map;
+
+	// pipeline reference
+	shared_ptr<Pipeline> this_pipeline;
+
+	// lookup bloom filter by the column it was built on
+	shared_ptr<PTBloomFilter> GetBloomFilter(const ColumnBinding &col) const;
+
+	// dynamic filter pushdown to table scans (forward pass only)
+	struct DynamicFilterTarget {
+		shared_ptr<DynamicTableFilterSet> dynamic_filters;
+		idx_t scan_column_index;
+		ColumnBinding probe_column;
+		LogicalType column_type;
+		string column_name;
+	};
+	vector<DynamicFilterTarget> pushdown_targets;
+	bool is_forward_pass = false;
+
+	// profiling
+	mutable shared_ptr<CreateFilterStats> profiling_stats;
+	mutable bool profiling_checked = false;
+};
+
+struct ColumnMinMax {
+	Value min_val, max_val;
+	bool has_value = false;
+};
+
+struct ColumnDistinct {
+	value_set_t values;
+	bool over_threshold = false;
+};
+
+class CreateFilterLocalSinkState : public LocalSinkState {
+public:
+	CreateFilterLocalSinkState(ClientContext &context, const PhysicalCreateFilter &op);
+
+	ClientContext &client_context;
+	unique_ptr<ColumnDataCollection> local_data;
+	vector<ColumnMinMax> local_min_max;
+	vector<ColumnDistinct> local_distinct;
+};
+
+class CreateFilterGlobalSinkState : public GlobalSinkState {
+public:
+	CreateFilterGlobalSinkState(ClientContext &context, const PhysicalCreateFilter &op);
+
+	const PhysicalCreateFilter &op;
+	mutex glock;
+
+	// store data for sink phase
+	unique_ptr<ColumnDataCollection> total_data;
+	vector<unique_ptr<ColumnDataCollection>> local_data_collections;
+
+	// min-max tracking for dynamic filter pushdown
+	vector<ColumnMinMax> column_min_max;
+
+	// distinct-value tracking for IN-filter pushdown
+	vector<ColumnDistinct> column_distinct;
+	idx_t distinct_threshold = 50;
+
+	// shared empty-probe flag (forward pass). set by any sibling CREATE_FILTER / PROBE_FILTER
+	// targeting the same probe table when it detects the probe will be empty.
+	// read lock-free (relaxed) in Sink to short-circuit BF build.
+	shared_ptr<std::atomic<bool>> probe_empty_flag;
+};
+
+class CreateFilterLocalSourceState : public LocalSourceState {
+public:
+	CreateFilterLocalSourceState() {
+		local_current_chunk_id = 0;
+		initial = true;
+	}
+
+public:
+	idx_t local_current_chunk_id;
+	idx_t local_partition_id;
+	idx_t chunk_from;
+	idx_t chunk_to;
+	bool initial;
+};
+
+class CreateFilterGlobalSourceState : public GlobalSourceState {
+public:
+	CreateFilterGlobalSourceState(ClientContext &context, const PhysicalCreateFilter &op);
+
+	idx_t MaxThreads() override;
+
+	ClientContext &context;
+	ColumnDataScanState scan_state;
+	vector<pair<idx_t, idx_t>> chunks_todo;
+	std::atomic<idx_t> partition_id;
+	vector<shared_ptr<PTBloomFilter>> bloom_filters;
+	mutex bf_lock;
+};
+
+} // namespace duckdb
diff --git a/src/operators/physical_probe_filter.cpp b/src/operators/physical_probe_filter.cpp
new file mode 100644
index 0000000..dd239ac
--- /dev/null
+++ b/src/operators/physical_probe_filter.cpp
@@ -0,0 +1,243 @@
+#include "physical_probe_filter.hpp"
+#include "physical_create_filter.hpp"
+#include "bloom_filter.hpp"
+#include "duckdb/common/types/selection_vector.hpp"
+#include "duckdb/common/vector_operations/vector_operations.hpp"
+#include "duckdb/parallel/meta_pipeline.hpp"
+#include "utils/debug_utils.hpp"
+#include "robust_profiling.hpp"
+#include "probe_empty_registry.hpp"
+
+namespace duckdb {
+
+PhysicalProbeFilter::PhysicalProbeFilter(PhysicalPlan &physical_plan, shared_ptr<FilterOperation> filter_operation,
+                                         vector<LogicalType> types, idx_t estimated_cardinality,
+                                         vector<idx_t> bound_column_indices)
+    : CachingPhysicalOperator(physical_plan, PhysicalOperatorType::EXTENSION, std::move(types), estimated_cardinality),
+      filter_operation(std::move(filter_operation)), bound_column_indices(std::move(bound_column_indices)) {
+}
+
+string PhysicalProbeFilter::GetName() const {
+	return "PROBE_FILTER";
+}
+
+string PhysicalProbeFilter::ToString(ExplainFormat format) const {
+	string result = "PROBE_FILTER";
+	if (is_passthrough) {
+		result += " (passthrough, pushed to scan)";
+	} else if (filter_operation) {
+		result += " [" + std::to_string(filter_operation->probe_columns.size()) + " probe columns]";
+	}
+	return result;
+}
+
+InsertionOrderPreservingMap<string> PhysicalProbeFilter::ParamsToString() const {
+	InsertionOrderPreservingMap<string> result;
+	result["Operator"] = is_passthrough ? "PhysicalProbeFilter (passthrough)" : "PhysicalProbeFilter";
+
+	result["Build Table"] = to_string(filter_operation->build_table_idx);
+	result["Probe Table"] = to_string(filter_operation->probe_table_idx);
+
+	string probe_cols = "";
+	for (size_t i = 0; i < filter_operation->probe_columns.size(); i++) {
+		if (i > 0) {
+			probe_cols += ", ";
+		}
+		probe_cols += "(" + to_string(filter_operation->probe_columns[i].table_index) + "." +
+		              to_string(filter_operation->probe_columns[i].column_index) + ")";
+	}
+	result["Probe Columns"] = probe_cols;
+
+	if (estimated_cardinality != DConstants::INVALID_INDEX) {
+		result["Estimated Cardinality"] = std::to_string(estimated_cardinality);
+	}
+
+	return result;
+}
+
+unique_ptr<OperatorState> PhysicalProbeFilter::GetOperatorState(ExecutionContext &context) const {
+	return make_uniq<PhysicalProbeFilterState>();
+}
+
+OperatorResultType PhysicalProbeFilter::ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
+                                                        GlobalOperatorState &gstate, OperatorState &state_p) const {
+	// passthrough mode: filters pushed to scan, just forward data
+	if (is_passthrough) {
+		chunk.Reference(input);
+		return OperatorResultType::NEED_MORE_INPUT;
+	}
+
+	if (!profiling_checked) {
+		profiling_checked = true;
+		auto prof = GetRobustProfilingState(context.client);
+		if (prof) {
+			profiling_stats =
+			    prof->RegisterProbeFilter(filter_operation->build_table_idx, filter_operation->probe_table_idx,
+			                              filter_operation->sequence_number, filter_operation->is_forward_pass);
+		}
+	}
+
+	string table_name = filter_operation ? "table_" + std::to_string(filter_operation->probe_table_idx) : "unknown";
+
+	auto &state = state_p.Cast<PhysicalProbeFilterState>();
+
+	// lazy initialization of bloom filters on first call
+	if (!state.bloom_filters_initialized) {
+		D_PRINTF("[EXEC_INTERNAL] PROBE_FILTER (probe=%s) Initializing bloom filters, bound_column_indices.size()=%zu",
+		         table_name.c_str(), bound_column_indices.size());
+		for (size_t i = 0; i < bound_column_indices.size(); i++) {
+			D_PRINTF("  bound_column_indices[%zu] = %llu", i, (unsigned long long)bound_column_indices[i]);
+		}
+
+		if (!related_create_filter_vec.empty() && filter_operation) {
+			// lookup bloom filters by build column binding
+			for (const auto &build_col : filter_operation->build_columns) {
+				for (auto *create_filter : related_create_filter_vec) {
+					auto bf = create_filter->GetBloomFilter(build_col);
+					if (bf) {
+						string build_table =
+						    create_filter->filter_operation
+						        ? "table_" + std::to_string(create_filter->filter_operation->build_table_idx)
+						        : "unknown";
+						D_PRINTF("[EXEC_INTERNAL] PROBE_FILTER found bloom filter for col(%llu,%llu) from "
+						         "CREATE_FILTER (build=%s)",
+						         (unsigned long long)build_col.table_index, (unsigned long long)build_col.column_index,
+						         build_table.c_str());
+						state.bloom_filters.push_back(bf);
+						break; // found the filter for this column
+					}
+				}
+			}
+		}
+		D_PRINTF("[EXEC_INTERNAL] PROBE_FILTER total bloom_filters.size() = %zu", state.bloom_filters.size());
+		state.bloom_filters_initialized = true;
+	}
+
+	idx_t row_num = input.size();
+
+	// if no bloom filters or no input, just pass through
+	if (state.bloom_filters.empty() || row_num == 0) {
+		D_PRINTF("[EXEC_INTERNAL] PROBE_FILTER (probe=%s) No bloom filter input/empty, row_num = %llu",
+		         table_name.c_str(), (unsigned long long)row_num);
+		if (profiling_stats) {
+			profiling_stats->rows_in.fetch_add(row_num, std::memory_order_relaxed);
+			profiling_stats->rows_out.fetch_add(row_num, std::memory_order_relaxed);
+		}
+		chunk.Reference(input);
+		return OperatorResultType::NEED_MORE_INPUT;
+	}
+
+	idx_t original_row_num = row_num;
+
+	// apply bloom filters
+	idx_t result_count = row_num;
+	auto &sel = state.sel;
+
+	unique_ptr<ScopedTimer> probe_timer;
+	if (profiling_stats) {
+		probe_timer = make_uniq<ScopedTimer>(profiling_stats->probe_time_us);
+	}
+
+	for (int i = 0; i < state.bloom_filters.size(); i++) {
+		auto bf = state.bloom_filters[i];
+		if (!bf || !bf->finalized_) {
+			D_PRINT("skipped - bloom filter not ready");
+			continue;
+		}
+
+		// check if bloom filter is empty (no data inserted)
+		if (bf->IsEmpty()) {
+			string build_table =
+			    filter_operation ? "table_" + std::to_string(filter_operation->build_table_idx) : "unknown";
+			D_PRINTF("Bloom filter empty for %s", build_table.c_str());
+			// signal any CREATE_FILTER siblings targeting this probe that it will be empty
+			auto reg = GetProbeEmptyRegistry(context.client);
+			if (reg) {
+				auto flag = reg->GetOrCreate(filter_operation->probe_table_idx);
+				flag->store(true, std::memory_order_relaxed);
+			}
+			// empty filter means no matches possible
+			probe_timer.reset();
+			if (profiling_stats) {
+				profiling_stats->rows_in.fetch_add(original_row_num, std::memory_order_relaxed);
+			}
+			chunk.SetCardinality(0);
+			return OperatorResultType::NEED_MORE_INPUT;
+		}
+
+		// string probe_table = filter_operation ? "table_" + std::to_string(filter_operation->probe_table_idx) :
+		// "unknown"; for (int i = 0; i < bound_column_indices.size(); i++) { 	printf("bound columns for %s - %llu\n",
+		// probe_table.c_str(), bound_column_indices[i]);
+		// }
+
+		// lookup directly into selection vector
+		result_count = bf->LookupSel(input, sel, {bound_column_indices[i]}, state.bit_vector.data());
+
+		// early exit if no rows passed
+		if (result_count == 0) {
+			probe_timer.reset();
+			if (profiling_stats) {
+				profiling_stats->rows_in.fetch_add(original_row_num, std::memory_order_relaxed);
+			}
+			chunk.SetCardinality(0);
+			return OperatorResultType::NEED_MORE_INPUT;
+		}
+
+		// apply filter if we filtered rows
+		if (result_count < row_num) {
+			input.Slice(sel, result_count);
+			row_num = result_count;
+		}
+	}
+
+	// stop probe timer before output work
+	probe_timer.reset();
+
+	// optimization: if all rows passed, just reference input (zero-copy)
+	if (result_count == row_num) {
+		chunk.Reference(input);
+	} else {
+		chunk.Slice(input, sel, result_count);
+	}
+
+	if (profiling_stats) {
+		profiling_stats->rows_in.fetch_add(original_row_num, std::memory_order_relaxed);
+		profiling_stats->rows_out.fetch_add(result_count, std::memory_order_relaxed);
+	}
+
+	return OperatorResultType::NEED_MORE_INPUT;
+}
+
+void PhysicalProbeFilter::BuildPipelines(Pipeline &current, MetaPipeline &meta_pipeline) {
+	op_state.reset();
+
+#ifdef DEBUG
+	char ptr_str[32];
+	snprintf(ptr_str, sizeof(ptr_str), "%p", (void *)this);
+	string probe_table = filter_operation ? "table_" + std::to_string(filter_operation->probe_table_idx) : "unknown";
+	Printer::Print(StringUtil::Format("[PIPELINE] PROBE_FILTER (probe=%s, this=%s) BuildPipelines called",
+	                                  probe_table.c_str(), ptr_str));
+#endif
+
+	auto &state = meta_pipeline.GetState();
+	state.AddPipelineOperator(current, *this);
+
+#ifdef DEBUG
+	Printer::Print(
+	    StringUtil::Format("[PIPELINE] PROBE_FILTER (probe=%s, this=%s) added to current pipeline as operator",
+	                       probe_table.c_str(), ptr_str));
+	Printer::Print(StringUtil::Format("[PIPELINE] PROBE_FILTER (probe=%s) has %zu related CREATE_FILTER operators",
+	                                  probe_table.c_str(), related_create_filter_vec.size()));
+#endif
+
+	// add dependencies on all related CREATE_FILTER operators
+	for (size_t i = 0; i < related_create_filter_vec.size(); i++) {
+		auto *create_filter = related_create_filter_vec[i];
+		create_filter->BuildPipelinesFromRelated(current, meta_pipeline);
+	}
+
+	// continue building child pipelines
+	children[0].get().BuildPipelines(current, meta_pipeline);
+}
+
+} // namespace duckdb
diff --git a/src/operators/physical_probe_filter.hpp b/src/operators/physical_probe_filter.hpp
new file mode 100644
index 0000000..ec8d655
--- /dev/null
+++ b/src/operators/physical_probe_filter.hpp
@@ -0,0 +1,74 @@
+#pragma once
+
+#include "duckdb.hpp"
+#include "duckdb/execution/physical_operator.hpp"
+#include "../optimizer/graph_manager.hpp"
+#include "bloom_filter.hpp"
+
+namespace duckdb {
+
+struct ProbeFilterStats;
+class PhysicalCreateFilter;
+
+class PhysicalProbeFilterState : public CachingOperatorState {
+public:
+	PhysicalProbeFilterState()
+	    : bloom_filters_initialized(false), sel(STANDARD_VECTOR_SIZE), bit_vector((STANDARD_VECTOR_SIZE + 7) / 8) {
+	}
+
+	vector<shared_ptr<PTBloomFilter>> bloom_filters;
+	bool bloom_filters_initialized;
+
+	// reusable buffers to avoid per-chunk heap allocations
+	SelectionVector sel;
+	vector<uint8_t> bit_vector;
+};
+
+class PhysicalProbeFilter : public CachingPhysicalOperator {
+public:
+	static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::EXTENSION;
+
+public:
+	PhysicalProbeFilter(PhysicalPlan &physical_plan, shared_ptr<FilterOperation> filter_operation,
+	                    vector<LogicalType> types, idx_t estimated_cardinality, vector<idx_t> bound_column_indices);
+
+	// required virtual methods
+	~PhysicalProbeFilter() override = default;
+
+	string GetName() const override;
+	string ToString(ExplainFormat format = ExplainFormat::DEFAULT) const override;
+
+	// populate info in query plan
+	InsertionOrderPreservingMap<string> ParamsToString() const override;
+
+	// state management
+	unique_ptr<OperatorState> GetOperatorState(ExecutionContext &context) const override;
+
+	bool ParallelOperator() const override {
+		return true;
+	}
+
+	void BuildPipelines(Pipeline &current, MetaPipeline &meta_pipeline) override;
+
+protected:
+	// operator interface - using ExecuteInternal for CachingPhysicalOperator
+	OperatorResultType ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
+	                                   GlobalOperatorState &gstate, OperatorState &state) const override;
+
+public:
+	shared_ptr<FilterOperation> filter_operation;
+	bool is_passthrough = false;
+
+	// maps the column indices to resolved chunk column positions
+	vector<idx_t> bound_column_indices;
+
+	// references to related CREATE_FILTER operators
+	vector<PhysicalCreateFilter *> related_create_filter_vec;
+	mutable PhysicalCreateFilter *related_create_filter = nullptr;
+
+	// profiling
+	mutable shared_ptr<ProbeFilterStats> profiling_stats;
+	mutable bool profiling_checked = false;
+};
+
+} // namespace duckdb
diff --git a/src/operators/physical_use_bf.cpp b/src/operators/physical_use_bf.cpp
deleted file mode 100644
index 4e1ef5e..0000000
--- a/src/operators/physical_use_bf.cpp
+++ /dev/null
@@ -1,145 +0,0 @@
-#include "physical_use_bf.hpp"
-#include "physical_create_bf.hpp"
-#include "dag.hpp"
-#include "bloom_filter.hpp"
-#include "duckdb/execution/expression_executor.hpp"
-#include "duckdb/common/types/selection_vector.hpp"
-
-namespace duckdb {
-
-PhysicalUseBF::PhysicalUseBF(shared_ptr<FilterPlan> filter_plan, vector<LogicalType> types, idx_t estimated_cardinality)
-    : PhysicalOperator(PhysicalOperatorType::EXTENSION, std::move(types), estimated_cardinality),
-      filter_plan(std::move(filter_plan)), is_probing_side(false) {
-}
-
-string PhysicalUseBF::GetName() const {
-    return "USE_BF";
-}
-
-string PhysicalUseBF::ToString(ExplainFormat format) const {
-    string result = "USE_BF";
-    if (filter_plan) {
-        result += " [" + std::to_string(filter_plan->apply.size()) + " expressions]";
-    }
-    return result;
-}
-
-unique_ptr<OperatorState> PhysicalUseBF::GetOperatorState(ExecutionContext &context) const {
-    return make_uniq<PhysicalUseBFState>();
-}
-
-unique_ptr<GlobalOperatorState> PhysicalUseBF::GetGlobalOperatorState(ClientContext &context) const {
-    return make_uniq<PhysicalUseBFGlobalState>();
-}
-
-void PhysicalUseBF::InitializeBloomFilters(PhysicalUseBFState &bf_state, ExecutionContext &context) const {
-    if (related_create_bf && !bf_state.bloom_filters_initialized) {
-        // get bloom filters from the related PhysicalCreateBF
-        bf_state.bloom_filters = related_create_bf->GetBloomFilters();
-        bf_state.bloom_filters_initialized = true;
-    }
-}
-
-bool PhysicalUseBF::FilterDataChunk(DataChunk &chunk, const vector<shared_ptr<BloomFilter>> &bloom_filters,
-                                    ExecutionContext &context) const {
-    if (!filter_plan || filter_plan->apply.empty() || bloom_filters.empty()) {
-    	// No filtering needed
-    	return true;
-    }
-    
-    // simplified version: use bound column indices directly from input chunk
-    // note: this assumes the chunk already contains the correct columns
-    
-    // use bloom filters to filter rows using bound columns
-    bool any_rows_remain = false;
-    SelectionVector sel(chunk.size());
-    idx_t result_count = 0;
-    
-    for (idx_t i = 0; i < chunk.size(); i++) {
-        bool passes_filter = true;
-        
-        // check against all available bloom filters
-        for (auto &bf : bloom_filters) {
-            if (bf && bf->finalized_) {
-                vector<uint32_t> results(1);
-                
-                // create single-row chunk for lookup using input chunk columns
-                DataChunk single_row_chunk;
-                single_row_chunk.Initialize(Allocator::DefaultAllocator(), chunk.GetTypes());
-                
-                // copy row data
-                for (idx_t col = 0; col < chunk.ColumnCount(); col++) {
-                    single_row_chunk.data[col].Slice(chunk.data[col], i, i + 1);
-                }
-                single_row_chunk.SetCardinality(1);
-                
-                // determine which columns to use for lookup
-                vector<idx_t> lookup_cols;
-                if (!filter_plan->bound_cols_apply.empty()) {
-                    lookup_cols = filter_plan->bound_cols_apply;
-                } else {
-                    // use all columns if no specific binding
-                    for (idx_t k = 0; k < single_row_chunk.ColumnCount(); k++) {
-                        lookup_cols.push_back(k);
-                    }
-                }
-                
-                // Perform bloom filter lookup
-                bf->Lookup(single_row_chunk, results, lookup_cols);
-                
-                if (results[0] == 0) {
-                    passes_filter = false;
-                    break; // Row doesn't pass bloom filter
-                }
-            }
-        }
-        
-        if (passes_filter) {
-            sel.set_index(result_count++, i);
-            any_rows_remain = true;
-        }
-    }
-    
-    // apply selection to the chunk if we filtered any rows
-    if (result_count < chunk.size()) {
-        chunk.Slice(sel, result_count);
-    }
-    
-    return any_rows_remain;
-}
-
-OperatorResultType PhysicalUseBF::Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
-                                         GlobalOperatorState &gstate, OperatorState &state) const {
-
-	Printer::Print("PhysicalUseBF::Execute() called");
-    auto &bf_state = state.Cast<PhysicalUseBFState>();
-    auto &bf_gstate = gstate.Cast<PhysicalUseBFGlobalState>();
-    
-    // initialize bloom filters if not done yet
-    if (!bf_state.bloom_filters_initialized) {
-        InitializeBloomFilters(bf_state, context);
-    }
-    
-    // get data from child operator
-    auto child_result = OperatorResultType::NEED_MORE_INPUT;
-    if (!children.empty()) {
-        child_result = children[0].get().Execute(context, input, chunk, gstate, state);
-    } else {
-        chunk.Reference(input);
-        child_result = OperatorResultType::HAVE_MORE_OUTPUT;
-    }
-    
-    // apply bloom filter if we have data and bloom filters are available
-    if (chunk.size() > 0 && !bf_state.bloom_filters.empty()) {
-        bool has_data = FilterDataChunk(chunk, bf_state.bloom_filters, context);
-        
-        // if all rows were filtered out, continue with empty chunk
-        if (!has_data && chunk.size() > 0) {
-            chunk.Reset();
-        }
-    }
-    
-    return child_result;
-}
-
-} // namespace duckdb
\ No newline at end of file
diff --git a/src/operators/physical_use_bf.hpp b/src/operators/physical_use_bf.hpp
deleted file mode 100644
index 0bbfbcf..0000000
--- a/src/operators/physical_use_bf.hpp
+++ /dev/null
@@ -1,61 +0,0 @@
-#pragma once
-
-#include "duckdb.hpp"
-#include "duckdb/execution/physical_operator.hpp"
-#include "dag.hpp"
-#include "bloom_filter.hpp"
-
-namespace duckdb {
-class PhysicalCreateBF;
-}
-
-namespace duckdb {
-
-class PhysicalUseBFState : public OperatorState {
-public:
-	PhysicalUseBFState() : bloom_filters_initialized(false) {}
-	
-	vector<shared_ptr<BloomFilter>> bloom_filters;
-	bool bloom_filters_initialized;
-};
-
-class PhysicalUseBFGlobalState : public GlobalOperatorState {
-public:
-	PhysicalUseBFGlobalState() {}
-};
-
-class PhysicalUseBF : public PhysicalOperator {
-public:
-	static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::EXTENSION;
-
-public:
-	PhysicalUseBF(shared_ptr<FilterPlan> filter_plan, vector<LogicalType> types, idx_t estimated_cardinality);
-
-	// Required virtual methods
-	virtual ~PhysicalUseBF() = default;
-
-	string GetName() const override;
-	string ToString(ExplainFormat format = ExplainFormat::DEFAULT) const override;
-	
-	// State management
-	unique_ptr<OperatorState> GetOperatorState(ExecutionContext &context) const override;
-	unique_ptr<GlobalOperatorState> GetGlobalOperatorState(ClientContext &context) const override;
-	
-	// Operator interface
-	OperatorResultType Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
-	                          GlobalOperatorState &gstate, OperatorState &state) const override;
-
-public:
-	shared_ptr<FilterPlan> filter_plan;
-	bool is_probing_side;
-	
-	// Reference to the corresponding PhysicalCreateBF operator
-	mutable PhysicalCreateBF *related_create_bf = nullptr;
-	
-private:
-	void InitializeBloomFilters(PhysicalUseBFState &bf_state, ExecutionContext &context) const;
-	bool FilterDataChunk(DataChunk &chunk, const vector<shared_ptr<BloomFilter>> &bloom_filters, 
-	                     ExecutionContext &context) const;
-};
-
-} // namespace duckdb
\ No newline at end of file
diff --git a/src/operators/probe_empty_registry.hpp b/src/operators/probe_empty_registry.hpp
new file mode 100644
index 0000000..4b2038d
--- /dev/null
+++ b/src/operators/probe_empty_registry.hpp
@@ -0,0 +1,44 @@
+#pragma once
+
+#include "duckdb/main/client_context_state.hpp"
+#include "duckdb/main/client_context.hpp"
+#include "duckdb/common/unordered_map.hpp"
+#include <atomic>
+#include <mutex>
+
+namespace duckdb {
+
+// per-query registry mapping probe_table_idx -> shared empty-flag.
+// used to short-circuit CREATE_FILTER operators whose probe side is known empty
+// (e.g. a sibling CREATE_FILTER targeting the same probe finalized with an empty
+// BF, or a PROBE_FILTER detected an empty build BF).
+class ProbeEmptyRegistry : public ClientContextState {
+public:
+	mutex reg_lock;
+	unordered_map<idx_t, shared_ptr<std::atomic<bool>>> flags;
+
+	shared_ptr<std::atomic<bool>> GetOrCreate(idx_t probe_table_idx) {
+		lock_guard<mutex> g(reg_lock);
+		auto it = flags.find(probe_table_idx);
+		if (it != flags.end()) {
+			return it->second;
+		}
+		auto f = make_shared_ptr<std::atomic<bool>>(false);
+		flags[probe_table_idx] = f;
+		return f;
+	}
+
+	void QueryEnd(ClientContext &ctx) override {
+		{
+			lock_guard<mutex> g(reg_lock);
+			flags.clear();
+		}
+		ctx.registered_state->Remove("robust_probe_empty");
+	}
+};
+
+inline shared_ptr<ProbeEmptyRegistry> GetProbeEmptyRegistry(ClientContext &context) {
+	return context.registered_state->GetOrCreate<ProbeEmptyRegistry>("robust_probe_empty");
+}
+
+} // namespace duckdb
diff --git a/src/optimizer/graph_manager.cpp b/src/optimizer/graph_manager.cpp
index e69de29..8b13789 100644
--- a/src/optimizer/graph_manager.cpp
+++ b/src/optimizer/graph_manager.cpp
@@ -0,0 +1 @@
+
diff --git a/src/optimizer/graph_manager.hpp b/src/optimizer/graph_manager.hpp
index 8aa54ab..d0f4b5e 100644
--- a/src/optimizer/graph_manager.hpp
+++ b/src/optimizer/graph_manager.hpp
@@ -6,42 +6,54 @@
 #include "duckdb/common/assert.hpp"
 #include "duckdb/common/types.hpp"
 
-
 namespace duckdb {
+
+// hash function for ColumnBinding to use as map key
+struct ColumnBindingHash {
+	size_t operator()(const ColumnBinding &binding) const {
+		return std::hash<idx_t>()(binding.table_index) ^ (std::hash<idx_t>()(binding.column_index) << 16);
+	}
+};
+
+// equality for ColumnBinding
+struct ColumnBindingEqual {
+	bool operator()(const ColumnBinding &a, const ColumnBinding &b) const {
+		return a.table_index == b.table_index && a.column_index == b.column_index;
+	}
+};
 typedef idx_t table_id;
-	class JoinEdge {
-	public:
-		idx_t table_a;
-		idx_t table_b;
-		vector<ColumnBinding> join_columns_a;  // multi-column join support
-		vector<ColumnBinding> join_columns_b;
-		idx_t weight;
-		JoinType join_type;
-		// reference<LogicalOperator> table1_op;
-		// reference<LogicalOperator> table2_op;
+class JoinEdge {
+public:
+	idx_t table_a;
+	idx_t table_b;
+	vector<ColumnBinding> join_columns_a; // multi-column join support
+	vector<ColumnBinding> join_columns_b;
+	idx_t weight;
+	JoinType join_type;
+	// reference<LogicalOperator> table1_op;
+	// reference<LogicalOperator> table2_op;
 
-	public:
-		JoinEdge(table_id table_a, table_id table_b,
-			 vector<ColumnBinding> cols_a, vector<ColumnBinding> cols_b,
-			 idx_t weight, JoinType join_type)
-		: table_a(table_a), table_b(table_b),
-		  join_columns_a(std::move(cols_a)), join_columns_b(std::move(cols_b)),
-		  weight(weight), join_type(join_type) {
-			D_ASSERT(!join_columns_a.empty());
-			D_ASSERT(join_columns_a.size() == join_columns_b.size());
-		}
-	};
+public:
+	JoinEdge(table_id table_a, table_id table_b, vector<ColumnBinding> cols_a, vector<ColumnBinding> cols_b,
+	         idx_t weight, JoinType join_type)
+	    : table_a(table_a), table_b(table_b), join_columns_a(std::move(cols_a)), join_columns_b(std::move(cols_b)),
+	      weight(weight), join_type(join_type) {
+		D_ASSERT(!join_columns_a.empty());
+		D_ASSERT(join_columns_a.size() == join_columns_b.size());
+	}
+};
 
-	class BloomFilterOperation {
-	public:
-		bool is_create; // true = CREATE_BF, false = USE_BF
-		idx_t build_table_idx;
-		idx_t probe_table_idx;
-		vector<ColumnBinding> join_columns;
-		vector<ColumnBinding> build_columns;
-		vector<ColumnBinding> probe_columns;
-		JoinType join_type;
-		idx_t sequence_number; // for preserving generation order
-	};
+class FilterOperation {
+public:
+	bool is_create; // true = CREATE_FILTER, false = PROBE_FILTER
+	idx_t build_table_idx;
+	idx_t probe_table_idx;
+	vector<ColumnBinding> join_columns;
+	vector<ColumnBinding> build_columns;
+	vector<ColumnBinding> probe_columns;
+	JoinType join_type;
+	idx_t sequence_number; // for preserving generation order
+	bool is_forward_pass = false;
+};
 
-} // namespace duckdb
\ No newline at end of file
+} // namespace duckdb
diff --git a/src/optimizer/robust_optimizer.cpp b/src/optimizer/robust_optimizer.cpp
new file mode 100644
index 0000000..a80da57
--- /dev/null
+++ b/src/optimizer/robust_optimizer.cpp
@@ -0,0 +1,1734 @@
+#include "robust_optimizer.hpp"
+#include "duckdb/planner/operator/logical_comparison_join.hpp"
+#include "duckdb/planner/operator/logical_get.hpp"
+#include "duckdb/planner/operator/logical_aggregate.hpp"
+#include "duckdb/planner/expression/bound_columnref_expression.hpp"
+#include "duckdb/common/types.hpp"
+#include "table_manager.hpp"
+#include "graph_manager.hpp"
+#include "duckdb/common/unordered_set.hpp"
+#include "duckdb/common/set.hpp"
+#include <algorithm>
+#include "duckdb/common/vector.hpp"
+#include "duckdb/common/unordered_map.hpp"
+#include "../operators/logical_create_filter.hpp"
+#include "../operators/logical_probe_filter.hpp"
+#include "utils/debug_utils.hpp"
+#include "robust_profiling.hpp"
+#include "../utils/dag_printer.hpp"
+#include <chrono>
+
+namespace duckdb {
+// class LogicalCreateFilter;
+// class LogicalProbeFilter;
+
+vector<JoinEdge> RobustOptimizerContextState::ExtractOperators(LogicalOperator &plan) {
+	vector<LogicalOperator *> join_ops;
+	vector<TableInfo> table_infos;
+
+	// pass 1: collect the base tables and join operators
+	ExtractOperatorsRecursive(plan, join_ops);
+
+	// debug: print summary of registered nodes
+	D_PRINT("\n=== REGISTERED NODES SUMMARY ===");
+	for (const auto &entry : table_mgr.table_lookup) {
+		D_PRINTF("  table_idx=%llu (type=%d, cardinality=%llu)", (unsigned long long)entry.first,
+		         (int)entry.second.table_op->type, (unsigned long long)entry.second.estimated_cardinality);
+	}
+	D_PRINTF("Total registered nodes: %zu", table_mgr.table_lookup.size());
+	D_PRINTF("Total join operators found: %zu\n", join_ops.size());
+
+	// pass 2: create JoinEdges with table information
+	return CreateJoinEdges(join_ops);
+}
+
+void RobustOptimizerContextState::ExtractOperatorsRecursive(LogicalOperator &plan,
+                                                            vector<LogicalOperator *> &join_ops) {
+	LogicalOperator *op = &plan;
+
+	// step 1: collect all join operators
+	if (op->type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN ||
+	    op->type == LogicalOperatorType::LOGICAL_DELIM_JOIN) {
+		LogicalComparisonJoin &join = op->Cast<LogicalComparisonJoin>();
+		switch (join.join_type) {
+		case JoinType::INNER:
+		case JoinType::LEFT:
+		case JoinType::RIGHT:
+		case JoinType::SEMI:
+		case JoinType::RIGHT_SEMI: {
+			if (std::any_of(join.conditions.begin(), join.conditions.end(), [](const JoinCondition &jc) {
+				    return jc.comparison == ExpressionType::COMPARE_EQUAL &&
+				           jc.left->type == ExpressionType::BOUND_COLUMN_REF &&
+				           jc.right->type == ExpressionType::BOUND_COLUMN_REF;
+			    })) {
+				// JoinEdge edge(join);
+				join_ops.push_back(op);
+				break;
+			}
+		}
+		default:
+			break;
+		}
+	}
+
+	switch (op->type) {
+	case LogicalOperatorType::LOGICAL_FILTER: {
+		LogicalOperator *child = op->children[0].get();
+		if (child->type == LogicalOperatorType::LOGICAL_GET) {
+			table_mgr.AddTableOperator(child);
+			return;
+		}
+
+		ExtractOperatorsRecursive(*child, join_ops);
+		return;
+	}
+	case LogicalOperatorType::LOGICAL_AGGREGATE_AND_GROUP_BY: {
+		auto &agg = op->Cast<LogicalAggregate>();
+		if (agg.groups.empty() && agg.grouping_sets.size() <= 1) {
+			table_mgr.AddTableOperator(op);
+			ExtractOperatorsRecursive(*op->children[0], join_ops);
+		} else {
+			auto old_refs = agg.GetColumnBindings();
+			for (size_t i = 0; i < agg.groups.size(); i++) {
+				if (agg.groups[i]->type == ExpressionType::BOUND_COLUMN_REF) {
+					auto &col_ref = agg.groups[i]->Cast<BoundColumnRefExpression>();
+					rename_col_bindings.insert({old_refs[i], col_ref.binding});
+				}
+			}
+			ExtractOperatorsRecursive(*op->children[0], join_ops);
+		}
+		return;
+	}
+	case LogicalOperatorType::LOGICAL_PROJECTION: {
+		auto old_refs = op->GetColumnBindings();
+		for (size_t i = 0; i < op->expressions.size(); i++) {
+			if (op->expressions[i]->type == ExpressionType::BOUND_COLUMN_REF) {
+				auto &col_ref = op->expressions[i]->Cast<BoundColumnRefExpression>();
+				rename_col_bindings.insert({old_refs[i], col_ref.binding});
+			}
+		}
+		ExtractOperatorsRecursive(*op->children[0], join_ops);
+		return;
+	}
+	case LogicalOperatorType::LOGICAL_UNION:
+	case LogicalOperatorType::LOGICAL_EXCEPT:
+	case LogicalOperatorType::LOGICAL_INTERSECT: {
+		table_mgr.AddTableOperator(op);
+		ExtractOperatorsRecursive(*op->children[0], join_ops);
+		ExtractOperatorsRecursive(*op->children[1], join_ops);
+		return;
+	}
+	case LogicalOperatorType::LOGICAL_WINDOW: {
+		table_mgr.AddTableOperator(op);
+		ExtractOperatorsRecursive(*op->children[0], join_ops);
+		return;
+	}
+	case LogicalOperatorType::LOGICAL_DUMMY_SCAN:
+	case LogicalOperatorType::LOGICAL_EXPRESSION_GET:
+	case LogicalOperatorType::LOGICAL_DELIM_GET:
+	case LogicalOperatorType::LOGICAL_GET:
+	case LogicalOperatorType::LOGICAL_EMPTY_RESULT:
+	case LogicalOperatorType::LOGICAL_CHUNK_GET:
+		D_PRINTF("[NODE_REG] Registering base table scan, type=%d", (int)op->type);
+		table_mgr.AddTableOperator(op);
+		return;
+	default:
+		for (auto &child : op->children) {
+			ExtractOperatorsRecursive(*child, join_ops);
+		}
+	}
+}
+
+ColumnBinding RobustOptimizerContextState::ResolveColumnBinding(const ColumnBinding &binding) const {
+	ColumnBinding current = binding;
+	set<pair<idx_t, idx_t>> visited;
+
+	// follow the rename chain until we find a base table binding
+	while (true) {
+		auto key = make_pair(current.table_index, current.column_index);
+		if (visited.count(key)) {
+			D_PRINTF("WARNING: Cycle detected in rename_col_bindings for binding (%llu.%llu)",
+			         (unsigned long long)current.table_index, (unsigned long long)current.column_index);
+			break;
+		}
+		visited.insert(key);
+
+		// check if this binding exists in the rename map
+		auto it = rename_col_bindings.find(current);
+		if (it != rename_col_bindings.end()) {
+			current = it->second;
+		} else {
+			// no more renames, this is the base binding
+			break;
+		}
+	}
+
+	return current;
+}
+
+vector<JoinEdge> RobustOptimizerContextState::CreateJoinEdges(vector<LogicalOperator *> &join_ops) {
+	vector<JoinEdge> edges;
+	for (auto &op : join_ops) {
+		auto &join = op->Cast<LogicalComparisonJoin>();
+
+		vector<ColumnBinding> left_columns, right_columns;
+		vector<ColumnBinding> resolved_left_columns, resolved_right_columns;
+
+		for (const JoinCondition &cond : join.conditions) {
+			if (cond.comparison == ExpressionType::COMPARE_EQUAL &&
+			    cond.left->type == ExpressionType::BOUND_COLUMN_REF &&
+			    cond.right->type == ExpressionType::BOUND_COLUMN_REF) {
+				// store original bindings
+				ColumnBinding left_binding = cond.left->Cast<BoundColumnRefExpression>().binding;
+				ColumnBinding right_binding = cond.right->Cast<BoundColumnRefExpression>().binding;
+
+				left_columns.push_back(left_binding);
+				right_columns.push_back(right_binding);
+
+				// resolve bindings through rename chain
+				resolved_left_columns.push_back(ResolveColumnBinding(left_binding));
+				resolved_right_columns.push_back(ResolveColumnBinding(right_binding));
+			}
+		}
+
+		if (!left_columns.empty() && !right_columns.empty()) {
+			// get table indices from first resolved column
+			idx_t left_table_idx = resolved_left_columns[0].table_index;
+			idx_t right_table_idx = resolved_right_columns[0].table_index;
+
+			// verify these table indices exist in our table manager
+			if (table_mgr.table_lookup.find(left_table_idx) != table_mgr.table_lookup.end() &&
+			    table_mgr.table_lookup.find(right_table_idx) != table_mgr.table_lookup.end()) {
+				// use resolved column bindings in the JoinEdge so they match child bindings in CreatePlan
+				JoinEdge edge(left_table_idx, right_table_idx, resolved_left_columns, resolved_right_columns,
+				              resolved_left_columns.size(), join.join_type);
+				edges.push_back(edge);
+			} else {
+				D_PRINTF("WARNING: Resolved table indices (%llu, %llu) not found in table_lookup",
+				         (unsigned long long)left_table_idx, (unsigned long long)right_table_idx);
+			}
+		}
+	}
+
+	return edges;
+}
+
+vector<JoinEdge> RobustOptimizerContextState::LargestRoot(vector<JoinEdge> &edges) {
+	// step 1: find largest table by cardinality
+	idx_t largest_table_idx = 0;
+	idx_t max_cardinality = 0;
+	for (auto &table_info : table_mgr.table_ops) {
+		if (table_info.estimated_cardinality > max_cardinality) {
+			max_cardinality = table_info.estimated_cardinality;
+			largest_table_idx = table_info.table_idx;
+		}
+	}
+
+	// step 2: build MST (maximum) using Prim's algorithm starting from largest table
+	unordered_set<idx_t> mst_nodes;
+	vector<JoinEdge> mst_edges;
+
+	mst_nodes.insert(largest_table_idx);
+
+	while (mst_nodes.size() < table_mgr.table_ops.size() && !edges.empty()) {
+		const JoinEdge *best_edge = nullptr;
+		idx_t max_weight = 0;
+		max_cardinality = 0;
+		for (JoinEdge &edge : edges) {
+			bool left_in_mst = mst_nodes.count(edge.table_a) > 0;
+			bool right_in_mst = mst_nodes.count(edge.table_b) > 0;
+
+			if (left_in_mst != right_in_mst) {
+				const idx_t weight = edge.weight;
+
+				// safely lookup cardinalities with bounds checking
+				auto left_it = table_mgr.table_lookup.find(edge.table_a);
+				auto right_it = table_mgr.table_lookup.find(edge.table_b);
+
+				if (left_it == table_mgr.table_lookup.end() || right_it == table_mgr.table_lookup.end()) {
+					// printf("WARNING: Table lookup failed for edge %llu <-> %llu\n", edge.table_a, edge.table_b);
+					continue;
+				}
+
+				idx_t left_cardinality = left_it->second.estimated_cardinality;
+				idx_t right_cardinality = right_it->second.estimated_cardinality;
+				const idx_t cardinality = std::min(left_cardinality, right_cardinality);
+
+				if (weight > max_weight || (weight == max_weight && cardinality > max_cardinality)) {
+					max_weight = weight;
+					max_cardinality = cardinality;
+					best_edge = &edge;
+				}
+			}
+		}
+
+		if (!best_edge) {
+			D_PRINT("Warning - Disconnected components found. MST incomplete.");
+			// TODO: Add Assertion
+			break;
+		}
+
+		mst_edges.push_back(*best_edge);
+		mst_nodes.insert(best_edge->table_a);
+		mst_nodes.insert(best_edge->table_b);
+	}
+
+	return mst_edges;
+}
+
+TreeNode *RobustOptimizerContextState::BuildRootedTree(vector<JoinEdge> &mst_edges) const {
+	if (mst_edges.empty()) {
+		return nullptr;
+	}
+
+	if (table_mgr.table_ops.empty()) {
+		D_PRINT("ERROR: BuildRootedTree called with empty table_ops");
+		return nullptr;
+	}
+
+	// step 1: find largest table (root)
+	idx_t root_table_idx = 0;
+	idx_t max_cardinality = 0;
+	bool found_root = false;
+
+	for (const auto &table_info : table_mgr.table_ops) {
+		if (table_info.estimated_cardinality > max_cardinality) {
+			max_cardinality = table_info.estimated_cardinality;
+			root_table_idx = table_info.table_idx;
+			found_root = true;
+		}
+	}
+
+	if (!found_root) {
+		D_PRINT("ERROR: No valid root table found");
+		return nullptr;
+	}
+
+	// step 2: create nodes for all tables
+	unordered_map<idx_t, TreeNode *> table_to_node;
+	for (const auto &table_info : table_mgr.table_ops) {
+		auto *node = new TreeNode(table_info.table_idx, table_info.table_op);
+		table_to_node[table_info.table_idx] = node;
+	}
+
+	// verify root node was created
+	if (table_to_node.find(root_table_idx) == table_to_node.end() || !table_to_node[root_table_idx]) {
+		D_PRINTF("ERROR: Failed to create root node for table %llu", (unsigned long long)root_table_idx);
+		// cleanup allocated nodes
+		for (auto &pair : table_to_node) {
+			delete pair.second;
+		}
+		return nullptr;
+	}
+
+	// step 3: build adjacency list from MST edges (undirected)
+	unordered_map<idx_t, vector<pair<idx_t, JoinEdge *>>> adjacency;
+	for (auto &edge : mst_edges) {
+		adjacency[edge.table_a].push_back({edge.table_b, &edge});
+		adjacency[edge.table_b].push_back({edge.table_a, &edge});
+	}
+
+	// step 4: BFS from root to assign parent-child relationships and levels
+	vector<idx_t> queue;
+	unordered_set<idx_t> visited;
+
+	queue.push_back(root_table_idx);
+	visited.insert(root_table_idx);
+	table_to_node[root_table_idx]->level = 0;
+
+	size_t front = 0;
+	while (front < queue.size()) {
+		idx_t current = queue[front++];
+
+		// check if current node exists
+		if (table_to_node.find(current) == table_to_node.end() || !table_to_node[current]) {
+			D_PRINTF("ERROR: Node for table %llu not found in table_to_node", (unsigned long long)current);
+			continue;
+		}
+
+		TreeNode *current_node = table_to_node[current];
+
+		// process all neighbors
+		for (pair<idx_t, JoinEdge *> &adj_entry : adjacency[current]) {
+			idx_t &neighbor_idx = adj_entry.first;
+			JoinEdge *&edge = adj_entry.second;
+			if (visited.count(neighbor_idx) == 0) {
+				// verify neighbor node exists
+				if (table_to_node.find(neighbor_idx) == table_to_node.end() || !table_to_node[neighbor_idx]) {
+					D_PRINTF("ERROR: Child node for table %llu not found", (unsigned long long)neighbor_idx);
+					continue;
+				}
+
+				// neighbor is a child of current
+				TreeNode *child_node = table_to_node[neighbor_idx];
+				child_node->parent = current_node;
+				child_node->level = current_node->level + 1;
+				child_node->edge_to_parent = edge;
+
+				current_node->children.push_back(child_node);
+
+				queue.push_back(neighbor_idx);
+				visited.insert(neighbor_idx);
+			}
+		}
+	}
+
+	return table_to_node[root_table_idx];
+}
+
+void RobustOptimizerContextState::DebugPrintGraph(const vector<JoinEdge> &edges) const {
+	(void)edges;
+#ifdef DEBUG
+	// Debug: Print all tables
+	Printer::Print("=== TABLE INFORMATION ===");
+	for (const auto &table_info : table_mgr.table_ops) {
+		Printer::PrintF("Table %llu: cardinality=%llu", (unsigned long long)table_info.table_idx,
+		                (unsigned long long)table_info.estimated_cardinality);
+	}
+
+	// Find largest table
+	idx_t largest_table_idx = 0;
+	idx_t max_cardinality = 0;
+	for (auto &table_info : table_mgr.table_ops) {
+		if (table_info.estimated_cardinality > max_cardinality) {
+			max_cardinality = table_info.estimated_cardinality;
+			largest_table_idx = table_info.table_idx;
+		}
+	}
+	Printer::PrintF("Largest table: %llu (cardinality=%llu)\n", (unsigned long long)largest_table_idx,
+	                (unsigned long long)max_cardinality);
+
+	// Debug: Print all join edges
+	Printer::Print("=== ALL JOIN EDGES ===");
+	for (size_t i = 0; i < edges.size(); i++) {
+		const auto &edge = edges[i];
+		Printer::PrintF("Edge %zu: %llu <-> %llu (weight=%llu, type=%d)", i, (unsigned long long)edge.table_a,
+		                (unsigned long long)edge.table_b, (unsigned long long)edge.weight, (int)edge.join_type);
+
+		// Print column bindings
+		string cols_a = "  Columns A: ";
+		for (const auto &col : edge.join_columns_a) {
+			cols_a += "(" + std::to_string(col.table_index) + "." + std::to_string(col.column_index) + ") ";
+		}
+		Printer::Print(cols_a);
+
+		string cols_b = "  Columns B: ";
+		for (const auto &col : edge.join_columns_b) {
+			cols_b += "(" + std::to_string(col.table_index) + "." + std::to_string(col.column_index) + ") ";
+		}
+		Printer::Print(cols_b);
+	}
+	Printer::Print("");
+#endif
+}
+
+void RobustOptimizerContextState::DebugPrintMST(const vector<JoinEdge> &mst_edges,
+                                                const vector<FilterOperation> &filter_operations) {
+	(void)mst_edges;
+	(void)filter_operations;
+#ifdef DEBUG
+	Printer::Print("=== MST EDGES ===");
+	for (size_t i = 0; i < mst_edges.size(); i++) {
+		const auto &edge = mst_edges[i];
+		Printer::PrintF("MST Edge %zu: %llu <-> %llu (weight=%llu)", i, (unsigned long long)edge.table_a,
+		                (unsigned long long)edge.table_b, (unsigned long long)edge.weight);
+	}
+	Printer::Print("");
+
+	Printer::Print("=== BLOOM FILTER OPERATIONS ===");
+	for (size_t i = 0; i < filter_operations.size(); i++) {
+		const auto &filter_op = filter_operations[i];
+
+		if (filter_op.is_create) {
+			// CREATE operation
+			Printer::PrintF("Filter Op %zu: CREATE_FILTER on table %llu", i,
+			                (unsigned long long)filter_op.build_table_idx);
+			string cols = "  Build columns: ";
+			for (const auto &col : filter_op.build_columns) {
+				cols += "(" + std::to_string(col.table_index) + "." + std::to_string(col.column_index) + ") ";
+			}
+			Printer::Print(cols);
+		} else {
+			// USE operation
+			Printer::PrintF("Filter Op %zu: PROBE_FILTER on table %llu (using BF from table %llu)", i,
+			                (unsigned long long)filter_op.probe_table_idx,
+			                (unsigned long long)filter_op.build_table_idx);
+			string build_cols = "  Build columns: ";
+			for (const auto &col : filter_op.build_columns) {
+				build_cols += "(" + std::to_string(col.table_index) + "." + std::to_string(col.column_index) + ") ";
+			}
+			Printer::Print(build_cols);
+
+			string probe_cols = "  Probe columns: ";
+			for (const auto &col : filter_op.probe_columns) {
+				probe_cols += "(" + std::to_string(col.table_index) + "." + std::to_string(col.column_index) + ") ";
+			}
+			Printer::Print(probe_cols);
+		}
+	}
+	Printer::Print("");
+#endif
+}
+
+void RobustOptimizerContextState::PrintDAG(TreeNode *root) {
+	Value val;
+	if (!context.TryGetCurrentSetting("robust_display_dag", val) || !val.GetValue<bool>()) {
+		return;
+	}
+	PrintTransferDAG(root, table_mgr);
+}
+
+// helper: collect all base table indices in a subtree
+static void CollectBaseTableIndices(LogicalOperator *op, TableManager &table_mgr, unordered_set<idx_t> &indices) {
+	if (!op) {
+		return;
+	}
+	auto *info = table_mgr.GetTableInfo(op);
+	if (info) {
+		indices.insert(info->table_idx);
+		return;
+	}
+	for (auto &child : op->children) {
+		CollectBaseTableIndices(child.get(), table_mgr, indices);
+	}
+}
+
+// union-find helpers for column equivalence classes
+static ColKey UFFind(map<ColKey, ColKey> &parent, ColKey x) {
+	if (parent.find(x) == parent.end()) {
+		parent[x] = x;
+	}
+	while (parent[x] != x) {
+		parent[x] = parent[parent[x]];
+		x = parent[x];
+	}
+	return x;
+}
+
+static void UFUnion(map<ColKey, ColKey> &parent, ColKey a, ColKey b) {
+	a = UFFind(parent, a);
+	b = UFFind(parent, b);
+	if (a != b) {
+		parent[a] = b;
+	}
+}
+
+// recursive DFS for building physical DAG
+// uses build-first traversal so DFS index matches execution order
+// (first-executed = lowest index, last-executed = highest index)
+static void PhysicalDAGDFS(LogicalOperator *op, TableManager &table_mgr, RobustOptimizerContextState &state,
+                           vector<PhysicalDAGNode *> &all_nodes, map<idx_t, PhysicalDAGNode *> &node_map,
+                           map<idx_t, int> &dfs_index, map<ColKey, ColKey> &uf_parent) {
+	if (!op) {
+		return;
+	}
+
+	// base case: registered base table
+	auto *info = table_mgr.GetTableInfo(op);
+	if (info) {
+		bool is_leaf =
+		    (op->type == LogicalOperatorType::LOGICAL_GET || op->type == LogicalOperatorType::LOGICAL_DUMMY_SCAN ||
+		     op->type == LogicalOperatorType::LOGICAL_EXPRESSION_GET ||
+		     op->type == LogicalOperatorType::LOGICAL_DELIM_GET ||
+		     op->type == LogicalOperatorType::LOGICAL_EMPTY_RESULT ||
+		     op->type == LogicalOperatorType::LOGICAL_CHUNK_GET);
+		if (is_leaf) {
+			auto *node = new PhysicalDAGNode(info->table_idx, info->table_op);
+			all_nodes.push_back(node);
+			node_map[info->table_idx] = node;
+			dfs_index[info->table_idx] = (int)all_nodes.size() - 1;
+			return;
+		}
+	}
+
+	// join node
+	if (op->type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN ||
+	    op->type == LogicalOperatorType::LOGICAL_DELIM_JOIN) {
+		auto &join = op->Cast<LogicalComparisonJoin>();
+
+		if (join.join_type == JoinType::MARK) {
+			PhysicalDAGDFS(op->children[0].get(), table_mgr, state, all_nodes, node_map, dfs_index, uf_parent);
+			return;
+		}
+
+		// build-first: visit right child (build) first, then left child (probe)
+		// this gives execution order: first-executed tables get lowest DFS index
+		PhysicalDAGDFS(op->children[1].get(), table_mgr, state, all_nodes, node_map, dfs_index, uf_parent);
+		PhysicalDAGDFS(op->children[0].get(), table_mgr, state, all_nodes, node_map, dfs_index, uf_parent);
+
+		// process each join condition
+		for (const JoinCondition &cond : join.conditions) {
+			if (cond.comparison != ExpressionType::COMPARE_EQUAL) {
+				continue;
+			}
+			if (cond.left->type != ExpressionType::BOUND_COLUMN_REF ||
+			    cond.right->type != ExpressionType::BOUND_COLUMN_REF) {
+				continue;
+			}
+
+			ColumnBinding left_resolved =
+			    state.ResolveColumnBinding(cond.left->Cast<BoundColumnRefExpression>().binding);
+			ColumnBinding right_resolved =
+			    state.ResolveColumnBinding(cond.right->Cast<BoundColumnRefExpression>().binding);
+
+			// add to equivalence classes
+			ColKey left_key = {left_resolved.table_index, left_resolved.column_index};
+			ColKey right_key = {right_resolved.table_index, right_resolved.column_index};
+			UFUnion(uf_parent, left_key, right_key);
+
+			idx_t table_a = left_resolved.table_index;
+			idx_t table_b = right_resolved.table_index;
+			if (!node_map.count(table_a) || !node_map.count(table_b)) {
+				continue;
+			}
+			if (table_a == table_b) {
+				continue;
+			}
+
+			int idx_a = dfs_index.count(table_a) ? dfs_index[table_a] : -1;
+			int idx_b = dfs_index.count(table_b) ? dfs_index[table_b] : -1;
+
+			// higher DFS index = later execution = parent (closer to root/top)
+			idx_t parent_idx, child_idx;
+			ColumnBinding parent_col, child_col;
+			if (idx_a > idx_b) {
+				parent_idx = table_a;
+				parent_col = left_resolved;
+				child_idx = table_b;
+				child_col = right_resolved;
+			} else {
+				parent_idx = table_b;
+				parent_col = right_resolved;
+				child_idx = table_a;
+				child_col = left_resolved;
+			}
+
+			// equiv resolution on child side: find shallowest equivalent (highest DFS, != parent)
+			ColKey child_root = UFFind(uf_parent, {child_col.table_index, child_col.column_index});
+			idx_t best_child = child_idx;
+			int best_child_dfs = dfs_index.count(child_idx) ? dfs_index[child_idx] : -1;
+			ColumnBinding best_child_col = child_col;
+
+			vector<ColKey> all_keys;
+			for (auto &entry : uf_parent) {
+				all_keys.push_back(entry.first);
+			}
+			for (auto &key : all_keys) {
+				if (UFFind(uf_parent, key) != child_root) {
+					continue;
+				}
+				idx_t candidate = key.first;
+				if (candidate == parent_idx) {
+					continue;
+				}
+				if (!node_map.count(candidate)) {
+					continue;
+				}
+				int candidate_dfs = dfs_index.count(candidate) ? dfs_index[candidate] : -1;
+				if (candidate_dfs > best_child_dfs) {
+					best_child = candidate;
+					best_child_dfs = candidate_dfs;
+					best_child_col = ColumnBinding(key.first, key.second);
+				}
+			}
+
+			auto *parent_node = node_map[parent_idx];
+			auto *child_node = node_map[best_child];
+
+			// check if already linked
+			bool already_linked = false;
+			for (auto *p : child_node->parents) {
+				if (p == parent_node) {
+					already_linked = true;
+					break;
+				}
+			}
+
+			if (!already_linked) {
+				child_node->parents.push_back(parent_node);
+				parent_node->children.push_back(child_node);
+
+				PhysicalDAGEdge edge;
+				edge.parent_table = parent_idx;
+				edge.child_table = best_child;
+				edge.parent_cols.push_back(parent_col);
+				edge.child_cols.push_back(best_child_col);
+				child_node->edges_to_parents.push_back(edge);
+			} else {
+				// multi-column join: append columns to existing edge
+				for (auto &edge : child_node->edges_to_parents) {
+					if (edge.parent_table == parent_idx) {
+						edge.parent_cols.push_back(parent_col);
+						edge.child_cols.push_back(best_child_col);
+						break;
+					}
+				}
+			}
+		}
+		return;
+	}
+
+	// other operators: recurse into children
+	for (auto &child : op->children) {
+		PhysicalDAGDFS(child.get(), table_mgr, state, all_nodes, node_map, dfs_index, uf_parent);
+	}
+}
+
+vector<PhysicalDAGNode *> RobustOptimizerContextState::BuildPhysicalPlanDAG(LogicalOperator *op,
+                                                                            map<ColKey, ColKey> &uf_parent) {
+	vector<PhysicalDAGNode *> all_nodes;
+	map<idx_t, PhysicalDAGNode *> node_map;
+	map<idx_t, int> dfs_index;
+
+	PhysicalDAGDFS(op, table_mgr, *this, all_nodes, node_map, dfs_index, uf_parent);
+
+	// compute levels: roots (no parents) get 0, others get max(parent levels) + 1
+	for (auto *node : all_nodes) {
+		node->level = node->parents.empty() ? 0 : -1;
+	}
+	bool changed = true;
+	while (changed) {
+		changed = false;
+		for (auto *node : all_nodes) {
+			if (node->parents.empty()) {
+				continue;
+			}
+			int max_parent = -1;
+			bool all_set = true;
+			for (auto *p : node->parents) {
+				if (p->level < 0) {
+					all_set = false;
+					break;
+				}
+				max_parent = std::max(max_parent, p->level);
+			}
+			if (all_set && max_parent >= 0) {
+				int new_level = max_parent + 1;
+				if (new_level != node->level) {
+					node->level = new_level;
+					changed = true;
+				}
+			}
+		}
+	}
+
+	return all_nodes;
+}
+
+void RobustOptimizerContextState::FlipRootsToLeaves(vector<PhysicalDAGNode *> &all_nodes) {
+	// step 1: find all roots
+	vector<PhysicalDAGNode *> roots;
+	for (auto *node : all_nodes) {
+		if (node->parents.empty()) {
+			roots.push_back(node);
+		}
+	}
+	if (roots.size() <= 1) {
+		return;
+	}
+
+	// step 2: find anchor root (largest cardinality)
+	PhysicalDAGNode *anchor = roots[0];
+	for (auto *r : roots) {
+		if (r->table_op->estimated_cardinality > anchor->table_op->estimated_cardinality) {
+			anchor = r;
+		}
+	}
+
+	// step 3: repeatedly flip non-anchor roots until only anchor remains
+	// flipping a root can expose new roots (e.g. flipping name reveals aka_name)
+	bool flipped = true;
+	while (flipped) {
+		flipped = false;
+		for (auto *node : all_nodes) {
+			if (!node->parents.empty() || node == anchor) {
+				continue;
+			}
+			// non-anchor root: reverse all edges to its children
+			auto children_copy = node->children;
+			for (auto *child : children_copy) {
+				PhysicalDAGEdge edge;
+				bool found = false;
+				for (idx_t i = 0; i < child->parents.size(); i++) {
+					if (child->parents[i] == node) {
+						edge = child->edges_to_parents[i];
+						child->parents.erase(child->parents.begin() + static_cast<std::ptrdiff_t>(i));
+						child->edges_to_parents.erase(child->edges_to_parents.begin() + static_cast<std::ptrdiff_t>(i));
+						found = true;
+						break;
+					}
+				}
+				if (!found) {
+					continue;
+				}
+				for (idx_t i = 0; i < node->children.size(); i++) {
+					if (node->children[i] == child) {
+						node->children.erase(node->children.begin() + static_cast<std::ptrdiff_t>(i));
+						break;
+					}
+				}
+				child->children.push_back(node);
+				node->parents.push_back(child);
+
+				PhysicalDAGEdge reversed;
+				reversed.parent_table = edge.child_table;
+				reversed.child_table = edge.parent_table;
+				reversed.parent_cols = edge.child_cols;
+				reversed.child_cols = edge.parent_cols;
+				node->edges_to_parents.push_back(reversed);
+			}
+			flipped = true;
+		}
+	}
+
+	// step 4: recompute levels
+	for (auto *node : all_nodes) {
+		node->level = node->parents.empty() ? 0 : -1;
+	}
+	bool changed = true;
+	while (changed) {
+		changed = false;
+		for (auto *node : all_nodes) {
+			if (node->parents.empty()) {
+				continue;
+			}
+			int max_parent = -1;
+			bool all_set = true;
+			for (auto *p : node->parents) {
+				if (p->level < 0) {
+					all_set = false;
+					break;
+				}
+				max_parent = std::max(max_parent, p->level);
+			}
+			if (all_set && max_parent >= 0) {
+				int new_level = max_parent + 1;
+				if (new_level != node->level) {
+					node->level = new_level;
+					changed = true;
+				}
+			}
+		}
+	}
+}
+
+void RobustOptimizerContextState::PrintPhysicalPlanDAG(LogicalOperator *op) {
+	Value val;
+	if (!context.TryGetCurrentSetting("robust_display_physical_dag", val) || !val.GetValue<bool>()) {
+		return;
+	}
+
+	map<ColKey, ColKey> uf_parent;
+	auto all_nodes = BuildPhysicalPlanDAG(op, uf_parent);
+	if (all_nodes.empty()) {
+		return;
+	}
+	PrintPhysicalDAG(all_nodes, table_mgr);
+}
+
+std::pair<unordered_map<LogicalOperator *, vector<FilterOperation>>,
+          unordered_map<LogicalOperator *, vector<FilterOperation>>>
+RobustOptimizerContextState::GenerateStageModifications(const vector<JoinEdge> &mst_edges) {
+	// step 1: build rooted tree from MST.
+	// BuildRootedTree stores non-const JoinEdge* internally for adjacency-list layout; does not mutate elements.
+	TreeNode *root =
+	    BuildRootedTree(const_cast<vector<JoinEdge> &>(mst_edges)); // NOLINT(cppcoreguidelines-pro-type-const-cast)
+
+	// check if tree building failed
+	if (!root) {
+		D_PRINT("ERROR: BuildRootedTree returned nullptr, returning empty modifications");
+		return {{}, {}};
+	}
+
+	// display DAG if setting is enabled
+	PrintDAG(root);
+
+	// step 2: collect all nodes organized by level
+	unordered_map<int, vector<TreeNode *>> nodes_by_level;
+	int max_level = 0;
+
+	// BFS to collect nodes by level
+	vector<TreeNode *> queue;
+	queue.push_back(root);
+	size_t front = 0;
+
+	while (front < queue.size()) {
+		TreeNode *node = queue[front++];
+		if (!node) {
+			D_PRINT("ERROR: Null node encountered during BFS");
+			continue;
+		}
+
+		nodes_by_level[node->level].push_back(node);
+		max_level = std::max(max_level, node->level);
+
+		for (TreeNode *child : node->children) {
+			if (child) {
+				queue.push_back(child);
+			} else {
+				D_PRINT("ERROR: Null child node encountered");
+			}
+		}
+	}
+
+	unordered_map<LogicalOperator *, vector<FilterOperation>> forward_filter_ops;
+	unordered_map<LogicalOperator *, vector<FilterOperation>> backward_filter_ops;
+
+	// sequence counter to preserve operation order
+	idx_t sequence = 0;
+
+	// sort nodes at each level by cardinality ascending so PROBE_FILTERs are generated smallest-first
+	for (int level = 1; level <= max_level; level++) {
+		std::sort(nodes_by_level[level].begin(), nodes_by_level[level].end(), [](const TreeNode *a, const TreeNode *b) {
+			return a->table_op->estimated_cardinality < b->table_op->estimated_cardinality;
+		});
+	}
+
+	// step 3: forward pass - bottom-up (leaves to root)
+	// process levels from highest (leaves) down to 1
+	for (int level = max_level; level >= 1; level--) {
+		for (TreeNode *child_node : nodes_by_level[level]) {
+			if (!child_node) {
+				D_PRINTF("ERROR: Null child_node at level %d", level);
+				continue;
+			}
+
+			TreeNode *parent_node = child_node->parent;
+			if (!parent_node) {
+				D_PRINTF("ERROR: Null parent_node for table %llu at level %d",
+				         (unsigned long long)child_node->table_idx, level);
+				continue;
+			}
+
+			JoinEdge *edge = child_node->edge_to_parent;
+			if (!edge) {
+				D_PRINTF("ERROR: Null edge_to_parent for table %llu", (unsigned long long)child_node->table_idx);
+				continue;
+			}
+
+			// determine which columns belong to child and which to parent
+			vector<ColumnBinding> child_columns, parent_columns;
+
+			if (edge->table_a == child_node->table_idx) {
+				child_columns = edge->join_columns_a;
+				parent_columns = edge->join_columns_b;
+			} else {
+				child_columns = edge->join_columns_b;
+				parent_columns = edge->join_columns_a;
+			}
+
+			// CREATE_FILTER on child
+			FilterOperation create_op;
+			create_op.build_table_idx = child_node->table_idx;
+			create_op.probe_table_idx = parent_node->table_idx;
+			create_op.build_columns = child_columns;
+			create_op.probe_columns = parent_columns;
+			create_op.is_create = true;
+			create_op.is_forward_pass = true;
+			create_op.sequence_number = sequence++;
+			forward_filter_ops[child_node->table_op].push_back(create_op);
+
+			// PROBE_FILTER on parent
+			FilterOperation use_op;
+			use_op.build_table_idx = child_node->table_idx;
+			use_op.probe_table_idx = parent_node->table_idx;
+			use_op.build_columns = child_columns;
+			use_op.probe_columns = parent_columns;
+			use_op.is_create = false;
+			use_op.is_forward_pass = true;
+			use_op.sequence_number = sequence++;
+			forward_filter_ops[parent_node->table_op].push_back(use_op);
+		}
+	}
+
+	// step 4: backward pass - top-down (root to leaves)
+	// process levels from 1 to max_level
+	for (int level = 1; level <= max_level; level++) {
+		for (TreeNode *child_node : nodes_by_level[level]) {
+			if (!child_node) {
+				D_PRINTF("ERROR: Null child_node at level %d", level);
+				continue;
+			}
+
+			TreeNode *parent_node = child_node->parent;
+			if (!parent_node) {
+				D_PRINTF("ERROR: Null parent_node for table %llu at level %d",
+				         (unsigned long long)child_node->table_idx, level);
+				continue;
+			}
+
+			JoinEdge *edge = child_node->edge_to_parent;
+			if (!edge) {
+				D_PRINTF("ERROR: Null edge_to_parent for table %llu", (unsigned long long)child_node->table_idx);
+				continue;
+			}
+
+			// determine which columns belong to parent and which to child
+			vector<ColumnBinding> parent_columns, child_columns;
+
+			if (edge->table_a == parent_node->table_idx) {
+				parent_columns = edge->join_columns_a;
+				child_columns = edge->join_columns_b;
+			} else {
+				parent_columns = edge->join_columns_b;
+				child_columns = edge->join_columns_a;
+			}
+
+			// CREATE_FILTER on parent
+			FilterOperation create_op;
+			create_op.build_table_idx = parent_node->table_idx;
+			create_op.probe_table_idx = child_node->table_idx;
+			create_op.build_columns = parent_columns;
+			create_op.probe_columns = child_columns;
+			create_op.is_create = true;
+			create_op.sequence_number = sequence++;
+			backward_filter_ops[parent_node->table_op].push_back(create_op);
+
+			// PROBE_FILTER on child
+			FilterOperation use_op;
+			use_op.build_table_idx = parent_node->table_idx;
+			use_op.probe_table_idx = child_node->table_idx;
+			use_op.build_columns = parent_columns;
+			use_op.probe_columns = child_columns;
+			use_op.is_create = false;
+			use_op.sequence_number = sequence++;
+			backward_filter_ops[child_node->table_op].push_back(use_op);
+		}
+	}
+
+	return {std::move(forward_filter_ops), std::move(backward_filter_ops)};
+}
+
+std::pair<unordered_map<LogicalOperator *, vector<FilterOperation>>,
+          unordered_map<LogicalOperator *, vector<FilterOperation>>>
+RobustOptimizerContextState::GenerateStageModificationsFromDAG(vector<PhysicalDAGNode *> &all_nodes,
+                                                               map<ColKey, ColKey> &uf_parent) {
+	unordered_map<LogicalOperator *, vector<FilterOperation>> forward_filter_ops;
+	unordered_map<LogicalOperator *, vector<FilterOperation>> backward_filter_ops;
+
+	if (all_nodes.empty()) {
+		return {std::move(forward_filter_ops), std::move(backward_filter_ops)};
+	}
+
+	// group nodes by level
+	unordered_map<int, vector<PhysicalDAGNode *>> nodes_by_level;
+	int max_level = 0;
+	for (auto *node : all_nodes) {
+		nodes_by_level[node->level].push_back(node);
+		max_level = std::max(max_level, node->level);
+	}
+
+	idx_t sequence = 0;
+
+	// Printer::Print(StringUtil::Format("[DAG-GEN] %zu nodes, max_level=%d", all_nodes.size(), max_level));
+	// for (auto *node : all_nodes) {
+	// 	Printer::Print(StringUtil::Format("[DAG-GEN]   table_%llu (level=%d, parents=%zu, children=%zu, card=%llu)",
+	// 	                                  (unsigned long long)node->table_idx, node->level, node->parents.size(),
+	// 	                                  node->children.size(),
+	// 	                                  (unsigned long long)node->table_op->estimated_cardinality));
+	// 	for (idx_t ei = 0; ei < node->edges_to_parents.size(); ei++) {
+	// 		auto &e = node->edges_to_parents[ei];
+	// 		for (idx_t ci = 0; ci < e.parent_cols.size(); ci++) {
+	// 			Printer::Print(StringUtil::Format(
+	// 			    "[DAG-GEN]     edge[%llu] parent=%llu col(%llu.%llu) <- child=%llu col(%llu.%llu)",
+	// 			    (unsigned long long)ei, (unsigned long long)e.parent_table,
+	// 			    (unsigned long long)e.parent_cols[ci].table_index,
+	// 			    (unsigned long long)e.parent_cols[ci].column_index, (unsigned long long)e.child_table,
+	// 			    (unsigned long long)e.child_cols[ci].table_index,
+	// 			    (unsigned long long)e.child_cols[ci].column_index));
+	// 		}
+	// 	}
+	// }
+
+	// forward pass: bottom-up (leaves to roots), levels max_level down to 1
+	for (int level = max_level; level >= 1; level--) {
+		for (auto *child_node : nodes_by_level[level]) {
+			for (idx_t ei = 0; ei < child_node->edges_to_parents.size(); ei++) {
+				auto &edge = child_node->edges_to_parents[ei];
+				auto *parent_node = child_node->parents[ei];
+
+				// CREATE_FILTER on child (build=child, probe=parent)
+				FilterOperation create_op;
+				create_op.build_table_idx = child_node->table_idx;
+				create_op.probe_table_idx = parent_node->table_idx;
+				create_op.build_columns = edge.child_cols;
+				create_op.probe_columns = edge.parent_cols;
+				create_op.is_create = true;
+				create_op.is_forward_pass = true;
+				create_op.sequence_number = sequence++;
+				forward_filter_ops[child_node->table_op].push_back(create_op);
+
+				// PROBE_FILTER on parent
+				FilterOperation use_op;
+				use_op.build_table_idx = child_node->table_idx;
+				use_op.probe_table_idx = parent_node->table_idx;
+				use_op.build_columns = edge.child_cols;
+				use_op.probe_columns = edge.parent_cols;
+				use_op.is_create = false;
+				use_op.is_forward_pass = true;
+				use_op.sequence_number = sequence++;
+				forward_filter_ops[parent_node->table_op].push_back(use_op);
+			}
+		}
+	}
+
+	// backward pass: top-down (roots to leaves) with broadcast optimization.
+	// for each equivalence class, build the BF once at the highest ancestor (root/bridge)
+	// and broadcast it to all descendants sharing that class.
+
+	// tracks which table created the BF for each equivalence class in the backward pass.
+	// key: equiv class root (from union-find)
+	// value: (build_table_op, index into backward_filter_ops[build_table_op], build_table_idx, build_columns)
+	struct EquivBFSource {
+		LogicalOperator *build_table_op;
+		idx_t create_op_index; // index into backward_filter_ops[build_table_op]
+		idx_t build_table_idx;
+		vector<ColumnBinding> build_columns;
+	};
+	map<ColKey, EquivBFSource> equiv_class_bf_source;
+
+	for (int level = 1; level <= max_level; level++) {
+		for (auto *child_node : nodes_by_level[level]) {
+			// collect parent edges with their indices, sort by parent cardinality ascending
+			vector<idx_t> edge_indices;
+			for (idx_t ei = 0; ei < child_node->edges_to_parents.size(); ei++) {
+				edge_indices.push_back(ei);
+			}
+			std::sort(edge_indices.begin(), edge_indices.end(), [&](idx_t a, idx_t b) {
+				return child_node->parents[a]->table_op->estimated_cardinality <
+				       child_node->parents[b]->table_op->estimated_cardinality;
+			});
+
+			for (auto ei : edge_indices) {
+				auto &edge = child_node->edges_to_parents[ei];
+				auto *parent_node = child_node->parents[ei];
+
+				// determine the equivalence class for this edge using the first column pair
+				ColKey parent_col_key = {edge.parent_cols[0].table_index, edge.parent_cols[0].column_index};
+				ColKey equiv_root = UFFind(uf_parent, parent_col_key);
+
+				auto it = equiv_class_bf_source.find(equiv_root);
+				if (it != equiv_class_bf_source.end()) {
+					// an ancestor already created a BF for this equivalence class — broadcast (USE only)
+					auto &source = it->second;
+
+					// add child's probe columns (and matching build columns) to the existing
+					// CREATE_FILTER so linking can find it and the merge logic preserves them
+					auto &create_op = backward_filter_ops[source.build_table_op][source.create_op_index];
+					for (idx_t ci = 0; ci < edge.child_cols.size(); ci++) {
+						create_op.probe_columns.push_back(edge.child_cols[ci]);
+						create_op.build_columns.push_back(
+						    source.build_columns[ci < source.build_columns.size() ? ci
+						                                                          : source.build_columns.size() - 1]);
+					}
+
+					FilterOperation use_op;
+					use_op.build_table_idx = source.build_table_idx;
+					use_op.probe_table_idx = child_node->table_idx;
+					use_op.build_columns = source.build_columns;
+					use_op.probe_columns = edge.child_cols;
+					use_op.is_create = false;
+					use_op.is_forward_pass = false;
+					use_op.sequence_number = sequence++;
+					backward_filter_ops[child_node->table_op].push_back(use_op);
+				} else {
+					// new equivalence class at this edge — create BF on parent, use on child
+					FilterOperation create_op;
+					create_op.build_table_idx = parent_node->table_idx;
+					create_op.probe_table_idx = child_node->table_idx;
+					create_op.build_columns = edge.parent_cols;
+					create_op.probe_columns = edge.child_cols;
+					create_op.is_create = true;
+					create_op.is_forward_pass = false;
+					create_op.sequence_number = sequence++;
+
+					idx_t create_idx = backward_filter_ops[parent_node->table_op].size();
+					backward_filter_ops[parent_node->table_op].push_back(create_op);
+
+					FilterOperation use_op;
+					use_op.build_table_idx = parent_node->table_idx;
+					use_op.probe_table_idx = child_node->table_idx;
+					use_op.build_columns = edge.parent_cols;
+					use_op.probe_columns = edge.child_cols;
+					use_op.is_create = false;
+					use_op.is_forward_pass = false;
+					use_op.sequence_number = sequence++;
+					backward_filter_ops[child_node->table_op].push_back(use_op);
+
+					// record this as the source for this equivalence class
+					equiv_class_bf_source[equiv_root] = {parent_node->table_op, create_idx, parent_node->table_idx,
+					                                     edge.parent_cols};
+				}
+			}
+		}
+	}
+
+	return {std::move(forward_filter_ops), std::move(backward_filter_ops)};
+}
+
+unique_ptr<LogicalOperator>
+RobustOptimizerContextState::BuildStackedBFOperators(unique_ptr<LogicalOperator> base_plan,
+                                                     const vector<FilterOperation> &filter_ops, bool reverse_order) {
+	if (filter_ops.empty()) {
+		return base_plan;
+	}
+
+	// preserve order and only merge consecutive CREATEs for the same table
+	vector<FilterOperation> merged_ops;
+
+	for (size_t i = 0; i < filter_ops.size(); i++) {
+		const auto &filter_op = filter_ops[i];
+
+		if (filter_op.is_create) {
+			// Check if we can merge with subsequent consecutive CREATEs for same table
+			vector<FilterOperation> consecutive_creates;
+			consecutive_creates.push_back(filter_op);
+
+			// Look ahead for consecutive CREATEs on the same table
+			size_t j = i + 1;
+			while (j < filter_ops.size() && filter_ops[j].is_create &&
+			       filter_ops[j].build_table_idx == filter_op.build_table_idx) {
+				consecutive_creates.push_back(filter_ops[j]);
+				j++;
+			}
+
+			if (consecutive_creates.size() == 1) {
+				// single CREATE, no merging needed
+				merged_ops.push_back(filter_op);
+			} else {
+				// multiple consecutive CREATEs for same table - merge them
+				FilterOperation merged_op = consecutive_creates[0];
+				merged_op.build_columns.clear();
+
+				// collect all build columns
+				for (const auto &op : consecutive_creates) {
+					// for (const auto &col : op.build_columns) {
+					for (idx_t x = 0; x < op.build_columns.size(); x++) {
+						// __assert(op.build_columns.size() == op.probe_columns.size(),"Merging consecutive
+						// CREATE_FILTERs: Build columns and probe columns size different");
+						merged_op.build_columns.push_back(op.build_columns[x]);
+						merged_op.probe_columns.push_back(op.probe_columns[x]);
+					}
+				}
+				merged_ops.push_back(merged_op);
+			}
+
+			// skip the operations we just merged
+			i = j - 1;
+		} else {
+			// USE operation - add as is
+			merged_ops.push_back(filter_op);
+		}
+	}
+
+	// build operators from merged list
+	unique_ptr<LogicalOperator> current = std::move(base_plan);
+
+	// helper to set estimated_cardinality from the underlying scan table
+	auto set_cardinality = [&](LogicalOperator *op, const FilterOperation &filter_op) {
+		idx_t table_idx = filter_op.is_create ? filter_op.build_table_idx : filter_op.probe_table_idx;
+		auto it = table_mgr.table_lookup.find(table_idx);
+		if (it != table_mgr.table_lookup.end()) {
+			op->estimated_cardinality = it->second.estimated_cardinality;
+		}
+	};
+
+	if (reverse_order) {
+		for (auto it = merged_ops.rbegin(); it != merged_ops.rend(); ++it) {
+			const auto &filter_op = *it;
+			unique_ptr<LogicalOperator> new_op;
+
+			if (filter_op.is_create) {
+				auto create = make_uniq<LogicalCreateFilter>(filter_op);
+				create->is_forward_pass = filter_op.is_forward_pass;
+				new_op = std::move(create);
+			} else {
+				new_op = make_uniq<LogicalProbeFilter>(filter_op);
+			}
+
+			set_cardinality(new_op.get(), filter_op);
+			new_op->AddChild(std::move(current));
+			current = std::move(new_op);
+		}
+	} else {
+		for (const auto &filter_op : merged_ops) {
+			unique_ptr<LogicalOperator> new_op;
+
+			if (filter_op.is_create) {
+				auto create = make_uniq<LogicalCreateFilter>(filter_op);
+				create->is_forward_pass = filter_op.is_forward_pass;
+				new_op = std::move(create);
+			} else {
+				new_op = make_uniq<LogicalProbeFilter>(filter_op);
+			}
+
+			set_cardinality(new_op.get(), filter_op);
+			new_op->AddChild(std::move(current));
+			current = std::move(new_op);
+		}
+	}
+	return current;
+}
+
+unique_ptr<LogicalOperator> RobustOptimizerContextState::ApplyStageModifications(
+    unique_ptr<LogicalOperator> plan,
+    const unordered_map<LogicalOperator *, vector<FilterOperation>> &forward_filter_ops,
+    const unordered_map<LogicalOperator *, vector<FilterOperation>> &backward_filter_ops) {
+	// first apply modifications to children recursively
+	for (auto &child : plan->children) {
+		child = ApplyStageModifications(std::move(child), forward_filter_ops, backward_filter_ops);
+	}
+
+	LogicalOperator *original_op = plan.get();
+
+	// add the forward pass bf operators above the base table operator
+	auto forward_it = forward_filter_ops.find(original_op);
+	if (forward_it != forward_filter_ops.end()) {
+		plan = BuildStackedBFOperators(std::move(plan), forward_it->second, false);
+	}
+
+	// add the backward pass bf operators above the forward pass bf operators
+	auto backward_it = backward_filter_ops.find(original_op);
+	if (backward_it != backward_filter_ops.end()) {
+		// for (size_t i = 0; i < backward_it->second.size(); i++) {
+		// 	const auto &op = backward_it->second[i];
+		// }
+		plan = BuildStackedBFOperators(std::move(plan), backward_it->second, false);
+	}
+
+	return plan;
+}
+
+void RobustOptimizerContextState::LinkProbeFilterToCreateFilter(LogicalOperator *plan) {
+	if (!plan) {
+		return;
+	}
+
+	// helper struct to uniquely identify a CREATE_FILTER
+	struct CreateFilterKey {
+		idx_t build_table_idx;
+		vector<ColumnBinding> build_columns;
+
+		bool operator==(const CreateFilterKey &other) const {
+			if (build_table_idx != other.build_table_idx) {
+				return false;
+			}
+			if (build_columns.size() != other.build_columns.size()) {
+				return false;
+			}
+			for (size_t i = 0; i < build_columns.size(); i++) {
+				if (build_columns[i].table_index != other.build_columns[i].table_index ||
+				    build_columns[i].column_index != other.build_columns[i].column_index) {
+					return false;
+				}
+			}
+			return true;
+		}
+	};
+
+	struct CreateFilterKeyHash {
+		size_t operator()(const CreateFilterKey &key) const {
+			size_t hash = std::hash<idx_t>()(key.build_table_idx);
+			for (const auto &col : key.build_columns) {
+				hash ^= (std::hash<idx_t>()(col.table_index) << 1);
+				hash ^= (std::hash<idx_t>()(col.column_index) << 2);
+			}
+			return hash;
+		}
+	};
+
+	// pass 1: collect all CREATE_FILTER operators (multiple per build table possible)
+	unordered_map<idx_t, vector<LogicalCreateFilter *>> create_filter_by_table;
+	vector<LogicalOperator *> queue;
+	queue.push_back(plan);
+
+	while (!queue.empty()) {
+		LogicalOperator *current = queue.back();
+		queue.pop_back();
+
+		if (current->type == LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR) {
+			auto *create_filter = dynamic_cast<LogicalCreateFilter *>(current);
+			if (create_filter) {
+				create_filter_by_table[create_filter->filter_operation.build_table_idx].push_back(create_filter);
+			}
+		}
+
+		for (auto &child : current->children) {
+			queue.push_back(child.get());
+		}
+	}
+
+	// pass 2: link all PROBE_FILTER operators to their corresponding CREATE_FILTER
+	queue.clear();
+	queue.push_back(plan);
+
+	while (!queue.empty()) {
+		LogicalOperator *current = queue.back();
+		queue.pop_back();
+
+		if (current->type == LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR) {
+			auto *probe_filter = dynamic_cast<LogicalProbeFilter *>(current);
+			if (probe_filter) {
+				idx_t build_table_idx = probe_filter->filter_operation.build_table_idx;
+				idx_t probe_table_idx = probe_filter->filter_operation.probe_table_idx;
+
+				auto it = create_filter_by_table.find(build_table_idx);
+				if (it != create_filter_by_table.end()) {
+					for (auto *create_filter : it->second) {
+						for (const auto &pc : create_filter->filter_operation.probe_columns) {
+							if (pc.table_index == probe_table_idx) {
+								probe_filter->related_create_filter = create_filter;
+								create_filter->related_probe_filter.push_back(probe_filter);
+								break;
+							}
+						}
+						if (probe_filter->related_create_filter) {
+							break;
+						}
+					}
+					if (!probe_filter->related_create_filter) {
+						D_PRINTF("[LINK] WARNING: No CREATE_FILTER with matching probe table for PROBE_FILTER "
+						         "(build=table_%llu, probe=table_%llu)",
+						         (unsigned long long)build_table_idx, (unsigned long long)probe_table_idx);
+					}
+				} else {
+					D_PRINTF(
+					    "[LINK] WARNING: No CREATE_FILTER found for PROBE_FILTER (build=table_%llu, probe=table_%llu)",
+					    (unsigned long long)build_table_idx, (unsigned long long)probe_table_idx);
+				}
+			}
+		}
+
+		for (auto &child : current->children) {
+			queue.push_back(child.get());
+		}
+	}
+}
+
+void RobustOptimizerContextState::SetupDynamicFilterPushdown(LogicalOperator *plan) {
+	if (!plan) {
+		return;
+	}
+
+	// collect all forward-pass LogicalCreateFilter operators
+	vector<LogicalCreateFilter *> forward_creates;
+	vector<LogicalOperator *> queue;
+	queue.push_back(plan);
+
+	while (!queue.empty()) {
+		LogicalOperator *current = queue.back();
+		queue.pop_back();
+
+		if (current->type == LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR) {
+			auto *create_filter = dynamic_cast<LogicalCreateFilter *>(current);
+			if (create_filter) {
+				if (create_filter->is_forward_pass) {
+					forward_creates.push_back(create_filter);
+				}
+			}
+		}
+
+		for (auto &child : current->children) {
+			queue.push_back(child.get());
+		}
+	}
+
+	D_PRINTF("[PUSHDOWN-SETUP] found %zu forward CREATE_FILTERs", forward_creates.size());
+
+	// for each forward-pass CREATE_FILTER, set up pushdown targets
+	for (auto *create_filter : forward_creates) {
+		D_PRINTF("[PUSHDOWN-SETUP] CREATE_FILTER build=table_%llu, related_probe_filter=%zu",
+		         (unsigned long long)create_filter->filter_operation.build_table_idx,
+		         create_filter->related_probe_filter.size());
+		for (auto *probe_filter : create_filter->related_probe_filter) {
+			if (!probe_filter->filter_operation.is_forward_pass) {
+				D_PRINTF("[PUSHDOWN-SETUP]   skipping PROBE_FILTER probe=table_%llu (not forward)",
+				         (unsigned long long)probe_filter->filter_operation.probe_table_idx);
+				continue;
+			}
+
+			idx_t probe_table_idx = probe_filter->filter_operation.probe_table_idx;
+			auto it = table_mgr.table_lookup.find(probe_table_idx);
+			if (it == table_mgr.table_lookup.end()) {
+				D_PRINTF("[PUSHDOWN-SETUP]   probe table_%llu not in table_lookup",
+				         (unsigned long long)probe_table_idx);
+				continue;
+			}
+
+			LogicalGet *get = TableManager::FindLogicalGet(it->second.table_op);
+			if (!get) {
+				continue;
+			}
+
+			// create or reuse DynamicTableFilterSet on the LogicalGet
+			if (!get->dynamic_filters) {
+				get->dynamic_filters = make_shared_ptr<DynamicTableFilterSet>();
+			}
+
+			// resolve each probe column to a scan column index
+			auto &col_ids = get->GetColumnIds();
+			for (size_t i = 0; i < probe_filter->filter_operation.probe_columns.size(); i++) {
+				const auto &probe_col = probe_filter->filter_operation.probe_columns[i];
+
+				idx_t scan_col_idx = probe_col.column_index;
+				if (scan_col_idx >= col_ids.size()) {
+					D_PRINTF("[PUSHDOWN] probe column (%llu.%llu) out of bounds for scan column_ids (size=%zu)",
+					         (unsigned long long)probe_col.table_index, (unsigned long long)probe_col.column_index,
+					         col_ids.size());
+					continue;
+				}
+
+				// get column type and name
+				LogicalType col_type = LogicalType::BIGINT;
+				string col_name = "col_" + std::to_string(probe_col.column_index);
+				idx_t primary_idx = col_ids[scan_col_idx].GetPrimaryIndex();
+				if (primary_idx < get->returned_types.size()) {
+					col_type = get->returned_types[primary_idx];
+				}
+				if (primary_idx < get->names.size()) {
+					col_name = get->names[primary_idx];
+				}
+
+				LogicalCreateFilter::DynamicFilterTarget target;
+				target.dynamic_filters = get->dynamic_filters;
+				target.scan_column_index = scan_col_idx;
+				target.probe_column = probe_col;
+				target.column_type = col_type;
+				target.column_name = col_name;
+				create_filter->pushdown_targets.push_back(std::move(target));
+			}
+
+			// mark PROBE_FILTER as passthrough since filters are pushed to scan
+			probe_filter->is_passthrough = true;
+
+			D_PRINTF("[PUSHDOWN] forward CREATE_FILTER (build=table_%llu) -> PROBE_FILTER (probe=table_%llu) pushed "
+			         "%zu targets",
+			         (unsigned long long)create_filter->filter_operation.build_table_idx,
+			         (unsigned long long)probe_table_idx, create_filter->pushdown_targets.size());
+		}
+	}
+}
+
+// find the deepest CREATE_FILTER in a linear chain (following child[0])
+static LogicalOperator *FindDeepestCreateFilter(LogicalOperator *node) {
+	LogicalOperator *deepest = nullptr;
+	while (node) {
+		if (node->type == LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR &&
+		    dynamic_cast<LogicalCreateFilter *>(node)) {
+			deepest = node;
+		}
+		if (node->children.empty()) {
+			break;
+		}
+		node = node->children[0].get();
+	}
+	return deepest;
+}
+
+void RobustOptimizerContextState::LiftCreateFilterAboveMarkJoin(unique_ptr<LogicalOperator> &plan) {
+	if (!plan) {
+		return;
+	}
+	for (auto &child : plan->children) {
+		LiftCreateFilterAboveMarkJoin(child);
+	}
+
+	// match: MARK JOIN with BF operators in probe chain
+	if (plan->type != LogicalOperatorType::LOGICAL_COMPARISON_JOIN) {
+		return;
+	}
+	auto &join = plan->Cast<LogicalComparisonJoin>();
+	if (join.join_type != JoinType::MARK) {
+		return;
+	}
+
+	auto &probe_child = plan->children[0];
+	auto *deepest = FindDeepestCreateFilter(probe_child.get());
+	if (!deepest) {
+		return;
+	}
+
+	// detach block from probe chain, place above MARK_JOIN
+	auto below_deepest = std::move(deepest->children[0]);
+	deepest->children.clear();
+	auto block = std::move(probe_child);
+	probe_child = std::move(below_deepest);
+
+	deepest->AddChild(std::move(plan));
+	plan = std::move(block);
+}
+
+void RobustOptimizerContextState::LiftCreateFilterAboveFilter(unique_ptr<LogicalOperator> &plan) {
+	if (!plan) {
+		return;
+	}
+	for (auto &child : plan->children) {
+		LiftCreateFilterAboveFilter(child);
+	}
+
+	if (plan->type != LogicalOperatorType::LOGICAL_FILTER) {
+		return;
+	}
+
+	auto *deepest = FindDeepestCreateFilter(plan->children[0].get());
+	if (!deepest) {
+		return;
+	}
+
+	// same block-detach logic as LiftCreateFilterAboveMarkJoin
+	auto below_deepest = std::move(deepest->children[0]);
+	deepest->children.clear();
+	auto block = std::move(plan->children[0]);
+	plan->children[0] = std::move(below_deepest);
+
+	deepest->AddChild(std::move(plan));
+	plan = std::move(block);
+}
+
+unique_ptr<LogicalOperator> RobustOptimizerContextState::PreOptimize(unique_ptr<LogicalOperator> plan) {
+	// step 1: extract join operators
+	vector<JoinEdge> edges = ExtractOperators(*plan);
+
+	// step 2: create transfer graph using LargestRoot algorithm
+	mst_edges = LargestRoot(edges);
+
+	return plan;
+}
+
+unique_ptr<LogicalOperator> RobustOptimizerContextState::Optimize(unique_ptr<LogicalOperator> plan) {
+	// step 1: extract join operators
+	vector<JoinEdge> edges = ExtractOperators(*plan);
+
+	D_PRINTF("Edges size: %zu", edges.size());
+	if (edges.size() <= 1) {
+		return plan;
+	}
+
+	// display physical plan DAG if enabled (before we modify the plan)
+	PrintPhysicalPlanDAG(plan.get());
+
+	// determine heuristic
+	Value heuristic_val;
+	string heuristic = "join_order";
+	if (context.TryGetCurrentSetting("robust_heuristic", heuristic_val)) {
+		heuristic = heuristic_val.GetValue<string>();
+	}
+
+	unordered_map<LogicalOperator *, vector<FilterOperation>> forward_filter_ops, backward_filter_ops;
+
+	if (heuristic == "join_order") {
+		// use DuckDB's join order DAG
+		map<ColKey, ColKey> uf_parent;
+		auto all_nodes = BuildPhysicalPlanDAG(plan.get(), uf_parent);
+
+		// flip non-largest roots to leaves (default: on)
+		Value flip_val;
+		bool flip_roots = true;
+		if (context.TryGetCurrentSetting("robust_flip_roots", flip_val)) {
+			flip_roots = flip_val.GetValue<bool>();
+		}
+		if (flip_roots) {
+			FlipRootsToLeaves(all_nodes);
+		}
+
+		// display DAG if setting is enabled
+		Value dag_val;
+		if (context.TryGetCurrentSetting("robust_display_dag", dag_val) && dag_val.GetValue<bool>()) {
+			PrintPhysicalDAG(all_nodes, table_mgr);
+		}
+
+		auto filter_ops = GenerateStageModificationsFromDAG(all_nodes, uf_parent);
+		forward_filter_ops = std::move(filter_ops.first);
+		backward_filter_ops = std::move(filter_ops.second);
+	} else {
+		// largest_root
+		mst_edges = LargestRoot(edges);
+		auto filter_ops = GenerateStageModifications(mst_edges);
+		forward_filter_ops = std::move(filter_ops.first);
+		backward_filter_ops = std::move(filter_ops.second);
+	}
+
+	// check pass mode setting
+	Value pass_mode_val;
+	string pass_mode = "both";
+	if (context.TryGetCurrentSetting("robust_pass_mode", pass_mode_val)) {
+		pass_mode = pass_mode_val.GetValue<string>();
+	}
+	if (pass_mode == "forward_only") {
+		backward_filter_ops.clear();
+	}
+
+	// step 4: insert create_filter/probe_filter operators into the plan
+	plan = ApplyStageModifications(std::move(plan), forward_filter_ops, backward_filter_ops);
+
+	// step 4.5a: lift BF operators above MARK_JOIN so they sit between FILTER and MARK_JOIN
+	LiftCreateFilterAboveMarkJoin(plan);
+
+	// step 4.5b: lift BF operators above FILTER so bloom filters are built from filtered output
+	LiftCreateFilterAboveFilter(plan);
+
+	// step 5: link PROBE_FILTER operators to their corresponding CREATE_FILTER operators
+	LinkProbeFilterToCreateFilter(plan.get());
+
+	// step 6: set up dynamic filter pushdown for forward-pass operators
+	SetupDynamicFilterPushdown(plan.get());
+
+	// // combine all bloom filter operations for debug (preserving order)
+	// vector<FilterOperation> all_filter_operations;
+	// for (const auto &pair : filter_ops.first) {
+	// 	all_filter_operations.insert(all_filter_operations.end(), pair.second.begin(), pair.second.end());
+	// }
+	// for (const auto &pair : filter_ops.second) {
+	// 	all_filter_operations.insert(all_filter_operations.end(), pair.second.begin(), pair.second.end());
+	// }
+	//
+	// // sort by sequence number to restore generation order
+	// std::sort(all_filter_operations.begin(), all_filter_operations.end(),
+	// 	[](const FilterOperation &a, const FilterOperation &b) {
+	// 		return a.sequence_number < b.sequence_number;
+	// 	});
+	//
+	// // debug print with correct ordering
+	// DebugPrintMST(mst_edges, all_filter_operations);
+	return plan;
+}
+
+// extension hooks
+// void PredicateTransferOptimizer::PreOptimize(OptimizerExtensionInput &input, unique_ptr<LogicalOperator> &plan) {
+// 	// create optimizer state using proper DuckDB state management
+// 	auto optimizer_state = input.context.registered_state->GetOrCreate<PredicateTransferOptimizer>(
+// 		"robust_optimizer_state", input.context);
+//
+// 	plan = optimizer_state->PreOptimize(std::move(plan));
+// }
+
+void RobustOptimizerContextState::PreOptimize(OptimizerExtensionInput &input, unique_ptr<LogicalOperator> &plan) {
+	auto optimizer_state = input.context.registered_state->GetOrCreate<RobustOptimizerContextState>(
+	    "robust_optimizer_state", input.context);
+
+	plan = optimizer_state->PreOptimize(std::move(plan));
+}
+
+void RobustOptimizerContextState::Optimize(OptimizerExtensionInput &input, unique_ptr<LogicalOperator> &plan) {
+	auto profiling = GetRobustProfilingState(input.context);
+	auto opt_start = std::chrono::high_resolution_clock::now();
+
+	const auto optimizer_state = input.context.registered_state->GetOrCreate<RobustOptimizerContextState>(
+	    "robust_optimizer_state", input.context);
+	plan = optimizer_state->Optimize(std::move(plan));
+
+	if (profiling) {
+		auto opt_end = std::chrono::high_resolution_clock::now();
+		profiling->optimizer_time_us =
+		    std::chrono::duration_cast<std::chrono::microseconds>(opt_end - opt_start).count();
+
+		// populate table names for profiling output
+		for (const auto &ti : optimizer_state->table_mgr.table_ops) {
+			profiling->table_names[ti.table_idx] = optimizer_state->table_mgr.GetTableName(ti.table_idx);
+		}
+	}
+
+	input.context.registered_state->Remove("robust_optimizer_state");
+}
+
+} // namespace duckdb
diff --git a/src/optimizer/robust_optimizer.hpp b/src/optimizer/robust_optimizer.hpp
new file mode 100644
index 0000000..10bb7be
--- /dev/null
+++ b/src/optimizer/robust_optimizer.hpp
@@ -0,0 +1,147 @@
+#pragma once
+
+#include "graph_manager.hpp"
+#include "table_manager.hpp"
+#include "duckdb/main/client_context_state.hpp"
+#include "duckdb/main/client_context.hpp"
+#include "duckdb/common/vector.hpp"
+#include "duckdb/common/unordered_map.hpp"
+#include "duckdb/optimizer/optimizer_extension.hpp"
+
+namespace duckdb {
+
+// tree node for rooted MST representation (used by Robust DAG)
+struct TreeNode {
+	idx_t table_idx;
+	LogicalOperator *table_op;
+	vector<TreeNode *> children;
+	TreeNode *parent;
+	int level;                // distance from root (root = 0)
+	JoinEdge *edge_to_parent; // null for root
+
+	TreeNode(idx_t idx, LogicalOperator *op)
+	    : table_idx(idx), table_op(op), parent(nullptr), level(0), edge_to_parent(nullptr) {
+	}
+};
+
+// edge in physical plan DAG (stores resolved column bindings for label)
+struct PhysicalDAGEdge {
+	idx_t parent_table;
+	idx_t child_table;
+	vector<ColumnBinding> parent_cols;
+	vector<ColumnBinding> child_cols;
+};
+
+// node in physical plan DAG (supports multiple parents for multi-way joins)
+struct PhysicalDAGNode {
+	idx_t table_idx;
+	LogicalOperator *table_op;
+	vector<PhysicalDAGNode *> children;
+	vector<PhysicalDAGNode *> parents;
+	vector<PhysicalDAGEdge> edges_to_parents; // one per parent, same ordering
+	int level;
+
+	PhysicalDAGNode(idx_t idx, LogicalOperator *op) : table_idx(idx), table_op(op), level(0) {
+	}
+};
+
+// column key for equivalence class union-find
+using ColKey = std::pair<idx_t, idx_t>; // (table_idx, column_idx)
+
+class RobustOptimizerContextState : public ClientContextState {
+public:
+	explicit RobustOptimizerContextState(ClientContext &ctx) : context(ctx) {
+	}
+
+	ClientContext &context;
+
+	vector<JoinEdge> join_edges;
+	//	map<table_id, idx_t> table_cardinalities;
+	map<LogicalOperator *, idx_t> operator_to_table_id;
+
+	TableManager table_mgr;
+	vector<LogicalOperator *> join_ops;
+	vector<JoinEdge> mst_edges;
+
+	unordered_map<ColumnBinding, ColumnBinding, ColumnBindingHashFunction> rename_col_bindings;
+
+public:
+	// extract all the join edges from the plan
+	// vector<JoinEdge> ExtractOperators(LogicalOperator &plan, vector<LogicalOperator*> &join_ops);
+	vector<JoinEdge> ExtractOperators(LogicalOperator &plan);
+	void ExtractOperatorsRecursive(LogicalOperator &plan, vector<LogicalOperator *> &join_ops);
+	map<table_id, TableInfo> get_value();
+	vector<JoinEdge> CreateJoinEdges(vector<LogicalOperator *> &join_ops);
+	vector<JoinEdge> LargestRoot(vector<JoinEdge> &edges);
+
+	// build rooted tree from MST edges with largest table as root
+	TreeNode *BuildRootedTree(vector<JoinEdge> &mst_edges) const;
+
+	// void CreateForwardPassModifications(LogicalOperator *smaller_table_op, LogicalOperator *larger_table_op,
+	// 														const vector<ColumnBinding> &smaller_columns, const
+	// vector<ColumnBinding>
+	// &larger_columns, 														unordered_map<LogicalOperator*,
+	// unique_ptr<LogicalOperator>> &forward_pass);
+	//
+	// void CreateBackwardPassModifications(LogicalOperator *smaller_table_op, LogicalOperator *larger_table_op,
+	// 														const vector<ColumnBinding> &smaller_columns, const
+	// vector<ColumnBinding>
+	// &larger_columns, 														unordered_map<LogicalOperator*,
+	// unique_ptr<LogicalOperator>> &backward_pass);
+	//
+	std::pair<unordered_map<LogicalOperator *, vector<FilterOperation>>,
+	          unordered_map<LogicalOperator *, vector<FilterOperation>>>
+	GenerateStageModifications(const vector<JoinEdge> &mst_edges);
+
+	std::pair<unordered_map<LogicalOperator *, vector<FilterOperation>>,
+	          unordered_map<LogicalOperator *, vector<FilterOperation>>>
+	GenerateStageModificationsFromDAG(vector<PhysicalDAGNode *> &all_nodes, map<ColKey, ColKey> &uf_parent);
+
+	unique_ptr<LogicalOperator> BuildStackedBFOperators(unique_ptr<LogicalOperator> base_plan,
+	                                                    const vector<FilterOperation> &filter_ops,
+	                                                    bool reverse_order = false);
+
+	unique_ptr<LogicalOperator>
+	ApplyStageModifications(unique_ptr<LogicalOperator> plan,
+	                        const unordered_map<LogicalOperator *, vector<FilterOperation>> &forward_filter_ops,
+	                        const unordered_map<LogicalOperator *, vector<FilterOperation>> &backward_filter_ops);
+
+	// helper to link PROBE_FILTER operators to their corresponding CREATE_FILTER operators
+	void LinkProbeFilterToCreateFilter(LogicalOperator *plan);
+
+	// set up dynamic filter pushdown for forward-pass CREATE_FILTER operators
+	void SetupDynamicFilterPushdown(LogicalOperator *plan);
+
+	// pass 1: lift BF operator block above MARK_JOIN (probe chain → above MARK_JOIN)
+	void LiftCreateFilterAboveMarkJoin(unique_ptr<LogicalOperator> &plan);
+
+	// pass 2: lift BF operator block above FILTER (handles all FILTER cases)
+	void LiftCreateFilterAboveFilter(unique_ptr<LogicalOperator> &plan);
+
+	// resolve column binding through rename chain to get base table binding
+	ColumnBinding ResolveColumnBinding(const ColumnBinding &binding) const;
+
+	// debug functions
+	void DebugPrintGraph(const vector<JoinEdge> &edges) const;
+	void DebugPrintMST(const vector<JoinEdge> &mst_edges, const vector<FilterOperation> &filter_operations);
+
+	// print DAG as ASCII tree (gated by robust_display_dag setting)
+	void PrintDAG(TreeNode *root);
+
+	// build and print DAG from DuckDB's join order (gated by robust_display_physical_dag)
+	vector<PhysicalDAGNode *> BuildPhysicalPlanDAG(LogicalOperator *op, map<ColKey, ColKey> &uf_parent);
+	void PrintPhysicalPlanDAG(LogicalOperator *op);
+
+	// flip non-largest roots to leaves in the DAG
+	void FlipRootsToLeaves(vector<PhysicalDAGNode *> &all_nodes);
+
+	unique_ptr<LogicalOperator> PreOptimize(unique_ptr<LogicalOperator> plan);
+
+	unique_ptr<LogicalOperator> Optimize(unique_ptr<LogicalOperator> plan);
+
+	// entry point for extension framework
+	static void PreOptimize(OptimizerExtensionInput &input, unique_ptr<LogicalOperator> &plan);
+	static void Optimize(OptimizerExtensionInput &input, unique_ptr<LogicalOperator> &plan);
+};
+
+} // namespace duckdb
diff --git a/src/optimizer/rpt_optimizer.cpp b/src/optimizer/rpt_optimizer.cpp
deleted file mode 100644
index 9d2cd14..0000000
--- a/src/optimizer/rpt_optimizer.cpp
+++ /dev/null
@@ -1,734 +0,0 @@
-#include "rpt_optimizer.hpp"
-#include "duckdb/planner/operator/logical_comparison_join.hpp"
-// #include "duckdb/planner/operator/logical_get.hpp"
-#include "duckdb/planner/operator/logical_aggregate.hpp"
-#include "duckdb/planner/expression/bound_columnref_expression.hpp"
-#include "duckdb/common/types.hpp"
-#include "table_manager.hpp"
-#include "graph_manager.hpp"
-#include "duckdb/common/unordered_set.hpp"
-#include <algorithm>
-#include "duckdb/common/vector.hpp"
-#include "duckdb/common/unordered_map.hpp"
-#include "../operators/logical_create_bf.hpp"
-#include "../operators/logical_use_bf.hpp"
-#include <fmt/format.h>
-
-namespace duckdb {
-// class LogicalCreateBF;
-// class LogicalUseBF;
-
-vector<JoinEdge> RPTOptimizerContextState::ExtractOperators(LogicalOperator &plan) {
-	vector<LogicalOperator*> join_ops;
-	vector<TableInfo> table_infos;
-
-	// pass 1: collect the base tables and join operators
-	ExtractOperatorsRecursive(plan, join_ops);
-
-	// pass 2: create JoinEdges with table information
-	return CreateJoinEdges(join_ops);
-}
-
-
-void RPTOptimizerContextState::ExtractOperatorsRecursive(LogicalOperator &plan, vector<LogicalOperator*> &join_ops) {
-//	unordered_set<hash_t> existed_set;
-//	auto ComputeConditionHash = [](const JoinCondition &cond) {
-//		return cond.left->Hash() + cond.right->Hash();
-//	};
-
-	LogicalOperator *op = &plan;
-
-	// step 1: collect all join operators
-	if (op->type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN ||
-		op->type == LogicalOperatorType::LOGICAL_DELIM_JOIN) {
-		LogicalComparisonJoin &join = op->Cast<LogicalComparisonJoin>();
-		switch (join.join_type) {
-			case JoinType::INNER:
-			case JoinType::LEFT:
-			case JoinType::RIGHT:
-			case JoinType::SEMI:
-			case JoinType::RIGHT_SEMI: {
-				if (std::any_of(join.conditions.begin(), join.conditions.end(), [](const JoinCondition &jc) {
-							return jc.comparison == ExpressionType::COMPARE_EQUAL &&
-								   jc.left->type == ExpressionType::BOUND_COLUMN_REF &&
-								   jc.right->type == ExpressionType::BOUND_COLUMN_REF;
-						})) {
-					// JoinEdge edge(join);
-					join_ops.push_back(op);
-					break;
-				}
-			}
-			default:
-				break;
-		}
-	}
-
-	switch (op->type) {
-		case LogicalOperatorType::LOGICAL_FILTER: {
-			LogicalOperator *child = op->children[0].get();
-			if(child->type == LogicalOperatorType::LOGICAL_GET) {
-				table_mgr.AddTableOperator(child);
-				return;
-			}
-
-			ExtractOperatorsRecursive(*child, join_ops);
-			return;
-		}
-		case LogicalOperatorType::LOGICAL_AGGREGATE_AND_GROUP_BY: {
-			auto &agg = op->Cast<LogicalAggregate>();
-			if (agg.groups.empty() && agg.grouping_sets.size() <= 1) {
-				table_mgr.AddTableOperator(op);
-				ExtractOperatorsRecursive(*op->children[0], join_ops);
-			} else {
-				auto old_refs = agg.GetColumnBindings();
-				for (size_t i = 0; i < agg.groups.size(); i++) {
-					if (agg.groups[i]->type == ExpressionType::BOUND_COLUMN_REF) {
-						auto &col_ref = agg.groups[i]->Cast<BoundColumnRefExpression>();
-						rename_col_bindings.insert({old_refs[i], col_ref.binding});
-					}
-				}
-				ExtractOperatorsRecursive(*op->children[0], join_ops);
-			}
-			return;
-		}
-		case LogicalOperatorType::LOGICAL_PROJECTION: {
-					auto old_refs = op->GetColumnBindings();
-					for (size_t i = 0; i < op->expressions.size(); i++) {
-						if (op->expressions[i]->type == ExpressionType::BOUND_COLUMN_REF) {
-							auto &col_ref = op->expressions[i]->Cast<BoundColumnRefExpression>();
-							rename_col_bindings.insert({old_refs[i], col_ref.binding});
-						}
-					}
-			ExtractOperatorsRecursive(*op->children[0], join_ops);
-			return;
-		}
-		case LogicalOperatorType::LOGICAL_UNION:
-		case LogicalOperatorType::LOGICAL_EXCEPT:
-		case LogicalOperatorType::LOGICAL_INTERSECT: {
-					table_mgr.AddTableOperator(op);
-					ExtractOperatorsRecursive(*op->children[0], join_ops);
-					ExtractOperatorsRecursive(*op->children[1], join_ops);
-					return;
-		}
-		case LogicalOperatorType::LOGICAL_WINDOW: {
-					table_mgr.AddTableOperator(op);
-					ExtractOperatorsRecursive(*op->children[0], join_ops);
-					return;
-		}
-		case LogicalOperatorType::LOGICAL_DUMMY_SCAN:
-		case LogicalOperatorType::LOGICAL_EXPRESSION_GET:
-		case LogicalOperatorType::LOGICAL_DELIM_GET:
-		case LogicalOperatorType::LOGICAL_GET:
-		case LogicalOperatorType::LOGICAL_EMPTY_RESULT:
-		case LogicalOperatorType::LOGICAL_CHUNK_GET:
-				table_mgr.AddTableOperator(op);
-				return;
-		default:
-				for (auto &child : op->children) {
-					ExtractOperatorsRecursive(*child, join_ops);
-				}
-		}
-}
-
-vector<JoinEdge> RPTOptimizerContextState::CreateJoinEdges(vector<LogicalOperator*> &join_ops) {
-
-	// deduplicate join conditions
-//	unordered_set<hash_t> existed_set;
-//	auto ComputeConditionHash = [](const JoinCondition &cond) {
-//		return cond.left->Hash() + cond.right->Hash();
-//	};
-
-	vector<JoinEdge> edges;
-	for (auto &op : join_ops) {
-		auto &join = op->Cast<LogicalComparisonJoin>();
-
-		vector<ColumnBinding> left_columns, right_columns;
-		for(const JoinCondition &cond: join.conditions) {
-			if(cond.comparison == ExpressionType::COMPARE_EQUAL &&
-				cond.left->type == ExpressionType::BOUND_COLUMN_REF &&
-				cond.right->type == ExpressionType::BOUND_COLUMN_REF) {
-				left_columns.push_back(cond.left->Cast<BoundColumnRefExpression>().binding);
-				right_columns.push_back(cond.right->Cast<BoundColumnRefExpression>().binding);
-			}
-		}
-
-		if(!left_columns.empty() && !right_columns.empty()) {
-			// use column bindings to determine table indices instead of looking up join children directly
-			idx_t left_table_idx = left_columns[0].table_index;
-			idx_t right_table_idx = right_columns[0].table_index;
-			
-			// verify these table indices exist in our table manager
-			if(table_mgr.table_lookup.find(left_table_idx) != table_mgr.table_lookup.end() &&
-			   table_mgr.table_lookup.find(right_table_idx) != table_mgr.table_lookup.end()) {
-				JoinEdge edge(left_table_idx, right_table_idx, left_columns, right_columns, left_columns.size(), join.join_type);
-				edges.push_back(edge);
-			}
-		}
-	}
-
-	return edges;
-}
-
-vector<JoinEdge> RPTOptimizerContextState::LargestRoot(vector<JoinEdge> &edges) {
-	// step 1: find largest table by cardinality
-	idx_t largest_table_idx = 0;
-	idx_t max_cardinality = 0;
-	for (auto &table_info : table_mgr.table_ops) {
-		if (table_info.estimated_cardinality > max_cardinality) {
-			max_cardinality = table_info.estimated_cardinality;
-			largest_table_idx = table_info.table_idx;
-		}
-	}
-
-	// print largest table idx
-	printf("Largest table: %llu, cardinality: %llu\n", largest_table_idx, max_cardinality);
-	DebugPrintGraph(edges);
-
-	// step 2: build MST (maximum) using Prim's algorithm starting from largest table
-	unordered_set<idx_t> mst_nodes;
-	vector<JoinEdge> mst_edges;
-
-	mst_nodes.insert(largest_table_idx);
-
-	// print table ops size
-	printf("Table ops size: %zu\n", table_mgr.table_ops.size());
-
-	while (mst_nodes.size() < table_mgr.table_ops.size() && !edges.empty()) {
-		const JoinEdge *best_edge = nullptr;
-		idx_t max_weight = 0;
-		max_cardinality = 0;
-		for (JoinEdge &edge : edges) {
-			bool left_in_mst = mst_nodes.count(edge.table_a) > 0;
-			bool right_in_mst = mst_nodes.count(edge.table_b) > 0;
-
-			if (left_in_mst != right_in_mst) {
-				const idx_t weight = edge.weight;
-				idx_t left_cardinality = table_mgr.table_lookup[edge.table_a].estimated_cardinality;
-				idx_t right_cardinality = table_mgr.table_lookup[edge.table_b].estimated_cardinality;
-				const idx_t cardinality = std::min(left_cardinality, right_cardinality);
-
-				if (weight > max_weight || (weight == max_weight && cardinality > max_cardinality)) {
-					max_weight = weight;
-					max_cardinality = cardinality;
-					best_edge = &edge;
-				}
-			}
-		}
-
-		if (best_edge) {
-			printf("Best edge: %llu <-> %llu (weight=%llu)\n", best_edge->table_a, best_edge->table_b, best_edge->weight);
-		}
-		if (!best_edge) {
-
-			printf("Warning - Disconnected components found. MST incomplete.\n");
-			break;
-		}
-
-		mst_edges.push_back(*best_edge);
-		mst_nodes.insert(best_edge->table_a);
-		mst_nodes.insert(best_edge->table_b);
-	}
-
-	return mst_edges;
-}
-
-TreeNode* RPTOptimizerContextState::BuildRootedTree(vector<JoinEdge> &mst_edges) {
-	// step 1: find largest table (root)
-	idx_t root_table_idx = 0;
-	idx_t max_cardinality = 0;
-	LogicalOperator* root_op = nullptr;
-
-	for (const auto &table_info : table_mgr.table_ops) {
-		if (table_info.estimated_cardinality > max_cardinality) {
-			max_cardinality = table_info.estimated_cardinality;
-			root_table_idx = table_info.table_idx;
-			root_op = table_info.table_op;
-		}
-	}
-
-	printf("BuildRootedTree: root table = %llu (cardinality=%llu)\n", root_table_idx, max_cardinality);
-
-	// step 2: create nodes for all tables
-	unordered_map<idx_t, TreeNode*> table_to_node;
-	for (const auto &table_info : table_mgr.table_ops) {
-		auto* node = new TreeNode(table_info.table_idx, table_info.table_op);
-		table_to_node[table_info.table_idx] = node;
-	}
-
-	// step 3: build adjacency list from MST edges (undirected)
-	unordered_map<idx_t, vector<pair<idx_t, JoinEdge*>>> adjacency;
-	for (auto &edge : mst_edges) {
-		adjacency[edge.table_a].push_back({edge.table_b, &edge});
-		adjacency[edge.table_b].push_back({edge.table_a, &edge});
-	}
-
-	// step 4: BFS from root to assign parent-child relationships and levels
-	vector<idx_t> queue;
-	unordered_set<idx_t> visited;
-
-	queue.push_back(root_table_idx);
-	visited.insert(root_table_idx);
-	table_to_node[root_table_idx]->level = 0;
-
-	size_t front = 0;
-	while (front < queue.size()) {
-		idx_t current = queue[front++];
-		TreeNode* current_node = table_to_node[current];
-
-		printf("  Visiting table %llu at level %d\n", current, current_node->level);
-
-		// process all neighbors
-		for (auto &[neighbor_idx, edge] : adjacency[current]) {
-			if (visited.count(neighbor_idx) == 0) {
-				// neighbor is a child of current
-				TreeNode* child_node = table_to_node[neighbor_idx];
-				child_node->parent = current_node;
-				child_node->level = current_node->level + 1;
-				child_node->edge_to_parent = edge;
-
-				current_node->children.push_back(child_node);
-
-				queue.push_back(neighbor_idx);
-				visited.insert(neighbor_idx);
-
-				printf("    Child: table %llu at level %d\n", neighbor_idx, child_node->level);
-			}
-		}
-	}
-
-	return table_to_node[root_table_idx];
-}
-
-void RPTOptimizerContextState::DebugPrintGraph(const vector<JoinEdge> &edges) const {
-	// Debug: Print all tables
-	printf("=== TABLE INFORMATION ===\n");
-	for (const auto &table_info : table_mgr.table_ops) {
-		printf("Table %llu: cardinality=%llu\n", table_info.table_idx, table_info.estimated_cardinality);
-	}
-
-	// Find largest table
-	idx_t largest_table_idx = 0;
-	idx_t max_cardinality = 0;
-	for (auto &table_info : table_mgr.table_ops) {
-		if (table_info.estimated_cardinality > max_cardinality) {
-			max_cardinality = table_info.estimated_cardinality;
-			largest_table_idx = table_info.table_idx;
-		}
-	}
-	printf("Largest table: %llu (cardinality=%llu)\n\n", largest_table_idx, max_cardinality);
-
-	// Debug: Print all join edges
-	printf("=== ALL JOIN EDGES ===\n");
-	for (size_t i = 0; i < edges.size(); i++) {
-		const auto &edge = edges[i];
-		printf("Edge %zu: %llu <-> %llu (weight=%llu, type=%d)\n",
-				i, edge.table_a, edge.table_b, edge.weight, (int)edge.join_type);
-
-		// Print column bindings
-		printf("  Columns A: ");
-		for (const auto &col : edge.join_columns_a) {
-			printf("(%llu.%llu) ", col.table_index, col.column_index);
-		}
-		printf("\n  Columns B: ");
-		for (const auto &col : edge.join_columns_b) {
-			printf("(%llu.%llu) ", col.table_index, col.column_index);
-		}
-		printf("\n");
-	}
-	printf("\n");
-}
-
-void RPTOptimizerContextState::DebugPrintMST(const vector<JoinEdge> &mst_edges, const vector<BloomFilterOperation> &bf_operations) {
-	printf("=== MST EDGES ===\n");
-	for (size_t i = 0; i < mst_edges.size(); i++) {
-		const auto &edge = mst_edges[i];
-		printf("MST Edge %zu: %llu <-> %llu (weight=%llu)\n",
-			i, edge.table_a, edge.table_b, edge.weight);
-	}
-	printf("\n");
-
-	printf("=== BLOOM FILTER OPERATIONS ===\n");
-	for (size_t i = 0; i < bf_operations.size(); i++) {
-		const auto &bf_op = bf_operations[i];
-
-		if (bf_op.is_create) {
-			// CREATE operation
-			printf("BF Op %zu: CREATE_BF on table %llu\n", i, bf_op.build_table_idx);
-			printf("  Build columns: ");
-			for (const auto &col : bf_op.build_columns) {
-				printf("(%llu.%llu) ", col.table_index, col.column_index);
-			}
-			printf("\n");
-		} else {
-			// USE operation
-			printf("BF Op %zu: USE_BF on table %llu (using BF from table %llu)\n",
-				   i, bf_op.probe_table_idx, bf_op.build_table_idx);
-			printf("  Build columns: ");
-			for (const auto &col : bf_op.build_columns) {
-				printf("(%llu.%llu) ", col.table_index, col.column_index);
-			}
-			printf("\n  Probe columns: ");
-			for (const auto &col : bf_op.probe_columns) {
-				printf("(%llu.%llu) ", col.table_index, col.column_index);
-			}
-			printf("\n");
-		}
-	}
-	printf("\n");
-}
-
-std::pair<unordered_map<LogicalOperator*, vector<BloomFilterOperation>>,
-          unordered_map<LogicalOperator*, vector<BloomFilterOperation>>>
-RPTOptimizerContextState::GenerateStageModifications(const vector<JoinEdge> &mst_edges) {
-
-	// step 1: build rooted tree from MST
-	TreeNode* root = BuildRootedTree(const_cast<vector<JoinEdge>&>(mst_edges));
-
-	// step 2: collect all nodes organized by level
-	unordered_map<int, vector<TreeNode*>> nodes_by_level;
-	int max_level = 0;
-
-	// BFS to collect nodes by level
-	vector<TreeNode*> queue;
-	queue.push_back(root);
-	size_t front = 0;
-
-	while (front < queue.size()) {
-		TreeNode* node = queue[front++];
-		nodes_by_level[node->level].push_back(node);
-		max_level = std::max(max_level, node->level);
-
-		for (TreeNode* child : node->children) {
-			queue.push_back(child);
-		}
-	}
-
-	printf("=== TREE LEVELS ===\n");
-	for (int level = 0; level <= max_level; level++) {
-		printf("Level %d: ", level);
-		for (TreeNode* node : nodes_by_level[level]) {
-			printf("table_%llu ", node->table_idx);
-		}
-		printf("\n");
-	}
-	printf("\n");
-
-	unordered_map<LogicalOperator*, vector<BloomFilterOperation>> forward_bf_ops;
-	unordered_map<LogicalOperator*, vector<BloomFilterOperation>> backward_bf_ops;
-
-	// sequence counter to preserve operation order
-	idx_t sequence = 0;
-
-	// step 3: forward pass - bottom-up (leaves to root)
-	// process levels from highest (leaves) down to 1
-	printf("=== FORWARD PASS (leaves → root) ===\n");
-	for (int level = max_level; level >= 1; level--) {
-		for (TreeNode* child_node : nodes_by_level[level]) {
-			TreeNode* parent_node = child_node->parent;
-			JoinEdge* edge = child_node->edge_to_parent;
-
-			// determine which columns belong to child and which to parent
-			vector<ColumnBinding> child_columns, parent_columns;
-
-			if (edge->table_a == child_node->table_idx) {
-				child_columns = edge->join_columns_a;
-				parent_columns = edge->join_columns_b;
-			} else {
-				child_columns = edge->join_columns_b;
-				parent_columns = edge->join_columns_a;
-			}
-
-			printf("  Level %d: table_%llu (child) CREATE → table_%llu (parent) USE\n",
-				   level, child_node->table_idx, parent_node->table_idx);
-
-			// CREATE_BF on child
-			BloomFilterOperation create_op;
-			create_op.build_table_idx = child_node->table_idx;
-			create_op.probe_table_idx = 0; // not used for CREATE operations
-			create_op.build_columns = child_columns;
-			create_op.is_create = true;
-			create_op.sequence_number = sequence++;
-			forward_bf_ops[child_node->table_op].push_back(create_op);
-
-			// USE_BF on parent
-			BloomFilterOperation use_op;
-			use_op.build_table_idx = child_node->table_idx;
-			use_op.probe_table_idx = parent_node->table_idx;
-			use_op.build_columns = child_columns;
-			use_op.probe_columns = parent_columns;
-			use_op.is_create = false;
-			use_op.sequence_number = sequence++;
-			forward_bf_ops[parent_node->table_op].push_back(use_op);
-		}
-	}
-	printf("\n");
-
-	// step 4: backward pass - top-down (root to leaves)
-	// process levels from 1 to max_level
-	printf("=== BACKWARD PASS (root → leaves) ===\n");
-	for (int level = 1; level <= max_level; level++) {
-		for (TreeNode* child_node : nodes_by_level[level]) {
-			TreeNode* parent_node = child_node->parent;
-			JoinEdge* edge = child_node->edge_to_parent;
-
-			// determine which columns belong to parent and which to child
-			vector<ColumnBinding> parent_columns, child_columns;
-
-			if (edge->table_a == parent_node->table_idx) {
-				parent_columns = edge->join_columns_a;
-				child_columns = edge->join_columns_b;
-			} else {
-				parent_columns = edge->join_columns_b;
-				child_columns = edge->join_columns_a;
-			}
-
-			printf("  Level %d: table_%llu (parent) CREATE → table_%llu (child) USE\n",
-				   level, parent_node->table_idx, child_node->table_idx);
-
-			// CREATE_BF on parent
-			BloomFilterOperation create_op;
-			create_op.build_table_idx = parent_node->table_idx;
-			create_op.probe_table_idx = 0; // not used for CREATE operations
-			create_op.build_columns = parent_columns;
-			create_op.is_create = true;
-			create_op.sequence_number = sequence++;
-			backward_bf_ops[parent_node->table_op].push_back(create_op);
-
-			// USE_BF on child
-			BloomFilterOperation use_op;
-			use_op.build_table_idx = parent_node->table_idx;
-			use_op.probe_table_idx = child_node->table_idx;
-			use_op.build_columns = parent_columns;
-			use_op.probe_columns = child_columns;
-			use_op.is_create = false;
-			use_op.sequence_number = sequence++;
-			backward_bf_ops[child_node->table_op].push_back(use_op);
-		}
-	}
-	printf("\n");
-
-	return {std::move(forward_bf_ops), std::move(backward_bf_ops)};
-}
-
-// std::pair<unordered_map<LogicalOperator*, vector<BloomFilterOperation>>,
-// 			unordered_map<LogicalOperator*, vector<BloomFilterOperation>>>
-// RPTOptimizerContextState::GenerateStageModifications(const vector<JoinEdge> &mst_edges) {
-//
-// 	unordered_map<LogicalOperator*, vector<BloomFilterOperation>> forward_bf_ops;
-// 	unordered_map<LogicalOperator*, vector<BloomFilterOperation>> backward_bf_ops;
-//
-// 	for (const JoinEdge &mst_edge: mst_edges) {
-// 		// for each edge, create a bf operation
-// 		// rule: CREATE_BF on a smaller table, USE_BF on a larger table
-//
-// 		const idx_t left_cardinality = table_mgr.table_lookup[mst_edge.table_a].estimated_cardinality;
-// 		const idx_t right_cardinality = table_mgr.table_lookup[mst_edge.table_b].estimated_cardinality;
-//
-// 		LogicalOperator* smaller_table_op;
-// 		LogicalOperator* larger_table_op;
-// 		vector<ColumnBinding> smaller_columns, larger_columns;
-//
-// 		if (left_cardinality <= right_cardinality) {
-// 			smaller_table_op = table_mgr.table_lookup[mst_edge.table_a].table_op;
-// 			larger_table_op = table_mgr.table_lookup[mst_edge.table_b].table_op;
-// 			smaller_columns = mst_edge.join_columns_a;
-// 			larger_columns = mst_edge.join_columns_b;
-// 		}
-// 		else {
-// 			smaller_table_op = table_mgr.table_lookup[mst_edge.table_b].table_op;
-// 			larger_table_op = table_mgr.table_lookup[mst_edge.table_a].table_op;
-// 			smaller_columns = mst_edge.join_columns_b;
-// 			larger_columns = mst_edge.join_columns_a;
-// 		}
-//
-// 		// forward pass: smaller → larger
-// 		// CREATE_BF on smaller table
-// 		BloomFilterOperation forward_create_bf;
-// 		forward_create_bf.build_table_idx = table_mgr.GetScalarTableIndex(smaller_table_op);
-// 		forward_create_bf.probe_table_idx = table_mgr.GetScalarTableIndex(larger_table_op);
-// 		forward_create_bf.build_columns = smaller_columns;
-// 		forward_create_bf.probe_columns = larger_columns;
-// 		forward_create_bf.is_create = true;
-//
-// 		// USE_BF on larger table
-// 		BloomFilterOperation forward_use_bf;
-// 		forward_use_bf.build_table_idx = table_mgr.GetScalarTableIndex(smaller_table_op);
-// 		forward_use_bf.probe_table_idx = table_mgr.GetScalarTableIndex(larger_table_op);
-// 		forward_use_bf.build_columns = smaller_columns;
-// 		forward_use_bf.probe_columns = larger_columns;
-// 		forward_use_bf.is_create = false;
-//
-// 		forward_bf_ops[smaller_table_op].push_back(forward_create_bf);
-// 		forward_bf_ops[larger_table_op].push_back(forward_use_bf);
-//
-//
-// 		// backward pass: larger → smaller
-// 		// CREATE_BF operation for larger table
-// 		BloomFilterOperation backward_create_bf;
-// 		backward_create_bf.build_table_idx = table_mgr.GetScalarTableIndex(larger_table_op);
-// 		backward_create_bf.probe_table_idx = table_mgr.GetScalarTableIndex(smaller_table_op);
-// 		backward_create_bf.build_columns = larger_columns;
-// 		backward_create_bf.probe_columns = smaller_columns;
-// 		backward_create_bf.is_create = true;
-//
-// 		// USE_BF operation for smaller table
-// 		BloomFilterOperation backward_use_bf;
-// 		backward_use_bf.build_table_idx = table_mgr.GetScalarTableIndex(larger_table_op);
-// 		backward_use_bf.probe_table_idx = table_mgr.GetScalarTableIndex(smaller_table_op);
-// 		backward_use_bf.build_columns = larger_columns;
-// 		backward_use_bf.probe_columns = smaller_columns;
-// 		backward_use_bf.is_create = false;
-//
-// 		backward_bf_ops[larger_table_op].push_back(backward_create_bf);
-// 		backward_bf_ops[smaller_table_op].push_back(backward_use_bf);
-// 	}
-// 	return {std::move(forward_bf_ops), std::move(backward_bf_ops)};
-// }
-
-unique_ptr<LogicalOperator> RPTOptimizerContextState::BuildStackedBFOperators(unique_ptr<LogicalOperator> base_plan,
-																			   const vector<BloomFilterOperation> &bf_ops,
-																			   bool reverse_order) {
-	if (bf_ops.empty()) {
-		return base_plan;
-	}
-
-	printf("BuildStackedBF: Processing %zu ops, reverse=%d\n", bf_ops.size(), reverse_order);
-
-	// start with the base plan at the bottom
-	unique_ptr<LogicalOperator> current = std::move(base_plan);
-
-	if (reverse_order) {
-		// backward pass: normal iteration (ops already in correct order in vector)
-		int iter = 0;
-		for (const auto &bf_op : bf_ops) {
-			unique_ptr<LogicalOperator> new_op;
-
-			printf("  Bwd iter %d: %s on table %llu\n", iter++, bf_op.is_create ? "CREATE" : "USE",
-				   bf_op.is_create ? bf_op.build_table_idx : bf_op.probe_table_idx);
-
-			if (bf_op.is_create) {
-				new_op = make_uniq<LogicalCreateBF>(bf_op);
-			} else {
-				new_op = make_uniq<LogicalUseBF>(bf_op);
-			}
-
-			new_op->AddChild(std::move(current));
-			current = std::move(new_op);
-		}
-	} else {
-		// forward pass: normal order
-		int iter = 0;
-		for (const auto &bf_op : bf_ops) {
-			unique_ptr<LogicalOperator> new_op;
-
-			printf("  Fwd iter %d: %s on table %llu\n", iter++, bf_op.is_create ? "CREATE" : "USE",
-				   bf_op.is_create ? bf_op.build_table_idx : bf_op.probe_table_idx);
-
-			if (bf_op.is_create) {
-				new_op = make_uniq<LogicalCreateBF>(bf_op);
-			} else {
-				new_op = make_uniq<LogicalUseBF>(bf_op);
-			}
-
-			new_op->AddChild(std::move(current));
-			current = std::move(new_op);
-		}
-	}
-
-	printf("BuildStackedBF: Done, returning operator\n");
-	return current;
-}
-
-unique_ptr<LogicalOperator> RPTOptimizerContextState::ApplyStageModifications(unique_ptr<LogicalOperator> plan,
-																			  const unordered_map<LogicalOperator*, vector<BloomFilterOperation>> &forward_bf_ops,
-																			  const unordered_map<LogicalOperator*, vector<BloomFilterOperation>> &backward_bf_ops) {
-
-	// first apply modifications to children recursively
-	for (auto &child : plan->children) {
-		child = ApplyStageModifications(std::move(child), forward_bf_ops, backward_bf_ops);
-	}
-
-	LogicalOperator* original_op = plan.get();
-
-	// add the forward pass bf operators above the base table operator
-	auto forward_it = forward_bf_ops.find(original_op);
-	if (forward_it != forward_bf_ops.end()) {
-		printf("ApplyStage: Found %zu forward ops for operator %p\n", forward_it->second.size(), original_op);
-		plan = BuildStackedBFOperators(std::move(plan), forward_it->second, false);
-	}
-
-	// add the backward pass bf operators above the forward pass bf operators
-	auto backward_it = backward_bf_ops.find(original_op);
-	if (backward_it != backward_bf_ops.end()) {
-		printf("ApplyStage: Found %zu backward ops for operator %p\n", backward_it->second.size(), original_op);
-		for (size_t i = 0; i < backward_it->second.size(); i++) {
-			const auto &op = backward_it->second[i];
-			printf("  Backward op %zu: %s on table %llu\n", i, op.is_create ? "CREATE" : "USE", op.is_create ? op.build_table_idx : op.probe_table_idx);
-		}
-		plan = BuildStackedBFOperators(std::move(plan), backward_it->second, true);
-	}
-
-	return plan;
-}
-
-unique_ptr<LogicalOperator> RPTOptimizerContextState::Optimize(unique_ptr<LogicalOperator> plan) {
-
-	// step 1: extract join operators
-	vector<JoinEdge> edges = ExtractOperators(*plan);
-
-	// step 2: create transfer graph using LargestRoot algorithm
-	vector<JoinEdge> mst_edges = LargestRoot(edges);
-
-	// step 3: generate forward/backward pass using MST edges
-	const auto bf_ops = GenerateStageModifications(mst_edges);
-	const unordered_map<LogicalOperator *, vector<BloomFilterOperation>> forward_bf_ops = bf_ops.first;
-	const unordered_map<LogicalOperator *, vector<BloomFilterOperation>> backward_bf_ops = bf_ops.second;
-
-	// step 4: insert create_bf/use_bf operators into the plan
-	plan = ApplyStageModifications(std::move(plan), forward_bf_ops, backward_bf_ops);
-
-	// combine all bloom filter operations for debug (preserving order)
-	vector<BloomFilterOperation> all_bf_operations;
-	for (const auto &pair : bf_ops.first) {
-		all_bf_operations.insert(all_bf_operations.end(), pair.second.begin(), pair.second.end());
-	}
-	for (const auto &pair : bf_ops.second) {
-		all_bf_operations.insert(all_bf_operations.end(), pair.second.begin(), pair.second.end());
-	}
-
-	// sort by sequence number to restore generation order
-	std::sort(all_bf_operations.begin(), all_bf_operations.end(),
-		[](const BloomFilterOperation &a, const BloomFilterOperation &b) {
-			return a.sequence_number < b.sequence_number;
-		});
-
-	// debug print with correct ordering
-	DebugPrintMST(mst_edges, all_bf_operations);
-	return plan;
-}
-
-
-// extension hooks
-// void PredicateTransferOptimizer::PreOptimize(OptimizerExtensionInput &input, unique_ptr<LogicalOperator> &plan) {
-// 	// create optimizer state using proper DuckDB state management
-// 	auto optimizer_state = input.context.registered_state->GetOrCreate<PredicateTransferOptimizer>(
-// 		"rpt_optimizer_state", input.context);
-//
-// 	plan = optimizer_state->PreOptimize(std::move(plan));
-// }
-
-void RPTOptimizerContextState::Optimize(OptimizerExtensionInput &input, unique_ptr<LogicalOperator> &plan) {
-	// retrieve the optimizer state from ClientContext
-	const auto optimizer_state = input.context.registered_state->GetOrCreate<RPTOptimizerContextState>("rpt_optimizer_state", input.context);
-	// if (!optimizer_state) {
-	// 	optimizer_state = input.context.registered_state->GetOrCreate<PredicateTransferOptimizer>(
-	// 		"rpt_optimizer_state", input.context);
-	// }
-
-	plan = optimizer_state->Optimize(std::move(plan));
-
-	// cleanup
-	input.context.registered_state->Remove("rpt_optimizer_state");
-}
-
-} // namespace duckdb
diff --git a/src/optimizer/rpt_optimizer.hpp b/src/optimizer/rpt_optimizer.hpp
deleted file mode 100644
index 8666ef0..0000000
--- a/src/optimizer/rpt_optimizer.hpp
+++ /dev/null
@@ -1,81 +0,0 @@
-#pragma once
-
-#include "graph_manager.hpp"
-#include "table_manager.hpp"
-#include "duckdb/main/client_context_state.hpp"
-#include "duckdb/common/vector.hpp"
-#include "duckdb/common/unordered_map.hpp"
-
-namespace duckdb {
-
-// tree node for rooted MST representation
-struct TreeNode {
-	idx_t table_idx;
-	LogicalOperator* table_op;
-	vector<TreeNode*> children;
-	TreeNode* parent;
-	int level; // distance from root (root = 0)
-	JoinEdge* edge_to_parent; // null for root
-
-	TreeNode(idx_t idx, LogicalOperator* op)
-		: table_idx(idx), table_op(op), parent(nullptr), level(0), edge_to_parent(nullptr) {}
-};
-
-class RPTOptimizerContextState : public ClientContextState {
-public:
-	explicit RPTOptimizerContextState(ClientContext &context) {}
-
-	vector<JoinEdge> join_edges;
-//	map<table_id, idx_t> table_cardinalities;
-	map<LogicalOperator *, idx_t> operator_to_table_id;
-
-	TableManager table_mgr;
-	vector<LogicalOperator*> join_ops;
-
-	unordered_map<ColumnBinding, ColumnBinding, ColumnBindingHashFunction> rename_col_bindings;
-public:
-	// extract all the join edges from the plan
-	//vector<JoinEdge> ExtractOperators(LogicalOperator &plan, vector<LogicalOperator*> &join_ops);
-	vector<JoinEdge> ExtractOperators(LogicalOperator &plan);
-	void ExtractOperatorsRecursive(LogicalOperator &plan, vector<LogicalOperator*> &join_ops);
-	vector<JoinEdge> CreateJoinEdges(vector<LogicalOperator*> &join_ops);
-	vector<JoinEdge> LargestRoot(vector<JoinEdge> &edges);
-
-	// build rooted tree from MST edges with largest table as root
-	TreeNode* BuildRootedTree(vector<JoinEdge> &mst_edges);
-
-	// void CreateForwardPassModifications(LogicalOperator *smaller_table_op, LogicalOperator *larger_table_op,
-	// 														const vector<ColumnBinding> &smaller_columns, const vector<ColumnBinding> &larger_columns,
-	// 														unordered_map<LogicalOperator*, unique_ptr<LogicalOperator>> &forward_pass);
-	//
-	// void CreateBackwardPassModifications(LogicalOperator *smaller_table_op, LogicalOperator *larger_table_op,
-	// 														const vector<ColumnBinding> &smaller_columns, const vector<ColumnBinding> &larger_columns,
-	// 														unordered_map<LogicalOperator*, unique_ptr<LogicalOperator>> &backward_pass);
-	//
-	std::pair<unordered_map<LogicalOperator*, vector<BloomFilterOperation>>,
-			unordered_map<LogicalOperator*, vector<BloomFilterOperation>>>
-	GenerateStageModifications(const vector<JoinEdge> &mst_edges);
-
-	unique_ptr<LogicalOperator> BuildStackedBFOperators(unique_ptr<LogicalOperator> base_plan,
-							     const vector<BloomFilterOperation> &bf_ops,
-							     bool reverse_order = false);
-
-	unique_ptr<LogicalOperator> ApplyStageModifications(unique_ptr<LogicalOperator> plan,
-							   const unordered_map<LogicalOperator*, vector<BloomFilterOperation>> &forward_bf_ops,
-							   const unordered_map<LogicalOperator*, vector<BloomFilterOperation>> &backward_bf_ops);
-	// debug functions
-	void DebugPrintGraph(const vector<JoinEdge> &edges) const;
-	void DebugPrintMST(const vector<JoinEdge> &mst_edges, const vector<BloomFilterOperation> &bf_operations);
-
-	unique_ptr<LogicalOperator> PreOptimize(unique_ptr<LogicalOperator> plan);
-
-	unique_ptr<LogicalOperator> Optimize(unique_ptr<LogicalOperator> plan);
-
-	// entry point for extension framework
-	static void PreOptimize(OptimizerExtensionInput &input, unique_ptr<LogicalOperator> &plan);
-	static void Optimize(OptimizerExtensionInput &input, unique_ptr<LogicalOperator> &plan);
-};
-
-} // namespace duckdb
-
-
diff --git a/src/optimizer/table_manager.cpp b/src/optimizer/table_manager.cpp
index fc3759e..b160a90 100644
--- a/src/optimizer/table_manager.cpp
+++ b/src/optimizer/table_manager.cpp
@@ -1,7 +1,38 @@
 #include "table_manager.hpp"
+#include "duckdb/planner/operator/logical_get.hpp"
+#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
+#include "utils/debug_utils.hpp"
 
 namespace duckdb {
 
+// helper to get operator type name for debug
+static const char *GetOpTypeName(LogicalOperatorType type) {
+	switch (type) {
+	case LogicalOperatorType::LOGICAL_GET:
+		return "GET";
+	case LogicalOperatorType::LOGICAL_FILTER:
+		return "FILTER";
+	case LogicalOperatorType::LOGICAL_PROJECTION:
+		return "PROJECTION";
+	case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
+		return "COMPARISON_JOIN";
+	case LogicalOperatorType::LOGICAL_DELIM_JOIN:
+		return "DELIM_JOIN";
+	case LogicalOperatorType::LOGICAL_AGGREGATE_AND_GROUP_BY:
+		return "AGGREGATE";
+	case LogicalOperatorType::LOGICAL_WINDOW:
+		return "WINDOW";
+	case LogicalOperatorType::LOGICAL_UNION:
+		return "UNION";
+	case LogicalOperatorType::LOGICAL_CHUNK_GET:
+		return "CHUNK_GET";
+	case LogicalOperatorType::LOGICAL_DELIM_GET:
+		return "DELIM_GET";
+	default:
+		return "OTHER";
+	}
+}
+
 void TableManager::AddTable(const TableInfo &table) {
 	table_lookup[table.table_idx] = table;
 	table_ops.push_back(table);
@@ -31,15 +62,20 @@ idx_t TableManager::GetScalarTableIndex(LogicalOperator *op) {
 }
 
 void TableManager::AddTableOperator(LogicalOperator *op) {
-	// op->estimated_cardinality = op->EstimateCardinality(context);
 	TableInfo tbl_info;
 	tbl_info.estimated_cardinality = op->estimated_cardinality;
 	tbl_info.table_idx = GetScalarTableIndex(op);
 	table_id table_idx = tbl_info.table_idx;
 	tbl_info.table_op = op;
+
 	if (table_idx != std::numeric_limits<idx_t>::max() && table_lookup.find(table_idx) == table_lookup.end()) {
+		D_PRINTF("[NODE_REG] AddTableOperator: type=%s, table_idx=%llu, cardinality=%llu", GetOpTypeName(op->type),
+		         (unsigned long long)table_idx, (unsigned long long)tbl_info.estimated_cardinality);
 		table_lookup[table_idx] = tbl_info;
 		table_ops.push_back(tbl_info);
+	} else if (table_idx != std::numeric_limits<idx_t>::max()) {
+		D_PRINTF("[NODE_REG] AddTableOperator SKIPPED (already exists): type=%s, table_idx=%llu",
+		         GetOpTypeName(op->type), (unsigned long long)table_idx);
 	}
 }
 
@@ -49,10 +85,60 @@ TableInfo *TableManager::GetTableInfo(LogicalOperator *op) {
 	}
 
 	idx_t table_idx = GetScalarTableIndex(op);
-	if(table_lookup.find(table_idx) == table_lookup.end()) {
+	if (table_lookup.find(table_idx) == table_lookup.end()) {
 		return nullptr;
 	}
 	return &table_lookup[table_idx];
 }
 
+LogicalGet *TableManager::FindLogicalGet(LogicalOperator *op) {
+	if (!op) {
+		return nullptr;
+	}
+	if (op->type == LogicalOperatorType::LOGICAL_GET) {
+		return &op->Cast<LogicalGet>();
+	}
+	for (auto &child : op->children) {
+		auto *result = FindLogicalGet(child.get());
+		if (result) {
+			return result;
+		}
+	}
+	return nullptr;
+}
+
+string TableManager::GetTableName(idx_t table_idx) {
+	auto it = table_lookup.find(table_idx);
+	if (it == table_lookup.end()) {
+		return "table_" + std::to_string(table_idx);
+	}
+	auto *get = FindLogicalGet(it->second.table_op);
+	if (get) {
+		auto table = get->GetTable();
+		if (table) {
+			return table->name;
+		}
+	}
+	return "table_" + std::to_string(table_idx);
+}
+
+string TableManager::GetColumnName(idx_t table_idx, idx_t column_index) {
+	auto it = table_lookup.find(table_idx);
+	if (it == table_lookup.end()) {
+		return "col_" + std::to_string(column_index);
+	}
+	auto *get = FindLogicalGet(it->second.table_op);
+	if (!get) {
+		return "col_" + std::to_string(column_index);
+	}
+	auto &col_ids = get->GetColumnIds();
+	if (column_index < col_ids.size()) {
+		idx_t primary_idx = col_ids[column_index].GetPrimaryIndex();
+		if (primary_idx < get->names.size()) {
+			return get->names[primary_idx];
+		}
+	}
+	return "col_" + std::to_string(column_index);
+}
+
 } // namespace duckdb
diff --git a/src/optimizer/table_manager.hpp b/src/optimizer/table_manager.hpp
index 3b2053e..2fd31ac 100644
--- a/src/optimizer/table_manager.hpp
+++ b/src/optimizer/table_manager.hpp
@@ -2,15 +2,15 @@
 
 #include "duckdb/common/types.hpp"
 #include "duckdb/planner/logical_operator.hpp"
+#include "duckdb/planner/operator/logical_get.hpp"
 #include <map>
 
-
 namespace duckdb {
 typedef idx_t table_id;
 
 struct TableInfo {
 	table_id table_idx;
-	LogicalOperator* table_op;  // LogicalGet, LogicalFilter->LogicalGet, etc.
+	LogicalOperator *table_op; // LogicalGet, LogicalFilter->LogicalGet, etc.
 	idx_t estimated_cardinality;
 };
 
@@ -23,11 +23,18 @@ class TableManager {
 
 public:
 	void AddTable(const TableInfo &table);
-	TableInfo* GetTableInfo(LogicalOperator *op);
+	TableInfo *GetTableInfo(LogicalOperator *op);
 
 	idx_t GetScalarTableIndex(LogicalOperator *op);
 
 	void AddTableOperator(LogicalOperator *op);
+
+	// navigate from registered operator to underlying LogicalGet
+	static LogicalGet *FindLogicalGet(LogicalOperator *op);
+	// resolve table name from table index
+	string GetTableName(idx_t table_idx);
+	// resolve column name from table index and column binding index
+	string GetColumnName(idx_t table_idx, idx_t column_index);
 };
 
 } // namespace duckdb
diff --git a/src/predicate_transfer_optimization.cpp b/src/predicate_transfer_optimization.cpp
deleted file mode 100644
index dcf1c10..0000000
--- a/src/predicate_transfer_optimization.cpp
+++ /dev/null
@@ -1,294 +0,0 @@
-#include "predicate_transfer_optimization.hpp"
-#include "duckdb/planner/operator/logical_get.hpp"
-#include "operators/logical_use_bf.hpp"
-#include "duckdb/planner/expression/bound_columnref_expression.hpp"
-#include "duckdb/planner/expression_iterator.hpp"
-#include "duckdb/main/prepared_statement_data.hpp"
-#include "duckdb/catalog/catalog_entry/duck_table_entry.hpp"
-#include "duckdb/execution/index/art/node.hpp"
-#include "duckdb/planner/operator/logical_projection.hpp"
-#include "duckdb/main/attached_database.hpp"
-#include "duckdb/main/client_data.hpp"
-#include "duckdb/planner/operator/logical_filter.hpp"
-
-namespace duckdb {
-
-unique_ptr<LogicalOperator> PredicateTransferOptimizer::PreOptimize(unique_ptr<LogicalOperator> plan) {
-	// preoptimize does nothing - transfer graph building moved to optimize phase
-	// to ensure table filters are populated after duckdb's filter pushdown
-	graph_manager.Build(*plan);
-	return plan;
-}
-
-unique_ptr<LogicalOperator> PredicateTransferOptimizer::Optimize(unique_ptr<LogicalOperator> plan) {
-	// build transfer graph after filter pushdown has occurred
-	// if (!graph_manager.Build(*plan)) {
-	// 	// if build fails (< 2 tables or other issues), return plan unchanged
-	// 	return plan;
-	// }
-	
-	auto &ordered_nodes = graph_manager.transfer_order;
-
-	// **Forward pass**: Process nodes in reverse order (from last to first)
-	// - Generate Bloom Filters (BFs) based on predicates
-	// - Add BFs to the corresponding edges in the graph
-	for (auto it = ordered_nodes.rbegin(); it != ordered_nodes.rend(); ++it) {
-		auto *current_node = *it;
-		for (auto &BF_plan : CreateBloomFilterPlan(*current_node, false)) {
-			graph_manager.AddFilterPlan(BF_plan.first, BF_plan.second, false);
-		}
-	}
-
-	// **Backward pass**: Process nodes in original order (from first to last)
-	// - Similar to the forward pass, but for backward edges
-	for (auto *current_node : ordered_nodes) {
-		for (auto &BF_plan : CreateBloomFilterPlan(*current_node, true)) {
-			graph_manager.AddFilterPlan(BF_plan.first, BF_plan.second, true);
-		}
-	}
-
-	return InsertTransferOperators(std::move(plan));
-}
-
-unique_ptr<LogicalOperator> PredicateTransferOptimizer::InsertTransferOperators(unique_ptr<LogicalOperator> plan) {
-	for (auto &child : plan->children) {
-		child = InsertTransferOperators(std::move(child));
-	}
-
-	// Store original operator pointer
-	LogicalOperator *original_operator = plan.get();
-	auto apply_modification = [&](std::unordered_map<LogicalOperator *, unique_ptr<LogicalOperator>> &modify_map) {
-		auto it = modify_map.find(original_operator);
-		if (it == modify_map.end()) {
-			// No modification needed
-			return;
-		}
-
-		auto *last_op = it->second.get();
-		// auto *last_creator = (it->second->type == LogicalOperatorType::LOGICAL_CREATE_BF) ? it->second.get() : nullptr;
-		// TODO: Update condition to reflect the LOGICAL_CREATE_BF operator extension
-		auto *last_creator = (it->second->type == LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR) ? it->second.get() : nullptr;
-		while (!last_op->children.empty()) {
-			last_op = last_op->children[0].get();
-			// if (last_op->type == LogicalOperatorType::LOGICAL_CREATE_BF) {
-			// TODO: Update condition to reflect the LOGICAL_CREATE_BF operator extension
-			if (last_op->type == LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR) {
-				last_creator = last_op;
-			}
-		}
-
-		// table scan
-		if (last_creator == last_op || plan->type != LogicalOperatorType::LOGICAL_FILTER) {
-			last_op->AddChild(std::move(plan));
-			plan = std::move(it->second);
-			return;
-		}
-
-		// in clause
-		auto &filter = plan->Cast<LogicalFilter>();
-		if (!filter.expressions.empty() && filter.expressions[0]->type == ExpressionType::BOUND_COLUMN_REF) {
-			last_op->AddChild(std::move(plan));
-			plan = std::move(it->second);
-			return;
-		}
-
-		// creator ---> user --> creator --> filter --> user --> user --> scan
-		last_op->AddChild(std::move(plan->children[0]));
-		if (last_creator == nullptr) {
-			plan->children[0] = std::move(it->second);
-		} else {
-			plan->children[0] = std::move(last_creator->children[0]);
-			last_creator->children[0] = std::move(plan);
-			plan = std::move(it->second);
-		}
-	};
-
-	apply_modification(forward_stage_modification);
-	apply_modification(backward_stage_modification);
-
-	return plan;
-}
-
-vector<pair<idx_t, shared_ptr<FilterPlan>>> PredicateTransferOptimizer::CreateBloomFilterPlan(LogicalOperator &node,
-                                                                                              bool reverse) {
-	vector<pair<idx_t, shared_ptr<FilterPlan>>> result;
-
-	vector<shared_ptr<FilterPlan>> bfs_to_use_plan;
-	vector<shared_ptr<FilterPlan>> bfs_to_create_plan;
-
-	idx_t node_id = TableOperatorManager::GetScalarTableIndex(&node);
-	if (node_id == std::numeric_limits<idx_t>::max() ||
-	    graph_manager.transfer_graph.find(node_id) == graph_manager.transfer_graph.end()) {
-		return result;
-	}
-
-	// Use Bloom Filter
-	vector<idx_t> parent_nodes;
-	GetAllBFsToUse(node_id, bfs_to_use_plan, parent_nodes, reverse);
-
-	// Create Bloom Filter
-	GetAllBFsToCreate(node_id, bfs_to_create_plan, reverse);
-
-	auto &replace_map = reverse ? backward_stage_modification : forward_stage_modification;
-
-	if (!bfs_to_use_plan.empty() && !bfs_to_create_plan.empty()) {
-		auto last_use_bf = BuildUseBFOperator(node, bfs_to_use_plan);
-		auto create_bf = BuildCreateBFOperator(node, bfs_to_create_plan);
-		for (auto &filter : create_bf->filter_plans) {
-			result.emplace_back(make_pair(node_id, filter));
-		}
-		create_bf->AddChild(unique_ptr_cast<LogicalUseBF, LogicalOperator>(std::move(last_use_bf)));
-		replace_map[&node] = std::move(create_bf);
-	} else if (!bfs_to_use_plan.empty()) {
-		auto last_use_bf = BuildUseBFOperator(node, bfs_to_use_plan);
-		replace_map[&node] = std::move(last_use_bf);
-	} else if (!bfs_to_create_plan.empty()) {
-		if (!HasAnyFilter(node, reverse)) {
-			return result;
-		}
-
-		auto create_bf = BuildCreateBFOperator(node, bfs_to_create_plan);
-		for (auto &filter : create_bf->filter_plans) {
-			result.emplace_back(make_pair(node_id, filter));
-		}
-		replace_map[&node] = std::move(create_bf);
-	}
-
-	return result;
-}
-
-void PredicateTransferOptimizer::GetAllBFsToUse(idx_t cur_node_id, vector<shared_ptr<FilterPlan>> &bfs_to_use_plan,
-                                                vector<idx_t> &parent_nodes, bool reverse) {
-	auto &node = graph_manager.transfer_graph[cur_node_id];
-	auto &edges = reverse ? node->backward_stage_edges.in : node->forward_stage_edges.in;
-
-	for (auto &edge : edges) {
-		for (auto &bf : edge->filter_plan) {
-			if (std::find(bfs_to_use_plan.begin(), bfs_to_use_plan.end(), bf) == bfs_to_use_plan.end()) {
-				bfs_to_use_plan.emplace_back(bf);
-				parent_nodes.emplace_back(edge->destination);
-			}
-		}
-	}
-}
-
-void PredicateTransferOptimizer::GetAllBFsToCreate(idx_t cur_node_id,
-                                                   vector<shared_ptr<FilterPlan>> &bfs_to_create_plan, bool reverse) {
-	auto &node = graph_manager.transfer_graph[cur_node_id];
-	auto &edges = reverse ? node->backward_stage_edges.out : node->forward_stage_edges.out;
-
-	// We cannot create BFs for these operators, because the current dynamic pipeline does not support them.
-	auto &base_table = graph_manager.table_operator_manager.table_operators[cur_node_id];
-	if (base_table->type == LogicalOperatorType::LOGICAL_UNION ||
-	    base_table->type == LogicalOperatorType::LOGICAL_EXCEPT ||
-	    base_table->type == LogicalOperatorType::LOGICAL_INTERSECT ||
-	    base_table->type == LogicalOperatorType::LOGICAL_WINDOW) {
-		return;
-	}
-
-	for (auto &edge : edges) {
-		auto bf_plan = make_shared_ptr<FilterPlan>();
-
-		// Each expression leads to a bloom filter on a column on this table
-		idx_t size = edge->left.size();
-		for (idx_t i = 0; i < size; ++i) {
-			auto &left_binding = edge->left[i];
-			auto &right_binding = edge->right[i];
-			auto &return_type = edge->return_types[i];
-
-			bf_plan->return_types.push_back(return_type);
-
-			auto binding0 = graph_manager.table_operator_manager.GetRenaming(left_binding);
-			auto binding1 = graph_manager.table_operator_manager.GetRenaming(right_binding);
-
-			if (binding0.table_index == cur_node_id) {
-				bf_plan->build.push_back(binding0);
-				bf_plan->apply.push_back(binding1);
-			} else if (binding1.table_index == cur_node_id) {
-				bf_plan->build.push_back(binding1);
-				bf_plan->apply.push_back(binding0);
-			}
-		}
-		if (!bf_plan->build.empty()) {
-			bfs_to_create_plan.emplace_back(std::move(bf_plan));
-		} else {
-			throw InternalException("No built column found!");
-		}
-	}
-}
-
-unique_ptr<LogicalCreateBF>
-PredicateTransferOptimizer::BuildCreateBFOperator(LogicalOperator &node, vector<shared_ptr<FilterPlan>> &bf_plans) {
-	auto create_bf = make_uniq<LogicalCreateBF>(bf_plans);
-	create_bf->SetEstimatedCardinality(node.estimated_cardinality);
-	return create_bf;
-}
-
-unique_ptr<LogicalUseBF> PredicateTransferOptimizer::BuildUseBFOperator(LogicalOperator &node,
-                                                                        vector<shared_ptr<FilterPlan>> &bf_plans) {
-	unique_ptr<LogicalUseBF> last_operator;
-
-	// This is important for performance, not use (int i = 0; i < temp_result_to_use.size(); i++)
-	for (auto it = bf_plans.rbegin(); it != bf_plans.rend(); ++it) {
-		auto use_bf_operator = make_uniq<LogicalUseBF>(*it);
-		use_bf_operator->SetEstimatedCardinality(node.estimated_cardinality);
-		if (last_operator) {
-			use_bf_operator->AddChild(std::move(last_operator));
-		}
-		last_operator = std::move(use_bf_operator);
-	}
-
-	return last_operator;
-}
-
-bool PredicateTransferOptimizer::HasAnyFilter(LogicalOperator &node, bool reverse) {
-	if (!reverse || (forward_stage_modification.find(&node) == forward_stage_modification.end())) {
-		if (node.type == LogicalOperatorType::LOGICAL_GET) {
-			auto &get = node.Cast<LogicalGet>();
-			if (get.table_filters.filters.empty()) {
-				return false;
-			}
-		} else if (node.type == LogicalOperatorType::LOGICAL_UNION) {
-			return false;
-		}
-	}
-
-	return true;
-}
-
-void PredicateTransferOptimizer::GetColumnBindingExpression(Expression &expr,
-                                                            vector<BoundColumnRefExpression *> &expressions) {
-	if (expr.type == ExpressionType::BOUND_COLUMN_REF) {
-		Expression *expr_ptr = &expr;
-		BoundColumnRefExpression *col_ref = static_cast<BoundColumnRefExpression *>(expr_ptr);
-		D_ASSERT(col_ref->depth == 0);
-		expressions.emplace_back(col_ref);
-	} else {
-		ExpressionIterator::EnumerateChildren(
-		    expr, [&](unique_ptr<Expression> &child) { GetColumnBindingExpression(*child, expressions); });
-	}
-}
-
-void PredicateTransferOptimizer::PreOptimize(OptimizerExtensionInput &input, unique_ptr<LogicalOperator> &plan) {
-	// create optimizer state using proper DuckDB state management
-	auto optimizer_state = input.context.registered_state->GetOrCreate<PredicateTransferOptimizer>(
-		"rpt_optimizer_state", input.context);
-
-	plan = optimizer_state->PreOptimize(std::move(plan));
-}
-
-void PredicateTransferOptimizer::Optimize(OptimizerExtensionInput &input, unique_ptr<LogicalOperator> &plan) {
-	// retrieve the optimizer state from ClientContext
-	auto optimizer_state = input.context.registered_state->Get<PredicateTransferOptimizer>("rpt_optimizer_state");
-	if (!optimizer_state) {
-		optimizer_state = input.context.registered_state->GetOrCreate<PredicateTransferOptimizer>(
-			"rpt_optimizer_state", input.context);
-	}
-
-	plan = optimizer_state->Optimize(std::move(plan));
-	
-	// cleanup
-	input.context.registered_state->Remove("rpt_optimizer_state");
-}
-
-} // namespace duckdb
diff --git a/src/predicate_transfer_optimization.hpp b/src/predicate_transfer_optimization.hpp
deleted file mode 100644
index 221e9f9..0000000
--- a/src/predicate_transfer_optimization.hpp
+++ /dev/null
@@ -1,55 +0,0 @@
-#pragma once
-
-#include "transfer_graph_manager.hpp"
-#include "operators/logical_create_bf.hpp"
-#include "operators/logical_use_bf.hpp"
-#include "duckdb/optimizer/optimizer_extension.hpp"
-#include "duckdb/main/client_context_state.hpp"
-
-namespace duckdb {
-using BloomFilters = vector<shared_ptr<BloomFilter>>;
-
-class PredicateTransferOptimizer : public ClientContextState {
-public:
-	explicit PredicateTransferOptimizer(ClientContext &context) : graph_manager(context) {
-	}
-
-	//! Extract the query join information, note that this function must be called before join order optimization,
-	//! because some join conditions are lost during join order optimization.
-	unique_ptr<LogicalOperator> PreOptimize(unique_ptr<LogicalOperator> plan);
-
-	//! Create bloom filters and insert them into the query plan, note that this function must be called after join
-	//! order optimization, because it cannot handle newly inserted operator correctly.
-	unique_ptr<LogicalOperator> Optimize(unique_ptr<LogicalOperator> plan);
-
-	//! Static functions for extension framework integration
-	static void PreOptimize(OptimizerExtensionInput &input, unique_ptr<LogicalOperator> &plan);
-	static void Optimize(OptimizerExtensionInput &input, unique_ptr<LogicalOperator> &plan);
-
-private:
-	unique_ptr<LogicalOperator> InsertTransferOperators(unique_ptr<LogicalOperator> plan);
-
-	//! Create Bloom filter and use existing Bloom filter for the given scan or filter node
-	vector<pair<idx_t, shared_ptr<FilterPlan>>> CreateBloomFilterPlan(LogicalOperator &node, bool reverse = false);
-
-	void GetAllBFsToUse(idx_t cur_node_id, vector<shared_ptr<FilterPlan>> &bfs_to_use_plan, vector<idx_t> &parent_nodes,
-	                    bool reverse);
-	void GetAllBFsToCreate(idx_t cur_node_id, vector<shared_ptr<FilterPlan>> &bfs_to_create_plan, bool reverse);
-
-	static unique_ptr<LogicalCreateBF> BuildCreateBFOperator(LogicalOperator &node,
-	                                                         vector<shared_ptr<FilterPlan>> &bf_plans);
-	static unique_ptr<LogicalUseBF> BuildUseBFOperator(LogicalOperator &node, vector<shared_ptr<FilterPlan>> &bf_plans);
-
-	bool HasAnyFilter(LogicalOperator &node, bool reverse = false);
-
-	//! which column(s) involved in this expression?
-	static void GetColumnBindingExpression(Expression &expr, vector<BoundColumnRefExpression *> &expressions);
-
-private:
-	TransferGraphManager graph_manager;
-
-	//! we use a map to record how to modify/update the operators in the query plan.
-	std::unordered_map<LogicalOperator *, unique_ptr<LogicalOperator>> forward_stage_modification;
-	std::unordered_map<LogicalOperator *, unique_ptr<LogicalOperator>> backward_stage_modification;
-};
-} // namespace duckdb
diff --git a/src/robust_extension.cpp b/src/robust_extension.cpp
new file mode 100644
index 0000000..47aa9e5
--- /dev/null
+++ b/src/robust_extension.cpp
@@ -0,0 +1,106 @@
+#define DUCKDB_EXTENSION_MAIN
+
+#include "robust_extension.hpp"
+#include "duckdb.hpp"
+#include "duckdb/common/exception.hpp"
+#include "duckdb/function/scalar_function.hpp"
+#include <duckdb/parser/parsed_data/create_scalar_function_info.hpp>
+#include "duckdb/optimizer/optimizer_extension.hpp"
+#include "duckdb/planner/operator_extension.hpp"
+#include "operators/logical_create_filter.hpp"
+#include "operators/logical_probe_filter.hpp"
+#include "optimizer/robust_optimizer.hpp"
+#include "duckdb/main/config.hpp"
+
+// OpenSSL linked through vcpkg
+#include <openssl/opensslv.h>
+
+namespace duckdb {
+
+class CreateFilterOperatorExtension : public OperatorExtension {
+public:
+	std::string GetName() override {
+		return "logical_create_filter";
+	}
+
+	unique_ptr<LogicalExtensionOperator> Deserialize(Deserializer &deserializer) override {
+		return make_uniq<LogicalCreateFilter>();
+	}
+};
+
+class ProbeFilterOperatorExtension : public OperatorExtension {
+public:
+	std::string GetName() override {
+		return "logical_probe_filter";
+	}
+
+	unique_ptr<LogicalExtensionOperator> Deserialize(Deserializer &deserializer) override {
+		return make_uniq<LogicalProbeFilter>();
+	}
+};
+
+static void LoadInternal(ExtensionLoader &loader) {
+	// Register the SIP optimizer rule
+	OptimizerExtension optimizer;
+	// optimizer.pre_optimize_function = RobustOptimizerContextState::PreOptimize;
+	optimizer.optimize_function = RobustOptimizerContextState::Optimize;
+
+	DatabaseInstance &instance = loader.GetDatabaseInstance();
+	OptimizerExtension::Register(instance.config, optimizer);
+
+	// Register logical operators
+	OperatorExtension::Register(instance.config, make_shared_ptr<CreateFilterOperatorExtension>());
+	OperatorExtension::Register(instance.config, make_shared_ptr<ProbeFilterOperatorExtension>());
+
+	// Register profiling setting
+	auto &config = DBConfig::GetConfig(instance);
+	config.AddExtensionOption("robust_profiling", "Enable Robust extension profiling output", LogicalType::BOOLEAN,
+	                          Value::BOOLEAN(false));
+	config.AddExtensionOption("robust_display_dag", "Display Robust transfer DAG", LogicalType::BOOLEAN,
+	                          Value::BOOLEAN(false));
+	config.AddExtensionOption("robust_display_physical_dag", "Display DAG from DuckDB join order", LogicalType::BOOLEAN,
+	                          Value::BOOLEAN(false));
+	config.AddExtensionOption("robust_filter_type", "Filter type for scan pushdown: all, bf_only, minmax_only",
+	                          LogicalType::VARCHAR, Value("all"));
+	config.AddExtensionOption("robust_pass_mode", "Pass mode: both, forward_only", LogicalType::VARCHAR, Value("both"));
+	config.AddExtensionOption("robust_heuristic", "Heuristic for BF transfer: join_order (default), largest_root",
+	                          LogicalType::VARCHAR, Value("join_order"));
+	config.AddExtensionOption("robust_flip_roots", "Flip non-largest roots to leaves in join_order DAG",
+	                          LogicalType::BOOLEAN, Value::BOOLEAN(true));
+	config.AddExtensionOption("robust_dynamic_or_filter_threshold",
+	                          "Max distinct build keys to push as IN-filter instead of bloom filter",
+	                          LogicalType::UBIGINT, Value::UBIGINT(50));
+}
+
+void RobustExtension::Load(ExtensionLoader &loader) {
+	LoadInternal(loader);
+}
+
+std::string RobustExtension::Name() {
+	return "robust";
+}
+
+std::string RobustExtension::Version() const {
+#ifdef EXT_VERSION_ROBUST
+	return EXT_VERSION_ROBUST;
+#else
+	return "";
+#endif
+}
+
+} // namespace duckdb
+
+extern "C" {
+
+DUCKDB_CPP_EXTENSION_ENTRY(robust, loader) {
+	duckdb::LoadInternal(loader);
+}
+
+DUCKDB_EXTENSION_API const char *robust_version() {
+	return duckdb::DuckDB::LibraryVersion();
+}
+}
+
+#ifndef DUCKDB_EXTENSION_MAIN
+#error DUCKDB_EXTENSION_MAIN not defined
+#endif
diff --git a/src/include/rpt_extension.hpp b/src/robust_extension.hpp
similarity index 64%
rename from src/include/rpt_extension.hpp
rename to src/robust_extension.hpp
index e1b9ee1..75c2434 100644
--- a/src/include/rpt_extension.hpp
+++ b/src/robust_extension.hpp
@@ -4,9 +4,9 @@
 
 namespace duckdb {
 
-class RptExtension : public Extension {
+class RobustExtension : public Extension {
 public:
-	void Load(DuckDB &db) override;
+	void Load(ExtensionLoader &loader) override;
 	std::string Name() override;
 	std::string Version() const override;
 };
diff --git a/src/robust_profiling.hpp b/src/robust_profiling.hpp
new file mode 100644
index 0000000..206ac04
--- /dev/null
+++ b/src/robust_profiling.hpp
@@ -0,0 +1,231 @@
+#pragma once
+
+#include "duckdb/main/client_context_state.hpp"
+#include "duckdb/main/client_context.hpp"
+#include "duckdb/common/printer.hpp"
+#include "duckdb/common/string_util.hpp"
+#include <chrono>
+#include <atomic>
+#include <mutex>
+#include <vector>
+#include <algorithm>
+#include <map>
+
+namespace duckdb {
+
+struct CreateFilterStats {
+	idx_t sequence_number = 0;
+	idx_t build_table_idx = 0;
+	vector<idx_t> probe_table_indices;
+	bool is_forward_pass = false;
+	std::atomic<idx_t> rows_materialized {0};
+	std::atomic<int64_t> sink_time_us {0};
+	std::atomic<int64_t> finalize_time_us {0};
+	std::atomic<int64_t> source_time_us {0};
+};
+
+struct ProbeFilterStats {
+	idx_t sequence_number = 0;
+	idx_t build_table_idx = 0;
+	idx_t probe_table_idx = 0;
+	bool is_forward_pass = false;
+	std::atomic<idx_t> rows_in {0};
+	std::atomic<idx_t> rows_out {0};
+	std::atomic<int64_t> probe_time_us {0};
+};
+
+// RAII timer that adds elapsed microseconds to an atomic counter
+struct ScopedTimer {
+	std::atomic<int64_t> &target;
+	std::chrono::high_resolution_clock::time_point start;
+
+	explicit ScopedTimer(std::atomic<int64_t> &target) : target(target) {
+		start = std::chrono::high_resolution_clock::now();
+	}
+	~ScopedTimer() {
+		auto end = std::chrono::high_resolution_clock::now();
+		target.fetch_add(std::chrono::duration_cast<std::chrono::microseconds>(end - start).count(),
+		                 std::memory_order_relaxed);
+	}
+};
+
+class RobustProfilingState : public ClientContextState {
+public:
+	explicit RobustProfilingState(bool enabled) : enabled(enabled) {
+	}
+
+	bool enabled;
+	int64_t optimizer_time_us = 0;
+
+	// table index -> resolved table name (populated at optimizer time)
+	std::map<idx_t, string> table_names;
+
+	mutex stats_lock;
+	vector<shared_ptr<CreateFilterStats>> create_filter_stats;
+	vector<shared_ptr<ProbeFilterStats>> probe_filter_stats;
+
+	string GetName(idx_t table_idx) const {
+		auto it = table_names.find(table_idx);
+		if (it != table_names.end()) {
+			return it->second;
+		}
+		return "table_" + std::to_string(table_idx);
+	}
+
+	shared_ptr<CreateFilterStats> RegisterCreateFilter(idx_t build_table_idx,
+	                                                   const vector<ColumnBinding> &probe_columns,
+	                                                   idx_t sequence_number, bool is_forward_pass) {
+		lock_guard<mutex> lock(stats_lock);
+		auto stats = make_shared_ptr<CreateFilterStats>();
+		stats->sequence_number = sequence_number;
+		stats->build_table_idx = build_table_idx;
+		stats->is_forward_pass = is_forward_pass;
+		// extract unique probe table indices from probe columns
+		for (const auto &col : probe_columns) {
+			if (stats->probe_table_indices.empty() || stats->probe_table_indices.back() != col.table_index) {
+				// check if already present
+				bool found = false;
+				for (auto idx : stats->probe_table_indices) {
+					if (idx == col.table_index) {
+						found = true;
+						break;
+					}
+				}
+				if (!found) {
+					stats->probe_table_indices.push_back(col.table_index);
+				}
+			}
+		}
+		create_filter_stats.push_back(stats);
+		return stats;
+	}
+
+	shared_ptr<ProbeFilterStats> RegisterProbeFilter(idx_t build_table_idx, idx_t probe_table_idx,
+	                                                 idx_t sequence_number, bool is_forward_pass) {
+		lock_guard<mutex> lock(stats_lock);
+		auto stats = make_shared_ptr<ProbeFilterStats>();
+		stats->sequence_number = sequence_number;
+		stats->build_table_idx = build_table_idx;
+		stats->probe_table_idx = probe_table_idx;
+		stats->is_forward_pass = is_forward_pass;
+		probe_filter_stats.push_back(stats);
+		return stats;
+	}
+
+	void QueryEnd(ClientContext &context) override {
+		if (!enabled) {
+			return;
+		}
+		PrintSummary();
+		context.registered_state->Remove("robust_profiling");
+	}
+
+	void PrintSummary() {
+		Printer::Print("\n=== Robust PROFILING ===");
+		Printer::PrintF("Optimizer: %lld us", (int64_t)optimizer_time_us);
+
+		// build a combined list sorted by sequence_number
+		struct StatsEntry {
+			idx_t seq;
+			bool is_create;
+			size_t idx;
+		};
+		vector<StatsEntry> entries;
+		for (size_t i = 0; i < create_filter_stats.size(); i++) {
+			entries.push_back({create_filter_stats[i]->sequence_number, true, i});
+		}
+		for (size_t i = 0; i < probe_filter_stats.size(); i++) {
+			entries.push_back({probe_filter_stats[i]->sequence_number, false, i});
+		}
+		std::sort(entries.begin(), entries.end(),
+		          [](const StatsEntry &a, const StatsEntry &b) { return a.seq < b.seq; });
+
+		// per-pass accumulators
+		int64_t fwd_rows_in = 0, fwd_rows_out = 0, fwd_probe_us = 0;
+		int64_t bwd_rows_in = 0, bwd_rows_out = 0, bwd_probe_us = 0;
+		int64_t total_sink_us = 0, total_source_us = 0, total_finalize_us = 0;
+
+		Printer::Print("");
+		for (auto &e : entries) {
+			if (e.is_create) {
+				auto &s = create_filter_stats[e.idx];
+				string pass = s->is_forward_pass ? "FWD" : "BWD";
+				string probe_names;
+				for (size_t pi = 0; pi < s->probe_table_indices.size(); pi++) {
+					if (pi > 0) {
+						probe_names += ",";
+					}
+					probe_names += GetName(s->probe_table_indices[pi]);
+				}
+				if (probe_names.empty()) {
+					probe_names = "?";
+				}
+				Printer::PrintF(
+				    "CREATE_FILTER [%s]: [build=%s -> probe=%s] %llu rows, sink=%lldus, finalize=%lldus, source=%lldus",
+				    pass.c_str(), GetName(s->build_table_idx).c_str(), probe_names.c_str(),
+				    (uint64_t)s->rows_materialized.load(), (int64_t)s->sink_time_us.load(),
+				    (int64_t)s->finalize_time_us.load(), (int64_t)s->source_time_us.load());
+				total_sink_us += s->sink_time_us.load();
+				total_source_us += s->source_time_us.load();
+				total_finalize_us += s->finalize_time_us.load();
+			} else {
+				auto &s = probe_filter_stats[e.idx];
+				string pass = s->is_forward_pass ? "FWD" : "BWD";
+				idx_t ri = s->rows_in.load();
+				idx_t ro = s->rows_out.load();
+				double sel = ri > 0 ? 100.0 * static_cast<double>(ro) / static_cast<double>(ri) : 0.0;
+				Printer::PrintF(
+				    "PROBE_FILTER    [%s]: [build=%s, probe=%s] in=%llu, out=%llu, sel=%.1f%%, probe=%lldus",
+				    pass.c_str(), GetName(s->build_table_idx).c_str(), GetName(s->probe_table_idx).c_str(),
+				    (uint64_t)ri, (uint64_t)ro, sel, (int64_t)s->probe_time_us.load());
+				if (s->is_forward_pass) {
+					fwd_rows_in += static_cast<int64_t>(ri);
+					fwd_rows_out += static_cast<int64_t>(ro);
+					fwd_probe_us += s->probe_time_us.load();
+				} else {
+					bwd_rows_in += static_cast<int64_t>(ri);
+					bwd_rows_out += static_cast<int64_t>(ro);
+					bwd_probe_us += s->probe_time_us.load();
+				}
+			}
+		}
+
+		Printer::Print("\nTotals:");
+		Printer::PrintF("  sink: %lld us", (int64_t)total_sink_us);
+		Printer::PrintF("  source: %lld us", (int64_t)total_source_us);
+		Printer::PrintF("  finalize (BF build): %lld us", (int64_t)total_finalize_us);
+
+		auto print_pass_stats = [](const char *label, int64_t rows_in, int64_t rows_out, int64_t probe_us) {
+			if (rows_in > 0) {
+				double filtered_pct = 100.0 * (1.0 - static_cast<double>(rows_out) / static_cast<double>(rows_in));
+				Printer::PrintF("  %s probe: %lld us, filtered: %lld / %lld rows (%.1f%% removed)", label,
+				                (int64_t)probe_us, (int64_t)(rows_in - rows_out), (int64_t)rows_in, filtered_pct);
+			}
+		};
+
+		print_pass_stats("forward", fwd_rows_in, fwd_rows_out, fwd_probe_us);
+		print_pass_stats("backward", bwd_rows_in, bwd_rows_out, bwd_probe_us);
+
+		int64_t total_rows_in = fwd_rows_in + bwd_rows_in;
+		int64_t total_rows_out = fwd_rows_out + bwd_rows_out;
+		if (total_rows_in > 0) {
+			double filtered_pct =
+			    100.0 * (1.0 - static_cast<double>(total_rows_out) / static_cast<double>(total_rows_in));
+			Printer::PrintF("  total probe: %lld us, filtered: %lld / %lld rows (%.1f%% removed)",
+			                (int64_t)(fwd_probe_us + bwd_probe_us), (int64_t)(total_rows_in - total_rows_out),
+			                (int64_t)total_rows_in, filtered_pct);
+		}
+		Printer::Print("=== END Robust PROFILING ===\n");
+	}
+};
+
+inline shared_ptr<RobustProfilingState> GetRobustProfilingState(ClientContext &context) {
+	Value val;
+	auto result = context.TryGetCurrentSetting("robust_profiling", val);
+	if (result && val.GetValue<bool>()) {
+		return context.registered_state->GetOrCreate<RobustProfilingState>("robust_profiling", true);
+	}
+	return nullptr;
+}
+
+} // namespace duckdb
diff --git a/src/rpt_extension.cpp b/src/rpt_extension.cpp
deleted file mode 100644
index f6270fa..0000000
--- a/src/rpt_extension.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-#define DUCKDB_EXTENSION_MAIN
-
-#include "rpt_extension.hpp"
-#include "duckdb.hpp"
-#include "duckdb/common/exception.hpp"
-#include "duckdb/common/string_util.hpp"
-#include "duckdb/function/scalar_function.hpp"
-#include "duckdb/main/extension_util.hpp"
-#include <duckdb/parser/parsed_data/create_scalar_function_info.hpp>
-#include "duckdb/optimizer/optimizer_extension.hpp"
-#include "duckdb/planner/operator_extension.hpp"
-// #include "operators/logical_hello.hpp"
-// #include "operators/physical_hello.hpp"
-#include "operators/logical_create_bf.hpp"
-#include "operators/logical_use_bf.hpp"
-// #include "predicate_transfer_optimization.hpp"
-#include "optimizer/rpt_optimizer.hpp"
-
-// OpenSSL linked through vcpkg
-#include <openssl/opensslv.h>
-
-namespace duckdb {
-
-class CreateBFOperatorExtension : public OperatorExtension {
-public:
-	std::string GetName() override {
-		return "logical_create_bf";
-	}
-	
-	unique_ptr<LogicalExtensionOperator> Deserialize(Deserializer &deserializer) override {
-		return make_uniq<LogicalCreateBF>();
-	}
-};
-
-class UseBFOperatorExtension : public OperatorExtension {
-public:
-	std::string GetName() override {
-		return "logical_use_bf";
-	}
-	
-	unique_ptr<LogicalExtensionOperator> Deserialize(Deserializer &deserializer) override {
-		return make_uniq<LogicalUseBF>();
-	}
-};
-
-static void LoadInternal(DatabaseInstance &instance) {
-	// Register the SIP optimizer rule
-	OptimizerExtension optimizer;
-	// optimizer.optimize_function = PredicateTransferOptimizer::Optimize;
-	// optimizer.pre_optimize_function = PredicateTransferOptimizer::PreOptimize;
-	optimizer.optimize_function = RPTOptimizerContextState::Optimize;
-	// optimizer.pre_optimize_function = PredicateTransferOptimizer::PreOptimize;
-	instance.config.optimizer_extensions.push_back(optimizer);
-	
-	// Register logical operators
-	instance.config.operator_extensions.push_back(make_uniq<CreateBFOperatorExtension>());
-	instance.config.operator_extensions.push_back(make_uniq<UseBFOperatorExtension>());
-}
-
-void RptExtension::Load(DuckDB &db) {
-	LoadInternal(*db.instance);
-}
-std::string RptExtension::Name() {
-	return "rpt";
-}
-
-std::string RptExtension::Version() const {
-#ifdef EXT_VERSION_RPT
-	return EXT_VERSION_RPT;
-#else
-	return "";
-#endif
-}
-
-} // namespace duckdb
-
-extern "C" {
-
-DUCKDB_EXTENSION_API void rpt_init(duckdb::DatabaseInstance &db) {
-	duckdb::DuckDB db_wrapper(db);
-	db_wrapper.LoadExtension<duckdb::RptExtension>();
-}
-
-DUCKDB_EXTENSION_API const char *rpt_version() {
-	return duckdb::DuckDB::LibraryVersion();
-}
-}
-
-#ifndef DUCKDB_EXTENSION_MAIN
-#error DUCKDB_EXTENSION_MAIN not defined
-#endif
diff --git a/src/table_operator_manager.cpp b/src/table_operator_manager.cpp
deleted file mode 100644
index 90db06b..0000000
--- a/src/table_operator_manager.cpp
+++ /dev/null
@@ -1,197 +0,0 @@
-#include "table_operator_manager.hpp"
-
-#include "duckdb/planner/operator/logical_aggregate.hpp"
-#include "duckdb/planner/operator/logical_comparison_join.hpp"
-#include "duckdb/planner/operator/logical_get.hpp"
-#include "duckdb/planner/expression/bound_columnref_expression.hpp"
-#include "duckdb/planner/operator/logical_set_operation.hpp"
-
-namespace duckdb {
-vector<reference<LogicalOperator>> TableOperatorManager::ExtractOperators(LogicalOperator &plan) {
-	vector<reference<LogicalOperator>> ret;
-	ExtractOperatorsInternal(plan, ret);
-	SortTableOperators();
-	return ret;
-}
-
-void TableOperatorManager::SortTableOperators() {
-	sorted_table_operators.clear();
-	for (auto &node : table_operators) {
-		sorted_table_operators.emplace_back(node.second);
-	}
-	sort(sorted_table_operators.begin(), sorted_table_operators.end(),
-	     [&](LogicalOperator *a, LogicalOperator *b) { return a->estimated_cardinality < b->estimated_cardinality; });
-}
-
-LogicalOperator *TableOperatorManager::GetTableOperator(idx_t table_idx) {
-	auto itr = table_operators.find(table_idx);
-	if (itr == table_operators.end()) {
-		return nullptr;
-	}
-
-	return itr->second;
-}
-
-idx_t TableOperatorManager::GetTableOperatorOrder(const LogicalOperator *node) {
-	if (sorted_table_operators.empty()) {
-		SortTableOperators();
-	}
-
-	for (idx_t i = 0; i < sorted_table_operators.size(); i++) {
-		if (sorted_table_operators[i] == node) {
-			return i;
-		}
-	}
-	return static_cast<idx_t>(-1); // fallback if not found
-}
-
-ColumnBinding TableOperatorManager::GetRenaming(ColumnBinding binding) {
-	auto itr = rename_col_bindings.find(binding);
-	while (itr != rename_col_bindings.end()) {
-		binding = itr->second;
-		itr = rename_col_bindings.find(binding);
-	}
-	return binding;
-}
-
-idx_t TableOperatorManager::GetScalarTableIndex(LogicalOperator *op) {
-	switch (op->type) {
-	case LogicalOperatorType::LOGICAL_WINDOW:
-	case LogicalOperatorType::LOGICAL_CHUNK_GET:
-	case LogicalOperatorType::LOGICAL_GET:
-	case LogicalOperatorType::LOGICAL_DELIM_GET:
-	case LogicalOperatorType::LOGICAL_PROJECTION:
-	case LogicalOperatorType::LOGICAL_UNION:
-	case LogicalOperatorType::LOGICAL_EXCEPT:
-	case LogicalOperatorType::LOGICAL_INTERSECT: {
-		return op->GetTableIndex()[0];
-	}
-	case LogicalOperatorType::LOGICAL_FILTER: {
-		return GetScalarTableIndex(op->children[0].get());
-	}
-	case LogicalOperatorType::LOGICAL_AGGREGATE_AND_GROUP_BY: {
-		return op->GetTableIndex()[1];
-	}
-	default:
-		return std::numeric_limits<idx_t>::max();
-	}
-}
-
-bool TableOperatorManager::OperatorNeedsRelation(LogicalOperatorType op_type) {
-	switch (op_type) {
-	case LogicalOperatorType::LOGICAL_PROJECTION:
-	case LogicalOperatorType::LOGICAL_EXPRESSION_GET:
-	case LogicalOperatorType::LOGICAL_GET:
-	case LogicalOperatorType::LOGICAL_DELIM_GET:
-	case LogicalOperatorType::LOGICAL_AGGREGATE_AND_GROUP_BY:
-	case LogicalOperatorType::LOGICAL_WINDOW:
-		return true;
-	default:
-		return false;
-	}
-}
-
-void TableOperatorManager::AddTableOperator(LogicalOperator *op) {
-	op->estimated_cardinality = op->EstimateCardinality(context);
-
-	idx_t table_idx = GetScalarTableIndex(op);
-	if (table_idx != std::numeric_limits<idx_t>::max() && table_operators.find(table_idx) == table_operators.end()) {
-		table_operators[table_idx] = op;
-	}
-}
-
-void TableOperatorManager::ExtractOperatorsInternal(LogicalOperator &plan, vector<reference<LogicalOperator>> &joins) {
-	LogicalOperator *op = &plan;
-
-	// 1. collect joins
-	if (op->type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN ||
-	    op->type == LogicalOperatorType::LOGICAL_DELIM_JOIN) {
-		auto &join = op->Cast<LogicalComparisonJoin>();
-		switch (join.join_type) {
-		case JoinType::INNER:
-		case JoinType::LEFT:
-		case JoinType::RIGHT:
-		case JoinType::SEMI:
-		case JoinType::RIGHT_SEMI: {
-			if (std::any_of(join.conditions.begin(), join.conditions.end(), [](const JoinCondition &jc) {
-				    return jc.comparison == ExpressionType::COMPARE_EQUAL &&
-				           jc.left->type == ExpressionType::BOUND_COLUMN_REF &&
-				           jc.right->type == ExpressionType::BOUND_COLUMN_REF;
-			    })) {
-				joins.push_back(*op);
-			}
-			break;
-		}
-		default:
-			break;
-		}
-	}
-
-	// 2. collect base tables
-	switch (op->type) {
-	case LogicalOperatorType::LOGICAL_FILTER: {
-		LogicalOperator *child = op->children[0].get();
-		if (child->type == LogicalOperatorType::LOGICAL_GET) {
-			AddTableOperator(op);
-			return;
-		}
-		ExtractOperatorsInternal(*child, joins);
-		return;
-	}
-	case LogicalOperatorType::LOGICAL_AGGREGATE_AND_GROUP_BY: {
-		auto &agg = op->Cast<LogicalAggregate>();
-		if (agg.groups.empty() && agg.grouping_sets.size() <= 1) {
-			AddTableOperator(op);
-			ExtractOperatorsInternal(*op->children[0], joins);
-		} else {
-			auto old_refs = agg.GetColumnBindings();
-			for (size_t i = 0; i < agg.groups.size(); i++) {
-				if (agg.groups[i]->type == ExpressionType::BOUND_COLUMN_REF) {
-					auto &col_ref = agg.groups[i]->Cast<BoundColumnRefExpression>();
-					rename_col_bindings.insert({old_refs[i], col_ref.binding});
-				}
-			}
-			ExtractOperatorsInternal(*op->children[0], joins);
-		}
-		return;
-	}
-	case LogicalOperatorType::LOGICAL_PROJECTION: {
-		auto old_refs = op->GetColumnBindings();
-		for (size_t i = 0; i < op->expressions.size(); i++) {
-			if (op->expressions[i]->type == ExpressionType::BOUND_COLUMN_REF) {
-				auto &col_ref = op->expressions[i]->Cast<BoundColumnRefExpression>();
-				rename_col_bindings.insert({old_refs[i], col_ref.binding});
-			}
-		}
-		ExtractOperatorsInternal(*op->children[0], joins);
-		return;
-	}
-	case LogicalOperatorType::LOGICAL_UNION:
-	case LogicalOperatorType::LOGICAL_EXCEPT:
-	case LogicalOperatorType::LOGICAL_INTERSECT: {
-		AddTableOperator(op);
-		ExtractOperatorsInternal(*op->children[0], joins);
-		ExtractOperatorsInternal(*op->children[1], joins);
-		return;
-	}
-	case LogicalOperatorType::LOGICAL_WINDOW: {
-		// TODO: how can we handle the window?
-		AddTableOperator(op);
-		ExtractOperatorsInternal(*op->children[0], joins);
-		return;
-	}
-	case LogicalOperatorType::LOGICAL_DUMMY_SCAN:
-	case LogicalOperatorType::LOGICAL_EXPRESSION_GET:
-	case LogicalOperatorType::LOGICAL_GET:
-	case LogicalOperatorType::LOGICAL_EMPTY_RESULT:
-	case LogicalOperatorType::LOGICAL_CHUNK_GET:
-		AddTableOperator(op);
-		return;
-	default:
-		for (auto &child : op->children) {
-			ExtractOperatorsInternal(*child, joins);
-		}
-	}
-}
-
-} // namespace duckdb
diff --git a/src/table_operator_manager.hpp b/src/table_operator_manager.hpp
deleted file mode 100644
index cf1e069..0000000
--- a/src/table_operator_manager.hpp
+++ /dev/null
@@ -1,38 +0,0 @@
-#pragma once
-
-#include "duckdb/main/client_context.hpp"
-
-namespace duckdb {
-class TableOperatorManager {
-public:
-	explicit TableOperatorManager(ClientContext &context) : context(context) {
-	}
-
-	ClientContext &context;
-
-	vector<LogicalOperator *> sorted_table_operators;
-	unordered_map<idx_t, LogicalOperator *> table_operators;
-
-public:
-	vector<reference<LogicalOperator>> ExtractOperators(LogicalOperator &plan);
-	void SortTableOperators();
-
-	LogicalOperator *GetTableOperator(idx_t table_idx);
-	idx_t GetTableOperatorOrder(const LogicalOperator *node);
-	ColumnBinding GetRenaming(ColumnBinding col_binding);
-
-	static idx_t GetScalarTableIndex(LogicalOperator *op);
-	static bool OperatorNeedsRelation(LogicalOperatorType op_type);
-
-private:
-	void AddTableOperator(LogicalOperator *op);
-	void ExtractOperatorsInternal(LogicalOperator &plan, vector<reference<LogicalOperator>> &joins);
-
-	struct HashFunc {
-		size_t operator()(const ColumnBinding &key) const {
-			return std::hash<uint64_t> {}(key.table_index) ^ (std::hash<uint64_t> {}(key.column_index) << 1);
-		}
-	};
-	unordered_map<ColumnBinding, ColumnBinding, HashFunc> rename_col_bindings;
-};
-} // namespace duckdb
\ No newline at end of file
diff --git a/src/transfer_graph_manager.cpp b/src/transfer_graph_manager.cpp
deleted file mode 100644
index 192bfda..0000000
--- a/src/transfer_graph_manager.cpp
+++ /dev/null
@@ -1,803 +0,0 @@
-#include "transfer_graph_manager.hpp"
-
-#include "duckdb/planner/operator/logical_comparison_join.hpp"
-#include "duckdb/planner/expression/bound_comparison_expression.hpp"
-#include "predicate_transfer_optimization.hpp"
-#include "duckdb/planner/expression/bound_columnref_expression.hpp"
-#include "duckdb/planner/operator/logical_get.hpp"
-
-#include <queue>
-
-namespace duckdb {
-static ColumnBinding FindBindingRoot(const ColumnBinding &binding, BindingParentMap &parents) {
-	auto it = parents.find(binding);
-	if (it == parents.end()) {
-		return binding;
-	}
-	ColumnBinding root = it->second;
-	if (root != binding) {
-		root = FindBindingRoot(root, parents);
-		parents[binding] = root; // Path compression
-	}
-	return root;
-}
-
-static void UnionBindings(const ColumnBinding &a, const ColumnBinding &b, const LogicalType &type,
-                          BindingParentMap &parents, BindingGroupMap &group_map) {
-	ColumnBinding root_a = FindBindingRoot(a, parents);
-	ColumnBinding root_b = FindBindingRoot(b, parents);
-	if (root_a == root_b) {
-		return;
-	}
-
-	// Union by attaching b to a
-	parents[root_b] = root_a;
-
-	auto &group_a = group_map[root_a];
-	if (!group_a) {
-		group_a = make_shared_ptr<JoinKeyTableGroup>(type, a.table_index);
-	}
-
-	auto &group_b = group_map[root_b];
-	if (!group_b) {
-		group_b = make_shared_ptr<JoinKeyTableGroup>(type, b.table_index);
-	}
-
-	group_a->Union(*group_b);
-	group_map[root_a] = group_a;
-}
-
-bool TransferGraphManager::Build(LogicalOperator &plan) {
-	printf("\n=== BUILD TRANSFER GRAPH MANAGER ===\n");
-	
-	// 1. Extract all operators, including table operators and join operators
-	const vector<reference<LogicalOperator>> joins = table_operator_manager.ExtractOperators(plan);
-	printf("1. extracted operators: %zu table operators, %zu join operators\n", 
-	       table_operator_manager.table_operators.size(), joins.size());
-	
-	if (table_operator_manager.table_operators.size() < 2) {
-		printf("not enough table operators (< 2), skipping\n");
-		return false;
-	}
-
-	// print table operators
-	printf("table operators:\n");
-	for (auto &pair : table_operator_manager.table_operators) {
-		auto &op = *pair.second;
-		printf("  table_idx=%llu, type=%s, cardinality=%llu\n",
-		       pair.first, LogicalOperatorToString(op.type).c_str(), op.estimated_cardinality);
-	}
-
-	// 2. Getting graph edges information from join operators
-	ExtractEdgesInfo(joins);
-	printf("2. extracted edges: neighbor_matrix size=%zu\n", neighbor_matrix.size());
-	
-	if (neighbor_matrix.empty()) {
-		printf("no edges extracted, skipping\n");
-		return false;
-	}
-
-	// print edge information
-	printf("edge information:\n");
-	for (auto &pair : neighbor_matrix) {
-		idx_t table1 = pair.first;
-		for (auto &edge_pair : pair.second) {
-			idx_t table2 = edge_pair.first;
-			auto &edge = edge_pair.second;
-			printf("  edge: table_%llu <-> table_%llu, protect_left=%s, protect_right=%s\n",
-			       table1, table2, edge->protect_left ? "true" : "false", edge->protect_right ? "true" : "false");
-		}
-	}
-
-	// 3. unfiltered table only receives bloom filters, they will not generate bloom filters.
-	// SkipUnfilteredTable(joins);
-	printf("3. after SkipUnfilteredTable: neighbor_matrix size=%zu\n", neighbor_matrix.size());
-
-	// 4. create the transfer graph
-	printf("4. calling CreateTransferPlanUpdated()\n");
-	CreateTransferPlanUpdated();
-
-	printf("=== BUILD COMPLETE ===\n\n");
-	return true;
-}
-
-void TransferGraphManager::AddFilterPlan(idx_t create_table, const shared_ptr<FilterPlan> &filter_plan, bool reverse) {
-	bool is_forward = !reverse;
-
-	D_ASSERT(!filter_plan->apply.empty());
-	auto &expr = filter_plan->apply[0];
-	auto node_idx = expr.table_index;
-	transfer_graph[node_idx]->Add(create_table, filter_plan, is_forward, true);
-}
-
-void TransferGraphManager::PrintTransferPlan() {
-	// Output table groups
-	unordered_set<JoinKeyTableGroup *> visited;
-	for (auto &pair : table_groups) {
-		auto &group = pair.second;
-		if (visited.count(group.get())) {
-			continue;
-		}
-		visited.insert(group.get());
-		group->Print();
-	}
-
-	// Local helper to get operator name
-	auto GetName = [](LogicalOperator &op) -> string {
-		string ret;
-		auto params = op.ParamsToString();
-
-		if (params.contains("Table")) {
-			ret = params.at("Table");
-		} else {
-			ret = "Unknown";
-		}
-
-		if (op.type == LogicalOperatorType::LOGICAL_GET) {
-			auto &get = op.Cast<LogicalGet>();
-			if (get.table_filters.filters.empty()) {
-				ret += " (No Filter)";
-			}
-		}
-		return ret;
-	};
-
-	std::cout << "digraph G {" << '\n';
-
-	// Create a map to store outgoing neighbors for each LogicalOperator
-	std::unordered_map<std::string, std::vector<std::pair<std::string, std::pair<int, int>>>> outgoing_neighbors;
-
-	// Populate the outgoing_neighbors map by traversing the edges
-	for (const auto &edge : selected_edges) {
-		std::string left_name = GetName(edge->left_table);
-		std::string right_name = GetName(edge->right_table);
-		idx_t left_column_id = edge->left_binding.column_index;   // Get the column id for the left table
-		idx_t right_column_id = edge->right_binding.column_index; // Get the column id for the right table
-
-		// Check the protection flags and only allow outgoing edges if the table is not protected
-		if (!edge->protect_right) {
-			outgoing_neighbors[left_name].emplace_back(right_name, std::make_pair(left_column_id, right_column_id));
-		}
-		if (!edge->protect_left) {
-			outgoing_neighbors[right_name].emplace_back(left_name, std::make_pair(right_column_id, left_column_id));
-		}
-	}
-
-	// Output nodes (LogicalOperators) and their outgoing neighbors (edges)
-	for (const auto &op : transfer_order) {
-		std::string op_name = GetName(*op); // Use GetName function for operator name
-		std::cout << "\t\"" << op_name << "\";\n";
-
-		// Output edges to each neighbor (only outgoing edges)
-		if (outgoing_neighbors.find(op_name) != outgoing_neighbors.end()) {
-			for (const auto &neighbor : outgoing_neighbors[op_name]) {
-				std::string neighbor_name = neighbor.first;
-				int left_column_id = neighbor.second.first;
-				int right_column_id = neighbor.second.second;
-
-				// Print edge with column ids
-				std::cout << "\t\t\"" << op_name << "\" -> \"" << neighbor_name << "\" [label=\"Column "
-				          << left_column_id << " -> Column " << right_column_id << "\"];\n";
-			}
-		}
-	}
-
-	std::cout << "}"
-	          << "\n";
-}
-
-void TransferGraphManager::ExtractEdgesInfo(const vector<reference<LogicalOperator>> &join_operators) {
-	// Deduplicate join conditions
-	unordered_set<hash_t> existed_set;
-	auto ComputeConditionHash = [](const JoinCondition &cond) {
-		return cond.left->Hash() + cond.right->Hash();
-	};
-
-	// Union-Find structures
-	BindingParentMap binding_parents;
-	BindingGroupMap group_map;
-
-	for (auto &join_ref : join_operators) {
-		auto &join = join_ref.get();
-
-		if (join.type != LogicalOperatorType::LOGICAL_COMPARISON_JOIN &&
-		    join.type != LogicalOperatorType::LOGICAL_DELIM_JOIN) {
-			continue;
-		}
-
-		auto &comp_join = join.Cast<LogicalComparisonJoin>();
-		D_ASSERT(comp_join.expressions.empty());
-
-		for (auto &cond : comp_join.conditions) {
-			// Only equal predicates between two column refs are supported
-			if (cond.comparison != ExpressionType::COMPARE_EQUAL ||
-			    cond.left->type != ExpressionType::BOUND_COLUMN_REF ||
-			    cond.right->type != ExpressionType::BOUND_COLUMN_REF) {
-				continue;
-			}
-
-			// Skip duplicate conditions
-			hash_t hash = ComputeConditionHash(cond);
-			if (!existed_set.insert(hash).second) {
-				continue;
-			}
-
-			auto &left_expr = cond.left->Cast<BoundColumnRefExpression>();
-			auto &right_expr = cond.right->Cast<BoundColumnRefExpression>();
-
-			ColumnBinding left_binding = table_operator_manager.GetRenaming(left_expr.binding);
-			ColumnBinding right_binding = table_operator_manager.GetRenaming(right_expr.binding);
-
-			auto left_node = table_operator_manager.GetTableOperator(left_binding.table_index);
-			auto right_node = table_operator_manager.GetTableOperator(right_binding.table_index);
-
-			if (!left_node || !right_node) {
-				continue;
-			}
-
-			// Create edge
-			auto edge =
-			    make_shared_ptr<EdgeInfo>(cond.left->return_type, *left_node, left_binding, *right_node, right_binding);
-
-			// Set edge protection flags based on join type
-			switch (comp_join.type) {
-			case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
-				switch (comp_join.join_type) {
-				case JoinType::LEFT:
-					edge->protect_left = true;
-					break;
-				case JoinType::RIGHT:
-					edge->protect_right = true;
-					break;
-				case JoinType::MARK:
-					edge->protect_right = true;
-					break;
-				case JoinType::INNER:
-				case JoinType::SEMI:
-				case JoinType::RIGHT_SEMI:
-					break;
-				default:
-					continue; // Skip unsupported types
-				}
-				break;
-
-			case LogicalOperatorType::LOGICAL_DELIM_JOIN:
-				if (comp_join.delim_flipped == 0) {
-					edge->protect_left = true;
-				} else {
-					edge->protect_right = true;
-				}
-				break;
-
-			default:
-				continue;
-			}
-
-			// Store bidirectional edge
-			neighbor_matrix[left_binding.table_index][right_binding.table_index] = edge;
-			neighbor_matrix[right_binding.table_index][left_binding.table_index] = edge;
-
-			// Merge groups if not protected
-			if (!edge->protect_left && !edge->protect_right) {
-				UnionBindings(left_binding, right_binding, cond.left->return_type, binding_parents, group_map);
-			}
-		}
-	}
-
-	// Finalize table_groups by resolving root bindings
-	for (auto &entry : group_map) {
-		ColumnBinding rep = FindBindingRoot(entry.first, binding_parents);
-		table_groups[entry.first] = group_map[rep];
-	}
-
-	// Classify all tables into three categories: intermediate table, unfiltered table, and filtered table.
-	ClassifyTables();
-}
-
-void TransferGraphManager::ClassifyTables() {
-	for (auto &pair : table_operator_manager.table_operators) {
-		auto id = pair.first;
-		auto &table = pair.second;
-		auto &edges = neighbor_matrix[id];
-		auto &join_keys = table_join_keys[id];
-
-		// Check intermediate table, which belongs to more than 2 groups
-		unordered_set<JoinKeyTableGroup *> belong_groups;
-		for (auto &sub_pair : edges) {
-			auto &edge = sub_pair.second;
-			auto &join_key_group = table_groups[edge->left_binding];
-			belong_groups.insert(join_key_group.get());
-
-			// Record all join keys of this table
-			if (edge->left_binding.table_index == id) {
-				join_keys.insert(edge->left_binding);
-			} else {
-				join_keys.insert(edge->right_binding);
-			}
-		}
-		if (belong_groups.size() > 1) {
-			intermediate_table.insert(id);
-			continue;
-		}
-
-		// Check unfiltered table  
-		if (table->type == LogicalOperatorType::LOGICAL_GET) {
-			auto &get = table->Cast<LogicalGet>();
-			if (get.table_filters.filters.empty()) {
-				std::cout << "table_" << id << " has no table filters\n";
-				printf("  table_%llu marked as UNFILTERED (no table filters)\n", id);
-				unfiltered_table.insert(id);
-				continue;
-			} else {
-				printf("  table_%llu has %zu table filters\n", id, get.table_filters.filters.size());
-			}
-		}
-
-		std::cout<< "Adding table_" << id << " to filtered table\n";
-		// last, it is a filtered table
-		filtered_table.insert(id);
-	}
-}
-
-void TransferGraphManager::SkipUnfilteredTable(const vector<reference<LogicalOperator>> &joins) {
-	// TODO: currently, we do not support skip unfiltered tables participating in outer join.
-	for (auto& op: joins) {
-		auto &join = op.get().Cast<LogicalComparisonJoin>();
-		if (join.join_type != JoinType::INNER) {
-			return;
-		}
-	}
-
-	printf("SkipUnfilteredTable: found %zu unfiltered tables: ", unfiltered_table.size());
-	for (auto table_idx : unfiltered_table) {
-		printf("table_%llu ", table_idx);
-	}
-	printf("\n");
-
-	bool changed = false;
-	do {
-		changed = false;
-		for (auto &table_idx : unfiltered_table) {
-			// 2.1 collect received bfs
-			unordered_map<idx_t, vector<shared_ptr<EdgeInfo>>> received_bfs;
-			auto &edges = neighbor_matrix[table_idx];
-
-			for (auto &pair : edges) {
-				auto &e = pair.second;
-
-				if (e->left_binding.table_index == table_idx && !e->protect_left) {
-					auto &bfs = received_bfs[e->left_binding.column_index];
-					bfs.push_back(e);
-				} else if (e->right_binding.table_index == table_idx && !e->protect_right) {
-					auto &bfs = received_bfs[e->right_binding.column_index];
-					bfs.push_back(e);
-				}
-			}
-
-			// 2.2 remove BFs creation from this table
-			for (auto &pair : edges) {
-				auto &edge = pair.second;
-
-				bool is_left = (edge->left_binding.table_index == table_idx && !edge->protect_right);
-				bool is_right = (edge->right_binding.table_index == table_idx && !edge->protect_left);
-				if (!is_left && !is_right) {
-					continue;
-				}
-
-				idx_t col_idx = is_left ? edge->left_binding.column_index : edge->right_binding.column_index;
-				auto &bfs = received_bfs[col_idx];
-
-				// 2.2.1 add new links
-				for (auto &bf_edge : bfs) {
-					// the same edge
-					if (bf_edge->left_binding == edge->left_binding && bf_edge->right_binding == edge->right_binding) {
-						continue;
-					}
-
-					bool bf_left = (bf_edge->left_binding.table_index == table_idx && !bf_edge->protect_left);
-					bool bf_right = (bf_edge->right_binding.table_index == table_idx && !bf_edge->protect_right);
-					if (!bf_left && !bf_right) {
-						continue;
-					}
-
-					shared_ptr<EdgeInfo> concat_edge = nullptr;
-					if (is_left && bf_left) {
-						concat_edge =
-						    make_shared_ptr<EdgeInfo>(edge->return_type, bf_edge->right_table, bf_edge->right_binding,
-						                              edge->right_table, edge->right_binding);
-						concat_edge->protect_left = true;
-					} else if (is_left && bf_right) {
-						concat_edge =
-						    make_shared_ptr<EdgeInfo>(edge->return_type, bf_edge->left_table, bf_edge->left_binding,
-						                              edge->right_table, edge->right_binding);
-						concat_edge->protect_left = true;
-					} else if (is_right && bf_left) {
-						concat_edge = make_shared_ptr<EdgeInfo>(edge->return_type, edge->left_table, edge->left_binding,
-						                                        bf_edge->right_table, bf_edge->right_binding);
-						concat_edge->protect_right = true;
-					} else if (is_right && bf_right) {
-						concat_edge = make_shared_ptr<EdgeInfo>(edge->return_type, edge->left_table, edge->left_binding,
-						                                        bf_edge->left_table, bf_edge->left_binding);
-						concat_edge->protect_right = true;
-					}
-
-					if (concat_edge) {
-						idx_t i = concat_edge->left_binding.table_index;
-						idx_t j = concat_edge->right_binding.table_index;
-						auto &edge_ij = neighbor_matrix[i][j];
-						auto &edge_ji = neighbor_matrix[j][i];
-
-						bool exists = false;
-						if (edge_ij != nullptr) {
-							bool same_direction = edge_ij->left_binding == concat_edge->left_binding &&
-							                      edge_ij->right_binding == concat_edge->right_binding;
-							bool reverse_direction = edge_ij->left_binding == concat_edge->right_binding &&
-							                         edge_ij->right_binding == concat_edge->left_binding;
-
-							if (same_direction || reverse_direction) {
-								if (same_direction) {
-									edge_ij->protect_left &= concat_edge->protect_left;
-									edge_ij->protect_right &= concat_edge->protect_right;
-								} else { // reverse_direction
-									edge_ij->protect_left &= concat_edge->protect_right;
-									edge_ij->protect_right &= concat_edge->protect_left;
-								}
-								exists = true;
-							}
-						}
-
-						if (!exists) {
-							edge_ij = concat_edge;
-							edge_ji = concat_edge;
-						}
-					}
-				}
-
-				// 2.2.2 disable current link
-				if (is_left) {
-					edge->protect_right = true;
-				} else {
-					edge->protect_left = true;
-				}
-
-				changed = true;
-			}
-
-			// 2.3. Remove invalid links
-			for (auto it = edges.begin(); it != edges.end();) {
-				auto &edge = it->second;
-
-				// If the condition is met, erase the item from the unordered_map
-				if (edge->protect_left && edge->protect_right) {
-					it = edges.erase(it);
-				} else {
-					++it;
-				}
-			}
-		}
-	} while (changed);
-}
-
-void TransferGraphManager::LargestRoot(vector<LogicalOperator *> &sorted_nodes) {
-	unordered_set<idx_t> constructed_set, unconstructed_set;
-	int prior_flag = static_cast<int>(table_operator_manager.table_operators.size()) - 1;
-	idx_t root = std::numeric_limits<idx_t>::max();
-
-	// Initialize nodes
-	for (auto &entry : table_operator_manager.table_operators) {
-		idx_t id = entry.first;
-		auto node = make_uniq<GraphNode>(id, prior_flag--);
-
-		if (entry.second == sorted_nodes.back()) {
-			root = id;
-			constructed_set.insert(id);
-		} else {
-			unconstructed_set.insert(id);
-		}
-
-		transfer_graph[id] = std::move(node);
-	}
-
-	// Add root
-	transfer_order.push_back(table_operator_manager.GetTableOperator(root));
-	table_operator_manager.table_operators.erase(root);
-
-	// Build graph
-	while (!unconstructed_set.empty()) {
-		auto selected_edge = FindEdge(constructed_set, unconstructed_set);
-		if (selected_edge.first == std::numeric_limits<idx_t>::max()) {
-			break;
-		}
-
-		auto &edge = neighbor_matrix[selected_edge.first][selected_edge.second];
-		selected_edges.emplace_back(std::move(edge));
-
-		auto node = transfer_graph[selected_edge.second].get();
-		node->cardinality_order = prior_flag--;
-
-		transfer_order.push_back(table_operator_manager.GetTableOperator(node->id));
-		table_operator_manager.table_operators.erase(node->id);
-
-		unconstructed_set.erase(selected_edge.second);
-		constructed_set.insert(selected_edge.second);
-	}
-}
-void TransferGraphManager::LargestRootUpdated(vector<LogicalOperator *> &sorted_nodes) {
-	unordered_set<idx_t> constructed_set, unconstructed_set;
-	int prior_flag = static_cast<int>(table_operator_manager.table_operators.size()) - 1;
-	idx_t root = std::numeric_limits<idx_t>::max();
-
-	printf("Sorted nodes order - descending order: \n");
-	for (auto it = sorted_nodes.rbegin(); it != sorted_nodes.rend(); ++it) {
-		auto &node = *it;
-		idx_t table_idx = table_operator_manager.GetScalarTableIndex(node);
-		std::cout << table_idx << " " ;
-	}
-
-	root = table_operator_manager.GetScalarTableIndex(sorted_nodes.back());
-	std::cout << "\nRoot = " << root << std::endl;
-
-	// Try to choose the largest filtered or intermediate table as the root
-	// for (auto it = sorted_nodes.rbegin(); it != sorted_nodes.rend(); ++it) {
-	// 	auto &node = *it;
-	// 	auto id = table_operator_manager.GetScalarTableIndex(node);
-	// 	if (filtered_table.count(id) || intermediate_table.count(id)) {
-	// 		std::cout << "\nRoot = " << id << std::endl;
-	// 		root = id;
-	// 		break;
-	// 	}
-	// }
-
-	// If we cannot find it, use the largest table as the root
-	if (root == std::numeric_limits<idx_t>::max()) {
-		auto &node = sorted_nodes.back();
-		root = table_operator_manager.GetScalarTableIndex(node);
-	}
-
-	printf("LargestRootUpdated: selected root = table_%llu\n", root);
-	printf("filtered_table.size()=%zu, intermediate_table.size()=%zu\n",
-	       filtered_table.size(), intermediate_table.size());
-
-	// Initialize nodes
-	for (auto &entry : table_operator_manager.table_operators) {
-		idx_t id = entry.first;
-		if (id == root) {
-			constructed_set.insert(id);
-		} else {
-			unconstructed_set.insert(id);
-		}
-
-		auto node = make_uniq<GraphNode>(id, prior_flag--);
-		transfer_graph[id] = std::move(node);
-	}
-
-	// Add root
-	transfer_order.push_back(table_operator_manager.GetTableOperator(root));
-	table_operator_manager.table_operators.erase(root);
-	for (auto &col_binding : table_join_keys[root]) {
-		auto &group = table_groups[col_binding];
-		if (group) {
-			group->RegisterLeader(root, col_binding);
-		}
-	}
-
-	// Build graph
-	while (!unconstructed_set.empty()) {
-		auto selected_edge = FindEdge(constructed_set, unconstructed_set);
-		if (selected_edge.first == std::numeric_limits<idx_t>::max()) {
-			break;
-		}
-
-		printf("  spanning tree edge: table_%llu <-> table_%llu\n",
-		       selected_edge.first, selected_edge.second);
-
-		auto &edge = neighbor_matrix[selected_edge.first][selected_edge.second];
-		selected_edges.emplace_back(std::move(edge));
-
-		auto node = transfer_graph[selected_edge.second].get();
-		node->cardinality_order = prior_flag--;
-
-		transfer_order.push_back(table_operator_manager.GetTableOperator(node->id));
-		table_operator_manager.table_operators.erase(node->id);
-		for (auto &col_binding : table_join_keys[node->id]) {
-			auto &group = table_groups[col_binding];
-			if (group) {
-				group->RegisterLeader(node->id, col_binding);
-			}
-		}
-
-		unconstructed_set.erase(selected_edge.second);
-		constructed_set.insert(selected_edge.second);
-	}
-}
-
-void TransferGraphManager::CreateOriginTransferPlan() {
-	auto saved_nodes = table_operator_manager.table_operators;
-	while (!table_operator_manager.table_operators.empty()) {
-		LargestRoot(table_operator_manager.sorted_table_operators);
-		table_operator_manager.SortTableOperators();
-	}
-	table_operator_manager.table_operators = saved_nodes;
-
-	for (auto &edge : selected_edges) {
-		if (!edge) {
-			continue;
-		}
-
-		idx_t left_idx = TableOperatorManager::GetScalarTableIndex(&edge->left_table);
-		idx_t right_idx = TableOperatorManager::GetScalarTableIndex(&edge->right_table);
-
-		D_ASSERT(left_idx != std::numeric_limits<idx_t>::max() && right_idx != std::numeric_limits<idx_t>::max());
-
-		auto &type = edge->return_type;
-		auto left_node = transfer_graph[left_idx].get();
-		auto right_node = transfer_graph[right_idx].get();
-
-		auto left_cols = edge->left_binding;
-		auto right_cols = edge->right_binding;
-
-		auto protect_left = edge->protect_left;
-		auto protect_right = edge->protect_right;
-
-		// smaller table is in the left
-		if (left_node->cardinality_order > right_node->cardinality_order) {
-			std::swap(left_node, right_node);
-			std::swap(left_cols, right_cols);
-			std::swap(protect_left, protect_right);
-		}
-
-		// forward: from the smaller to the larger
-		if (!protect_right) {
-			left_node->Add(right_node->id, {left_cols}, {right_cols}, {type}, true, false);
-			right_node->Add(left_node->id, {left_cols}, {right_cols}, {type}, true, true);
-		}
-
-		// backward: from the larger to the smaller
-		if (!protect_left) {
-			left_node->Add(right_node->id, {left_cols}, {right_cols}, {type}, false, true);
-			right_node->Add(left_node->id, {left_cols}, {right_cols}, {type}, false, false);
-		}
-	}
-}
-
-void TransferGraphManager::CreateTransferPlanUpdated() {
-	printf("\n=== CREATE TRANSFER PLAN UPDATED ===\n");
-	
-	auto saved_nodes = table_operator_manager.table_operators;
-	printf("calling LargestRootUpdated to build spanning tree...\n");
-	
-	while (!table_operator_manager.table_operators.empty()) {
-		LargestRootUpdated(table_operator_manager.sorted_table_operators);
-		table_operator_manager.SortTableOperators();
-	}
-	table_operator_manager.table_operators = saved_nodes;
-
-	printf("selected_edges size: %zu\n", selected_edges.size());
-	printf("transfer_order size: %zu\n", transfer_order.size());
-	
-	printf("transfer_order: ");
-	for (auto *op : transfer_order) {
-		auto table_idx = TableOperatorManager::GetScalarTableIndex(op);
-		printf("table_%llu ", table_idx);
-	}
-	printf("\n");
-
-	printf("processing selected edges to build transfer graph...\n");
-	for (size_t i = 0; i < selected_edges.size(); i++) {
-		auto &edge = selected_edges[i];
-		if (!edge) {
-			printf("  edge %zu: null, skipping\n", i);
-			continue;
-		}
-
-		idx_t left_idx = TableOperatorManager::GetScalarTableIndex(&edge->left_table);
-		idx_t right_idx = TableOperatorManager::GetScalarTableIndex(&edge->right_table);
-
-		D_ASSERT(left_idx != std::numeric_limits<idx_t>::max() && right_idx != std::numeric_limits<idx_t>::max());
-
-		auto &type = edge->return_type;
-		auto left_node = transfer_graph[left_idx].get();
-		auto right_node = transfer_graph[right_idx].get();
-
-		auto left_cols = edge->left_binding;
-		auto right_cols = edge->right_binding;
-
-		auto protect_left = edge->protect_left;
-		auto protect_right = edge->protect_right;
-
-		printf("  edge %zu: table_%llu (cardinality_order=%d) <-> table_%llu (cardinality_order=%d)\n",
-		       i, left_idx, left_node->cardinality_order, right_idx, right_node->cardinality_order);
-		printf("    protect_left=%s, protect_right=%s\n", 
-		       protect_left ? "true" : "false", protect_right ? "true" : "false");
-
-		// smaller table is in the left
-		if (left_node->cardinality_order > right_node->cardinality_order) {
-			printf("    swapping order: left becomes right, right becomes left\n");
-			std::swap(left_node, right_node);
-			std::swap(left_cols, right_cols);
-			std::swap(protect_left, protect_right);
-		}
-
-		// forward: from the smaller to the larger
-		if (!protect_right) {
-			printf("    adding FORWARD edges: table_%llu -> table_%llu\n", left_node->id, right_node->id);
-			left_node->Add(right_node->id, {left_cols}, {right_cols}, {type}, true, false);
-			right_node->Add(left_node->id, {left_cols}, {right_cols}, {type}, true, true);
-		} else {
-			printf("    skipping forward edges (protect_right=true)\n");
-		}
-
-		// backward: from the larger to the smaller
-		if (!protect_left) {
-			auto &group = table_groups[right_cols];
-			if (group) {
-				printf("    adding BACKWARD edges with GROUP LEADER: table_%llu -> group_leader_%llu\n", left_node->id, group->leader_id);
-				auto group_leader = group->leader_id;
-				auto &leader_cols = group->leader_column_binding;
-				auto leader = transfer_graph[group_leader].get();
-
-				left_node->Add(group_leader, {left_cols}, {leader_cols}, {type}, false, true);
-				leader->Add(left_node->id, {left_cols}, {leader_cols}, {type}, false, false);
-			} else {
-				printf("    adding BACKWARD edges: table_%llu -> table_%llu\n", left_node->id, right_node->id);
-				left_node->Add(right_node->id, {left_cols}, {right_cols}, {type}, false, true);
-				right_node->Add(left_node->id, {left_cols}, {right_cols}, {type}, false, false);
-			}
-		} else {
-			printf("    skipping backward edges (protect_left=true)\n");
-		}
-	}
-
-	// print final transfer graph
-	printf("\nfinal transfer graph:\n");
-	for (auto &pair : transfer_graph) {
-		auto &node = *pair.second;
-		printf("  table_%llu (cardinality_order=%d):\n", node.id, node.cardinality_order);
-		printf("    forward out edges: ");
-		for (auto &edge : node.forward_stage_edges.out) {
-			printf("->%llu ", edge->destination);
-		}
-		printf("\n    forward in edges: ");
-		for (auto &edge : node.forward_stage_edges.in) {
-			printf("<-%llu ", edge->destination);
-		}
-		printf("\n    backward out edges: ");
-		for (auto &edge : node.backward_stage_edges.out) {
-			printf("->%llu ", edge->destination);
-		}
-		printf("\n    backward in edges: ");
-		for (auto &edge : node.backward_stage_edges.in) {
-			printf("<-%llu ", edge->destination);
-		}
-		printf("\n");
-	}
-	
-	printf("=== END CREATE TRANSFER PLAN UPDATED ===\n\n");
-}
-
-pair<idx_t, idx_t> TransferGraphManager::FindEdge(const unordered_set<idx_t> &constructed_set,
-                                                  const unordered_set<idx_t> &unconstructed_set) {
-	pair<idx_t, idx_t> result {std::numeric_limits<idx_t>::max(), std::numeric_limits<idx_t>::max()};
-	idx_t max_cardinality = 0;
-	bool is_indirected = false;
-
-	for (auto i : unconstructed_set) {
-		for (auto j : constructed_set) {
-			auto &edge = neighbor_matrix[j][i];
-			if (edge == nullptr) {
-				continue;
-			}
-
-			idx_t cardinality = table_operator_manager.GetTableOperator(i)->estimated_cardinality;
-			if (cardinality > max_cardinality ||
-			    (is_indirected == false && !edge->protect_left && !edge->protect_right)) {
-				max_cardinality = cardinality;
-				result = {j, i};
-				is_indirected = !edge->protect_left && !edge->protect_right;
-			}
-		}
-	}
-	return result;
-}
-
-} // namespace duckdb
diff --git a/src/utils/dag_printer.cpp b/src/utils/dag_printer.cpp
new file mode 100644
index 0000000..e5b0dcd
--- /dev/null
+++ b/src/utils/dag_printer.cpp
@@ -0,0 +1,514 @@
+#include "dag_printer.hpp"
+#include "../optimizer/robust_optimizer.hpp"
+#include "duckdb/common/string_util.hpp"
+
+namespace duckdb {
+
+// rendered subtree: lines of text + horizontal center position
+struct RenderedBlock {
+	vector<string> lines;
+	int center; // column where the connector attaches
+};
+
+static string FormatCardinality(idx_t card) {
+	if (card >= 1000000000) {
+		return StringUtil::Format("%.1fB rows", (double)card / 1e9);
+	} else if (card >= 1000000) {
+		return StringUtil::Format("%.1fM rows", (double)card / 1e6);
+	} else if (card >= 1000) {
+		return StringUtil::Format("%.1fK rows", (double)card / 1e3);
+	}
+	return std::to_string(card) + " rows";
+}
+
+static RenderedBlock MakeBox(const string &name_line, const string &card_line) {
+	idx_t inner_width = std::max(name_line.size(), card_line.size());
+	string top = "+" + string(inner_width + 2, '-') + "+";
+	string mid1 = "| " + name_line + string(inner_width - name_line.size(), ' ') + " |";
+	string mid2 = "| " + card_line + string(inner_width - card_line.size(), ' ') + " |";
+	string bottom = "+" + string(inner_width + 2, '-') + "+";
+
+	RenderedBlock block;
+	block.lines = {top, mid1, mid2, bottom};
+	block.center = (int)(top.size() / 2);
+	return block;
+}
+
+static RenderedBlock RenderSubtree(TreeNode *node, TableManager &table_mgr) {
+	string table_name = table_mgr.GetTableName(node->table_idx);
+	string name_line = table_name + " (table " + std::to_string(node->table_idx) + ")";
+	string card_line = FormatCardinality(node->table_op->estimated_cardinality);
+
+	RenderedBlock parent_box = MakeBox(name_line, card_line);
+
+	if (node->children.empty()) {
+		return parent_box;
+	}
+
+	// render all children
+	vector<RenderedBlock> child_blocks;
+	vector<string> edge_labels;
+	for (auto *child : node->children) {
+		child_blocks.push_back(RenderSubtree(child, table_mgr));
+
+		// build edge label: parent_col / child_col
+		JoinEdge *edge = child->edge_to_parent;
+		string label;
+		if (edge) {
+			vector<ColumnBinding> parent_cols, child_cols;
+			if (edge->table_a == node->table_idx) {
+				parent_cols = edge->join_columns_a;
+				child_cols = edge->join_columns_b;
+			} else {
+				parent_cols = edge->join_columns_b;
+				child_cols = edge->join_columns_a;
+			}
+			for (idx_t i = 0; i < parent_cols.size(); i++) {
+				if (i > 0) {
+					label += ", ";
+				}
+				label += table_mgr.GetColumnName(node->table_idx, parent_cols[i].column_index);
+				label += " / ";
+				label += table_mgr.GetColumnName(child->table_idx, child_cols[i].column_index);
+			}
+		}
+		edge_labels.push_back(label);
+	}
+
+	// place children side by side with gap
+	const int gap = 4;
+	int total_width = 0;
+	vector<int> child_offsets;
+	for (idx_t i = 0; i < child_blocks.size(); i++) {
+		child_offsets.push_back(total_width);
+		int block_width = 0;
+		for (auto &line : child_blocks[i].lines) {
+			block_width = std::max(block_width, (int)line.size());
+		}
+		total_width += block_width;
+		if (i + 1 < child_blocks.size()) {
+			total_width += gap;
+		}
+	}
+
+	// compute child centers in combined coordinate space
+	vector<int> child_centers;
+	for (idx_t i = 0; i < child_blocks.size(); i++) {
+		child_centers.push_back(child_offsets[i] + child_blocks[i].center);
+	}
+
+	// expand total_width if any edge label would be clipped
+	for (idx_t i = 0; i < child_centers.size(); i++) {
+		int label_start = child_centers[i] - (int)edge_labels[i].size() / 2;
+		int label_end = label_start + (int)edge_labels[i].size();
+		if (label_end > total_width) {
+			total_width = label_end;
+		}
+		if (label_start < 0) {
+			int shift = -label_start;
+			for (auto &c : child_offsets) {
+				c += shift;
+			}
+			for (auto &c : child_centers) {
+				c += shift;
+			}
+			total_width += shift;
+		}
+	}
+
+	// position parent box centered above children
+	int children_mid = (child_centers.front() + child_centers.back()) / 2;
+	int parent_width = (int)parent_box.lines[0].size();
+	int parent_offset = children_mid - parent_width / 2;
+	if (parent_offset < 0) {
+		int shift = -parent_offset;
+		for (auto &c : child_offsets) {
+			c += shift;
+		}
+		for (auto &c : child_centers) {
+			c += shift;
+		}
+		total_width += shift;
+		parent_offset = 0;
+	}
+	total_width = std::max(total_width, parent_offset + parent_width);
+	int parent_center = parent_offset + parent_width / 2;
+
+	// build result
+	RenderedBlock result;
+	result.center = parent_center;
+
+	// parent box lines
+	for (auto &line : parent_box.lines) {
+		string padded = string(parent_offset, ' ') + line;
+		if ((int)padded.size() < total_width) {
+			padded += string(total_width - padded.size(), ' ');
+		}
+		result.lines.push_back(padded);
+	}
+
+	// connector lines from parent to children
+	if (child_blocks.size() == 1) {
+		int cc = child_centers[0];
+		string conn_line(total_width, ' ');
+		if (cc >= 0 && cc < total_width) {
+			conn_line[cc] = '|';
+		}
+		result.lines.push_back(conn_line);
+
+		if (!edge_labels[0].empty()) {
+			string label_line(total_width, ' ');
+			int label_start = cc - (int)edge_labels[0].size() / 2;
+			if (label_start < 0) {
+				label_start = 0;
+			}
+			for (idx_t j = 0; j < edge_labels[0].size() && label_start + (int)j < total_width; j++) {
+				label_line[label_start + j] = edge_labels[0][j];
+			}
+			result.lines.push_back(label_line);
+		}
+
+		string conn_line2(total_width, ' ');
+		if (cc >= 0 && cc < total_width) {
+			conn_line2[cc] = '|';
+		}
+		result.lines.push_back(conn_line2);
+	} else {
+		// horizontal branch line
+		int leftmost = child_centers.front();
+		int rightmost = child_centers.back();
+
+		string branch_line(total_width, ' ');
+		for (int c = leftmost; c <= rightmost; c++) {
+			branch_line[c] = '-';
+		}
+		for (auto cc : child_centers) {
+			if (cc >= 0 && cc < total_width) {
+				branch_line[cc] = '+';
+			}
+		}
+		if (parent_center >= 0 && parent_center < total_width) {
+			branch_line[parent_center] = '+';
+		}
+		result.lines.push_back(branch_line);
+
+		// edge labels row
+		string label_line(total_width, ' ');
+		for (idx_t i = 0; i < child_centers.size(); i++) {
+			if (edge_labels[i].empty()) {
+				continue;
+			}
+			int label_start = child_centers[i] - (int)edge_labels[i].size() / 2;
+			if (label_start < 0) {
+				label_start = 0;
+			}
+			for (idx_t j = 0; j < edge_labels[i].size() && label_start + (int)j < total_width; j++) {
+				label_line[label_start + j] = edge_labels[i][j];
+			}
+		}
+		result.lines.push_back(label_line);
+
+		// vertical connectors to children
+		string vert_line(total_width, ' ');
+		for (auto cc : child_centers) {
+			if (cc >= 0 && cc < total_width) {
+				vert_line[cc] = '|';
+			}
+		}
+		result.lines.push_back(vert_line);
+	}
+
+	// merge child blocks (pad shorter ones)
+	idx_t max_child_height = 0;
+	for (auto &cb : child_blocks) {
+		max_child_height = std::max(max_child_height, (idx_t)cb.lines.size());
+	}
+
+	for (idx_t row = 0; row < max_child_height; row++) {
+		string merged_line(total_width, ' ');
+		for (idx_t i = 0; i < child_blocks.size(); i++) {
+			if (row < child_blocks[i].lines.size()) {
+				const string &src = child_blocks[i].lines[row];
+				int offset = child_offsets[i];
+				for (idx_t j = 0; j < src.size() && offset + (int)j < total_width; j++) {
+					merged_line[offset + j] = src[j];
+				}
+			}
+		}
+		result.lines.push_back(merged_line);
+	}
+
+	return result;
+}
+
+TreeNode *FindNodeInTree(TreeNode *root, idx_t table_idx) {
+	if (!root) {
+		return nullptr;
+	}
+	if (root->table_idx == table_idx) {
+		return root;
+	}
+	for (auto *child : root->children) {
+		auto *found = FindNodeInTree(child, table_idx);
+		if (found) {
+			return found;
+		}
+	}
+	return nullptr;
+}
+
+void SetTreeLevels(TreeNode *node, int level) {
+	if (!node) {
+		return;
+	}
+	node->level = level;
+	for (auto *child : node->children) {
+		SetTreeLevels(child, level + 1);
+	}
+}
+
+void PrintTransferDAG(TreeNode *root, TableManager &table_mgr) {
+	PrintTransferDAG(root, table_mgr, "DAG");
+}
+
+void PrintTransferDAG(TreeNode *root, TableManager &table_mgr, const string &title) {
+	if (!root) {
+		return;
+	}
+
+	RenderedBlock block = RenderSubtree(root, table_mgr);
+
+	Printer::Print("\n=== " + title + " ===");
+	for (auto &line : block.lines) {
+		Printer::Print(line);
+	}
+	Printer::Print("=== " + title + " ===\n");
+}
+
+void PrintPhysicalDAG(vector<PhysicalDAGNode *> &all_nodes, TableManager &table_mgr) {
+	if (all_nodes.empty()) {
+		return;
+	}
+
+	// group by level
+	map<int, vector<PhysicalDAGNode *>> by_level;
+	int max_level = 0;
+	for (auto *node : all_nodes) {
+		by_level[node->level].push_back(node);
+		max_level = std::max(max_level, node->level);
+	}
+
+	// create boxes for all nodes
+	map<PhysicalDAGNode *, RenderedBlock> boxes;
+	for (auto *node : all_nodes) {
+		string table_name = table_mgr.GetTableName(node->table_idx);
+		string name_line = table_name + " (table " + std::to_string(node->table_idx) + ")";
+		string card_line = FormatCardinality(node->table_op->estimated_cardinality);
+		boxes[node] = MakeBox(name_line, card_line);
+	}
+
+	// compute level widths and box offsets within each level
+	const int level_gap = 4;
+	map<int, int> level_widths;
+	map<int, vector<int>> level_offsets;
+
+	for (int level = 0; level <= max_level; level++) {
+		auto &nodes = by_level[level];
+		int width = 0;
+		level_offsets[level] = {};
+		for (idx_t i = 0; i < nodes.size(); i++) {
+			level_offsets[level].push_back(width);
+			int box_width = 0;
+			for (auto &line : boxes[nodes[i]].lines) {
+				box_width = std::max(box_width, (int)line.size());
+			}
+			width += box_width;
+			if (i + 1 < nodes.size()) {
+				width += level_gap;
+			}
+		}
+		level_widths[level] = width;
+	}
+
+	int total_width = 0;
+	for (auto &entry : level_widths) {
+		total_width = std::max(total_width, entry.second);
+	}
+
+	// center each level within total_width and record absolute node centers
+	map<PhysicalDAGNode *, int> node_centers;
+
+	for (int level = 0; level <= max_level; level++) {
+		int shift = (total_width - level_widths[level]) / 2;
+		auto &nodes = by_level[level];
+		for (idx_t i = 0; i < nodes.size(); i++) {
+			node_centers[nodes[i]] = shift + level_offsets[level][i] + boxes[nodes[i]].center;
+		}
+	}
+
+	// render level by level
+	vector<string> output;
+
+	for (int level = 0; level <= max_level; level++) {
+		auto &nodes = by_level[level];
+		int shift = (total_width - level_widths[level]) / 2;
+
+		// render boxes
+		int max_height = 0;
+		for (auto *node : nodes) {
+			max_height = std::max(max_height, (int)boxes[node].lines.size());
+		}
+
+		for (int row = 0; row < max_height; row++) {
+			string line(total_width, ' ');
+			for (idx_t i = 0; i < nodes.size(); i++) {
+				auto &box = boxes[nodes[i]];
+				if (row < (int)box.lines.size()) {
+					int offset = shift + level_offsets[level][i];
+					for (idx_t j = 0; j < box.lines[row].size() && offset + (int)j < total_width; j++) {
+						line[offset + j] = box.lines[row][j];
+					}
+				}
+			}
+			output.push_back(line);
+		}
+
+		// draw connectors to next level
+		if (level >= max_level) {
+			continue;
+		}
+
+		auto &children = by_level[level + 1];
+
+		// collect parent->child connections for this level transition
+		struct ConnInfo {
+			int child_center;
+			vector<int> parent_centers;
+			vector<string> edge_labels;
+		};
+		vector<ConnInfo> conns;
+
+		for (auto *child : children) {
+			ConnInfo info;
+			info.child_center = node_centers[child];
+
+			for (idx_t pi = 0; pi < child->parents.size(); pi++) {
+				auto *parent = child->parents[pi];
+				if (parent->level != level) {
+					continue;
+				}
+				info.parent_centers.push_back(node_centers[parent]);
+
+				if (pi < child->edges_to_parents.size()) {
+					auto &edge = child->edges_to_parents[pi];
+					string label;
+					for (idx_t ci = 0; ci < edge.parent_cols.size(); ci++) {
+						if (ci > 0) {
+							label += ", ";
+						}
+						label += table_mgr.GetColumnName(edge.parent_table, edge.parent_cols[ci].column_index) + " / " +
+						         table_mgr.GetColumnName(edge.child_table, edge.child_cols[ci].column_index);
+					}
+					info.edge_labels.push_back(label);
+				}
+			}
+
+			if (!info.parent_centers.empty()) {
+				conns.push_back(info);
+			}
+		}
+
+		if (conns.empty()) {
+			continue;
+		}
+
+		// determine connector width
+		int conn_width = total_width;
+		for (auto &info : conns) {
+			for (int pc : info.parent_centers) {
+				conn_width = std::max(conn_width, pc + 1);
+			}
+			conn_width = std::max(conn_width, info.child_center + 1);
+			for (auto &lbl : info.edge_labels) {
+				conn_width = std::max(conn_width, (int)lbl.size() + 2);
+			}
+		}
+
+		// vertical lines from parents
+		string vert1(conn_width, ' ');
+		for (auto &info : conns) {
+			for (int pc : info.parent_centers) {
+				if (pc >= 0 && pc < conn_width) {
+					vert1[pc] = '|';
+				}
+			}
+		}
+		output.push_back(vert1);
+
+		// branch line (when parent and child not aligned, or multiple parents)
+		string branch(conn_width, ' ');
+		bool need_branch = false;
+
+		for (auto &info : conns) {
+			vector<int> all_pos = info.parent_centers;
+			all_pos.push_back(info.child_center);
+			int leftmost = *std::min_element(all_pos.begin(), all_pos.end());
+			int rightmost = *std::max_element(all_pos.begin(), all_pos.end());
+
+			if (leftmost != rightmost) {
+				need_branch = true;
+				for (int c = leftmost; c <= rightmost && c < conn_width; c++) {
+					branch[c] = '-';
+				}
+				for (int pc : info.parent_centers) {
+					if (pc >= 0 && pc < conn_width) {
+						branch[pc] = '+';
+					}
+				}
+				if (info.child_center >= 0 && info.child_center < conn_width) {
+					branch[info.child_center] = '+';
+				}
+			}
+		}
+		if (need_branch) {
+			output.push_back(branch);
+		}
+
+		for (auto &info : conns) {
+			for (idx_t i = 0; i < info.edge_labels.size(); i++) {
+				string &lbl = info.edge_labels[i];
+				if (lbl.empty()) {
+					continue;
+				}
+				string label_line(conn_width, ' ');
+				int center = info.child_center;
+				int start = center - (int)lbl.size() / 2;
+				if (start < 0) {
+					start = 0;
+				}
+				for (idx_t j = 0; j < lbl.size() && start + (int)j < conn_width; j++) {
+					label_line[start + j] = lbl[j];
+				}
+				output.push_back(label_line);
+			}
+		}
+
+		// vertical lines to children
+		string vert2(conn_width, ' ');
+		for (auto &info : conns) {
+			if (info.child_center >= 0 && info.child_center < conn_width) {
+				vert2[info.child_center] = '|';
+			}
+		}
+		output.push_back(vert2);
+	}
+
+	// print
+	Printer::Print("\n=== Physical Plan DAG ===");
+	for (auto &line : output) {
+		Printer::Print(line);
+	}
+	Printer::Print("=== Physical Plan DAG ===\n");
+}
+
+} // namespace duckdb
diff --git a/src/utils/dag_printer.hpp b/src/utils/dag_printer.hpp
new file mode 100644
index 0000000..e879f08
--- /dev/null
+++ b/src/utils/dag_printer.hpp
@@ -0,0 +1,22 @@
+#pragma once
+
+#include "../optimizer/table_manager.hpp"
+#include "duckdb/common/printer.hpp"
+
+namespace duckdb {
+
+struct TreeNode;
+struct PhysicalDAGNode;
+
+// tree utilities
+TreeNode *FindNodeInTree(TreeNode *root, idx_t table_idx);
+void SetTreeLevels(TreeNode *node, int level);
+
+// render and print the Robust transfer DAG as an ASCII tree
+void PrintTransferDAG(TreeNode *root, TableManager &table_mgr);
+void PrintTransferDAG(TreeNode *root, TableManager &table_mgr, const string &title);
+
+// render and print the physical plan DAG (supports multiple roots/parents)
+void PrintPhysicalDAG(vector<PhysicalDAGNode *> &all_nodes, TableManager &table_mgr);
+
+} // namespace duckdb
diff --git a/src/utils/debug_utils.hpp b/src/utils/debug_utils.hpp
new file mode 100644
index 0000000..9276806
--- /dev/null
+++ b/src/utils/debug_utils.hpp
@@ -0,0 +1,29 @@
+//===----------------------------------------------------------------------===//
+//                         RPT Extension
+//
+// debug_utils.hpp
+//
+// Debug printing utilities - prints only in debug builds
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb/common/printer.hpp"
+#include "duckdb/common/string_util.hpp"
+
+namespace duckdb {
+
+// debug print macro - only prints in debug builds, no-op in release
+#ifdef DEBUG
+
+#define D_PRINT(...)  Printer::Print(__VA_ARGS__)
+#define D_PRINTF(...) Printer::PrintF(__VA_ARGS__)
+
+#else
+
+#define D_PRINT(...)  ((void)0)
+#define D_PRINTF(...) ((void)0)
+
+#endif
+
+} // namespace duckdb
diff --git a/test/sql/rpt.test b/test/sql/rpt.test
deleted file mode 100644
index dbafbf9..0000000
--- a/test/sql/rpt.test
+++ /dev/null
@@ -1,23 +0,0 @@
-# name: test/sql/rpt.test
-# description: test rpt extension
-# group: [sql]
-
-# Before we load the extension, this will fail
-statement error
-SELECT rpt('Sam');
-----
-Catalog Error: Scalar Function with name rpt does not exist!
-
-# Require statement will ensure this test is run with this extension loaded
-require rpt
-
-# Confirm the extension works
-query I
-SELECT rpt('Sam');
-----
-Rpt Sam 🐥
-
-query I
-SELECT rpt_openssl_version('Michael') ILIKE 'Rpt Michael, my linked OpenSSL version is OpenSSL%';
-----
-true
diff --git a/vcpkg.json b/vcpkg.json
index 090f982..9838d46 100644
--- a/vcpkg.json
+++ b/vcpkg.json
@@ -1,9 +1,13 @@
 {
-  "dependencies" : [ {
-    "name" : "openssl",
-    "version>=" : "3.5.3"
-  } ],
-  "vcpkg-configuration" : {
-    "overlay-ports" : [ "./extension-ci-tools/vcpkg_ports" ]
+  "dependencies": [
+    "openssl"
+  ],
+  "vcpkg-configuration": {
+    "overlay-ports": [
+      "./extension-ci-tools/vcpkg_ports"
+    ],
+    "overlay-triplets": [
+      "./extension-ci-tools/toolchains"
+    ]
   }
 }
\ No newline at end of file