From ee3e71ca2c07a12a8332a3877c0ce14adc9a5da8 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 16 Jun 2025 15:36:53 -0700 Subject: [PATCH 01/13] Updated format for license identifier --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9395f9e..0cfa183 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ readme = "README.md" authors = [ {name = "Andrew Kane", email = "andrew@ankane.org"} ] -license = {text = "MIT"} +license = "MIT" requires-python = ">= 3.9" dependencies = [ "numpy" From 33dee606229489c9ffb0cb5a1cd72bd4705ac618 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Sep 2025 16:35:08 -0700 Subject: [PATCH 02/13] Added support for str objects for bit type with SQLAlchemy - #137 Co-authored-by: Giacomo rua --- CHANGELOG.md | 4 ++++ pgvector/sqlalchemy/bit.py | 12 ++++++++++++ tests/test_sqlalchemy.py | 5 +++++ 3 files changed, 21 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ed80e3..f219b22 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.4.2 (unreleased) + +- Added support for `str` objects for `bit` type with SQLAlchemy + ## 0.4.1 (2025-04-26) - Fixed `SparseVector` constructor for SciPy sparse matrices diff --git a/pgvector/sqlalchemy/bit.py b/pgvector/sqlalchemy/bit.py index 0f83f3c..1ea85c3 100644 --- a/pgvector/sqlalchemy/bit.py +++ b/pgvector/sqlalchemy/bit.py @@ -14,6 +14,18 @@ def get_col_spec(self, **kw): return 'BIT' return 'BIT(%d)' % self.length + def bind_processor(self, dialect): + if dialect.__class__.__name__ == 'PGDialect_asyncpg': + import asyncpg + + def process(value): + if isinstance(value, str): + return asyncpg.BitString(value) + return value + return process + else: + return super().bind_processor(dialect) + class comparator_factory(UserDefinedType.Comparator): def hamming_distance(self, other): return self.op('<~>', return_type=Float)(other) diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index 5aec977..cd7bad8 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -596,6 +596,11 @@ async def test_bit(self, engine): item = await session.get(Item, 1) assert item.binary_embedding == embedding + if engine == asyncpg_engine: + session.add(Item(id=2, binary_embedding='101')) + item = await session.get(Item, 2) + assert item.binary_embedding == embedding + await engine.dispose() @pytest.mark.asyncio From dc9a8f959995f009649fd230139ca41193e0a801 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 3 Sep 2025 12:04:21 -0700 Subject: [PATCH 03/13] Added test for binary quantization with re-ranking --- tests/test_sqlalchemy.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index cd7bad8..702eee1 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -528,6 +528,22 @@ def test_binary_quantize(self, engine): items = session.query(Item).order_by(distance).all() assert [v.id for v in items] == [2, 3, 1] + def test_binary_quantize_reranking(self, engine): + # recreate index (could also vacuum table) + binary_quantize_index.drop(setup_engine) + binary_quantize_index.create(setup_engine) + + with Session(engine) as session: + session.add(Item(id=1, embedding=[-1, -2, -3])) + session.add(Item(id=2, embedding=[1, -2, 3])) + session.add(Item(id=3, embedding=[1, 2, 3])) + session.commit() + + distance = func.cast(func.binary_quantize(Item.embedding), BIT(3)).hamming_distance(func.binary_quantize(func.cast([3, -1, 2], VECTOR(3)))) + subquery = session.query(Item).order_by(distance).limit(20).subquery() + items = session.query(subquery).order_by(subquery.c.embedding.cosine_distance([3, -1, 2])).limit(5).all() + assert [v.id for v in items] == [2, 3, 1] + @pytest.mark.parametrize('engine', array_engines) class TestSqlalchemyArray: From caf1a2e0dd7a1ba2ad0ca9f09b50516dcfffcdeb Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 3 Sep 2025 12:09:18 -0700 Subject: [PATCH 04/13] Added docs for binary quantization with SQLAlchemy [skip ci] --- README.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/README.md b/README.md index 7c302b1..bfec8bb 100644 --- a/README.md +++ b/README.md @@ -271,6 +271,38 @@ order = func.cast(Item.embedding, HALFVEC(3)).l2_distance([3, 1, 2]) session.scalars(select(Item).order_by(order).limit(5)) ``` +#### Binary Quantization + +Use expression indexing for binary quantization + +```python +from pgvector.sqlalchemy import BIT +from sqlalchemy.sql import func + +index = Index( + 'my_index', + func.cast(func.binary_quantize(Item.embedding), BIT(3)).label('embedding'), + postgresql_using='hnsw', + postgresql_with={'m': 16, 'ef_construction': 64}, + postgresql_ops={'embedding': 'bit_hamming_ops'} +) +``` + +Get the nearest neighbors by Hamming distance + +```python +order = func.cast(func.binary_quantize(Item.embedding), BIT(3)).hamming_distance(func.binary_quantize(func.cast([3, -1, 2], VECTOR(3)))) +session.scalars(select(Item).order_by(order).limit(5)) +``` + +Re-rank by the original vectors for better recall + +```python +order = func.cast(func.binary_quantize(Item.embedding), BIT(3)).hamming_distance(func.binary_quantize(func.cast([3, -1, 2], VECTOR(3)))) +subquery = session.query(Item).order_by(order).limit(20).subquery() +session.scalars(select(subquery).order_by(subquery.c.embedding.cosine_distance([3, -1, 2])).limit(5)) +``` + #### Arrays Add an array column From c820a53bfb46196551de3c3f59f81b192d890574 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 3 Sep 2025 12:11:38 -0700 Subject: [PATCH 05/13] Simplified examples [skip ci] --- README.md | 2 -- tests/test_sqlalchemy.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/README.md b/README.md index bfec8bb..7cff86c 100644 --- a/README.md +++ b/README.md @@ -259,7 +259,6 @@ index = Index( 'my_index', func.cast(Item.embedding, HALFVEC(3)).label('embedding'), postgresql_using='hnsw', - postgresql_with={'m': 16, 'ef_construction': 64}, postgresql_ops={'embedding': 'halfvec_l2_ops'} ) ``` @@ -283,7 +282,6 @@ index = Index( 'my_index', func.cast(func.binary_quantize(Item.embedding), BIT(3)).label('embedding'), postgresql_using='hnsw', - postgresql_with={'m': 16, 'ef_construction': 64}, postgresql_ops={'embedding': 'bit_hamming_ops'} ) ``` diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index 702eee1..c59c12e 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -103,7 +103,6 @@ class Item(Base): 'sqlalchemy_orm_half_precision_index', func.cast(Item.embedding, HALFVEC(3)).label('embedding'), postgresql_using='hnsw', - postgresql_with={'m': 16, 'ef_construction': 64}, postgresql_ops={'embedding': 'halfvec_l2_ops'} ) half_precision_index.create(setup_engine) @@ -112,7 +111,6 @@ class Item(Base): 'sqlalchemy_orm_binary_quantize_index', func.cast(func.binary_quantize(Item.embedding), BIT(3)).label('embedding'), postgresql_using='hnsw', - postgresql_with={'m': 16, 'ef_construction': 64}, postgresql_ops={'embedding': 'bit_hamming_ops'} ) binary_quantize_index.create(setup_engine) From 1a72b7571adf3325174b383aca85bfb3a5b925fa Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sat, 6 Sep 2025 00:12:48 -0700 Subject: [PATCH 06/13] Updated pgvector on CI --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4d4e8ed..d943ea0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -19,7 +19,7 @@ jobs: dev-files: true - run: | cd /tmp - git clone --branch v0.8.0 https://github.com/pgvector/pgvector.git + git clone --branch v0.8.1 https://github.com/pgvector/pgvector.git cd pgvector make sudo make install From e211ba4029f204734f0c001fbb90f6a594d561ae Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 9 Oct 2025 23:19:50 -0700 Subject: [PATCH 07/13] Test with Python 3.14 on CI --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d943ea0..52ab712 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -6,7 +6,7 @@ jobs: strategy: fail-fast: false matrix: - python: [3.13, 3.9] + python: [3.14, 3.9] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 From e2986daf2b1533cc2c849f7e39350e31d57ac325 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 4 Dec 2025 16:47:23 -0800 Subject: [PATCH 08/13] Added support for Django 6 --- CHANGELOG.md | 1 + pgvector/django/extensions.py | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f219b22..62da0bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ## 0.4.2 (unreleased) +- Added support for Django 6 - Added support for `str` objects for `bit` type with SQLAlchemy ## 0.4.1 (2025-04-26) diff --git a/pgvector/django/extensions.py b/pgvector/django/extensions.py index 0573f72..1d04739 100644 --- a/pgvector/django/extensions.py +++ b/pgvector/django/extensions.py @@ -1,6 +1,11 @@ +from django import VERSION from django.contrib.postgres.operations import CreateExtension class VectorExtension(CreateExtension): - def __init__(self): - self.name = 'vector' + if VERSION[0] >= 6: + def __init__(self, hints=None): + super().__init__('vector', hints=hints) + else: + def __init__(self): + self.name = 'vector' From 674f5ba3410c873d49f50fa9725b95d9db50c674 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 4 Dec 2025 16:50:18 -0800 Subject: [PATCH 09/13] Updated checkout action [skip ci] --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 52ab712..34f15d5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -8,7 +8,7 @@ jobs: matrix: python: [3.14, 3.9] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} From 2968f258f9486531bd1340cbda4ff8fcaf06cdc1 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 4 Dec 2025 17:06:01 -0800 Subject: [PATCH 10/13] Version bump to 0.4.2 [skip ci] --- CHANGELOG.md | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 62da0bb..745335f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.4.2 (unreleased) +## 0.4.2 (2025-12-04) - Added support for Django 6 - Added support for `str` objects for `bit` type with SQLAlchemy diff --git a/pyproject.toml b/pyproject.toml index 0cfa183..6f91e04 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "pgvector" -version = "0.4.1" +version = "0.4.2" description = "pgvector support for Python" readme = "README.md" authors = [ From 05387da3c5ce0dc9f1d6ef238dcae118aa8176ea Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sat, 6 Dec 2025 16:31:46 -0800 Subject: [PATCH 11/13] Updated examples [skip ci] --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 7cff86c..a208ae0 100644 --- a/README.md +++ b/README.md @@ -177,10 +177,10 @@ session.execute(text('CREATE EXTENSION IF NOT EXISTS vector')) Add a vector column ```python -from pgvector.sqlalchemy import Vector +from pgvector.sqlalchemy import VECTOR class Item(Base): - embedding = mapped_column(Vector(3)) + embedding = mapped_column(VECTOR(3)) ``` Also supports `HALFVEC`, `BIT`, and `SPARSEVEC` @@ -306,11 +306,11 @@ session.scalars(select(subquery).order_by(subquery.c.embedding.cosine_distance([ Add an array column ```python -from pgvector.sqlalchemy import Vector +from pgvector.sqlalchemy import VECTOR from sqlalchemy import ARRAY class Item(Base): - embeddings = mapped_column(ARRAY(Vector(3))) + embeddings = mapped_column(ARRAY(VECTOR(3))) ``` And register the types with the underlying driver @@ -359,10 +359,10 @@ session.exec(text('CREATE EXTENSION IF NOT EXISTS vector')) Add a vector column ```python -from pgvector.sqlalchemy import Vector +from pgvector.sqlalchemy import VECTOR class Item(SQLModel, table=True): - embedding: Any = Field(sa_type=Vector(3)) + embedding: Any = Field(sa_type=VECTOR(3)) ``` Also supports `HALFVEC`, `BIT`, and `SPARSEVEC` From 6d8db07f74fd95b3673fd8149f3f805a15788f48 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sat, 6 Dec 2025 16:34:08 -0800 Subject: [PATCH 12/13] Updated readme [skip ci] --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a208ae0..36d6c06 100644 --- a/README.md +++ b/README.md @@ -345,7 +345,7 @@ from sqlalchemy import event @event.listens_for(engine, "connect") def connect(dbapi_connection, connection_record): - register_vector(dbapi_connection, arrays=True) + register_vector(dbapi_connection) ``` ## SQLModel From b34f1c994e843dd7468d600b0f0ff5dbb949ec61 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sat, 6 Dec 2025 16:38:13 -0800 Subject: [PATCH 13/13] Updated examples [skip ci] --- examples/implicit/example.py | 6 +++--- examples/lightfm/example.py | 6 +++--- examples/surprise/example.py | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/examples/implicit/example.py b/examples/implicit/example.py index f70eb8c..2cbf7c6 100644 --- a/examples/implicit/example.py +++ b/examples/implicit/example.py @@ -1,6 +1,6 @@ import implicit from implicit.datasets.movielens import get_movielens -from pgvector.sqlalchemy import Vector +from pgvector.sqlalchemy import VECTOR from sqlalchemy import create_engine, insert, select, text, Integer, String from sqlalchemy.orm import declarative_base, mapped_column, Session @@ -16,7 +16,7 @@ class User(Base): __tablename__ = 'user' id = mapped_column(Integer, primary_key=True) - factors = mapped_column(Vector(20)) + factors = mapped_column(VECTOR(20)) class Item(Base): @@ -24,7 +24,7 @@ class Item(Base): id = mapped_column(Integer, primary_key=True) title = mapped_column(String) - factors = mapped_column(Vector(20)) + factors = mapped_column(VECTOR(20)) Base.metadata.drop_all(engine) diff --git a/examples/lightfm/example.py b/examples/lightfm/example.py index fcb9027..65031c4 100644 --- a/examples/lightfm/example.py +++ b/examples/lightfm/example.py @@ -1,6 +1,6 @@ from lightfm import LightFM from lightfm.datasets import fetch_movielens -from pgvector.sqlalchemy import Vector +from pgvector.sqlalchemy import VECTOR from sqlalchemy import create_engine, insert, select, text, Float, Integer, String from sqlalchemy.orm import declarative_base, mapped_column, Session @@ -16,7 +16,7 @@ class User(Base): __tablename__ = 'user' id = mapped_column(Integer, primary_key=True) - factors = mapped_column(Vector(20)) + factors = mapped_column(VECTOR(20)) class Item(Base): @@ -24,7 +24,7 @@ class Item(Base): id = mapped_column(Integer, primary_key=True) title = mapped_column(String) - factors = mapped_column(Vector(20)) + factors = mapped_column(VECTOR(20)) bias = mapped_column(Float) diff --git a/examples/surprise/example.py b/examples/surprise/example.py index bd7d18d..e413bcf 100644 --- a/examples/surprise/example.py +++ b/examples/surprise/example.py @@ -1,4 +1,4 @@ -from pgvector.sqlalchemy import Vector +from pgvector.sqlalchemy import VECTOR from sqlalchemy import create_engine, insert, select, text, Integer from sqlalchemy.orm import declarative_base, mapped_column, Session from surprise import Dataset, SVD @@ -15,14 +15,14 @@ class User(Base): __tablename__ = 'user' id = mapped_column(Integer, primary_key=True) - factors = mapped_column(Vector(20)) + factors = mapped_column(VECTOR(20)) class Item(Base): __tablename__ = 'item' id = mapped_column(Integer, primary_key=True) - factors = mapped_column(Vector(20)) + factors = mapped_column(VECTOR(20)) Base.metadata.drop_all(engine)