diff --git a/docarray/index/__init__.py b/docarray/index/__init__.py index 30f6b3edf58..9e4dbde474a 100644 --- a/docarray/index/__init__.py +++ b/docarray/index/__init__.py @@ -1,7 +1,7 @@ import types from typing import TYPE_CHECKING -from docarray.index.backends.in_memory import InMemoryDocIndex +from docarray.index.backends.in_memory import InMemoryExactNNIndex from docarray.utils._internal.misc import ( _get_path_from_docarray_root_level, import_library, @@ -14,7 +14,7 @@ from docarray.index.backends.qdrant import QdrantDocumentIndex # noqa: F401 from docarray.index.backends.weaviate import WeaviateDocumentIndex # noqa: F401 -__all__ = ['InMemoryDocIndex'] +__all__ = ['InMemoryExactNNIndex'] def __getattr__(name: str): diff --git a/docarray/index/backends/in_memory.py b/docarray/index/backends/in_memory.py index 853cc47d2b1..400b60fc4c5 100644 --- a/docarray/index/backends/in_memory.py +++ b/docarray/index/backends/in_memory.py @@ -38,9 +38,9 @@ TSchema = TypeVar('TSchema', bound=BaseDoc) -class InMemoryDocIndex(BaseDocIndex, Generic[TSchema]): +class InMemoryExactNNIndex(BaseDocIndex, Generic[TSchema]): def __init__(self, docs: Optional[DocList] = None, **kwargs): - """Initialize InMemoryDocIndex""" + """Initialize InMemoryExactNNIndex""" super().__init__(db_config=None, **kwargs) self._runtime_config = self.RuntimeConfig() self._docs = ( @@ -78,13 +78,13 @@ def build(self, *args, **kwargs) -> Any: @dataclass class DBConfig(BaseDocIndex.DBConfig): - """Dataclass that contains all "static" configurations of InMemoryDocIndex.""" + """Dataclass that contains all "static" configurations of InMemoryExactNNIndex.""" pass @dataclass class RuntimeConfig(BaseDocIndex.RuntimeConfig): - """Dataclass that contains all "dynamic" configurations of InMemoryDocIndex.""" + """Dataclass that contains all "dynamic" configurations of InMemoryExactNNIndex.""" default_column_config: Dict[Type, Dict[str, Any]] = field( default_factory=lambda: defaultdict( @@ -139,7 +139,8 @@ def _get_items( If no document is found, a KeyError is raised. :param doc_ids: ids to get from the Document index - :return: Sequence of Documents, sorted corresponding to the order of `doc_ids`. Duplicate `doc_ids` can be omitted in the output. + :return: Sequence of Documents, sorted corresponding to the order of `doc_ids`. + Duplicate `doc_ids` can be omitted in the output. """ indices = [] for i, doc in enumerate(self._docs): @@ -149,7 +150,7 @@ def _get_items( def execute_query(self, query: List[Tuple[str, Dict]], *args, **kwargs) -> Any: """ - Execute a query on the InMemoryDocIndex. + Execute a query on the InMemoryExactNNIndex. Can take two kinds of inputs: diff --git a/docs/API_reference/doc_index/backends/in_memory.md b/docs/API_reference/doc_index/backends/in_memory.md index 6fe76893440..4dadcb2558d 100644 --- a/docs/API_reference/doc_index/backends/in_memory.md +++ b/docs/API_reference/doc_index/backends/in_memory.md @@ -1,3 +1,3 @@ -# InMemoryDocIndex +#InMemoryExactNNIndex -::: docarray.index.backends.in_memory.InMemoryDocIndex +::: docarray.index.backends.in_memory.InMemoryExactNNIndex diff --git a/docs/user_guide/storing/index_in_memory.md b/docs/user_guide/storing/index_in_memory.md index 88bab5ce5c2..57daf501566 100644 --- a/docs/user_guide/storing/index_in_memory.md +++ b/docs/user_guide/storing/index_in_memory.md @@ -1,22 +1,22 @@ # In-Memory Document Index -[InMemoryDocIndex][docarray.index.backends.in_memory.InMemoryDocIndex] stores all Documents in DocLists in memory. +[InMemoryExactNNIndex][docarray.index.backends.in_memory.InMemoryExactNNIndex] stores all Documents in DocLists in memory. It is a great starting point for small datasets, where you may not want to launch a database server. -For vector search and filtering the InMemoryDocIndex utilizes DocArray's [`find()`][docarray.utils.find.find] and +For vector search and filtering the InMemoryExactNNIndex utilizes DocArray's [`find()`][docarray.utils.find.find] and [`filter_docs()`][docarray.utils.filter.filter_docs] functions. ## Basic usage -To see how to create a [InMemoryDocIndex][docarray.index.backends.in_memory.InMemoryDocIndex] instance, add Documents, +To see how to create a [InMemoryExactNNIndex][docarray.index.backends.in_memory.InMemoryExactNNIndex] instance, add Documents, perform search, etc. see the [general user guide](./docindex.md). You can initialize the index as follows: ```python from docarray import BaseDoc, DocList -from docarray.index.backends.in_memory import InMemoryDocIndex +from docarray.index.backends.in_memory import InMemoryExactNNIndex from docarray.typing import NdArray @@ -26,20 +26,20 @@ class MyDoc(BaseDoc): docs = DocList[MyDoc](MyDoc() for _ in range(10)) -doc_index = InMemoryDocIndex[MyDoc]() +doc_index = InMemoryExactNNIndex[MyDoc]() doc_index.index(docs) # or in one step: -doc_index = InMemoryDocIndex[MyDoc](docs) +doc_index = InMemoryExactNNIndex[MyDoc](docs) ``` ## Configuration -This section lays out the configurations and options that are specific to [InMemoryDocIndex][docarray.index.backends.in_memory.InMemoryDocIndex]. +This section lays out the configurations and options that are specific to [InMemoryExactNNIndex][docarray.index.backends.in_memory.InMemoryExactNNIndex]. ### RuntimeConfig -The `RuntimeConfig` of [InMemoryDocIndex][docarray.index.backends.in_memory.InMemoryDocIndex] contains only one entry: +The `RuntimeConfig` of [InMemoryExactNNIndex][docarray.index.backends.in_memory.InMemoryExactNNIndex] contains only one entry: the default mapping from Python types to column configurations. You can see in the [section below](#field-wise-configurations) how to override configurations for specific fields. @@ -95,7 +95,7 @@ When using the index, you can define multiple fields and their nested structure. ```python import numpy as np from docarray import BaseDoc -from docarray.index.backends.in_memory import InMemoryDocIndex +from docarray.index.backends.in_memory import InMemoryExactNNIndex from docarray.typing import ImageUrl, VideoUrl, AnyTensor from pydantic import Field @@ -118,7 +118,7 @@ class YouTubeVideoDoc(BaseDoc): tensor: AnyTensor = Field(space='cosine_sim') -doc_index = InMemoryDocIndex[YouTubeVideoDoc]() +doc_index = InMemoryExactNNIndex[YouTubeVideoDoc]() index_docs = [ YouTubeVideoDoc( title=f'video {i+1}', @@ -134,7 +134,7 @@ doc_index.index(index_docs) ## Search Documents -To search Documents, the `InMemoryDocIndex` uses DocArray's [`find`][docarray.utils.find.find] function. +To search Documents, the `InMemoryExactNNIndex` uses DocArray's [`find`][docarray.utils.find.find] function. You can use the `search_field` to specify which field to use when performing the vector search. You can use the dunder operator to specify the field defined in nested data. @@ -157,7 +157,7 @@ docs, scores = doc_index.find(query, search_field='video__tensor', limit=3) ## Filter Documents -To filter Documents, the `InMemoryDocIndex` uses DocArray's [`filter_docs()`][docarray.utils.filter.filter_docs] function. +To filter Documents, the `InMemoryExactNNIndex` uses DocArray's [`filter_docs()`][docarray.utils.filter.filter_docs] function. You can filter your documents by using the `filter()` or `filter_batched()` method with a corresponding filter query. The query should follow the query language of the DocArray's [`filter_docs()`][docarray.utils.filter.filter_docs] function. @@ -174,7 +174,7 @@ class Book(BaseDoc): books = DocList[Book]([Book(title=f'title {i}', price=i * 10) for i in range(10)]) -book_index = InMemoryDocIndex[Book](books) +book_index = InMemoryExactNNIndex[Book](books) # filter for books that are cheaper than 29 dollars query = {'price': {'$lte': 29}} diff --git a/tests/index/in_memory/test_in_memory.py b/tests/index/in_memory/test_in_memory.py index e999ac15f30..bb1d1b47aed 100644 --- a/tests/index/in_memory/test_in_memory.py +++ b/tests/index/in_memory/test_in_memory.py @@ -3,7 +3,7 @@ from pydantic import Field from docarray import BaseDoc, DocList -from docarray.index.backends.in_memory import InMemoryDocIndex +from docarray.index.backends.in_memory import InMemoryExactNNIndex from docarray.typing import NdArray @@ -26,7 +26,7 @@ def docs(): def test_indexing(docs): - doc_index = InMemoryDocIndex[SchemaDoc]() + doc_index = InMemoryExactNNIndex[SchemaDoc]() assert doc_index.num_docs() == 0 doc_index.index(docs) @@ -35,7 +35,7 @@ def test_indexing(docs): @pytest.fixture def doc_index(docs): - doc_index = InMemoryDocIndex[SchemaDoc]() + doc_index = InMemoryExactNNIndex[SchemaDoc]() doc_index.index(docs) return doc_index