Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docarray/index/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import types
from typing import TYPE_CHECKING

from docarray.index.backends.in_memory import InMemoryDocIndex
from docarray.index.backends.in_memory import InMemoryExactNNIndex
from docarray.utils._internal.misc import (
_get_path_from_docarray_root_level,
import_library,
Expand All @@ -14,7 +14,7 @@
from docarray.index.backends.qdrant import QdrantDocumentIndex # noqa: F401
from docarray.index.backends.weaviate import WeaviateDocumentIndex # noqa: F401

__all__ = ['InMemoryDocIndex']
__all__ = ['InMemoryExactNNIndex']


def __getattr__(name: str):
Expand Down
13 changes: 7 additions & 6 deletions docarray/index/backends/in_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@
TSchema = TypeVar('TSchema', bound=BaseDoc)


class InMemoryDocIndex(BaseDocIndex, Generic[TSchema]):
class InMemoryExactNNIndex(BaseDocIndex, Generic[TSchema]):
def __init__(self, docs: Optional[DocList] = None, **kwargs):
"""Initialize InMemoryDocIndex"""
"""Initialize InMemoryExactNNIndex"""
super().__init__(db_config=None, **kwargs)
self._runtime_config = self.RuntimeConfig()
self._docs = (
Expand Down Expand Up @@ -78,13 +78,13 @@ def build(self, *args, **kwargs) -> Any:

@dataclass
class DBConfig(BaseDocIndex.DBConfig):
"""Dataclass that contains all "static" configurations of InMemoryDocIndex."""
"""Dataclass that contains all "static" configurations of InMemoryExactNNIndex."""

pass

@dataclass
class RuntimeConfig(BaseDocIndex.RuntimeConfig):
"""Dataclass that contains all "dynamic" configurations of InMemoryDocIndex."""
"""Dataclass that contains all "dynamic" configurations of InMemoryExactNNIndex."""

default_column_config: Dict[Type, Dict[str, Any]] = field(
default_factory=lambda: defaultdict(
Expand Down Expand Up @@ -139,7 +139,8 @@ def _get_items(
If no document is found, a KeyError is raised.

:param doc_ids: ids to get from the Document index
:return: Sequence of Documents, sorted corresponding to the order of `doc_ids`. Duplicate `doc_ids` can be omitted in the output.
:return: Sequence of Documents, sorted corresponding to the order of `doc_ids`.
Duplicate `doc_ids` can be omitted in the output.
"""
indices = []
for i, doc in enumerate(self._docs):
Expand All @@ -149,7 +150,7 @@ def _get_items(

def execute_query(self, query: List[Tuple[str, Dict]], *args, **kwargs) -> Any:
"""
Execute a query on the InMemoryDocIndex.
Execute a query on the InMemoryExactNNIndex.

Can take two kinds of inputs:

Expand Down
4 changes: 2 additions & 2 deletions docs/API_reference/doc_index/backends/in_memory.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# InMemoryDocIndex
#InMemoryExactNNIndex

::: docarray.index.backends.in_memory.InMemoryDocIndex
::: docarray.index.backends.in_memory.InMemoryExactNNIndex
26 changes: 13 additions & 13 deletions docs/user_guide/storing/index_in_memory.md
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
# In-Memory Document Index


[InMemoryDocIndex][docarray.index.backends.in_memory.InMemoryDocIndex] stores all Documents in DocLists in memory.
[InMemoryExactNNIndex][docarray.index.backends.in_memory.InMemoryExactNNIndex] stores all Documents in DocLists in memory.
It is a great starting point for small datasets, where you may not want to launch a database server.

For vector search and filtering the InMemoryDocIndex utilizes DocArray's [`find()`][docarray.utils.find.find] and
For vector search and filtering the InMemoryExactNNIndex utilizes DocArray's [`find()`][docarray.utils.find.find] and
[`filter_docs()`][docarray.utils.filter.filter_docs] functions.

## Basic usage

To see how to create a [InMemoryDocIndex][docarray.index.backends.in_memory.InMemoryDocIndex] instance, add Documents,
To see how to create a [InMemoryExactNNIndex][docarray.index.backends.in_memory.InMemoryExactNNIndex] instance, add Documents,
perform search, etc. see the [general user guide](./docindex.md).

You can initialize the index as follows:

```python
from docarray import BaseDoc, DocList
from docarray.index.backends.in_memory import InMemoryDocIndex
from docarray.index.backends.in_memory import InMemoryExactNNIndex
from docarray.typing import NdArray


Expand All @@ -26,20 +26,20 @@ class MyDoc(BaseDoc):

docs = DocList[MyDoc](MyDoc() for _ in range(10))

doc_index = InMemoryDocIndex[MyDoc]()
doc_index = InMemoryExactNNIndex[MyDoc]()
doc_index.index(docs)

# or in one step:
doc_index = InMemoryDocIndex[MyDoc](docs)
doc_index = InMemoryExactNNIndex[MyDoc](docs)
```

## Configuration

This section lays out the configurations and options that are specific to [InMemoryDocIndex][docarray.index.backends.in_memory.InMemoryDocIndex].
This section lays out the configurations and options that are specific to [InMemoryExactNNIndex][docarray.index.backends.in_memory.InMemoryExactNNIndex].

### RuntimeConfig

The `RuntimeConfig` of [InMemoryDocIndex][docarray.index.backends.in_memory.InMemoryDocIndex] contains only one entry:
The `RuntimeConfig` of [InMemoryExactNNIndex][docarray.index.backends.in_memory.InMemoryExactNNIndex] contains only one entry:
the default mapping from Python types to column configurations.

You can see in the [section below](#field-wise-configurations) how to override configurations for specific fields.
Expand Down Expand Up @@ -95,7 +95,7 @@ When using the index, you can define multiple fields and their nested structure.
```python
import numpy as np
from docarray import BaseDoc
from docarray.index.backends.in_memory import InMemoryDocIndex
from docarray.index.backends.in_memory import InMemoryExactNNIndex
from docarray.typing import ImageUrl, VideoUrl, AnyTensor
from pydantic import Field

Expand All @@ -118,7 +118,7 @@ class YouTubeVideoDoc(BaseDoc):
tensor: AnyTensor = Field(space='cosine_sim')


doc_index = InMemoryDocIndex[YouTubeVideoDoc]()
doc_index = InMemoryExactNNIndex[YouTubeVideoDoc]()
index_docs = [
YouTubeVideoDoc(
title=f'video {i+1}',
Expand All @@ -134,7 +134,7 @@ doc_index.index(index_docs)

## Search Documents

To search Documents, the `InMemoryDocIndex` uses DocArray's [`find`][docarray.utils.find.find] function.
To search Documents, the `InMemoryExactNNIndex` uses DocArray's [`find`][docarray.utils.find.find] function.

You can use the `search_field` to specify which field to use when performing the vector search.
You can use the dunder operator to specify the field defined in nested data.
Expand All @@ -157,7 +157,7 @@ docs, scores = doc_index.find(query, search_field='video__tensor', limit=3)

## Filter Documents

To filter Documents, the `InMemoryDocIndex` uses DocArray's [`filter_docs()`][docarray.utils.filter.filter_docs] function.
To filter Documents, the `InMemoryExactNNIndex` uses DocArray's [`filter_docs()`][docarray.utils.filter.filter_docs] function.

You can filter your documents by using the `filter()` or `filter_batched()` method with a corresponding filter query.
The query should follow the query language of the DocArray's [`filter_docs()`][docarray.utils.filter.filter_docs] function.
Expand All @@ -174,7 +174,7 @@ class Book(BaseDoc):


books = DocList[Book]([Book(title=f'title {i}', price=i * 10) for i in range(10)])
book_index = InMemoryDocIndex[Book](books)
book_index = InMemoryExactNNIndex[Book](books)

# filter for books that are cheaper than 29 dollars
query = {'price': {'$lte': 29}}
Expand Down
6 changes: 3 additions & 3 deletions tests/index/in_memory/test_in_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pydantic import Field

from docarray import BaseDoc, DocList
from docarray.index.backends.in_memory import InMemoryDocIndex
from docarray.index.backends.in_memory import InMemoryExactNNIndex
from docarray.typing import NdArray


Expand All @@ -26,7 +26,7 @@ def docs():


def test_indexing(docs):
doc_index = InMemoryDocIndex[SchemaDoc]()
doc_index = InMemoryExactNNIndex[SchemaDoc]()
assert doc_index.num_docs() == 0

doc_index.index(docs)
Expand All @@ -35,7 +35,7 @@ def test_indexing(docs):

@pytest.fixture
def doc_index(docs):
doc_index = InMemoryDocIndex[SchemaDoc]()
doc_index = InMemoryExactNNIndex[SchemaDoc]()
doc_index.index(docs)
return doc_index

Expand Down