Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -242,10 +242,10 @@ dl.insert(
And you can seamlessly switch between `DocVec` and `DocList`:

```python
vec_2 = dl.stack()
vec_2 = dl.to_doc_vec()
assert isinstance(vec_2, DocVec)

dl_2 = vec_2.unstack()
dl_2 = vec_2.to_doc_list()
assert isinstance(dl_2, DocList)
```

Expand Down
4 changes: 2 additions & 2 deletions docarray/array/doc_list/doc_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,13 +251,13 @@ def _set_data_column(
for doc, value in zip(self, values):
setattr(doc, field, value)

def stack(
def to_doc_vec(
self,
tensor_type: Type['AbstractTensor'] = NdArray,
) -> 'DocVec':
"""
Convert the `DocList` into a `DocVec`. `Self` cannot be used
afterwards
afterward
:param tensor_type: Tensor Class used to wrap the doc_vec tensors. This is useful
if the BaseDoc has some undefined tensor type like AnyTensor or Union of NdArray and TorchTensor
:return: A `DocVec` of the same document type as self
Expand Down
14 changes: 7 additions & 7 deletions docarray/array/doc_vec/doc_vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def __init__(
cast(AbstractTensor, tensor_columns[field_name])[i] = val

elif issubclass(field_type, BaseDoc):
doc_columns[field_name] = getattr(docs, field_name).stack(
doc_columns[field_name] = getattr(docs, field_name).to_doc_vec(
tensor_type=self.tensor_type
)

Expand All @@ -169,7 +169,7 @@ def __init__(
for doc in docs:
docs_nested = getattr(doc, field_name)
if isinstance(docs_nested, DocList):
docs_nested = docs_nested.stack(
docs_nested = docs_nested.to_doc_vec(
tensor_type=self.tensor_type
)
docs_list.append(docs_nested)
Expand Down Expand Up @@ -213,7 +213,7 @@ def validate(
if isinstance(value, cls):
return value
elif isinstance(value, DocList.__class_getitem__(cls.doc_type)):
return cast(T, value.stack())
return cast(T, value.to_doc_vec())
elif isinstance(value, Sequence):
return cls(value)
elif isinstance(value, Iterable):
Expand Down Expand Up @@ -328,7 +328,7 @@ def _set_data_and_columns(
f'this DocVec schema : {self.doc_type}'
)
processed_value = cast(
T, value.stack(tensor_type=self.tensor_type)
T, value.to_doc_vec(tensor_type=self.tensor_type)
) # we need to copy data here

elif isinstance(value, DocVec):
Expand Down Expand Up @@ -474,7 +474,7 @@ def to_protobuf(self) -> 'DocVecProto':
any_columns=any_columns_proto,
)

def unstack(self: T) -> DocList[T_doc]:
def to_doc_list(self: T) -> DocList[T_doc]:
"""Convert DocVec into a DocList.

Note this destroys the arguments and returns a new DocList
Expand All @@ -486,10 +486,10 @@ def unstack(self: T) -> DocList[T_doc]:
unstacked_any_column = self._storage.any_columns

for field, doc_col in self._storage.doc_columns.items():
unstacked_doc_column[field] = doc_col.unstack()
unstacked_doc_column[field] = doc_col.to_doc_list()

for field, da_col in self._storage.docs_vec_columns.items():
unstacked_da_column[field] = [docs.unstack() for docs in da_col]
unstacked_da_column[field] = [docs.to_doc_list() for docs in da_col]

for field, tensor_col in list(self._storage.tensor_columns.items()):
# list is needed here otherwise we cannot delete the column
Expand Down
4 changes: 2 additions & 2 deletions docarray/index/backends/hnswlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@
if tf is not None:
from docarray.typing import TensorFlowTensor

HNSWLIB_PY_VEC_TYPES = [list, tuple, np.ndarray, AbstractTensor]
HNSWLIB_PY_VEC_TYPES: List[Any] = [list, tuple, np.ndarray, AbstractTensor]

if torch is not None:
HNSWLIB_PY_VEC_TYPES.append(torch.Tensor)
HNSWLIB_PY_VEC_TYPES.append(torch.Tensor) # type: ignore

if tf is not None:
HNSWLIB_PY_VEC_TYPES.append(tf.Tensor)
Expand Down
2 changes: 1 addition & 1 deletion docs/how_to/multimodal_training_and_serving.md
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ async def embed_text(doc: Text) -> Text:
with torch.autocast(device_type="cuda", dtype=torch.float16):
with torch.inference_mode():
text_preprocess(doc)
da = DocList[Text]([doc], tensor_type=TorchTensor).stack()
da = DocList[Text]([doc], tensor_type=TorchTensor).to_doc_vec()
da.to(DEVICE)
doc.embedding = text_encoder(da)[0].to('cpu')
return doc
Expand Down
2 changes: 1 addition & 1 deletion tests/integrations/array/test_torch_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class Mmdoc(BaseDoc):
batch = DocList[Mmdoc](Mmdoc(text=f'hello{i}') for i in range(N))
batch.tensor = torch.zeros(N, 3, 224, 224)

batch = batch.stack()
batch = batch.to_doc_vec()

class Model(torch.nn.Module):
def __init__(self):
Expand Down
36 changes: 18 additions & 18 deletions tests/units/array/stack/test_array_stacked.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class MMdoc(BaseDoc):
]
)

return batch.stack()
return batch.to_doc_vec()


def test_create_from_list_docs():
Expand Down Expand Up @@ -79,7 +79,7 @@ class ImageDoc(BaseDoc):
[ImageDoc(tensor=torch.zeros(3, 224, 224)) for _ in range(10)]
)

batch = batch.stack()
batch = batch.to_doc_vec()
batch.tensor = torch.ones(10, 3, 224, 224)

assert (batch.tensor == torch.ones(10, 3, 224, 224)).all()
Expand All @@ -96,7 +96,7 @@ class ImageDoc(BaseDoc):
[ImageDoc(tensor=np.zeros((3, 224, 224))) for _ in range(10)]
)

batch = batch.stack()
batch = batch.to_doc_vec()
batch.tensor = np.ones((10, 3, 224, 224))

assert (batch.tensor == np.ones((10, 3, 224, 224))).all()
Expand All @@ -120,7 +120,7 @@ class ImageDoc(BaseDoc):
[ImageDoc(tensor=np.zeros((3, 224, 224))) for _ in range(10)]
)

batch = batch.stack()
batch = batch.to_doc_vec()

assert (
batch._storage.tensor_columns['tensor'] == np.zeros((10, 3, 224, 224))
Expand Down Expand Up @@ -156,7 +156,7 @@ class MMdoc(BaseDoc):
[MMdoc(img=ImageDoc(tensor=torch.zeros(3, 224, 224))) for _ in range(10)]
)

batch = batch.stack()
batch = batch.to_doc_vec()

assert (
batch._storage.doc_columns['img']._storage.tensor_columns['tensor']
Expand Down Expand Up @@ -192,8 +192,8 @@ class ImageDoc(BaseDoc):
[ImageDoc(tensor=torch.zeros(3, 224, 224)) for _ in range(10)]
)

batch = batch.stack()
da = batch.unstack()
batch = batch.to_doc_vec()
da = batch.to_doc_list()

for doc in da:
assert (doc.tensor == torch.zeros(3, 224, 224)).all()
Expand All @@ -210,16 +210,16 @@ class MMdoc(BaseDoc):
[MMdoc(img=ImageDoc(tensor=torch.zeros(3, 224, 224))) for _ in range(10)]
)

batch = batch.stack()
batch = batch.to_doc_vec()

da = batch.unstack()
da = batch.to_doc_list()

for doc in da:
assert (doc.img.tensor == torch.zeros(3, 224, 224)).all()


def test_unstack_nested_DocArray(nested_batch):
batch = nested_batch.unstack()
batch = nested_batch.to_doc_list()
for i in range(len(batch)):
assert isinstance(batch[i].img, DocList)
for doc in batch[i].img:
Expand All @@ -234,7 +234,7 @@ class ImageDoc(BaseDoc):
[ImageDoc(tensor=torch.zeros(3, 224, 224)) for _ in range(10)]
)

da = da.stack()
da = da.to_doc_vec()

assert len(da) == 10

Expand All @@ -252,7 +252,7 @@ class ImageDoc(BaseDoc):

# union fields aren't actually doc_vec
# just checking that there is no error
batch.stack()
batch.to_doc_vec()


@pytest.mark.parametrize(
Expand Down Expand Up @@ -402,7 +402,7 @@ class MyDoc(BaseDoc):
)
assert da[0].tensor.dtype == torch.int32

da = da.stack()
da = da.to_doc_vec()
assert da[0].tensor.dtype == torch.int32
assert da.tensor.dtype == torch.int32

Expand All @@ -416,7 +416,7 @@ class MyDoc(BaseDoc):
)
assert da[0].tensor.dtype == np.int32

da = da.stack()
da = da.to_doc_vec()
assert da[0].tensor.dtype == np.int32
assert da.tensor.dtype == np.int32

Expand All @@ -436,7 +436,7 @@ class MyDoc(BaseDoc):
assert all(doc.scalar.ndim == 0 for doc in da)
assert all(doc.scalar == 2.0 for doc in da)

stacked_da = da.stack()
stacked_da = da.to_doc_vec()
assert type(stacked_da.scalar) == NdArray

assert all(type(doc.scalar) == NdArray for doc in stacked_da)
Expand All @@ -457,7 +457,7 @@ class MyDoc(BaseDoc):
)
assert all(doc.scalar.ndim == 0 for doc in da)
assert all(doc.scalar == 2.0 for doc in da)
stacked_da = da.stack(tensor_type=TorchTensor)
stacked_da = da.to_doc_vec(tensor_type=TorchTensor)
assert type(stacked_da.scalar) == TorchTensor

assert all(type(doc.scalar) == TorchTensor for doc in stacked_da)
Expand All @@ -475,7 +475,7 @@ class MyDoc(BaseDoc):
da = DocList[MyDoc]([MyDoc() for _ in range(3)])
assert all(doc.scalar is None for doc in da)
assert all(doc.scalar == doc.scalar for doc in da)
stacked_da = da.stack()
stacked_da = da.to_doc_vec()
assert type(stacked_da.scalar) == NdArray

assert all(type(doc.scalar) == NdArray for doc in stacked_da) # TODO fail here
Expand All @@ -494,7 +494,7 @@ class MyDoc(BaseDoc):
da = DocList[MyDoc]([MyDoc() for _ in range(3)])
assert all(doc.scalar is None for doc in da)
assert all(doc.scalar == doc.scalar for doc in da)
stacked_da = da.stack(tensor_type=TorchTensor)
stacked_da = da.to_doc_vec(tensor_type=TorchTensor)
assert type(stacked_da.scalar) == TorchTensor

assert all(type(doc.scalar) == TorchTensor for doc in stacked_da)
Expand Down
14 changes: 7 additions & 7 deletions tests/units/array/stack/test_array_stacked_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class Image(BaseDoc):

batch = DocList[Image]([Image(tensor=tf.zeros((3, 224, 224))) for _ in range(10)])

return batch.stack()
return batch.to_doc_vec()


@pytest.fixture()
Expand Down Expand Up @@ -108,7 +108,7 @@ class MMdoc(BaseDoc):

batch = DocList[MMdoc](
[MMdoc(img=Image(tensor=tf.zeros((3, 224, 224)))) for _ in range(10)]
).stack()
).to_doc_vec()

assert tnp.allclose(
batch._storage.doc_columns['img']._storage.tensor_columns['tensor'].tensor,
Expand All @@ -133,7 +133,7 @@ def test_stack_nested_DocArray(nested_batch):

@pytest.mark.tensorflow
def test_convert_to_da(batch):
da = batch.unstack()
da = batch.to_doc_list()

for doc in da:
assert tnp.allclose(doc.tensor.tensor, tf.zeros((3, 224, 224)))
Expand All @@ -151,15 +151,15 @@ class MMdoc(BaseDoc):
[MMdoc(img=Image(tensor=tf.zeros((3, 224, 224)))) for _ in range(10)]
)
assert isinstance(batch.img._storage.tensor_columns['tensor'], TensorFlowTensor)
da = batch.unstack()
da = batch.to_doc_list()

for doc in da:
assert tnp.allclose(doc.img.tensor.tensor, tf.zeros((3, 224, 224)))


@pytest.mark.tensorflow
def test_unstack_nested_DocArray(nested_batch):
batch = nested_batch.unstack()
batch = nested_batch.to_doc_list()
for i in range(len(batch)):
assert isinstance(batch[i].img, DocList)
for doc in batch[i].img:
Expand All @@ -173,7 +173,7 @@ class Image(BaseDoc):

da = DocList[Image]([Image(tensor=tf.zeros((3, 224, 224))) for _ in range(10)])

da = da.stack()
da = da.to_doc_vec()

assert len(da) == 10

Expand Down Expand Up @@ -285,6 +285,6 @@ class MyDoc(BaseDoc):
)
assert da[0].tensor.tensor.dtype == tf.int32

da = da.stack()
da = da.to_doc_vec()
assert da[0].tensor.tensor.dtype == tf.int32
assert da.tensor.tensor.dtype == tf.int32
6 changes: 3 additions & 3 deletions tests/units/array/stack/test_proto.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class Image(BaseDoc):

batch = DocList[Image]([Image(tensor=torch.zeros(3, 224, 224)) for _ in range(10)])

return batch.stack()
return batch.to_doc_vec()


@pytest.mark.proto
Expand All @@ -29,7 +29,7 @@ class MyDoc(BaseDoc):

da = DocList[MyDoc]([MyDoc(tensor=np.zeros((3, 224, 224))) for _ in range(10)])

da = da.stack()
da = da.to_doc_vec()

da.from_protobuf(da.to_protobuf())

Expand All @@ -41,7 +41,7 @@ class CustomDocument(BaseDoc):

da = DocList[CustomDocument](
[CustomDocument(image=np.zeros((3, 224, 224))) for _ in range(10)]
).stack()
).to_doc_vec()

da2 = DocVec.from_protobuf(da.to_protobuf())

Expand Down
2 changes: 1 addition & 1 deletion tests/units/array/test_batching.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class MyDoc(BaseDoc):
]
)
if stack:
da = da.stack()
da = da.to_doc_vec()

batches = list(da._batch(batch_size=batch_size, shuffle=shuffle))
assert len(batches) == n_batches
Expand Down
Loading