docarray · samsja · Apr 14, 2023 · Apr 14, 2023 · Apr 14, 2023 · Apr 14, 2023
diff --git a/README.md b/README.md
@@ -242,10 +242,10 @@ dl.insert(
 And you can seamlessly switch between `DocVec` and `DocList`:
 
 ```python
-vec_2 = dl.stack()
+vec_2 = dl.to_doc_vec()
 assert isinstance(vec_2, DocVec)
 
-dl_2 = vec_2.unstack()
+dl_2 = vec_2.to_doc_list()
 assert isinstance(dl_2, DocList)
 ```
 

diff --git a/docarray/array/doc_list/doc_list.py b/docarray/array/doc_list/doc_list.py
@@ -251,13 +251,13 @@ def _set_data_column(
         for doc, value in zip(self, values):
             setattr(doc, field, value)
 
-    def stack(
+    def to_doc_vec(
         self,
         tensor_type: Type['AbstractTensor'] = NdArray,
     ) -> 'DocVec':
         """
         Convert the `DocList` into a `DocVec`. `Self` cannot be used
- afterwards
+ afterward
         :param tensor_type: Tensor Class used to wrap the doc_vec tensors. This is useful
         if the BaseDoc has some undefined tensor type like AnyTensor or Union of NdArray and TorchTensor
         :return: A `DocVec` of the same document type as self

diff --git a/docarray/array/doc_vec/doc_vec.py b/docarray/array/doc_vec/doc_vec.py
@@ -160,7 +160,7 @@ def __init__(
                         cast(AbstractTensor, tensor_columns[field_name])[i] = val
 
                 elif issubclass(field_type, BaseDoc):
-                    doc_columns[field_name] = getattr(docs, field_name).stack(
+                    doc_columns[field_name] = getattr(docs, field_name).to_doc_vec(
                         tensor_type=self.tensor_type
                     )
 
@@ -169,7 +169,7 @@ def __init__(
                     for doc in docs:
                         docs_nested = getattr(doc, field_name)
                         if isinstance(docs_nested, DocList):
-                            docs_nested = docs_nested.stack(
+                            docs_nested = docs_nested.to_doc_vec(
                                 tensor_type=self.tensor_type
                             )
                         docs_list.append(docs_nested)
@@ -213,7 +213,7 @@ def validate(
         if isinstance(value, cls):
             return value
         elif isinstance(value, DocList.__class_getitem__(cls.doc_type)):
-            return cast(T, value.stack())
+            return cast(T, value.to_doc_vec())
         elif isinstance(value, Sequence):
             return cls(value)
         elif isinstance(value, Iterable):
@@ -328,7 +328,7 @@ def _set_data_and_columns(
                     f'this DocVec schema : {self.doc_type}'
                 )
             processed_value = cast(
-                T, value.stack(tensor_type=self.tensor_type)
+                T, value.to_doc_vec(tensor_type=self.tensor_type)
             )  # we need to copy data here
 
         elif isinstance(value, DocVec):
@@ -474,7 +474,7 @@ def to_protobuf(self) -> 'DocVecProto':
             any_columns=any_columns_proto,
         )
 
-    def unstack(self: T) -> DocList[T_doc]:
+    def to_doc_list(self: T) -> DocList[T_doc]:
         """Convert DocVec into a DocList.
 
         Note this destroys the arguments and returns a new DocList
@@ -486,10 +486,10 @@ def unstack(self: T) -> DocList[T_doc]:
         unstacked_any_column = self._storage.any_columns
 
         for field, doc_col in self._storage.doc_columns.items():
-            unstacked_doc_column[field] = doc_col.unstack()
+            unstacked_doc_column[field] = doc_col.to_doc_list()
 
         for field, da_col in self._storage.docs_vec_columns.items():
-            unstacked_da_column[field] = [docs.unstack() for docs in da_col]
+            unstacked_da_column[field] = [docs.to_doc_list() for docs in da_col]
 
         for field, tensor_col in list(self._storage.tensor_columns.items()):
             # list is needed here otherwise we cannot delete the column

diff --git a/docarray/index/backends/hnswlib.py b/docarray/index/backends/hnswlib.py
@@ -48,10 +48,10 @@
     if tf is not None:
         from docarray.typing import TensorFlowTensor
 
-HNSWLIB_PY_VEC_TYPES = [list, tuple, np.ndarray, AbstractTensor]
+HNSWLIB_PY_VEC_TYPES: List[Any] = [list, tuple, np.ndarray, AbstractTensor]
 
 if torch is not None:
-    HNSWLIB_PY_VEC_TYPES.append(torch.Tensor)
+    HNSWLIB_PY_VEC_TYPES.append(torch.Tensor)  # type: ignore
 
 if tf is not None:
     HNSWLIB_PY_VEC_TYPES.append(tf.Tensor)

diff --git a/docs/how_to/multimodal_training_and_serving.md b/docs/how_to/multimodal_training_and_serving.md
@@ -366,7 +366,7 @@ async def embed_text(doc: Text) -> Text:
     with torch.autocast(device_type="cuda", dtype=torch.float16):
         with torch.inference_mode():
             text_preprocess(doc)
-            da = DocList[Text]([doc], tensor_type=TorchTensor).stack()
+            da = DocList[Text]([doc], tensor_type=TorchTensor).to_doc_vec()
             da.to(DEVICE)
             doc.embedding = text_encoder(da)[0].to('cpu')
     return doc

diff --git a/tests/integrations/array/test_torch_train.py b/tests/integrations/array/test_torch_train.py
@@ -16,7 +16,7 @@ class Mmdoc(BaseDoc):
     batch = DocList[Mmdoc](Mmdoc(text=f'hello{i}') for i in range(N))
     batch.tensor = torch.zeros(N, 3, 224, 224)
 
-    batch = batch.stack()
+    batch = batch.to_doc_vec()
 
     class Model(torch.nn.Module):
         def __init__(self):

diff --git a/tests/units/array/stack/test_array_stacked.py b/tests/units/array/stack/test_array_stacked.py
@@ -42,7 +42,7 @@ class MMdoc(BaseDoc):
         ]
     )
 
-    return batch.stack()
+    return batch.to_doc_vec()
 
 
 def test_create_from_list_docs():
@@ -79,7 +79,7 @@ class ImageDoc(BaseDoc):
         [ImageDoc(tensor=torch.zeros(3, 224, 224)) for _ in range(10)]
     )
 
-    batch = batch.stack()
+    batch = batch.to_doc_vec()
     batch.tensor = torch.ones(10, 3, 224, 224)
 
     assert (batch.tensor == torch.ones(10, 3, 224, 224)).all()
@@ -96,7 +96,7 @@ class ImageDoc(BaseDoc):
         [ImageDoc(tensor=np.zeros((3, 224, 224))) for _ in range(10)]
     )
 
-    batch = batch.stack()
+    batch = batch.to_doc_vec()
     batch.tensor = np.ones((10, 3, 224, 224))
 
     assert (batch.tensor == np.ones((10, 3, 224, 224))).all()
@@ -120,7 +120,7 @@ class ImageDoc(BaseDoc):
         [ImageDoc(tensor=np.zeros((3, 224, 224))) for _ in range(10)]
     )
 
-    batch = batch.stack()
+    batch = batch.to_doc_vec()
 
     assert (
         batch._storage.tensor_columns['tensor'] == np.zeros((10, 3, 224, 224))
@@ -156,7 +156,7 @@ class MMdoc(BaseDoc):
         [MMdoc(img=ImageDoc(tensor=torch.zeros(3, 224, 224))) for _ in range(10)]
     )
 
-    batch = batch.stack()
+    batch = batch.to_doc_vec()
 
     assert (
         batch._storage.doc_columns['img']._storage.tensor_columns['tensor']
@@ -192,8 +192,8 @@ class ImageDoc(BaseDoc):
         [ImageDoc(tensor=torch.zeros(3, 224, 224)) for _ in range(10)]
     )
 
-    batch = batch.stack()
-    da = batch.unstack()
+    batch = batch.to_doc_vec()
+    da = batch.to_doc_list()
 
     for doc in da:
         assert (doc.tensor == torch.zeros(3, 224, 224)).all()
@@ -210,16 +210,16 @@ class MMdoc(BaseDoc):
         [MMdoc(img=ImageDoc(tensor=torch.zeros(3, 224, 224))) for _ in range(10)]
     )
 
-    batch = batch.stack()
+    batch = batch.to_doc_vec()
 
-    da = batch.unstack()
+    da = batch.to_doc_list()
 
     for doc in da:
         assert (doc.img.tensor == torch.zeros(3, 224, 224)).all()
 
 
 def test_unstack_nested_DocArray(nested_batch):
-    batch = nested_batch.unstack()
+    batch = nested_batch.to_doc_list()
     for i in range(len(batch)):
         assert isinstance(batch[i].img, DocList)
         for doc in batch[i].img:
@@ -234,7 +234,7 @@ class ImageDoc(BaseDoc):
         [ImageDoc(tensor=torch.zeros(3, 224, 224)) for _ in range(10)]
     )
 
-    da = da.stack()
+    da = da.to_doc_vec()
 
     assert len(da) == 10
 
@@ -252,7 +252,7 @@ class ImageDoc(BaseDoc):
 
     # union fields aren't actually doc_vec
     # just checking that there is no error
-    batch.stack()
+    batch.to_doc_vec()
 
 
 @pytest.mark.parametrize(
@@ -402,7 +402,7 @@ class MyDoc(BaseDoc):
     )
     assert da[0].tensor.dtype == torch.int32
 
-    da = da.stack()
+    da = da.to_doc_vec()
     assert da[0].tensor.dtype == torch.int32
     assert da.tensor.dtype == torch.int32
 
@@ -416,7 +416,7 @@ class MyDoc(BaseDoc):
     )
     assert da[0].tensor.dtype == np.int32
 
-    da = da.stack()
+    da = da.to_doc_vec()
     assert da[0].tensor.dtype == np.int32
     assert da.tensor.dtype == np.int32
 
@@ -436,7 +436,7 @@ class MyDoc(BaseDoc):
     assert all(doc.scalar.ndim == 0 for doc in da)
     assert all(doc.scalar == 2.0 for doc in da)
 
-    stacked_da = da.stack()
+    stacked_da = da.to_doc_vec()
     assert type(stacked_da.scalar) == NdArray
 
     assert all(type(doc.scalar) == NdArray for doc in stacked_da)
@@ -457,7 +457,7 @@ class MyDoc(BaseDoc):
     )
     assert all(doc.scalar.ndim == 0 for doc in da)
     assert all(doc.scalar == 2.0 for doc in da)
-    stacked_da = da.stack(tensor_type=TorchTensor)
+    stacked_da = da.to_doc_vec(tensor_type=TorchTensor)
     assert type(stacked_da.scalar) == TorchTensor
 
     assert all(type(doc.scalar) == TorchTensor for doc in stacked_da)
@@ -475,7 +475,7 @@ class MyDoc(BaseDoc):
     da = DocList[MyDoc]([MyDoc() for _ in range(3)])
     assert all(doc.scalar is None for doc in da)
     assert all(doc.scalar == doc.scalar for doc in da)
-    stacked_da = da.stack()
+    stacked_da = da.to_doc_vec()
     assert type(stacked_da.scalar) == NdArray
 
     assert all(type(doc.scalar) == NdArray for doc in stacked_da)  # TODO fail here
@@ -494,7 +494,7 @@ class MyDoc(BaseDoc):
     da = DocList[MyDoc]([MyDoc() for _ in range(3)])
     assert all(doc.scalar is None for doc in da)
     assert all(doc.scalar == doc.scalar for doc in da)
-    stacked_da = da.stack(tensor_type=TorchTensor)
+    stacked_da = da.to_doc_vec(tensor_type=TorchTensor)
     assert type(stacked_da.scalar) == TorchTensor
 
     assert all(type(doc.scalar) == TorchTensor for doc in stacked_da)

diff --git a/tests/units/array/stack/test_array_stacked_tf.py b/tests/units/array/stack/test_array_stacked_tf.py
@@ -24,7 +24,7 @@ class Image(BaseDoc):
 
     batch = DocList[Image]([Image(tensor=tf.zeros((3, 224, 224))) for _ in range(10)])
 
-    return batch.stack()
+    return batch.to_doc_vec()
 
 
 @pytest.fixture()
@@ -108,7 +108,7 @@ class MMdoc(BaseDoc):
 
     batch = DocList[MMdoc](
         [MMdoc(img=Image(tensor=tf.zeros((3, 224, 224)))) for _ in range(10)]
-    ).stack()
+    ).to_doc_vec()
 
     assert tnp.allclose(
         batch._storage.doc_columns['img']._storage.tensor_columns['tensor'].tensor,
@@ -133,7 +133,7 @@ def test_stack_nested_DocArray(nested_batch):
 
 @pytest.mark.tensorflow
 def test_convert_to_da(batch):
-    da = batch.unstack()
+    da = batch.to_doc_list()
 
     for doc in da:
         assert tnp.allclose(doc.tensor.tensor, tf.zeros((3, 224, 224)))
@@ -151,15 +151,15 @@ class MMdoc(BaseDoc):
         [MMdoc(img=Image(tensor=tf.zeros((3, 224, 224)))) for _ in range(10)]
     )
     assert isinstance(batch.img._storage.tensor_columns['tensor'], TensorFlowTensor)
-    da = batch.unstack()
+    da = batch.to_doc_list()
 
     for doc in da:
         assert tnp.allclose(doc.img.tensor.tensor, tf.zeros((3, 224, 224)))
 
 
 @pytest.mark.tensorflow
 def test_unstack_nested_DocArray(nested_batch):
-    batch = nested_batch.unstack()
+    batch = nested_batch.to_doc_list()
     for i in range(len(batch)):
         assert isinstance(batch[i].img, DocList)
         for doc in batch[i].img:
@@ -173,7 +173,7 @@ class Image(BaseDoc):
 
     da = DocList[Image]([Image(tensor=tf.zeros((3, 224, 224))) for _ in range(10)])
 
-    da = da.stack()
+    da = da.to_doc_vec()
 
     assert len(da) == 10
 
@@ -285,6 +285,6 @@ class MyDoc(BaseDoc):
     )
     assert da[0].tensor.tensor.dtype == tf.int32
 
-    da = da.stack()
+    da = da.to_doc_vec()
     assert da[0].tensor.tensor.dtype == tf.int32
     assert da.tensor.tensor.dtype == tf.int32
diff --git a/tests/units/array/stack/test_proto.py b/tests/units/array/stack/test_proto.py
@@ -14,7 +14,7 @@ class Image(BaseDoc):
 
     batch = DocList[Image]([Image(tensor=torch.zeros(3, 224, 224)) for _ in range(10)])
 
-    return batch.stack()
+    return batch.to_doc_vec()
 
 
 @pytest.mark.proto
@@ -29,7 +29,7 @@ class MyDoc(BaseDoc):
 
     da = DocList[MyDoc]([MyDoc(tensor=np.zeros((3, 224, 224))) for _ in range(10)])
 
-    da = da.stack()
+    da = da.to_doc_vec()
 
     da.from_protobuf(da.to_protobuf())
 
@@ -41,7 +41,7 @@ class CustomDocument(BaseDoc):
 
     da = DocList[CustomDocument](
         [CustomDocument(image=np.zeros((3, 224, 224))) for _ in range(10)]
-    ).stack()
+    ).to_doc_vec()
 
     da2 = DocVec.from_protobuf(da.to_protobuf())
 

diff --git a/tests/units/array/test_batching.py b/tests/units/array/test_batching.py
@@ -24,7 +24,7 @@ class MyDoc(BaseDoc):
         ]
     )
     if stack:
-        da = da.stack()
+        da = da.to_doc_vec()
 
     batches = list(da._batch(batch_size=batch_size, shuffle=shuffle))
     assert len(batches) == n_batches