From aa348b1e1d066e8ea07bb66e6180c98117473a7b Mon Sep 17 00:00:00 2001 From: Han Xiao Date: Thu, 23 Dec 2021 09:10:54 +0100 Subject: [PATCH 1/3] style: reformat code --- docarray/array/mixins/getattr.py | 4 +-- docarray/array/mixins/io/csv.py | 4 +-- docarray/array/mixins/io/json.py | 14 +++++++++-- docarray/array/mixins/traverse.py | 2 +- docarray/document/mixins/__init__.py | 2 +- docarray/document/mixins/_property.py | 36 +++++++++++++-------------- docarray/document/mixins/plot.py | 8 +++--- docarray/document/mixins/porting.py | 2 +- setup.py | 2 +- 9 files changed, 41 insertions(+), 33 deletions(-) diff --git a/docarray/array/mixins/getattr.py b/docarray/array/mixins/getattr.py index 84aa41391e2..3e177a9d4ac 100644 --- a/docarray/array/mixins/getattr.py +++ b/docarray/array/mixins/getattr.py @@ -7,7 +7,7 @@ class GetAttributeMixin: """Helpers that provide attributes getter in bulk """ - def get_attributes(self, *fields: str) -> Union[List, List[List]]: + def get_attributes(self, *fields: str) -> List: """Return all nonempty values of the fields from all docs this array contains :param fields: Variable length argument with the name of the fields to extract @@ -24,7 +24,7 @@ def get_attributes(self, *fields: str) -> Union[List, List[List]]: def get_attributes_with_docs( self, *fields: str, - ) -> Tuple[Union[List, List[List]], 'DocumentArray']: + ) -> Tuple[List, 'DocumentArray']: """Return all nonempty values of the fields together with their nonempty docs :param fields: Variable length argument with the name of the fields to extract diff --git a/docarray/array/mixins/io/csv.py b/docarray/array/mixins/io/csv.py index 571ba329879..c3406223b46 100644 --- a/docarray/array/mixins/io/csv.py +++ b/docarray/array/mixins/io/csv.py @@ -101,6 +101,4 @@ def load_csv( from ....document.generators import from_csv - da = cls() - da.extend(from_csv(file, field_resolver=field_resolver)) - return da + return cls(from_csv(file, field_resolver=field_resolver)) diff --git a/docarray/array/mixins/io/json.py b/docarray/array/mixins/io/json.py index 63af24eb877..adbe16d1d6b 100644 --- a/docarray/array/mixins/io/json.py +++ b/docarray/array/mixins/io/json.py @@ -50,7 +50,17 @@ def load_json(cls: Type['T'], file: Union[str, TextIO]) -> 'T': with file_ctx as fp: return cls(constructor(v) for v in fp) - def to_list(self) -> List: + @classmethod + def from_json(cls: Type['T'], file: Union[str, TextIO]) -> 'T': + return cls.load_json(file) + + @classmethod + def from_list_safe(cls: Type['T'], values: List) -> 'T': + from .... import Document + + return cls(Document.from_dict(v) for v in values) + + def to_list_safe(self) -> List: """Convert the object into a Python list. .. note:: @@ -65,4 +75,4 @@ def to_json(self) -> str: :return: a Python list """ - return json.dumps(self.to_list()) + return json.dumps(self.to_list_safe()) diff --git a/docarray/array/mixins/traverse.py b/docarray/array/mixins/traverse.py index 9a2c6f0db18..426bbe06111 100644 --- a/docarray/array/mixins/traverse.py +++ b/docarray/array/mixins/traverse.py @@ -96,7 +96,7 @@ def traverse_flat( self, traversal_paths: str, filter_fn: Optional[Callable[['Document'], bool]] = None, - ) -> Union['DocumentArray', Iterable['Document']]: + ) -> 'DocumentArray': """ Returns a single flattened :class:``TraversableSequence`` with all Documents, that are reached via the ``traversal_paths``. diff --git a/docarray/document/mixins/__init__.py b/docarray/document/mixins/__init__.py index e6e50317489..03690c28175 100644 --- a/docarray/document/mixins/__init__.py +++ b/docarray/document/mixins/__init__.py @@ -7,12 +7,12 @@ from .image import ImageDataMixin from .mesh import MeshDataMixin from .plot import PlotMixin +from .porting import PortingMixin from .property import PropertyMixin from .protobuf import ProtobufMixin from .sugar import SingletonSugarMixin from .text import TextDataMixin from .video import VideoDataMixin -from .porting import PortingMixin class AllMixins( diff --git a/docarray/document/mixins/_property.py b/docarray/document/mixins/_property.py index 6c2ed719792..fc95334bed6 100644 --- a/docarray/document/mixins/_property.py +++ b/docarray/document/mixins/_property.py @@ -19,6 +19,15 @@ def id(self) -> str: def id(self, value: str): self._data.id = value + @property + def parent_id(self) -> Optional[str]: + self._data._set_default_value_if_none('parent_id') + return self._data.parent_id + + @parent_id.setter + def parent_id(self, value: str): + self._data.parent_id = value + @property def granularity(self) -> Optional[int]: self._data._set_default_value_if_none('granularity') @@ -37,15 +46,6 @@ def adjacency(self) -> Optional[int]: def adjacency(self, value: int): self._data.adjacency = value - @property - def parent_id(self) -> Optional[str]: - self._data._set_default_value_if_none('parent_id') - return self._data.parent_id - - @parent_id.setter - def parent_id(self, value: str): - self._data.parent_id = value - @property def buffer(self) -> Optional[bytes]: self._data._set_default_value_if_none('buffer') @@ -64,6 +64,15 @@ def blob(self) -> Optional['ArrayType']: def blob(self, value: 'ArrayType'): self._data.blob = value + @property + def mime_type(self) -> Optional[str]: + self._data._set_default_value_if_none('mime_type') + return self._data.mime_type + + @mime_type.setter + def mime_type(self, value: str): + self._data.mime_type = value + @property def text(self) -> Optional[str]: self._data._set_default_value_if_none('text') @@ -100,15 +109,6 @@ def uri(self) -> Optional[str]: def uri(self, value: str): self._data.uri = value - @property - def mime_type(self) -> Optional[str]: - self._data._set_default_value_if_none('mime_type') - return self._data.mime_type - - @mime_type.setter - def mime_type(self, value: str): - self._data.mime_type = value - @property def tags(self) -> Optional[Dict[str, 'StructValueType']]: self._data._set_default_value_if_none('tags') diff --git a/docarray/document/mixins/plot.py b/docarray/document/mixins/plot.py index b85f785dd2b..5473ab79edb 100644 --- a/docarray/document/mixins/plot.py +++ b/docarray/document/mixins/plot.py @@ -54,10 +54,10 @@ def _mermaid_to_url(self, img_type: str) -> str: """ mermaid_str = ( """ - %%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#FFC666'}}}%% - classDiagram - - """ + %%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#FFC666'}}}%% + classDiagram + + """ + self.__mermaid_str__() ) diff --git a/docarray/document/mixins/porting.py b/docarray/document/mixins/porting.py index 1fa5f79b7fa..c704acebbbf 100644 --- a/docarray/document/mixins/porting.py +++ b/docarray/document/mixins/porting.py @@ -1,5 +1,5 @@ import pickle -from typing import Union, Optional, TYPE_CHECKING, Type, Dict +from typing import Optional, TYPE_CHECKING, Type, Dict from ...helper import compress_bytes, decompress_bytes diff --git a/setup.py b/setup.py index 4884b15ffec..733d86cfd9c 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,7 @@ long_description_content_type='text/markdown', zip_safe=False, setup_requires=['setuptools>=18.0', 'wheel'], - install_requires=['protobuf>=3.13.0', 'numpy', 'lz4'], + install_requires=['numpy'], classifiers=[ 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Developers', From 3b5aabcba098efd655f240d3ab90a8504a06f150 Mon Sep 17 00:00:00 2001 From: Han Xiao Date: Thu, 23 Dec 2021 09:32:46 +0100 Subject: [PATCH 2/3] style: reformat code --- .github/requirements-cicd.txt | 1 + .github/workflows/cd.yml | 3 +- .github/workflows/ci.yml | 3 +- setup.py | 14 +++++++++ tests/unit/array/test_from_to_bytes.py | 43 +++++++++++++++++++++++++- tests/unit/array/test_ravel_unravel.py | 1 - 6 files changed, 59 insertions(+), 6 deletions(-) diff --git a/.github/requirements-cicd.txt b/.github/requirements-cicd.txt index 344b71e407b..49704affe6b 100644 --- a/.github/requirements-cicd.txt +++ b/.github/requirements-cicd.txt @@ -12,3 +12,4 @@ matplotlib rich Pillow lz4 +fastapi \ No newline at end of file diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index a1eb6ba9e6e..4993e825cea 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -42,8 +42,7 @@ jobs: python -m pip install wheel pip install -r .github/requirements-test.txt pip install -r .github/requirements-cicd.txt - pip install --no-cache-dir . - export JINA_LOG_LEVEL="ERROR" + pip install --no-cache-dir ".[full]" - name: Test id: test run: | diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b7486472dbf..f51d7b2d23f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -113,8 +113,7 @@ jobs: python -m pip install wheel pip install -r .github/requirements-test.txt pip install -r .github/requirements-cicd.txt - pip install --no-cache-dir . - export JINA_LOG_LEVEL="ERROR" + pip install --no-cache-dir ".[full]" - name: Test id: test run: | diff --git a/setup.py b/setup.py index 733d86cfd9c..dac8b1cc22b 100644 --- a/setup.py +++ b/setup.py @@ -40,6 +40,20 @@ zip_safe=False, setup_requires=['setuptools>=18.0', 'wheel'], install_requires=['numpy'], + extras_require={ + 'full': [ + 'protobuf>=3.13.0', + 'lz4', + 'requests', + 'matplotlib', + 'Pillow', + 'rich', + 'trimesh', + 'scipy', + 'av', + 'fastapi', + ] + }, classifiers=[ 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Developers', diff --git a/tests/unit/array/test_from_to_bytes.py b/tests/unit/array/test_from_to_bytes.py index 71af2c52aa3..610869b0b5d 100644 --- a/tests/unit/array/test_from_to_bytes.py +++ b/tests/unit/array/test_from_to_bytes.py @@ -1,20 +1,56 @@ +import numpy as np import pytest +import tensorflow as tf +import torch +from scipy.sparse import csr_matrix, coo_matrix, bsr_matrix, csc_matrix from docarray import DocumentArray +from docarray.math.ndarray import to_numpy_array from tests import random_docs +def get_ndarrays_for_ravel(): + a = np.random.random([100, 3]) + a[a > 0.5] = 0 + return [ + (a, False), + (torch.tensor(a), False), + (tf.constant(a), False), + (torch.tensor(a).to_sparse(), True), + # (tf.sparse.from_dense(a), True), + (csr_matrix(a), True), + (bsr_matrix(a), True), + (coo_matrix(a), True), + (csc_matrix(a), True), + ] + + +@pytest.mark.parametrize('ndarray_val, is_sparse', get_ndarrays_for_ravel()) @pytest.mark.parametrize('target_da', [DocumentArray.empty(100), random_docs(100)]) @pytest.mark.parametrize( 'protocol', ['protobuf', 'protobuf-once', 'pickle', 'pickle-once'] ) @pytest.mark.parametrize('compress', ['lz4', 'bz2', 'lzma', 'zlib', 'gzip', None]) -def test_to_from_bytes(target_da, protocol, compress): +def test_to_from_bytes(target_da, protocol, compress, ndarray_val, is_sparse): bstr = target_da.to_bytes(protocol=protocol, compress=compress) print(protocol, compress, len(bstr)) da2 = DocumentArray.from_bytes(bstr, protocol=protocol, compress=compress) assert len(da2) == len(target_da) + target_da.embeddings = ndarray_val + target_da.blobs = ndarray_val + bstr = target_da.to_bytes(protocol=protocol, compress=compress) + print(protocol, compress, len(bstr)) + da2 = DocumentArray.from_bytes(bstr, protocol=protocol, compress=compress) + assert len(da2) == len(target_da) + + np.testing.assert_almost_equal( + to_numpy_array(target_da.embeddings), to_numpy_array(da2.embeddings) + ) + np.testing.assert_almost_equal( + to_numpy_array(target_da.blobs), to_numpy_array(da2.blobs) + ) + @pytest.mark.parametrize('target_da', [DocumentArray.empty(100), random_docs(100)]) @pytest.mark.parametrize( @@ -37,3 +73,8 @@ def test_save_bytes(target_da, protocol, compress, tmpfile): @pytest.mark.parametrize('target_da', [DocumentArray.empty(100), random_docs(100)]) def test_from_to_protobuf(target_da): DocumentArray.from_protobuf(target_da.to_protobuf()) + + +@pytest.mark.parametrize('target_da', [DocumentArray.empty(100), random_docs(100)]) +def test_from_to_safe_list(target_da): + DocumentArray.from_list_safe(target_da.to_list_safe()) diff --git a/tests/unit/array/test_ravel_unravel.py b/tests/unit/array/test_ravel_unravel.py index a4041d03bbf..6ba872294f7 100644 --- a/tests/unit/array/test_ravel_unravel.py +++ b/tests/unit/array/test_ravel_unravel.py @@ -32,7 +32,6 @@ def get_ndarrays_for_ravel(): def test_ravel_embeddings_blobs(ndarray_val, attr, is_sparse): da = DocumentArray.empty(10) setattr(da, attr, ndarray_val) - ndav = getattr(da, attr) # test read/getter From 71919083f3247231610391ff59c90b6d15999575 Mon Sep 17 00:00:00 2001 From: Han Xiao Date: Thu, 23 Dec 2021 09:45:31 +0100 Subject: [PATCH 3/3] style: reformat code --- docarray/array/mixins/io/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docarray/array/mixins/io/dataframe.py b/docarray/array/mixins/io/dataframe.py index 65d2fd7637e..61a899c0409 100644 --- a/docarray/array/mixins/io/dataframe.py +++ b/docarray/array/mixins/io/dataframe.py @@ -21,7 +21,7 @@ def to_dataframe(self, **kwargs) -> 'DataFrame': """ from pandas import DataFrame - return DataFrame.from_dict(self.to_list(), **kwargs) + return DataFrame.from_dict(self.to_list_safe(), **kwargs) @classmethod def from_dataframe(cls: Type['T'], df: 'DataFrame') -> 'T':