From 26aae10dd52054aa1bd66109f4fc3392c4984e45 Mon Sep 17 00:00:00 2001 From: samsja <55492238+samsja@users.noreply.github.com> Date: Wed, 16 Nov 2022 10:09:31 +0100 Subject: [PATCH] refactor(v2): renamed variable and files (#796) * refactor(v2): renamed variable and files Signed-off-by: Sami Jaghouar * refactor(v2): renamd test Signed-off-by: Sami Jaghouar Signed-off-by: Sami Jaghouar feat: add read from proto --- docarray/document/mixins/proto.py | 28 +++++++++---------- docarray/predefined_document/text.py | 2 +- docarray/typing/__init__.py | 11 ++++++-- docarray/typing/id.py | 19 +++++++++---- docarray/typing/{ => tensor}/embedding.py | 0 docarray/typing/tensor/tensor.py | 2 +- docarray/typing/tensor/torch_tensor.py | 2 +- docarray/typing/url/any_url.py | 14 ++++++++++ tests/integrations/typing/test_anyurl.py | 12 ++++++++ tests/integrations/typing/test_embedding.py | 15 ++++++++++ tests/integrations/typing/test_id.py | 12 ++++++++ tests/integrations/typing/test_image_url.py | 12 ++++++++ .../typing/{tensor.py => test_tensor.py} | 0 .../{torch_tensor.py => test_torch_tensor.py} | 2 +- .../document/proto/test_proto_based_object.py | 2 +- tests/units/typing/tensor/__init__.py | 0 .../typing/{ => tensor}/test_embedding.py | 0 .../units/typing/{ => tensor}/test_tensor.py | 0 18 files changed, 105 insertions(+), 28 deletions(-) rename docarray/typing/{ => tensor}/embedding.py (100%) create mode 100644 tests/integrations/typing/test_anyurl.py create mode 100644 tests/integrations/typing/test_embedding.py create mode 100644 tests/integrations/typing/test_id.py create mode 100644 tests/integrations/typing/test_image_url.py rename tests/integrations/typing/{tensor.py => test_tensor.py} (100%) rename tests/integrations/typing/{torch_tensor.py => test_torch_tensor.py} (92%) create mode 100644 tests/units/typing/tensor/__init__.py rename tests/units/typing/{ => tensor}/test_embedding.py (100%) rename tests/units/typing/{ => tensor}/test_tensor.py (100%) diff --git a/docarray/document/mixins/proto.py b/docarray/document/mixins/proto.py index aaa3e6157dc..86a5ed40881 100644 --- a/docarray/document/mixins/proto.py +++ b/docarray/document/mixins/proto.py @@ -1,7 +1,5 @@ from typing import Any, Dict, Type, TypeVar -from pydantic.tools import parse_obj_as - from docarray.document.abstract_document import AbstractDocument from docarray.document.base_node import BaseNode from docarray.proto import DocumentProto, NodeProto @@ -14,7 +12,6 @@ class ProtoMixin(AbstractDocument, BaseNode): @classmethod def from_protobuf(cls: Type[T], pb_msg: 'DocumentProto') -> T: """create a Document from a protobuf message""" - from docarray import DocumentArray fields: Dict[str, Any] = {} @@ -25,18 +22,18 @@ def from_protobuf(cls: Type[T], pb_msg: 'DocumentProto') -> T: # this if else statement need to be refactored it is too long # the check should be delegated to the type level - if content_type == 'tensor': - fields[field] = Tensor._read_from_proto(value.tensor) - elif content_type == 'torch_tensor': - fields[field] = TorchTensor._read_from_proto(value.torch_tensor) - elif content_type == 'embedding': - fields[field] = Embedding._read_from_proto(value.embedding) - elif content_type == 'any_url': - fields[field] = parse_obj_as(AnyUrl, value.any_url) - elif content_type == 'image_url': - fields[field] = parse_obj_as(ImageUrl, value.image_url) - elif content_type == 'id': - fields[field] = parse_obj_as(ID, value.id) + content_type_dict = dict( + tensor=Tensor, + torch_tensor=TorchTensor, + embedding=Embedding, + any_url=AnyUrl, + image_url=ImageUrl, + id=ID, + ) + if content_type in content_type_dict: + fields[field] = content_type_dict[content_type].from_protobuf( + getattr(value, content_type) + ) elif content_type == 'text': fields[field] = value.text elif content_type == 'nested': @@ -44,6 +41,7 @@ def from_protobuf(cls: Type[T], pb_msg: 'DocumentProto') -> T: value.nested ) # we get to the parent class elif content_type == 'chunks': + from docarray import DocumentArray fields[field] = DocumentArray.from_protobuf( value.chunks diff --git a/docarray/predefined_document/text.py b/docarray/predefined_document/text.py index 3ef2cd61d12..f02586190f1 100644 --- a/docarray/predefined_document/text.py +++ b/docarray/predefined_document/text.py @@ -1,7 +1,7 @@ from typing import Optional from docarray.document import BaseDocument -from docarray.typing.embedding import Embedding, Tensor +from docarray.typing.tensor.embedding import Embedding, Tensor class Text(BaseDocument): diff --git a/docarray/typing/__init__.py b/docarray/typing/__init__.py index 7c018343e8a..5c94706d9b9 100644 --- a/docarray/typing/__init__.py +++ b/docarray/typing/__init__.py @@ -1,6 +1,13 @@ -from docarray.typing.embedding import Embedding from docarray.typing.id import ID from docarray.typing.tensor import Tensor, TorchTensor +from docarray.typing.tensor.embedding import Embedding from docarray.typing.url import AnyUrl, ImageUrl -__all__ = ['Tensor', 'Embedding', 'ImageUrl', 'AnyUrl', 'ID', 'TorchTensor'] +__all__ = [ + 'TorchTensor', + 'Tensor', + 'Embedding', + 'ImageUrl', + 'AnyUrl', + 'ID', +] diff --git a/docarray/typing/id.py b/docarray/typing/id.py index a6b9d82c3fc..07f93942bef 100644 --- a/docarray/typing/id.py +++ b/docarray/typing/id.py @@ -1,14 +1,12 @@ -from typing import TYPE_CHECKING, Optional, Type, TypeVar, Union +from typing import Optional, Type, TypeVar, Union from uuid import UUID +from pydantic import BaseConfig, parse_obj_as +from pydantic.fields import ModelField + from docarray.document.base_node import BaseNode from docarray.proto import NodeProto -if TYPE_CHECKING: - from pydantic import BaseConfig - from pydantic.fields import ModelField - - T = TypeVar('T', bound='ID') @@ -43,3 +41,12 @@ def _to_node_protobuf(self) -> NodeProto: :return: the nested item protobuf message """ return NodeProto(id=self) + + @classmethod + def from_protobuf(cls: Type[T], pb_msg: 'str') -> T: + """ + read ndarray from a proto msg + :param pb_msg: + :return: a string + """ + return parse_obj_as(cls, pb_msg) diff --git a/docarray/typing/embedding.py b/docarray/typing/tensor/embedding.py similarity index 100% rename from docarray/typing/embedding.py rename to docarray/typing/tensor/embedding.py diff --git a/docarray/typing/tensor/tensor.py b/docarray/typing/tensor/tensor.py index c031207cdaa..bb37c39e52a 100644 --- a/docarray/typing/tensor/tensor.py +++ b/docarray/typing/tensor/tensor.py @@ -52,7 +52,7 @@ def _to_node_protobuf(self: T, field: str = 'tensor') -> NodeProto: return NodeProto(**{field: nd_proto}) @classmethod - def _read_from_proto(cls: Type[T], pb_msg: 'NdArrayProto') -> 'T': + def from_protobuf(cls: Type[T], pb_msg: 'NdArrayProto') -> 'T': """ read ndarray from a proto msg :param pb_msg: diff --git a/docarray/typing/tensor/torch_tensor.py b/docarray/typing/tensor/torch_tensor.py index 4812d52ee38..6bef83605cf 100644 --- a/docarray/typing/tensor/torch_tensor.py +++ b/docarray/typing/tensor/torch_tensor.py @@ -82,7 +82,7 @@ def _to_node_protobuf(self: T, field: str = 'torch_tensor') -> NodeProto: return NodeProto(**{field: nd_proto}) @classmethod - def _read_from_proto(cls: Type[T], pb_msg: 'NdArrayProto') -> 'T': + def from_protobuf(cls: Type[T], pb_msg: 'NdArrayProto') -> 'T': """ read ndarray from a proto msg :param pb_msg: diff --git a/docarray/typing/url/any_url.py b/docarray/typing/url/any_url.py index 5bec625d476..b5cf67c3901 100644 --- a/docarray/typing/url/any_url.py +++ b/docarray/typing/url/any_url.py @@ -1,8 +1,13 @@ +from typing import Type, TypeVar + from pydantic import AnyUrl as BaseAnyUrl +from pydantic import parse_obj_as from docarray.document.base_node import BaseNode from docarray.proto import NodeProto +T = TypeVar('T', bound='AnyUrl') + class AnyUrl(BaseAnyUrl, BaseNode): def _to_node_protobuf(self) -> NodeProto: @@ -13,3 +18,12 @@ def _to_node_protobuf(self) -> NodeProto: :return: the nested item protobuf message """ return NodeProto(any_url=str(self)) + + @classmethod + def from_protobuf(cls: Type[T], pb_msg: 'str') -> T: + """ + read url from a proto msg + :param pb_msg: + :return: url + """ + return parse_obj_as(cls, pb_msg) diff --git a/tests/integrations/typing/test_anyurl.py b/tests/integrations/typing/test_anyurl.py new file mode 100644 index 00000000000..aa1a56a21f7 --- /dev/null +++ b/tests/integrations/typing/test_anyurl.py @@ -0,0 +1,12 @@ +from docarray import Document +from docarray.typing import AnyUrl + + +def test_set_any_url(): + class MyDocument(Document): + any_url: AnyUrl + + d = MyDocument(any_url="https://jina.ai") + + assert isinstance(d.any_url, AnyUrl) + assert d.any_url == "https://jina.ai" diff --git a/tests/integrations/typing/test_embedding.py b/tests/integrations/typing/test_embedding.py new file mode 100644 index 00000000000..dbb5de3cca4 --- /dev/null +++ b/tests/integrations/typing/test_embedding.py @@ -0,0 +1,15 @@ +import numpy as np + +from docarray import Document +from docarray.typing import Embedding + + +def test_set_embedding(): + class MyDocument(Document): + embedding: Embedding + + d = MyDocument(embedding=np.zeros((3, 224, 224))) + + assert isinstance(d.embedding, Embedding) + assert isinstance(d.embedding, np.ndarray) + assert (d.embedding == np.zeros((3, 224, 224))).all() diff --git a/tests/integrations/typing/test_id.py b/tests/integrations/typing/test_id.py new file mode 100644 index 00000000000..42b4d8b07f6 --- /dev/null +++ b/tests/integrations/typing/test_id.py @@ -0,0 +1,12 @@ +from docarray import Document +from docarray.typing import ID + + +def test_set_id(): + class MyDocument(Document): + id: ID + + d = MyDocument(id="123") + + assert isinstance(d.id, ID) + assert d.id == "123" diff --git a/tests/integrations/typing/test_image_url.py b/tests/integrations/typing/test_image_url.py new file mode 100644 index 00000000000..d8dae1ef311 --- /dev/null +++ b/tests/integrations/typing/test_image_url.py @@ -0,0 +1,12 @@ +from docarray import Document +from docarray.typing import ImageUrl + + +def test_set_image_url(): + class MyDocument(Document): + image_url: ImageUrl + + d = MyDocument(image_url="https://jina.ai/img.png") + + assert isinstance(d.image_url, ImageUrl) + assert d.image_url == "https://jina.ai/img.png" diff --git a/tests/integrations/typing/tensor.py b/tests/integrations/typing/test_tensor.py similarity index 100% rename from tests/integrations/typing/tensor.py rename to tests/integrations/typing/test_tensor.py diff --git a/tests/integrations/typing/torch_tensor.py b/tests/integrations/typing/test_torch_tensor.py similarity index 92% rename from tests/integrations/typing/torch_tensor.py rename to tests/integrations/typing/test_torch_tensor.py index 0974693234a..8c41ca62ad0 100644 --- a/tests/integrations/typing/torch_tensor.py +++ b/tests/integrations/typing/test_torch_tensor.py @@ -4,7 +4,7 @@ from docarray.typing import TorchTensor -def test_set_tensor(): +def test_set_torch_tensor(): class MyDocument(Document): tensor: TorchTensor diff --git a/tests/units/document/proto/test_proto_based_object.py b/tests/units/document/proto/test_proto_based_object.py index ac6f38949b2..d4e3d47f504 100644 --- a/tests/units/document/proto/test_proto_based_object.py +++ b/tests/units/document/proto/test_proto_based_object.py @@ -18,7 +18,7 @@ def test_ndarray(): original_tensor = np.zeros((3, 224, 224)) Tensor._flush_tensor_to_proto(nd_proto, value=original_tensor) nested_item = NodeProto(tensor=nd_proto) - tensor = Tensor._read_from_proto(nested_item.tensor) + tensor = Tensor.from_protobuf(nested_item.tensor) assert (tensor == original_tensor).all() diff --git a/tests/units/typing/tensor/__init__.py b/tests/units/typing/tensor/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/units/typing/test_embedding.py b/tests/units/typing/tensor/test_embedding.py similarity index 100% rename from tests/units/typing/test_embedding.py rename to tests/units/typing/tensor/test_embedding.py diff --git a/tests/units/typing/test_tensor.py b/tests/units/typing/tensor/test_tensor.py similarity index 100% rename from tests/units/typing/test_tensor.py rename to tests/units/typing/tensor/test_tensor.py