Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
941c0f9
feat: add tensor type for ndarray
JohannesMessner Nov 11, 2022
4a0f7bf
fix: fix mypy typing
JohannesMessner Nov 11, 2022
f451044
Merge remote-tracking branch 'origin/feat-rewrite-v2' into feat-rewri…
JohannesMessner Nov 15, 2022
04583d4
Merge remote-tracking branch 'origin/feat-rewrite-v2' into feat-rewri…
JohannesMessner Nov 16, 2022
1d9eaaf
feat: torch tensor type
JohannesMessner Nov 16, 2022
e820675
fix: protobuf for pytorch type
JohannesMessner Nov 16, 2022
4b449aa
ci: install all extras in the ci
JohannesMessner Nov 16, 2022
633b701
refactor: make nice looking
JohannesMessner Nov 16, 2022
492659e
docs: update docarray/typing/tensor/torch_tensor.py
JohannesMessner Nov 16, 2022
a5c1a31
refactor: code style
JohannesMessner Nov 16, 2022
8df13c1
fix: black and mypy
JohannesMessner Nov 16, 2022
28f1ed0
fix: suppress mypy import error
JohannesMessner Nov 16, 2022
4df03ef
ci: fix ci install
JohannesMessner Nov 16, 2022
81f7810
feat: add new type for image urls
JohannesMessner Nov 17, 2022
d14d4af
feat: add new type for image urls
JohannesMessner Nov 17, 2022
370b7fa
Merge branch 'feat-rewrite-v2' into feat-image-url
JohannesMessner Nov 18, 2022
3b6d5b9
test: add real existing url to test
JohannesMessner Nov 18, 2022
ba8adf0
test: test output of image buffer loading
JohannesMessner Nov 18, 2022
75a440c
feat: add validation for image url
JohannesMessner Nov 18, 2022
5b90769
feat: specify image axis permutation
JohannesMessner Nov 18, 2022
289ee15
docs: add docstrings
JohannesMessner Nov 18, 2022
f8a8129
docs: make strings uniform
JohannesMessner Nov 18, 2022
4512286
test: pass valid url as imageurl
JohannesMessner Nov 18, 2022
e4027d6
feat: add texturi type
JohannesMessner Nov 20, 2022
6124175
Merge branch 'feat-rewrite-v2' into feat-text-url
JohannesMessner Nov 21, 2022
f1cd043
Merge branch 'feat-rewrite-v2' into feat-text-url
JohannesMessner Nov 21, 2022
4ccecaf
Merge branch 'feat-rewrite-v2' into feat-text-url
JohannesMessner Nov 21, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion docarray/document/mixins/proto.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,15 @@
from docarray.document.abstract_document import AbstractDocument
from docarray.document.base_node import BaseNode
from docarray.proto import DocumentProto, NodeProto
from docarray.typing import ID, AnyUrl, Embedding, ImageUrl, Tensor, TorchTensor
from docarray.typing import (
ID,
AnyUrl,
Embedding,
ImageUrl,
Tensor,
TextUrl,
TorchTensor,
)

T = TypeVar('T', bound='ProtoMixin')

Expand All @@ -27,6 +35,7 @@ def from_protobuf(cls: Type[T], pb_msg: 'DocumentProto') -> T:
torch_tensor=TorchTensor,
embedding=Embedding,
any_url=AnyUrl,
text_url=TextUrl,
image_url=ImageUrl,
id=ID,
)
Expand Down
6 changes: 4 additions & 2 deletions docarray/proto/docarray.proto
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,11 @@ message NodeProto {

string image_url = 8;

string id = 9;
string text_url = 9;

NdArrayProto torch_tensor = 10;
string id = 10;

NdArrayProto torch_tensor = 11;

}

Expand Down
38 changes: 20 additions & 18 deletions docarray/proto/pb2/docarray_pb2.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion docarray/typing/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from docarray.typing.id import ID
from docarray.typing.tensor import Tensor, TorchTensor
from docarray.typing.tensor.embedding import Embedding
from docarray.typing.url import AnyUrl, ImageUrl
from docarray.typing.url import AnyUrl, ImageUrl, TextUrl

__all__ = [
'TorchTensor',
'Tensor',
'Embedding',
'ImageUrl',
'TextUrl',
'AnyUrl',
'ID',
]
3 changes: 2 additions & 1 deletion docarray/typing/url/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from docarray.typing.url.any_url import AnyUrl
from docarray.typing.url.image_url import ImageUrl
from docarray.typing.url.text_url import TextUrl

__all__ = ['ImageUrl', 'AnyUrl']
__all__ = ['ImageUrl', 'AnyUrl', 'TextUrl']
91 changes: 91 additions & 0 deletions docarray/typing/url/text_url.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from typing import Optional

from docarray.proto import NodeProto
from docarray.typing.url.any_url import AnyUrl
from docarray.typing.url.helper import _uri_to_blob


class TextUrl(AnyUrl):
"""
URL to a text file.
Cane be remote (web) URL, or a local file path.
"""

def _to_node_protobuf(self) -> NodeProto:
"""Convert Document into a NodeProto protobuf message. This function should
be called when the Document is nested into another Document that need to
be converted into a protobuf

:return: the nested item protobuf message
"""
return NodeProto(text_url=str(self))

def load_to_bytes(self, timeout: Optional[float] = None) -> bytes:
"""
Load the text file into a bytes object.

EXAMPLE USAGE

.. code-block:: python

from docarray import Document
from docarray.typing import TextUrl


class MyDoc(Document):
remote_url: TextUrl
local_url: TextUrl


doc = MyDoc(
remote_url='https://de.wikipedia.org/wiki/Brixen',
local_url='home/username/my_file.txt',
)

remote_txt_bytes = doc.remote_url.load_to_bytes()
local_txt_bytes = doc.local_url.load_to_bytes()

:param timeout: timeout (sec) for urlopen network request.
Only relevant if URL is not local
:return: the text file content as bytes
"""
return _uri_to_blob(self, timeout=timeout)

def load(self, charset: str = 'utf-8', timeout: Optional[float] = None) -> str:
"""
Load the text file into a string.

EXAMPLE USAGE

.. code-block:: python

from docarray import Document
from docarray.typing import TextUrl


class MyDoc(Document):
remote_url: TextUrl
local_url: TextUrl


doc = MyDoc(
remote_url='https://de.wikipedia.org/wiki/Brixen',
local_url='home/username/my_file.txt',
)

remote_txt = doc.remote_url.load()
print(remote_txt)
# prints: ```<!DOCTYPE html>\n<html class="client-nojs" ... > ...```

local_txt = doc.local_url.load()
print(local_txt)
# prints content of my_file.txt


:param timeout: timeout (sec) for urlopen network request.
Only relevant if URL is not local
:param charset: decoding charset; may be any character set registered with IANA
:return: the text file content
"""
_bytes = _uri_to_blob(self, timeout=timeout)
return _bytes.decode(charset)
4 changes: 3 additions & 1 deletion tests/integrations/typing/test_typing_proto.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from docarray import Document
from docarray.document import AnyDocument
from docarray.typing import AnyUrl, Embedding, ImageUrl, Tensor, TorchTensor
from docarray.typing import AnyUrl, Embedding, ImageUrl, Tensor, TextUrl, TorchTensor


def test_proto_all_types():
Expand All @@ -13,13 +13,15 @@ class Mymmdoc(Document):
embedding: Embedding
any_url: AnyUrl
image_url: ImageUrl
text_url: TextUrl

doc = Mymmdoc(
tensor=np.zeros((3, 224, 224)),
torch_tensor=torch.zeros((3, 224, 224)),
embedding=np.zeros((100, 1)),
any_url='http://jina.ai',
image_url='http://jina.ai/bla.jpg',
text_url='http://jina.ai',
)

new_doc = AnyDocument.from_protobuf(doc.to_protobuf())
Expand Down
Loading