diff --git a/docarray/typing/bytes/audio_bytes.py b/docarray/typing/bytes/audio_bytes.py index 4a0283c8761..3dab08230a7 100644 --- a/docarray/typing/bytes/audio_bytes.py +++ b/docarray/typing/bytes/audio_bytes.py @@ -7,6 +7,7 @@ from docarray.typing.abstract_type import AbstractType from docarray.typing.proto_register import _register_proto +from docarray.typing.tensor.audio import AudioNdArray from docarray.utils._internal.misc import import_library if TYPE_CHECKING: @@ -43,51 +44,48 @@ def _to_node_protobuf(self: T) -> 'NodeProto': return NodeProto(blob=self, type=self._proto_type_name) - def load(self) -> Tuple[np.ndarray, int]: + def load(self) -> Tuple[AudioNdArray, int]: """ - Load the Audio from the bytes into a numpy.ndarray Audio tensor + Load the Audio from the AudioBytes into an AudioNdArray --- ```python from typing import Optional from docarray import BaseDoc - from docarray.typing import AudioUrl, NdArray, AudioBytes - - import numpy as np + from docarray.typing import AudioBytes, AudioNdArray, AudioUrl class MyAudio(BaseDoc): url: AudioUrl - tensor: Optional[NdArray] - bytes: Optional[AudioBytes] + tensor: Optional[AudioNdArray] + bytes_: Optional[AudioBytes] frame_rate: Optional[float] doc = MyAudio(url='https://www.kozco.com/tech/piano2.wav') - doc.bytes = doc.url.load_bytes() - doc.tensor, doc.frame_rate = doc.bytes.load() + doc.bytes_ = doc.url.load_bytes() + doc.tensor, doc.frame_rate = doc.bytes_.load() # Note this is equivalent to do doc.tensor, doc.frame_rate = doc.url.load() - assert isinstance(doc.tensor, np.ndarray) + assert isinstance(doc.tensor, AudioNdArray) ``` --- - :return: np.ndarray representing the Audio as RGB values + :return: tuple of an AudioNdArray representing the audio bytes content, + and an integer representing the frame rate. """ - if TYPE_CHECKING: - import pydub - else: - pydub = import_library('pydub', raise_error=True) + pydub = import_library('pydub', raise_error=True) # noqa: F841 + from pydub import AudioSegment - segment = pydub.AudioSegment.from_file(io.BytesIO(self)) + segment = AudioSegment.from_file(io.BytesIO(self)) # Convert to float32 using NumPy samples = np.array(segment.get_array_of_samples()) # Normalise float32 array so that values are between -1.0 and +1.0 samples_norm = samples / 2 ** (segment.sample_width * 8 - 1) - return samples_norm, segment.frame_rate + return parse_obj_as(AudioNdArray, samples_norm), segment.frame_rate diff --git a/docarray/typing/bytes/image_bytes.py b/docarray/typing/bytes/image_bytes.py index e5d1d400d99..b3b1261f6b2 100644 --- a/docarray/typing/bytes/image_bytes.py +++ b/docarray/typing/bytes/image_bytes.py @@ -7,6 +7,7 @@ from docarray.typing.abstract_type import AbstractType from docarray.typing.proto_register import _register_proto +from docarray.typing.tensor.image import ImageNdArray from docarray.utils._internal.misc import import_library if TYPE_CHECKING: @@ -81,16 +82,15 @@ def load( width: Optional[int] = None, height: Optional[int] = None, axis_layout: Tuple[str, str, str] = ('H', 'W', 'C'), - ) -> np.ndarray: + ) -> ImageNdArray: """ - Load the image from the bytes into a numpy.ndarray image tensor + Load the image from the ImageBytes into an ImageNdArray --- ```python from docarray import BaseDoc - from docarray.typing import ImageUrl - import numpy as np + from docarray.typing import ImageNdArray, ImageUrl class MyDoc(BaseDoc): @@ -103,7 +103,7 @@ class MyDoc(BaseDoc): ) img_tensor = doc.img_url.load() - assert isinstance(img_tensor, np.ndarray) + assert isinstance(img_tensor, ImageNdArray) img_tensor = doc.img_url.load(height=224, width=224) assert img_tensor.shape == (224, 224, 3) @@ -119,10 +119,10 @@ class MyDoc(BaseDoc): :param height: height of the image tensor. :param axis_layout: ordering of the different image axes. 'H' = height, 'W' = width, 'C' = color channel - :return: np.ndarray representing the image as RGB values + :return: ImageNdArray representing the image as RGB values """ - raw_img = self.load_pil() + if width or height: new_width = width or raw_img.width new_height = height or raw_img.height @@ -132,7 +132,8 @@ class MyDoc(BaseDoc): except Exception: tensor = np.array(raw_img) - return self._move_channel_axis(tensor, axis_layout=axis_layout) + img = self._move_channel_axis(tensor, axis_layout=axis_layout) + return parse_obj_as(ImageNdArray, img) @staticmethod def _move_channel_axis( diff --git a/docarray/typing/bytes/video_bytes.py b/docarray/typing/bytes/video_bytes.py index bac2733b8db..1f4e22e4fd1 100644 --- a/docarray/typing/bytes/video_bytes.py +++ b/docarray/typing/bytes/video_bytes.py @@ -60,8 +60,7 @@ def load(self, **kwargs) -> VideoLoadResult: ```python from docarray import BaseDoc - from docarray.typing import VideoUrl - import numpy as np + from docarray.typing import AudioNdArray, NdArray, VideoNdArray, VideoUrl class MyDoc(BaseDoc): @@ -73,9 +72,9 @@ class MyDoc(BaseDoc): ) video, audio, key_frame_indices = doc.video_url.load() - assert isinstance(video, np.ndarray) - assert isinstance(audio, np.ndarray) - assert isinstance(key_frame_indices, np.ndarray) + assert isinstance(video, VideoNdArray) + assert isinstance(audio, AudioNdArray) + assert isinstance(key_frame_indices, NdArray) ``` --- diff --git a/docarray/typing/tensor/image/abstract_image_tensor.py b/docarray/typing/tensor/image/abstract_image_tensor.py index 7b033a4ee33..0d65f72ae9a 100644 --- a/docarray/typing/tensor/image/abstract_image_tensor.py +++ b/docarray/typing/tensor/image/abstract_image_tensor.py @@ -1,7 +1,6 @@ import io import warnings from abc import ABC -from typing import TYPE_CHECKING from docarray.typing.tensor.abstract_tensor import AbstractTensor from docarray.utils._internal.misc import import_library, is_notebook @@ -15,11 +14,8 @@ def to_bytes(self, format: str = 'PNG') -> bytes: :param format: the image format use to store the image, can be 'PNG' , 'JPG' ... :return: bytes """ - if TYPE_CHECKING: - from PIL import Image as PILImage - else: - PIL = import_library('PIL', raise_error=True) # noqa: F841 - from PIL import Image as PILImage + PIL = import_library('PIL', raise_error=True) # noqa: F841 + from PIL import Image as PILImage if format == 'jpg': format = 'jpeg' # unify it to ISO standard @@ -40,11 +36,8 @@ def display(self) -> None: Display image data from tensor in notebook. """ if is_notebook(): - if TYPE_CHECKING: - from PIL import Image as PILImage - else: - PIL = import_library('PIL', raise_error=True) # noqa: F841 - from PIL import Image as PILImage + PIL = import_library('PIL', raise_error=True) # noqa: F841 + from PIL import Image as PILImage np_array = self.get_comp_backend().to_numpy(self) img = PILImage.fromarray(np_array) diff --git a/docarray/typing/url/any_url.py b/docarray/typing/url/any_url.py index 4aacf250d89..e414a5572e2 100644 --- a/docarray/typing/url/any_url.py +++ b/docarray/typing/url/any_url.py @@ -142,7 +142,6 @@ def from_protobuf(cls: Type[T], pb_msg: 'str') -> T: def load_bytes(self, timeout: Optional[float] = None) -> bytes: """Convert url to bytes. This will either load or download the file and save it into a bytes object. - :param uri: the URI of Document. Can be a local file path or a (remote) URL :param timeout: timeout for urlopen. Only relevant if URI is not local :return: bytes. """ diff --git a/docarray/typing/url/audio_url.py b/docarray/typing/url/audio_url.py index bab24b518cf..43cdf37c36d 100644 --- a/docarray/typing/url/audio_url.py +++ b/docarray/typing/url/audio_url.py @@ -1,8 +1,7 @@ import warnings -from typing import TYPE_CHECKING, Any, Tuple, Type, TypeVar, Union - -import numpy as np +from typing import TYPE_CHECKING, Any, Optional, Tuple, Type, TypeVar, Union +from docarray.typing import AudioNdArray from docarray.typing.bytes.audio_bytes import AudioBytes from docarray.typing.proto_register import _register_proto from docarray.typing.url.any_url import AnyUrl @@ -44,19 +43,17 @@ def validate( raise ValueError('Audio URL must have a valid extension') return cls(str(url), scheme=None) - def load(self: T) -> Tuple[np.ndarray, int]: + def load(self: T) -> Tuple[AudioNdArray, int]: """ - Load the data from the url into an AudioNdArray. - + Load the data from the url into an AudioNdArray and the frame rate. --- ```python from typing import Optional - from docarray import BaseDoc - import numpy as np - from docarray.typing import AudioUrl, AudioNdArray + from docarray import BaseDoc + from docarray.typing import AudioNdArray, AudioUrl class MyDoc(BaseDoc): @@ -66,16 +63,29 @@ class MyDoc(BaseDoc): doc = MyDoc(audio_url='https://www.kozco.com/tech/piano2.wav') doc.audio_tensor, _ = doc.audio_url.load() - assert isinstance(doc.audio_tensor, np.ndarray) + assert isinstance(doc.audio_tensor, AudioNdArray) ``` --- - :return: AudioNdArray representing the audio file content. + :return: tuple of an AudioNdArray representing the Audio file content, + and an integer representing the frame rate. + """ - bytes_ = AudioBytes(self.load_bytes()) + bytes_ = self.load_bytes() return bytes_.load() + def load_bytes(self, timeout: Optional[float] = None) -> AudioBytes: + """ + Convert url to AudioBytes. This will either load or download the file and save + it into an AudioBytes object. + + :param timeout: timeout for urlopen. Only relevant if url is not local + :return: AudioBytes object + """ + bytes_ = super().load_bytes(timeout=timeout) + return AudioBytes(bytes_) + def display(self): """ Play the audio sound from url in notebook. diff --git a/docarray/typing/url/image_url.py b/docarray/typing/url/image_url.py index 7fa0f58af80..b5b221f939e 100644 --- a/docarray/typing/url/image_url.py +++ b/docarray/typing/url/image_url.py @@ -1,9 +1,9 @@ import warnings from typing import TYPE_CHECKING, Any, Optional, Tuple, Type, TypeVar, Union -import numpy as np - +from docarray.typing import ImageBytes from docarray.typing.proto_register import _register_proto +from docarray.typing.tensor.image import ImageNdArray from docarray.typing.url.any_url import AnyUrl from docarray.utils._internal.misc import is_notebook @@ -76,16 +76,15 @@ def load( height: Optional[int] = None, axis_layout: Tuple[str, str, str] = ('H', 'W', 'C'), timeout: Optional[float] = None, - ) -> np.ndarray: + ) -> ImageNdArray: """ - Load the data from the url into a numpy.ndarray image tensor + Load the data from the url into an ImageNdArray --- ```python from docarray import BaseDoc - from docarray.typing import ImageUrl - import numpy as np + from docarray.typing import ImageUrl, ImageNdArray class MyDoc(BaseDoc): @@ -98,7 +97,7 @@ class MyDoc(BaseDoc): ) img_tensor = doc.img_url.load() - assert isinstance(img_tensor, np.ndarray) + assert isinstance(img_tensor, ImageNdArray) img_tensor = doc.img_url.load(height=224, width=224) assert img_tensor.shape == (224, 224, 3) @@ -116,13 +115,24 @@ class MyDoc(BaseDoc): 'H' = height, 'W' = width, 'C' = color channel :param timeout: timeout (sec) for urlopen network request. Only relevant if URL is not local - :return: np.ndarray representing the image as RGB values + :return: ImageNdArray representing the image as RGB values """ from docarray.typing.bytes.image_bytes import ImageBytes buffer = ImageBytes(self.load_bytes(timeout=timeout)) return buffer.load(width, height, axis_layout) + def load_bytes(self, timeout: Optional[float] = None) -> ImageBytes: + """ + Convert url to ImageBytes. This will either load or download the file and save + it into an ImageBytes object. + + :param timeout: timeout for urlopen. Only relevant if url is not local + :return: ImageBytes object + """ + bytes_ = super().load_bytes(timeout=timeout) + return ImageBytes(bytes_) + def display(self) -> None: """ Display image data from url in notebook. diff --git a/docarray/typing/url/url_3d/mesh_url.py b/docarray/typing/url/url_3d/mesh_url.py index 2f235b6fb01..788bc4f02ac 100644 --- a/docarray/typing/url/url_3d/mesh_url.py +++ b/docarray/typing/url/url_3d/mesh_url.py @@ -33,7 +33,6 @@ def load( ```python from docarray import BaseDoc - import numpy as np from docarray.typing import Mesh3DUrl, NdArray diff --git a/docarray/typing/url/video_url.py b/docarray/typing/url/video_url.py index e6b35642f5e..dfbc8114063 100644 --- a/docarray/typing/url/video_url.py +++ b/docarray/typing/url/video_url.py @@ -1,7 +1,7 @@ import warnings -from typing import TYPE_CHECKING, Any, Type, TypeVar, Union +from typing import TYPE_CHECKING, Any, Optional, Type, TypeVar, Union -from docarray.typing.bytes.video_bytes import VideoLoadResult +from docarray.typing.bytes.video_bytes import VideoBytes, VideoLoadResult from docarray.typing.proto_register import _register_proto from docarray.typing.url.any_url import AnyUrl from docarray.utils._internal.misc import is_notebook @@ -99,11 +99,20 @@ class MyDoc(BaseDoc): :return: AudioNdArray representing the audio content, VideoNdArray representing the images of the video, NdArray of the key frame indices. """ - from docarray.typing.bytes.video_bytes import VideoBytes - - buffer = VideoBytes(self.load_bytes(**kwargs)) + buffer = self.load_bytes(**kwargs) return buffer.load() + def load_bytes(self, timeout: Optional[float] = None) -> VideoBytes: + """ + Convert url to VideoBytes. This will either load or download the file and save + it into an VideoBytes object. + + :param timeout: timeout for urlopen. Only relevant if url is not local + :return: VideoBytes object + """ + bytes_ = super().load_bytes(timeout=timeout) + return VideoBytes(bytes_) + def display(self): """ Play video from url in notebook. diff --git a/tests/integrations/document/test_proto.py b/tests/integrations/document/test_proto.py index 02b9aae3f00..2f656e6b4b4 100644 --- a/tests/integrations/document/test_proto.py +++ b/tests/integrations/document/test_proto.py @@ -122,6 +122,7 @@ class MyDoc(BaseDoc): assert doc.bytes_ == b'hello' assert doc.img_bytes == b'img' + assert isinstance(doc.img_bytes, ImageBytes) @pytest.mark.tensorflow diff --git a/tests/integrations/predefined_document/test_image.py b/tests/integrations/predefined_document/test_image.py index f77e945d714..b9fa418e29c 100644 --- a/tests/integrations/predefined_document/test_image.py +++ b/tests/integrations/predefined_document/test_image.py @@ -5,6 +5,7 @@ from docarray import BaseDoc from docarray.documents import ImageDoc +from docarray.typing import ImageBytes from docarray.utils._internal.misc import is_tf_available tf_available = is_tf_available() @@ -71,6 +72,7 @@ def test_byte(): img = ImageDoc(url=REMOTE_JPG) img.bytes_ = img.url.load_bytes() + assert isinstance(img.bytes_, ImageBytes) @pytest.mark.slow diff --git a/tests/units/typing/url/test_audio_url.py b/tests/units/typing/url/test_audio_url.py index 9168302aada..8b8757868d2 100644 --- a/tests/units/typing/url/test_audio_url.py +++ b/tests/units/typing/url/test_audio_url.py @@ -7,7 +7,7 @@ from docarray import BaseDoc from docarray.base_doc.io.json import orjson_dumps -from docarray.typing import AudioTorchTensor, AudioUrl +from docarray.typing import AudioBytes, AudioTorchTensor, AudioUrl from docarray.utils._internal.misc import is_tf_available from tests import TOYDATA_DIR @@ -130,3 +130,11 @@ def test_proto_audio_url(file_url): uri = parse_obj_as(AudioUrl, file_url) proto = uri._to_node_protobuf() assert 'audio_url' in str(proto) + + +def test_load_bytes(): + uri = parse_obj_as(AudioUrl, REMOTE_AUDIO_FILE) + audio_bytes = uri.load_bytes() + assert isinstance(audio_bytes, bytes) + assert isinstance(audio_bytes, AudioBytes) + assert len(audio_bytes) > 0 diff --git a/tests/units/typing/url/test_video_url.py b/tests/units/typing/url/test_video_url.py index e0dba53b6fe..f08d951bbec 100644 --- a/tests/units/typing/url/test_video_url.py +++ b/tests/units/typing/url/test_video_url.py @@ -10,6 +10,7 @@ from docarray.typing import ( AudioNdArray, NdArray, + VideoBytes, VideoNdArray, VideoTorchTensor, VideoUrl, @@ -158,4 +159,5 @@ def test_load_bytes(): uri = parse_obj_as(VideoUrl, file_url) video_bytes = uri.load_bytes() assert isinstance(video_bytes, bytes) + assert isinstance(video_bytes, VideoBytes) assert len(video_bytes) > 0