diff --git a/docarray/documents/audio.py b/docarray/documents/audio.py index 9617c913e0..30c07f9599 100644 --- a/docarray/documents/audio.py +++ b/docarray/documents/audio.py @@ -1,7 +1,6 @@ -from typing import TYPE_CHECKING, Any, Optional, Type, TypeVar, Union +from typing import TYPE_CHECKING, Any, Dict, Optional, Type, TypeVar, Union import numpy as np - from pydantic import Field from docarray.base_doc import BaseDoc @@ -10,6 +9,10 @@ from docarray.typing.tensor.abstract_tensor import AbstractTensor from docarray.typing.tensor.audio.audio_tensor import AudioTensor from docarray.utils._internal.misc import import_library +from docarray.utils._internal.pydantic import is_pydantic_v2 + +if is_pydantic_v2: + from pydantic import model_validator if TYPE_CHECKING: import tensorflow as tf # type: ignore @@ -121,17 +124,30 @@ class MultiModalDoc(BaseDoc): ) @classmethod - def validate( - cls: Type[T], - value: Union[str, AbstractTensor, Any], - ) -> T: + def _validate(cls, value) -> Dict[str, Any]: if isinstance(value, str): - value = cls(url=value) + value = dict(url=value) elif isinstance(value, (AbstractTensor, np.ndarray)) or ( torch is not None and isinstance(value, torch.Tensor) or (tf is not None and isinstance(value, tf.Tensor)) ): - value = cls(tensor=value) + value = dict(tensor=value) + + return value + + if is_pydantic_v2: + + @model_validator(mode='before') + @classmethod + def validate_model_before(cls, value): + return cls._validate(value) + + else: - return super().validate(value) + @classmethod + def validate( + cls: Type[T], + value: Union[str, AbstractTensor, Any], + ) -> T: + return super().validate(cls._validate(value)) diff --git a/docarray/documents/image.py b/docarray/documents/image.py index 41ae5a47c4..e263e1df5c 100644 --- a/docarray/documents/image.py +++ b/docarray/documents/image.py @@ -1,7 +1,6 @@ -from typing import TYPE_CHECKING, Any, Optional, Type, TypeVar, Union +from typing import TYPE_CHECKING, Any, Dict, Optional, Type, TypeVar, Union import numpy as np - from pydantic import Field from docarray.base_doc import BaseDoc @@ -9,7 +8,10 @@ from docarray.typing.tensor.abstract_tensor import AbstractTensor from docarray.typing.tensor.image.image_tensor import ImageTensor from docarray.utils._internal.misc import import_library +from docarray.utils._internal.pydantic import is_pydantic_v2 +if is_pydantic_v2: + from pydantic import model_validator if TYPE_CHECKING: import tensorflow as tf # type: ignore @@ -115,19 +117,32 @@ class MultiModalDoc(BaseDoc): ) @classmethod - def validate( - cls: Type[T], - value: Union[str, AbstractTensor, Any], - ) -> T: + def _validate(cls, value) -> Dict[str, Any]: if isinstance(value, str): - value = cls(url=value) + value = dict(url=value) elif ( isinstance(value, (AbstractTensor, np.ndarray)) or (torch is not None and isinstance(value, torch.Tensor)) or (tf is not None and isinstance(value, tf.Tensor)) ): - value = cls(tensor=value) + value = dict(tensor=value) elif isinstance(value, bytes): - value = cls(byte=value) + value = dict(byte=value) + + return value + + if is_pydantic_v2: + + @model_validator(mode='before') + @classmethod + def validate_model_before(cls, value): + return cls._validate(value) + + else: - return super().validate(value) + @classmethod + def validate( + cls: Type[T], + value: Union[str, AbstractTensor, Any], + ) -> T: + return super().validate(cls._validate(value)) diff --git a/docarray/documents/mesh/mesh_3d.py b/docarray/documents/mesh/mesh_3d.py index 5eabc112ea..ac2f057509 100644 --- a/docarray/documents/mesh/mesh_3d.py +++ b/docarray/documents/mesh/mesh_3d.py @@ -6,7 +6,10 @@ from docarray.documents.mesh.vertices_and_faces import VerticesAndFaces from docarray.typing.tensor.embedding import AnyEmbedding from docarray.typing.url.url_3d.mesh_url import Mesh3DUrl +from docarray.utils._internal.pydantic import is_pydantic_v2 +if is_pydantic_v2: + from pydantic import model_validator T = TypeVar('T', bound='Mesh3D') @@ -125,11 +128,22 @@ class MultiModalDoc(BaseDoc): default=None, ) - @classmethod - def validate( - cls: Type[T], - value: Union[str, Any], - ) -> T: - if isinstance(value, str): - value = cls(url=value) - return super().validate(value) + if is_pydantic_v2: + + @model_validator(mode='before') + @classmethod + def validate_model_before(cls, value): + if isinstance(value, str): + return {'url': value} + return value + + else: + + @classmethod + def validate( + cls: Type[T], + value: Union[str, Any], + ) -> T: + if isinstance(value, str): + value = cls(url=value) + return super().validate(value) diff --git a/docarray/documents/point_cloud/point_cloud_3d.py b/docarray/documents/point_cloud/point_cloud_3d.py index d911cf82b3..1b337c7578 100644 --- a/docarray/documents/point_cloud/point_cloud_3d.py +++ b/docarray/documents/point_cloud/point_cloud_3d.py @@ -1,7 +1,6 @@ from typing import TYPE_CHECKING, Any, Optional, Type, TypeVar, Union import numpy as np - from pydantic import Field from docarray.base_doc import BaseDoc @@ -9,6 +8,10 @@ from docarray.typing import AnyEmbedding, PointCloud3DUrl from docarray.typing.tensor.abstract_tensor import AbstractTensor from docarray.utils._internal.misc import import_library +from docarray.utils._internal.pydantic import is_pydantic_v2 + +if is_pydantic_v2: + from pydantic import model_validator if TYPE_CHECKING: import tensorflow as tf # type: ignore @@ -130,17 +133,30 @@ class MultiModalDoc(BaseDoc): ) @classmethod - def validate( - cls: Type[T], - value: Union[str, AbstractTensor, Any], - ) -> T: + def _validate(self, value: Union[str, AbstractTensor, Any]) -> Any: if isinstance(value, str): - value = cls(url=value) + value = {'url': value} elif isinstance(value, (AbstractTensor, np.ndarray)) or ( torch is not None and isinstance(value, torch.Tensor) or (tf is not None and isinstance(value, tf.Tensor)) ): - value = cls(tensors=PointsAndColors(points=value)) + value = {'tensors': PointsAndColors(points=value)} + + return value + + if is_pydantic_v2: + + @model_validator(mode='before') + @classmethod + def validate_model_before(cls, value): + return cls._validate(value) + + else: - return super().validate(value) + @classmethod + def validate( + cls: Type[T], + value: Union[str, AbstractTensor, Any], + ) -> T: + return super().validate(cls._validate(value)) diff --git a/docarray/documents/text.py b/docarray/documents/text.py index 4df291bb18..a504ce7589 100644 --- a/docarray/documents/text.py +++ b/docarray/documents/text.py @@ -5,6 +5,10 @@ from docarray.base_doc import BaseDoc from docarray.typing import TextUrl from docarray.typing.tensor.embedding import AnyEmbedding +from docarray.utils._internal.pydantic import is_pydantic_v2 + +if is_pydantic_v2: + from pydantic import model_validator T = TypeVar('T', bound='TextDoc') @@ -129,14 +133,26 @@ def __init__(self, text: Optional[str] = None, **kwargs): kwargs['text'] = text super().__init__(**kwargs) - @classmethod - def validate( - cls: Type[T], - value: Union[str, Any], - ) -> T: - if isinstance(value, str): - value = cls(text=value) - return super().validate(value) + if is_pydantic_v2: + + @model_validator(mode='before') + @classmethod + def validate_model_before(cls, values): + if isinstance(values, str): + return {'text': values} + else: + return values + + else: + + @classmethod + def validate( + cls: Type[T], + value: Union[str, Any], + ) -> T: + if isinstance(value, str): + value = cls(text=value) + return super().validate(value) def __eq__(self, other: Any) -> bool: if isinstance(other, str): diff --git a/docarray/documents/video.py b/docarray/documents/video.py index ed76c64ec5..ddb5529550 100644 --- a/docarray/documents/video.py +++ b/docarray/documents/video.py @@ -1,7 +1,6 @@ -from typing import TYPE_CHECKING, Any, Optional, Type, TypeVar, Union +from typing import TYPE_CHECKING, Any, Dict, Optional, Type, TypeVar, Union import numpy as np - from pydantic import Field from docarray.base_doc import BaseDoc @@ -11,6 +10,10 @@ from docarray.typing.tensor.video.video_tensor import VideoTensor from docarray.typing.url.video_url import VideoUrl from docarray.utils._internal.misc import import_library +from docarray.utils._internal.pydantic import is_pydantic_v2 + +if is_pydantic_v2: + from pydantic import model_validator if TYPE_CHECKING: import tensorflow as tf # type: ignore @@ -131,17 +134,30 @@ class MultiModalDoc(BaseDoc): ) @classmethod - def validate( - cls: Type[T], - value: Union[str, AbstractTensor, Any], - ) -> T: + def _validate(cls, value) -> Dict[str, Any]: if isinstance(value, str): - value = cls(url=value) + value = dict(url=value) elif isinstance(value, (AbstractTensor, np.ndarray)) or ( torch is not None and isinstance(value, torch.Tensor) or (tf is not None and isinstance(value, tf.Tensor)) ): - value = cls(tensor=value) + value = dict(tensor=value) + + return value + + if is_pydantic_v2: + + @model_validator(mode='before') + @classmethod + def validate_model_before(cls, value): + return cls._validate(value) + + else: - return super().validate(value) + @classmethod + def validate( + cls: Type[T], + value: Union[str, AbstractTensor, Any], + ) -> T: + return super().validate(cls._validate(value)) diff --git a/docarray/typing/tensor/ndarray.py b/docarray/typing/tensor/ndarray.py index 08edaf2a79..6ea94dc697 100644 --- a/docarray/typing/tensor/ndarray.py +++ b/docarray/typing/tensor/ndarray.py @@ -142,7 +142,6 @@ def _docarray_validate( return cls._docarray_from_native(arr) except Exception: pass # handled below - breakpoint() raise ValueError(f'Expected a numpy.ndarray compatible type, got {type(value)}') @classmethod diff --git a/tests/integrations/predefined_document/test_audio.py b/tests/integrations/predefined_document/test_audio.py index e8a063946a..52efb11705 100644 --- a/tests/integrations/predefined_document/test_audio.py +++ b/tests/integrations/predefined_document/test_audio.py @@ -11,7 +11,6 @@ from docarray.typing import AudioUrl from docarray.typing.tensor.audio import AudioNdArray, AudioTorchTensor from docarray.utils._internal.misc import is_tf_available -from docarray.utils._internal.pydantic import is_pydantic_v2 from tests import TOYDATA_DIR tf_available = is_tf_available() @@ -184,32 +183,27 @@ class MyAudio(AudioDoc): # Validating predefined docs against url or tensor is not yet working with pydantic v28 -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") def test_audio_np(): audio = parse_obj_as(AudioDoc, np.zeros((10, 10, 3))) assert (audio.tensor == np.zeros((10, 10, 3))).all() -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") def test_audio_torch(): audio = parse_obj_as(AudioDoc, torch.zeros(10, 10, 3)) assert (audio.tensor == torch.zeros(10, 10, 3)).all() -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") @pytest.mark.tensorflow def test_audio_tensorflow(): audio = parse_obj_as(AudioDoc, tf.zeros((10, 10, 3))) assert tnp.allclose(audio.tensor.tensor, tf.zeros((10, 10, 3))) -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") def test_audio_bytes(): audio = parse_obj_as(AudioDoc, torch.zeros(10, 10, 3)) audio.bytes_ = audio.tensor.to_bytes() -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") def test_audio_shortcut_doc(): class MyDoc(BaseDoc): audio: AudioDoc diff --git a/tests/integrations/predefined_document/test_image.py b/tests/integrations/predefined_document/test_image.py index 2897e0f2f1..45f6869629 100644 --- a/tests/integrations/predefined_document/test_image.py +++ b/tests/integrations/predefined_document/test_image.py @@ -7,7 +7,6 @@ from docarray.documents import ImageDoc from docarray.typing import ImageBytes from docarray.utils._internal.misc import is_tf_available -from docarray.utils._internal.pydantic import is_pydantic_v2 tf_available = is_tf_available() if tf_available: @@ -19,6 +18,8 @@ 'Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg' ) +pytestmark = [pytest.mark.image] + @pytest.mark.slow @pytest.mark.internet @@ -30,19 +31,16 @@ def test_image(): assert isinstance(image.tensor, np.ndarray) -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") def test_image_str(): image = parse_obj_as(ImageDoc, 'http://myurl.jpg') assert image.url == 'http://myurl.jpg' -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") def test_image_np(): image = parse_obj_as(ImageDoc, np.zeros((10, 10, 3))) assert (image.tensor == np.zeros((10, 10, 3))).all() -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") def test_image_torch(): image = parse_obj_as(ImageDoc, torch.zeros(10, 10, 3)) assert (image.tensor == torch.zeros(10, 10, 3)).all() @@ -54,7 +52,6 @@ def test_image_tensorflow(): assert tnp.allclose(image.tensor.tensor, tf.zeros((10, 10, 3))) -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") def test_image_shortcut_doc(): class MyDoc(BaseDoc): image: ImageDoc diff --git a/tests/integrations/predefined_document/test_mesh.py b/tests/integrations/predefined_document/test_mesh.py index 3cd537b923..a4e5976546 100644 --- a/tests/integrations/predefined_document/test_mesh.py +++ b/tests/integrations/predefined_document/test_mesh.py @@ -4,12 +4,13 @@ from docarray.base_doc.doc import BaseDoc from docarray.documents import Mesh3D -from docarray.utils._internal.pydantic import is_pydantic_v2 from tests import TOYDATA_DIR LOCAL_OBJ_FILE = str(TOYDATA_DIR / 'tetrahedron.obj') REMOTE_OBJ_FILE = 'https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj' +pytestmark = [pytest.mark.mesh] + @pytest.mark.slow @pytest.mark.internet @@ -23,13 +24,11 @@ def test_mesh(file_url: str): assert isinstance(mesh.tensors.faces, np.ndarray) -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") def test_str_init(): t = parse_obj_as(Mesh3D, 'http://hello.ply') assert t.url == 'http://hello.ply' -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") def test_doc(): class MyDoc(BaseDoc): mesh1: Mesh3D diff --git a/tests/integrations/predefined_document/test_point_cloud.py b/tests/integrations/predefined_document/test_point_cloud.py index c036f46938..b8a75914f2 100644 --- a/tests/integrations/predefined_document/test_point_cloud.py +++ b/tests/integrations/predefined_document/test_point_cloud.py @@ -6,7 +6,6 @@ from docarray import BaseDoc from docarray.documents import PointCloud3D from docarray.utils._internal.misc import is_tf_available -from docarray.utils._internal.pydantic import is_pydantic_v2 from tests import TOYDATA_DIR tf_available = is_tf_available() @@ -30,26 +29,22 @@ def test_point_cloud(file_url): assert isinstance(point_cloud.tensors.points, np.ndarray) -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") def test_point_cloud_np(): pc = parse_obj_as(PointCloud3D, np.zeros((10, 3))) assert (pc.tensors.points == np.zeros((10, 3))).all() -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") def test_point_cloud_torch(): pc = parse_obj_as(PointCloud3D, torch.zeros(10, 3)) assert (pc.tensors.points == torch.zeros(10, 3)).all() -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") @pytest.mark.tensorflow def test_point_cloud_tensorflow(): pc = parse_obj_as(PointCloud3D, tf.zeros((10, 3))) assert tnp.allclose(pc.tensors.points.tensor, tf.zeros((10, 3))) -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") def test_point_cloud_shortcut_doc(): class MyDoc(BaseDoc): pc: PointCloud3D @@ -66,7 +61,6 @@ class MyDoc(BaseDoc): assert (doc.pc3.tensors.points == torch.zeros(10, 3)).all() -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") @pytest.mark.tensorflow def test_point_cloud_shortcut_doc_tf(): class MyDoc(BaseDoc): diff --git a/tests/integrations/predefined_document/test_text.py b/tests/integrations/predefined_document/test_text.py index 5b89844ca3..da5d31092f 100644 --- a/tests/integrations/predefined_document/test_text.py +++ b/tests/integrations/predefined_document/test_text.py @@ -1,24 +1,19 @@ -import pytest from pydantic import parse_obj_as from docarray import BaseDoc from docarray.documents import TextDoc -from docarray.utils._internal.pydantic import is_pydantic_v2 -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") def test_simple_init(): t = TextDoc(text='hello') assert t.text == 'hello' -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") def test_str_init(): t = parse_obj_as(TextDoc, 'hello') assert t.text == 'hello' -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") def test_doc(): class MyDoc(BaseDoc): text1: TextDoc diff --git a/tests/integrations/predefined_document/test_video.py b/tests/integrations/predefined_document/test_video.py index 90b340f4f7..6aecdb10e7 100644 --- a/tests/integrations/predefined_document/test_video.py +++ b/tests/integrations/predefined_document/test_video.py @@ -4,10 +4,9 @@ from pydantic import parse_obj_as from docarray import BaseDoc -from docarray.documents import VideoDoc, AudioDoc +from docarray.documents import AudioDoc, VideoDoc from docarray.typing import AudioNdArray, NdArray, VideoNdArray from docarray.utils._internal.misc import is_tf_available -from docarray.utils._internal.pydantic import is_pydantic_v2 from tests import TOYDATA_DIR tf_available = is_tf_available() @@ -20,6 +19,9 @@ REMOTE_VIDEO_FILE = 'https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true' # noqa: E501 +pytestmark = [pytest.mark.video] + + @pytest.mark.slow @pytest.mark.internet @pytest.mark.parametrize('file_url', [LOCAL_VIDEO_FILE, REMOTE_VIDEO_FILE]) @@ -35,26 +37,22 @@ def test_video(file_url): assert isinstance(vid.key_frame_indices, NdArray) -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") def test_video_np(): video = parse_obj_as(VideoDoc, np.zeros((10, 10, 3))) assert (video.tensor == np.zeros((10, 10, 3))).all() -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") def test_video_torch(): video = parse_obj_as(VideoDoc, torch.zeros(10, 10, 3)) assert (video.tensor == torch.zeros(10, 10, 3)).all() -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") @pytest.mark.tensorflow def test_video_tensorflow(): video = parse_obj_as(VideoDoc, tf.zeros((10, 10, 3))) assert tnp.allclose(video.tensor.tensor, tf.zeros((10, 10, 3))) -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") def test_video_shortcut_doc(): class MyDoc(BaseDoc): video: VideoDoc diff --git a/tests/units/array/test_array_from_to_csv.py b/tests/units/array/test_array_from_to_csv.py index 4cbf9a657f..07d353ffc0 100644 --- a/tests/units/array/test_array_from_to_csv.py +++ b/tests/units/array/test_array_from_to_csv.py @@ -5,7 +5,6 @@ from docarray import BaseDoc, DocList, DocVec from docarray.documents import ImageDoc -from docarray.utils._internal.pydantic import is_pydantic_v2 from tests import TOYDATA_DIR @@ -44,7 +43,6 @@ def test_to_from_csv(tmpdir, nested_doc_cls): assert doc1 == doc2 -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") def test_from_csv_nested(nested_doc_cls): da = DocList[nested_doc_cls].from_csv( file_path=str(TOYDATA_DIR / 'docs_nested.csv')