From 829c128afa52bf8d4170f928b5eb8d07329f16ff Mon Sep 17 00:00:00 2001 From: samsja Date: Fri, 29 Sep 2023 15:22:22 +0200 Subject: [PATCH 1/4] fix: fix readme for pydantic v2 Signed-off-by: samsja --- README.md | 9 +++++---- tests/documentation/test_docs.py | 1 - 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 96f2d5faa1..79202079e0 100644 --- a/README.md +++ b/README.md @@ -111,8 +111,8 @@ class MyDocument(BaseDoc): image_url: ImageUrl # could also be VideoUrl, AudioUrl, etc. image_tensor: Optional[ TorchTensor[1704, 2272, 3] - ] # could also be NdArray or TensorflowTensor - embedding: Optional[TorchTensor] + ] = None # could also be NdArray or TensorflowTensor + embedding: Optional[TorchTensor] = None ``` So not only can you define the types of your data, you can even **specify the shape of your tensors!** @@ -643,8 +643,8 @@ import tensorflow as tf class Podcast(BaseDoc): - audio_tensor: Optional[AudioTensorFlowTensor] - embedding: Optional[AudioTensorFlowTensor] + audio_tensor: Optional[AudioTensorFlowTensor] = None + embedding: Optional[AudioTensorFlowTensor] = None class MyPodcastModel(tf.keras.Model): @@ -713,6 +713,7 @@ async def create_item(doc: InputDoc) -> OutputDoc: ) return doc + input_doc = InputDoc(text='', img=ImageDoc(tensor=np.random.random((3, 224, 224)))) async with AsyncClient(app=app, base_url="http://test") as ac: diff --git a/tests/documentation/test_docs.py b/tests/documentation/test_docs.py index df1ae1a282..f5940a3acc 100644 --- a/tests/documentation/test_docs.py +++ b/tests/documentation/test_docs.py @@ -70,7 +70,6 @@ def test_files_good(fpath): check_md_file(fpath=fpath, memory=True, keyword_ignore=['pickle', 'jac']) -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") def test_readme(): check_md_file( fpath='README.md', From fa70c550395ca1ef6d9c7e22143d216440ab5639 Mon Sep 17 00:00:00 2001 From: samsja Date: Mon, 2 Oct 2023 09:56:08 +0200 Subject: [PATCH 2/4] fix: fix readme for pydantic v2 Signed-off-by: samsja --- docs/data_types/3d_mesh/3d_mesh.md | 20 +++++++++---------- docs/data_types/audio/audio.md | 12 +++++------ docs/data_types/image/image.md | 10 +++++----- docs/data_types/text/text.md | 8 ++++---- docs/data_types/video/video.md | 12 +++++------ docs/how_to/add_doc_index.md | 2 +- .../how_to/multimodal_training_and_serving.md | 9 +++++---- docs/user_guide/representing/array.md | 4 ++-- tests/documentation/test_docs.py | 2 -- 9 files changed, 39 insertions(+), 40 deletions(-) diff --git a/docs/data_types/3d_mesh/3d_mesh.md b/docs/data_types/3d_mesh/3d_mesh.md index 4895b0b38e..4727f12cb7 100644 --- a/docs/data_types/3d_mesh/3d_mesh.md +++ b/docs/data_types/3d_mesh/3d_mesh.md @@ -42,7 +42,7 @@ from docarray.typing import Mesh3DUrl class MyMesh3D(BaseDoc): mesh_url: Mesh3DUrl - tensors: Optional[VerticesAndFaces] + tensors: Optional[VerticesAndFaces] = None doc = MyMesh3D(mesh_url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj") @@ -1355,7 +1355,7 @@ from docarray.typing import PointCloud3DUrl class MyPointCloud(BaseDoc): url: PointCloud3DUrl - tensors: Optional[PointsAndColors] + tensors: Optional[PointsAndColors] = None doc = MyPointCloud(url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj") @@ -2655,20 +2655,20 @@ The [`Mesh3D`][docarray.documents.mesh.Mesh3D] class provides a [`Mesh3DUrl`][do ``` { .python } class Mesh3D(BaseDoc): - url: Optional[Mesh3DUrl] - tensors: Optional[VerticesAndFaces] - embedding: Optional[AnyEmbedding] - bytes_: Optional[bytes] + url: Optional[Mesh3DUrl] = None + tensors: Optional[VerticesAndFaces] = None + embedding: Optional[AnyEmbedding] = None + bytes_: Optional[bytes] = None ``` ### `PointCloud3D` ``` { .python } class PointCloud3D(BaseDoc): - url: Optional[PointCloud3DUrl] - tensors: Optional[PointsAndColors] - embedding: Optional[AnyEmbedding] - bytes_: Optional[bytes] + url: Optional[PointCloud3DUrl] = None + tensors: Optional[PointsAndColors] = None + embedding: Optional[AnyEmbedding] = None + bytes_: Optional[bytes] = None ``` You can use them directly, extend or compose them: diff --git a/docs/data_types/audio/audio.md b/docs/data_types/audio/audio.md index ea12b0a5e3..a676adb4a0 100644 --- a/docs/data_types/audio/audio.md +++ b/docs/data_types/audio/audio.md @@ -187,11 +187,11 @@ To get started and play around with your audio data, DocArray provides a predefi ``` { .python } class AudioDoc(BaseDoc): - url: Optional[AudioUrl] - tensor: Optional[AudioTensor] - embedding: Optional[AnyEmbedding] - bytes_: Optional[AudioBytes] - frame_rate: Optional[int] + url: Optional[AudioUrl] = None + tensor: Optional[AudioTensor] = None + embedding: Optional[AnyEmbedding] = None + bytes_: Optional[AudioBytes] = None + frame_rate: Optional[int] = None ``` You can use this class directly or extend it to your preference: @@ -203,7 +203,7 @@ from typing import Optional # extend AudioDoc class MyAudio(AudioDoc): - name: Optional[str] + name: Optional[str] = None audio = MyAudio( diff --git a/docs/data_types/image/image.md b/docs/data_types/image/image.md index 27c7a5bfe0..e5aa9cbaf5 100644 --- a/docs/data_types/image/image.md +++ b/docs/data_types/image/image.md @@ -171,10 +171,10 @@ To get started and play around with the image modality, DocArray provides a pred ``` { .python } class ImageDoc(BaseDoc): - url: Optional[ImageUrl] - tensor: Optional[ImageTensor] - embedding: Optional[AnyEmbedding] - bytes_: Optional[ImageBytes] + url: Optional[ImageUrl] = None + tensor: Optional[ImageTensor] = None + embedding: Optional[AnyEmbedding] = None + bytes_: Optional[ImageBytes] = None ``` You can use this class directly or extend it to your preference: @@ -188,7 +188,7 @@ from typing import Optional # extending ImageDoc class MyImage(ImageDoc): image_title: str - second_embedding: Optional[AnyEmbedding] + second_embedding: Optional[AnyEmbedding] = None image = MyImage( diff --git a/docs/data_types/text/text.md b/docs/data_types/text/text.md index 2b1ec16384..5f3493c341 100644 --- a/docs/data_types/text/text.md +++ b/docs/data_types/text/text.md @@ -101,8 +101,8 @@ To get started and play around with your text data, DocArray provides a predefin ``` { .python } class TextDoc(BaseDoc): - text: Optional[str] - url: Optional[TextUrl] - embedding: Optional[AnyEmbedding] - bytes_: Optional[bytes] + text: Optional[str] = None + url: Optional[TextUrl] = None + embedding: Optional[AnyEmbedding] = None + bytes_: Optional[bytes] = None ``` diff --git a/docs/data_types/video/video.md b/docs/data_types/video/video.md index f619af9108..83b081c0cf 100644 --- a/docs/data_types/video/video.md +++ b/docs/data_types/video/video.md @@ -188,12 +188,12 @@ To get started and play around with your video data, DocArray provides a predefi ``` { .python } class VideoDoc(BaseDoc): - url: Optional[VideoUrl] + url: Optional[VideoUrl] = None audio: Optional[AudioDoc] = AudioDoc() - tensor: Optional[VideoTensor] - key_frame_indices: Optional[AnyTensor] - embedding: Optional[AnyEmbedding] - bytes_: Optional[bytes] + tensor: Optional[VideoTensor] = None + key_frame_indices: Optional[AnyTensor] = None + embedding: Optional[AnyEmbedding] = None + bytes_: Optional[bytes] = None ``` You can use this class directly or extend it to your preference: @@ -206,7 +206,7 @@ from docarray.documents import VideoDoc # extend it class MyVideo(VideoDoc): - name: Optional[str] + name: Optional[str] = None video = MyVideo( diff --git a/docs/how_to/add_doc_index.md b/docs/how_to/add_doc_index.md index facadaefb0..5ab3e3bbcc 100644 --- a/docs/how_to/add_doc_index.md +++ b/docs/how_to/add_doc_index.md @@ -187,7 +187,7 @@ The values of `self._column_infos` are `_ColumnInfo` dataclasses, which have the class _ColumnInfo: docarray_type: Type db_type: Any - n_dim: Optional[int] + n_dim: Optional[int] = None config: Dict[str, Any] ``` diff --git a/docs/how_to/multimodal_training_and_serving.md b/docs/how_to/multimodal_training_and_serving.md index eff3b2bc39..4353598cd8 100644 --- a/docs/how_to/multimodal_training_and_serving.md +++ b/docs/how_to/multimodal_training_and_serving.md @@ -101,7 +101,7 @@ class Tokens(BaseDoc): ```python class Text(BaseText): - tokens: Optional[Tokens] + tokens: Optional[Tokens] = None ``` Notice the [`TorchTensor`][docarray.typing.TorchTensor] type. It is a thin wrapper around `torch.Tensor` that can be used like any other Torch tensor, @@ -119,9 +119,9 @@ supported ML framework): ```python class ImageDoc(BaseDoc): - url: Optional[ImageUrl] - tensor: Optional[TorchTesor] - embedding: Optional[TorchTensor] + url: Optional[ImageUrl] = None + tensor: Optional[TorchTesor] = None + embedding: Optional[TorchTensor] = None ``` Actually, the `BaseText` above also already includes `tensor`, `url` and `embedding` fields, so we can use those on our @@ -141,6 +141,7 @@ This will be unnecessary once [this issue](https://github.com/docarray/docarray/ ```python from docarray import DocVec + DocVec[Tokens] DocVec[TextDoc] DocVec[ImageDoc] diff --git a/docs/user_guide/representing/array.md b/docs/user_guide/representing/array.md index 1d37a73b8a..5ea4a4bead 100644 --- a/docs/user_guide/representing/array.md +++ b/docs/user_guide/representing/array.md @@ -454,7 +454,7 @@ Both [`DocList`][docarray.array.doc_list.doc_list.DocList] and [`DocVec`][docarr class MyDoc(BaseDoc): - nested_doc: Optional[BaseDoc] + nested_doc: Optional[BaseDoc] = None ``` Using nested optional fields differs slightly between DocList and DocVes, so watch out. But in a nutshell: @@ -484,7 +484,7 @@ class ImageDoc(BaseDoc): class ArticleDoc(BaseDoc): - image: Optional[ImageDoc] + image: Optional[ImageDoc] = None title: str ``` diff --git a/tests/documentation/test_docs.py b/tests/documentation/test_docs.py index f5940a3acc..51a618a3aa 100644 --- a/tests/documentation/test_docs.py +++ b/tests/documentation/test_docs.py @@ -4,7 +4,6 @@ from mktestdocs import grab_code_blocks from mktestdocs.__main__ import _executors, check_raw_string -from docarray.utils._internal.pydantic import is_pydantic_v2 from tests.index.elastic.fixture import start_storage_v8 # noqa: F401 file_to_skip = ['fastAPI', 'jina', 'index', 'first_steps.md'] @@ -64,7 +63,6 @@ def check_md_file(fpath, memory=False, lang="python", keyword_ignore=[]): files_to_check.remove(file) -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") @pytest.mark.parametrize('fpath', files_to_check, ids=str) def test_files_good(fpath): check_md_file(fpath=fpath, memory=True, keyword_ignore=['pickle', 'jac']) From 221a85c170a23fa473f175b5117d03d6104855de Mon Sep 17 00:00:00 2001 From: samsja Date: Mon, 2 Oct 2023 10:02:04 +0200 Subject: [PATCH 3/4] fix: fix docstring tests Signed-off-by: samsja --- docarray/documents/audio.py | 2 +- docarray/documents/image.py | 2 +- docarray/documents/mesh/mesh_3d.py | 2 +- docarray/documents/point_cloud/point_cloud_3d.py | 2 +- docarray/documents/text.py | 2 +- docarray/documents/video.py | 2 +- tests/documentation/test_docstring.py | 2 -- 7 files changed, 6 insertions(+), 8 deletions(-) diff --git a/docarray/documents/audio.py b/docarray/documents/audio.py index 30c07f9599..5571a6e42f 100644 --- a/docarray/documents/audio.py +++ b/docarray/documents/audio.py @@ -60,7 +60,7 @@ class AudioDoc(BaseDoc): # extend it class MyAudio(AudioDoc): - name: Optional[TextDoc] + name: Optional[TextDoc] = None audio = MyAudio( diff --git a/docarray/documents/image.py b/docarray/documents/image.py index e263e1df5c..1b98a235f2 100644 --- a/docarray/documents/image.py +++ b/docarray/documents/image.py @@ -58,7 +58,7 @@ class ImageDoc(BaseDoc): # extend it class MyImage(ImageDoc): - second_embedding: Optional[AnyEmbedding] + second_embedding: Optional[AnyEmbedding] = None image = MyImage( diff --git a/docarray/documents/mesh/mesh_3d.py b/docarray/documents/mesh/mesh_3d.py index ac2f057509..e9ff863b2c 100644 --- a/docarray/documents/mesh/mesh_3d.py +++ b/docarray/documents/mesh/mesh_3d.py @@ -60,7 +60,7 @@ class Mesh3D(BaseDoc): # extend it class MyMesh3D(Mesh3D): - name: Optional[str] + name: Optional[str] = None mesh = MyMesh3D(url='https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj') diff --git a/docarray/documents/point_cloud/point_cloud_3d.py b/docarray/documents/point_cloud/point_cloud_3d.py index 1b337c7578..cd3ad2f268 100644 --- a/docarray/documents/point_cloud/point_cloud_3d.py +++ b/docarray/documents/point_cloud/point_cloud_3d.py @@ -62,7 +62,7 @@ class PointCloud3D(BaseDoc): # extend it class MyPointCloud3D(PointCloud3D): - second_embedding: Optional[AnyEmbedding] + second_embedding: Optional[AnyEmbedding] = None pc = MyPointCloud3D(url='https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj') diff --git a/docarray/documents/text.py b/docarray/documents/text.py index a504ce7589..101b3c3d3f 100644 --- a/docarray/documents/text.py +++ b/docarray/documents/text.py @@ -54,7 +54,7 @@ class TextDoc(BaseDoc): # extend it class MyText(TextDoc): - second_embedding: Optional[AnyEmbedding] + second_embedding: Optional[AnyEmbedding] = None txt_doc = MyText(url='https://www.gutenberg.org/files/1065/1065-0.txt') diff --git a/docarray/documents/video.py b/docarray/documents/video.py index ddb5529550..23965d26da 100644 --- a/docarray/documents/video.py +++ b/docarray/documents/video.py @@ -66,7 +66,7 @@ class VideoDoc(BaseDoc): # extend it class MyVideo(VideoDoc): - name: Optional[TextDoc] + name: Optional[TextDoc] = None video = MyVideo( diff --git a/tests/documentation/test_docstring.py b/tests/documentation/test_docstring.py index 71cc1bb8cb..9bb6e01aeb 100644 --- a/tests/documentation/test_docstring.py +++ b/tests/documentation/test_docstring.py @@ -16,7 +16,6 @@ import docarray.store import docarray.typing from docarray.utils import filter, find, map -from docarray.utils._internal.pydantic import is_pydantic_v2 SUB_MODULE_TO_CHECK = [ docarray, @@ -54,7 +53,6 @@ def get_obj_to_check(lib): members.extend(get_codeblock_members(obj)) -@pytest.mark.skipif(is_pydantic_v2, reason="Not working with pydantic v2 for now") @pytest.mark.parametrize("obj", members, ids=lambda d: d.__qualname__) def test_member(obj): check_docstring(obj) From 37ce03b625fb2c7ab67ce639380f838fa102ea92 Mon Sep 17 00:00:00 2001 From: samsja Date: Mon, 2 Oct 2023 10:09:50 +0200 Subject: [PATCH 4/4] fix: fix docstring tests Signed-off-by: samsja --- docarray/array/doc_list/doc_list.py | 2 +- docarray/typing/bytes/audio_bytes.py | 6 +++--- docarray/typing/tensor/audio/audio_ndarray.py | 6 +++--- docarray/typing/tensor/audio/audio_torch_tensor.py | 6 +++--- docarray/typing/tensor/image/image_ndarray.py | 6 +++--- docarray/typing/tensor/image/image_torch_tensor.py | 6 +++--- docarray/typing/tensor/video/video_ndarray.py | 4 ++-- docarray/typing/tensor/video/video_torch_tensor.py | 4 ++-- docarray/typing/url/audio_url.py | 2 +- docarray/typing/url/video_url.py | 6 +++--- 10 files changed, 24 insertions(+), 24 deletions(-) diff --git a/docarray/array/doc_list/doc_list.py b/docarray/array/doc_list/doc_list.py index f4707bc1d9..c21cf93413 100644 --- a/docarray/array/doc_list/doc_list.py +++ b/docarray/array/doc_list/doc_list.py @@ -68,7 +68,7 @@ class DocList( class Image(BaseDoc): - tensor: Optional[NdArray[100]] + tensor: Optional[NdArray[100]] = None url: ImageUrl diff --git a/docarray/typing/bytes/audio_bytes.py b/docarray/typing/bytes/audio_bytes.py index 8db4c8549e..4747231be2 100644 --- a/docarray/typing/bytes/audio_bytes.py +++ b/docarray/typing/bytes/audio_bytes.py @@ -33,9 +33,9 @@ def load(self) -> Tuple[AudioNdArray, int]: class MyAudio(BaseDoc): url: AudioUrl - tensor: Optional[AudioNdArray] - bytes_: Optional[AudioBytes] - frame_rate: Optional[float] + tensor: Optional[AudioNdArray] = None + bytes_: Optional[AudioBytes] = None + frame_rate: Optional[float] = None doc = MyAudio(url='https://www.kozco.com/tech/piano2.wav') diff --git a/docarray/typing/tensor/audio/audio_ndarray.py b/docarray/typing/tensor/audio/audio_ndarray.py index 3b15c0bc93..3b5c79aad1 100644 --- a/docarray/typing/tensor/audio/audio_ndarray.py +++ b/docarray/typing/tensor/audio/audio_ndarray.py @@ -22,9 +22,9 @@ class AudioNdArray(AbstractAudioTensor, NdArray): class MyAudioDoc(BaseDoc): title: str - audio_tensor: Optional[AudioNdArray] - url: Optional[AudioUrl] - bytes_: Optional[AudioBytes] + audio_tensor: Optional[AudioNdArray] = None + url: Optional[AudioUrl] = None + bytes_: Optional[AudioBytes] = None # from tensor diff --git a/docarray/typing/tensor/audio/audio_torch_tensor.py b/docarray/typing/tensor/audio/audio_torch_tensor.py index 974ddff120..06b6c649e4 100644 --- a/docarray/typing/tensor/audio/audio_torch_tensor.py +++ b/docarray/typing/tensor/audio/audio_torch_tensor.py @@ -22,9 +22,9 @@ class AudioTorchTensor(AbstractAudioTensor, TorchTensor, metaclass=metaTorchAndN class MyAudioDoc(BaseDoc): title: str - audio_tensor: Optional[AudioTorchTensor] - url: Optional[AudioUrl] - bytes_: Optional[AudioBytes] + audio_tensor: Optional[AudioTorchTensor] = None + url: Optional[AudioUrl] = None + bytes_: Optional[AudioBytes] = None doc_1 = MyAudioDoc( diff --git a/docarray/typing/tensor/image/image_ndarray.py b/docarray/typing/tensor/image/image_ndarray.py index 1ff3a14eaa..b5e588961b 100644 --- a/docarray/typing/tensor/image/image_ndarray.py +++ b/docarray/typing/tensor/image/image_ndarray.py @@ -25,9 +25,9 @@ class ImageNdArray(AbstractImageTensor, NdArray): class MyImageDoc(BaseDoc): title: str - tensor: Optional[ImageNdArray] - url: Optional[ImageUrl] - bytes: Optional[ImageBytes] + tensor: Optional[ImageNdArray] = None + url: Optional[ImageUrl] = None + bytes: Optional[ImageBytes] = None # from url diff --git a/docarray/typing/tensor/image/image_torch_tensor.py b/docarray/typing/tensor/image/image_torch_tensor.py index 103a936d70..7f2c3afc0d 100644 --- a/docarray/typing/tensor/image/image_torch_tensor.py +++ b/docarray/typing/tensor/image/image_torch_tensor.py @@ -28,9 +28,9 @@ class ImageTorchTensor(AbstractImageTensor, TorchTensor, metaclass=metaTorchAndN class MyImageDoc(BaseDoc): title: str - tensor: Optional[ImageTorchTensor] - url: Optional[ImageUrl] - bytes: Optional[ImageBytes] + tensor: Optional[ImageTorchTensor] = None + url: Optional[ImageUrl] = None + bytes: Optional[ImageBytes] = None doc = MyImageDoc( diff --git a/docarray/typing/tensor/video/video_ndarray.py b/docarray/typing/tensor/video/video_ndarray.py index db2c27c6ab..30129e40f9 100644 --- a/docarray/typing/tensor/video/video_ndarray.py +++ b/docarray/typing/tensor/video/video_ndarray.py @@ -29,8 +29,8 @@ class VideoNdArray(NdArray, VideoTensorMixin): class MyVideoDoc(BaseDoc): title: str - url: Optional[VideoUrl] - video_tensor: Optional[VideoNdArray] + url: Optional[VideoUrl] = None + video_tensor: Optional[VideoNdArray] = None doc_1 = MyVideoDoc( diff --git a/docarray/typing/tensor/video/video_torch_tensor.py b/docarray/typing/tensor/video/video_torch_tensor.py index 574e37fe37..a1e1a73e33 100644 --- a/docarray/typing/tensor/video/video_torch_tensor.py +++ b/docarray/typing/tensor/video/video_torch_tensor.py @@ -28,8 +28,8 @@ class VideoTorchTensor(TorchTensor, VideoTensorMixin, metaclass=metaTorchAndNode class MyVideoDoc(BaseDoc): title: str - url: Optional[VideoUrl] - video_tensor: Optional[VideoTorchTensor] + url: Optional[VideoUrl] = None + video_tensor: Optional[VideoTorchTensor] = None doc_1 = MyVideoDoc( diff --git a/docarray/typing/url/audio_url.py b/docarray/typing/url/audio_url.py index bd71a68b82..e693838841 100644 --- a/docarray/typing/url/audio_url.py +++ b/docarray/typing/url/audio_url.py @@ -46,7 +46,7 @@ def load(self: T) -> Tuple[AudioNdArray, int]: class MyDoc(BaseDoc): audio_url: AudioUrl - audio_tensor: Optional[AudioNdArray] + audio_tensor: Optional[AudioNdArray] = None doc = MyDoc(audio_url='https://www.kozco.com/tech/piano2.wav') diff --git a/docarray/typing/url/video_url.py b/docarray/typing/url/video_url.py index e4a623e53a..385e4946f8 100644 --- a/docarray/typing/url/video_url.py +++ b/docarray/typing/url/video_url.py @@ -48,9 +48,9 @@ def load(self: T, **kwargs) -> VideoLoadResult: class MyDoc(BaseDoc): video_url: VideoUrl - video: Optional[VideoNdArray] - audio: Optional[AudioNdArray] - key_frame_indices: Optional[NdArray] + video: Optional[VideoNdArray] = None + audio: Optional[AudioNdArray] = None + key_frame_indices: Optional[NdArray] = None doc = MyDoc(