From 72b4c9a9cb3c2b69d22da9a9c8df44db19bcce28 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Mon, 17 Apr 2023 10:35:47 +0200 Subject: [PATCH 1/4] docs: adjust file paths to main branch Signed-off-by: anna-charlotte --- docarray/documents/audio.py | 6 +++--- docarray/documents/image.py | 6 +++--- docarray/documents/text.py | 2 +- docarray/documents/video.py | 6 +++--- docarray/typing/bytes/video_bytes.py | 2 +- docarray/typing/tensor/video/video_ndarray.py | 2 +- docarray/typing/tensor/video/video_tensorflow_tensor.py | 2 +- docarray/typing/tensor/video/video_torch_tensor.py | 2 +- docarray/typing/url/video_url.py | 4 ++-- 9 files changed, 16 insertions(+), 16 deletions(-) diff --git a/docarray/documents/audio.py b/docarray/documents/audio.py index ee189b5c867..d2585d20b98 100644 --- a/docarray/documents/audio.py +++ b/docarray/documents/audio.py @@ -39,7 +39,7 @@ class AudioDoc(BaseDoc): # use it directly audio = AudioDoc( - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/hello.wav?raw=true' + url='https://github.com/docarray/docarray/tree/main/tests/toydata/hello.wav?raw=true' ) audio.tensor, audio.frame_rate = audio.url.load() # model = MyEmbeddingModel() @@ -59,7 +59,7 @@ class MyAudio(AudioDoc): audio = MyAudio( - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/hello.wav?raw=true' + url='https://github.com/docarray/docarray/tree/main/tests/toydata/hello.wav?raw=true' ) audio.name = TextDoc(text='my first audio') audio.tensor, audio.frame_rate = audio.url.load() @@ -82,7 +82,7 @@ class MultiModalDoc(BaseDoc): mmdoc = MultiModalDoc( audio=AudioDoc( - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/hello.wav?raw=true' + url='https://github.com/docarray/docarray/tree/main/tests/toydata/hello.wav?raw=true' ), text=TextDoc(text='hello world, how are you doing?'), ) diff --git a/docarray/documents/image.py b/docarray/documents/image.py index 12e32e3cb19..c0f3ef9ede0 100644 --- a/docarray/documents/image.py +++ b/docarray/documents/image.py @@ -36,7 +36,7 @@ class ImageDoc(BaseDoc): # use it directly image = ImageDoc( - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/image-data/apple.png?raw=true' + url='https://github.com/docarray/docarray/tree/main/tests/toydata/image-data/apple.png?raw=true' ) image.tensor = image.url.load() # model = MyEmbeddingModel() @@ -57,7 +57,7 @@ class MyImage(ImageDoc): image = MyImage( - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/image-data/apple.png?raw=true' + url='https://github.com/docarray/docarray/tree/main/tests/toydata/image-data/apple.png?raw=true' ) image.tensor = image.url.load() # model = MyEmbeddingModel() @@ -80,7 +80,7 @@ class MultiModalDoc(BaseDoc): mmdoc = MultiModalDoc( image=ImageDoc( - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/image-data/apple.png?raw=true' + url='https://github.com/docarray/docarray/tree/main/tests/toydata/image-data/apple.png?raw=true' ), text=TextDoc(text='hello world, how are you doing?'), ) diff --git a/docarray/documents/text.py b/docarray/documents/text.py index 91d2be7045a..8602c537173 100644 --- a/docarray/documents/text.py +++ b/docarray/documents/text.py @@ -73,7 +73,7 @@ class MultiModalDoc(BaseDoc): mmdoc = MultiModalDoc( image_doc=ImageDoc( - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/image-data/apple.png?raw=true' + url='https://github.com/docarray/docarray/tree/main/tests/toydata/image-data/apple.png?raw=true' ), text_doc=TextDoc(text='hello world, how are you doing?'), ) diff --git a/docarray/documents/video.py b/docarray/documents/video.py index 9ead5cc4ffc..23773465264 100644 --- a/docarray/documents/video.py +++ b/docarray/documents/video.py @@ -41,7 +41,7 @@ class VideoDoc(BaseDoc): # use it directly vid = VideoDoc( - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true' + url='https://github.com/docarray/docarray/tree/main/tests/toydata/mov_bbb.mp4?raw=true' ) vid.tensor, vid.audio.tensor, vid.key_frame_indices = vid.url.load() # model = MyEmbeddingModel() @@ -62,7 +62,7 @@ class MyVideo(VideoDoc): video = MyVideo( - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true' + url='https://github.com/docarray/docarray/tree/main/tests/toydata/mov_bbb.mp4?raw=true' ) video.name = TextDoc(text='my first video') video.tensor = video.url.load().video @@ -85,7 +85,7 @@ class MultiModalDoc(BaseDoc): mmdoc = MultiModalDoc( video=VideoDoc( - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true' + url='https://github.com/docarray/docarray/tree/main/tests/toydata/mov_bbb.mp4?raw=true' ), text=TextDoc(text='hello world, how are you doing?'), ) diff --git a/docarray/typing/bytes/video_bytes.py b/docarray/typing/bytes/video_bytes.py index 008eeb8b0fd..30fee75453f 100644 --- a/docarray/typing/bytes/video_bytes.py +++ b/docarray/typing/bytes/video_bytes.py @@ -69,7 +69,7 @@ class MyDoc(BaseDoc): doc = MyDoc( - video_url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true' + video_url='https://github.com/docarray/docarray/tree/main/tests/toydata/mov_bbb.mp4?raw=true' ) video, audio, key_frame_indices = doc.video_url.load() diff --git a/docarray/typing/tensor/video/video_ndarray.py b/docarray/typing/tensor/video/video_ndarray.py index 4077cb6b161..b4f4d02a783 100644 --- a/docarray/typing/tensor/video/video_ndarray.py +++ b/docarray/typing/tensor/video/video_ndarray.py @@ -44,7 +44,7 @@ class MyVideoDoc(BaseDoc): doc_2 = MyVideoDoc( title='my_second_video_doc', - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true', + url='https://github.com/docarray/docarray/tree/main/tests/toydata/mov_bbb.mp4?raw=true', ) doc_2.video_tensor = parse_obj_as(VideoNdArray, doc_2.url.load().video) diff --git a/docarray/typing/tensor/video/video_tensorflow_tensor.py b/docarray/typing/tensor/video/video_tensorflow_tensor.py index 9a652c2e4ca..0a4d485e977 100644 --- a/docarray/typing/tensor/video/video_tensorflow_tensor.py +++ b/docarray/typing/tensor/video/video_tensorflow_tensor.py @@ -46,7 +46,7 @@ class MyVideoDoc(BaseDoc): doc_2 = MyVideoDoc( title='my_second_video_doc', - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true', + url='https://github.com/docarray/docarray/tree/main/tests/toydata/mov_bbb.mp4?raw=true', ) doc_2.video_tensor = doc_2.url.load().video diff --git a/docarray/typing/tensor/video/video_torch_tensor.py b/docarray/typing/tensor/video/video_torch_tensor.py index bdef42d2fc2..4c94b3890e6 100644 --- a/docarray/typing/tensor/video/video_torch_tensor.py +++ b/docarray/typing/tensor/video/video_torch_tensor.py @@ -44,7 +44,7 @@ class MyVideoDoc(BaseDoc): doc_2 = MyVideoDoc( title='my_second_video_doc', - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true', + url='https://github.com/docarray/docarray/tree/main/tests/toydata/mov_bbb.mp4?raw=true', ) doc_2.video_tensor = doc_2.url.load().video diff --git a/docarray/typing/url/video_url.py b/docarray/typing/url/video_url.py index 8c5f0e6d995..b1ae3df2e1a 100644 --- a/docarray/typing/url/video_url.py +++ b/docarray/typing/url/video_url.py @@ -41,7 +41,7 @@ class MyDoc(BaseDoc): doc = MyDoc( - video_url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true' + video_url='https://github.com/docarray/docarray/tree/main/tests/toydata/mov_bbb.mp4?raw=true' ) doc.video, doc.audio, doc.key_frame_indices = doc.video_url.load() @@ -64,7 +64,7 @@ class MyDoc(BaseDoc): url = parse_obj_as( VideoUrl, - 'https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true', + 'https://github.com/docarray/docarray/tree/main/tests/toydata/mov_bbb.mp4?raw=true', ) key_frame_indices = url.load().key_frame_indices assert isinstance(key_frame_indices, NdArray) From 0dea11176dd3485debfbc69b35770c9d2cbbba83 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Mon, 17 Apr 2023 11:04:24 +0200 Subject: [PATCH 2/4] fix: paths Signed-off-by: anna-charlotte --- docarray/documents/audio.py | 6 +++--- docarray/documents/image.py | 6 +++--- docarray/documents/text.py | 2 +- docarray/documents/video.py | 6 +++--- docarray/typing/bytes/video_bytes.py | 2 +- docarray/typing/tensor/video/video_ndarray.py | 2 +- docarray/typing/tensor/video/video_tensorflow_tensor.py | 2 +- docarray/typing/tensor/video/video_torch_tensor.py | 2 +- docarray/typing/url/video_url.py | 4 ++-- 9 files changed, 16 insertions(+), 16 deletions(-) diff --git a/docarray/documents/audio.py b/docarray/documents/audio.py index d2585d20b98..fd746a2dfe5 100644 --- a/docarray/documents/audio.py +++ b/docarray/documents/audio.py @@ -39,7 +39,7 @@ class AudioDoc(BaseDoc): # use it directly audio = AudioDoc( - url='https://github.com/docarray/docarray/tree/main/tests/toydata/hello.wav?raw=true' + url='https://github.com/docarray/docarray/blob/main/tests/toydata/hello.wav?raw=true' ) audio.tensor, audio.frame_rate = audio.url.load() # model = MyEmbeddingModel() @@ -59,7 +59,7 @@ class MyAudio(AudioDoc): audio = MyAudio( - url='https://github.com/docarray/docarray/tree/main/tests/toydata/hello.wav?raw=true' + url='https://github.com/docarray/docarray/blob/main/tests/toydata/hello.wav?raw=true' ) audio.name = TextDoc(text='my first audio') audio.tensor, audio.frame_rate = audio.url.load() @@ -82,7 +82,7 @@ class MultiModalDoc(BaseDoc): mmdoc = MultiModalDoc( audio=AudioDoc( - url='https://github.com/docarray/docarray/tree/main/tests/toydata/hello.wav?raw=true' + url='https://github.com/docarray/docarray/blob/main/tests/toydata/hello.wav?raw=true' ), text=TextDoc(text='hello world, how are you doing?'), ) diff --git a/docarray/documents/image.py b/docarray/documents/image.py index c0f3ef9ede0..e0072b622ab 100644 --- a/docarray/documents/image.py +++ b/docarray/documents/image.py @@ -36,7 +36,7 @@ class ImageDoc(BaseDoc): # use it directly image = ImageDoc( - url='https://github.com/docarray/docarray/tree/main/tests/toydata/image-data/apple.png?raw=true' + url='https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true' ) image.tensor = image.url.load() # model = MyEmbeddingModel() @@ -57,7 +57,7 @@ class MyImage(ImageDoc): image = MyImage( - url='https://github.com/docarray/docarray/tree/main/tests/toydata/image-data/apple.png?raw=true' + url='https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true' ) image.tensor = image.url.load() # model = MyEmbeddingModel() @@ -80,7 +80,7 @@ class MultiModalDoc(BaseDoc): mmdoc = MultiModalDoc( image=ImageDoc( - url='https://github.com/docarray/docarray/tree/main/tests/toydata/image-data/apple.png?raw=true' + url='https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true' ), text=TextDoc(text='hello world, how are you doing?'), ) diff --git a/docarray/documents/text.py b/docarray/documents/text.py index 8602c537173..c6e6645f4e1 100644 --- a/docarray/documents/text.py +++ b/docarray/documents/text.py @@ -73,7 +73,7 @@ class MultiModalDoc(BaseDoc): mmdoc = MultiModalDoc( image_doc=ImageDoc( - url='https://github.com/docarray/docarray/tree/main/tests/toydata/image-data/apple.png?raw=true' + url='https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true' ), text_doc=TextDoc(text='hello world, how are you doing?'), ) diff --git a/docarray/documents/video.py b/docarray/documents/video.py index 23773465264..fad4a0e843a 100644 --- a/docarray/documents/video.py +++ b/docarray/documents/video.py @@ -41,7 +41,7 @@ class VideoDoc(BaseDoc): # use it directly vid = VideoDoc( - url='https://github.com/docarray/docarray/tree/main/tests/toydata/mov_bbb.mp4?raw=true' + url='https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true' ) vid.tensor, vid.audio.tensor, vid.key_frame_indices = vid.url.load() # model = MyEmbeddingModel() @@ -62,7 +62,7 @@ class MyVideo(VideoDoc): video = MyVideo( - url='https://github.com/docarray/docarray/tree/main/tests/toydata/mov_bbb.mp4?raw=true' + url='https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true' ) video.name = TextDoc(text='my first video') video.tensor = video.url.load().video @@ -85,7 +85,7 @@ class MultiModalDoc(BaseDoc): mmdoc = MultiModalDoc( video=VideoDoc( - url='https://github.com/docarray/docarray/tree/main/tests/toydata/mov_bbb.mp4?raw=true' + url='https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true' ), text=TextDoc(text='hello world, how are you doing?'), ) diff --git a/docarray/typing/bytes/video_bytes.py b/docarray/typing/bytes/video_bytes.py index 30fee75453f..720326fdbc1 100644 --- a/docarray/typing/bytes/video_bytes.py +++ b/docarray/typing/bytes/video_bytes.py @@ -69,7 +69,7 @@ class MyDoc(BaseDoc): doc = MyDoc( - video_url='https://github.com/docarray/docarray/tree/main/tests/toydata/mov_bbb.mp4?raw=true' + video_url='https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true' ) video, audio, key_frame_indices = doc.video_url.load() diff --git a/docarray/typing/tensor/video/video_ndarray.py b/docarray/typing/tensor/video/video_ndarray.py index b4f4d02a783..5b11e75bd94 100644 --- a/docarray/typing/tensor/video/video_ndarray.py +++ b/docarray/typing/tensor/video/video_ndarray.py @@ -44,7 +44,7 @@ class MyVideoDoc(BaseDoc): doc_2 = MyVideoDoc( title='my_second_video_doc', - url='https://github.com/docarray/docarray/tree/main/tests/toydata/mov_bbb.mp4?raw=true', + url='https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true', ) doc_2.video_tensor = parse_obj_as(VideoNdArray, doc_2.url.load().video) diff --git a/docarray/typing/tensor/video/video_tensorflow_tensor.py b/docarray/typing/tensor/video/video_tensorflow_tensor.py index 0a4d485e977..d98794f8aa3 100644 --- a/docarray/typing/tensor/video/video_tensorflow_tensor.py +++ b/docarray/typing/tensor/video/video_tensorflow_tensor.py @@ -46,7 +46,7 @@ class MyVideoDoc(BaseDoc): doc_2 = MyVideoDoc( title='my_second_video_doc', - url='https://github.com/docarray/docarray/tree/main/tests/toydata/mov_bbb.mp4?raw=true', + url='https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true', ) doc_2.video_tensor = doc_2.url.load().video diff --git a/docarray/typing/tensor/video/video_torch_tensor.py b/docarray/typing/tensor/video/video_torch_tensor.py index 4c94b3890e6..dd4c5a5dcd3 100644 --- a/docarray/typing/tensor/video/video_torch_tensor.py +++ b/docarray/typing/tensor/video/video_torch_tensor.py @@ -44,7 +44,7 @@ class MyVideoDoc(BaseDoc): doc_2 = MyVideoDoc( title='my_second_video_doc', - url='https://github.com/docarray/docarray/tree/main/tests/toydata/mov_bbb.mp4?raw=true', + url='https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true', ) doc_2.video_tensor = doc_2.url.load().video diff --git a/docarray/typing/url/video_url.py b/docarray/typing/url/video_url.py index b1ae3df2e1a..5bd7b1be0b9 100644 --- a/docarray/typing/url/video_url.py +++ b/docarray/typing/url/video_url.py @@ -41,7 +41,7 @@ class MyDoc(BaseDoc): doc = MyDoc( - video_url='https://github.com/docarray/docarray/tree/main/tests/toydata/mov_bbb.mp4?raw=true' + video_url='https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true' ) doc.video, doc.audio, doc.key_frame_indices = doc.video_url.load() @@ -64,7 +64,7 @@ class MyDoc(BaseDoc): url = parse_obj_as( VideoUrl, - 'https://github.com/docarray/docarray/tree/main/tests/toydata/mov_bbb.mp4?raw=true', + 'https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true', ) key_frame_indices = url.load().key_frame_indices assert isinstance(key_frame_indices, NdArray) From 06c094705a4c02d5527871f274eaa030985a9ba3 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Mon, 17 Apr 2023 11:14:15 +0200 Subject: [PATCH 3/4] fix: paths in docs Signed-off-by: anna-charlotte --- docs/data_types/audio/audio.md | 8 ++-- docs/data_types/image/image.md | 8 ++-- docs/data_types/multimodal/multimodal.md | 8 ++-- docs/data_types/table/table.md | 6 +-- docs/data_types/video/video.md | 8 ++-- .../how_to/multimodal_training_and_serving.md | 46 +++++++++---------- 6 files changed, 42 insertions(+), 42 deletions(-) diff --git a/docs/data_types/audio/audio.md b/docs/data_types/audio/audio.md index cbe84c9067e..73e3f5b798b 100644 --- a/docs/data_types/audio/audio.md +++ b/docs/data_types/audio/audio.md @@ -42,7 +42,7 @@ class MyAudio(BaseDoc): doc = MyAudio( - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/hello.mp3?raw=true' + url='https://github.com/docarray/docarray/blob/main/tests/toydata/hello.mp3?raw=true' ) ``` @@ -95,7 +95,7 @@ class MyAudio(BaseDoc): doc = MyAudio( - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/hello.mp3?raw=true' + url='https://github.com/docarray/docarray/blob/main/tests/toydata/hello.mp3?raw=true' ) doc.tf_tensor, _ = doc.url.load() @@ -123,7 +123,7 @@ class MyAudio(BaseDoc): doc = MyAudio( - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/hello.mp3?raw=true' + url='https://github.com/docarray/docarray/blob/main/tests/toydata/hello.mp3?raw=true' ) doc.bytes_ = doc.url.load_bytes() # type(doc.bytes_) = AudioBytes @@ -203,7 +203,7 @@ class MyAudio(AudioDoc): audio = MyAudio( - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/hello.mp3?raw=true' + url='https://github.com/docarray/docarray/blob/main/tests/toydata/hello.mp3?raw=true' ) audio.name = 'My first audio doc!' audio.tensor, audio.frame_rate = audio.url.load() diff --git a/docs/data_types/image/image.md b/docs/data_types/image/image.md index 1efc7b7be56..892542e3a45 100644 --- a/docs/data_types/image/image.md +++ b/docs/data_types/image/image.md @@ -31,7 +31,7 @@ class MyImage(BaseDoc): img = MyImage( - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/image-data/apple.png?raw=true' + url='https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true' ) ``` @@ -66,7 +66,7 @@ class MyImage(BaseDoc): torch_tensor: ImageTorchTensor = None -img = MyImage(url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/image-data/apple.png?raw=true') +img = MyImage(url='https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true') img.tf_tensor = img.url.load() img.torch_tensor = img.url.load() @@ -80,7 +80,7 @@ You can also load the URL content as a [`PIL.Image.Image`](https://pillow.readth from PIL.Image import Image as PILImage img = MyImage( - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/image-data/apple.png?raw=true' + url='https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true' ) pil_img = img.url.load_pil() @@ -141,7 +141,7 @@ class MyImage(BaseDoc): tensor: ImageTensor = None -img = MyImage(url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/image-data/apple.png?raw=true') +img = MyImage(url='https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true') img.bytes_ = img.url.load_bytes() # type(img.bytes_) = ImageBytes img.tensor = img.bytes_.load() # type(img.tensor) = ImageNdarray diff --git a/docs/data_types/multimodal/multimodal.md b/docs/data_types/multimodal/multimodal.md index 35f7ff2dba4..b40dd32c4ac 100644 --- a/docs/data_types/multimodal/multimodal.md +++ b/docs/data_types/multimodal/multimodal.md @@ -37,7 +37,7 @@ data. ```python page = Page( main_text='Hello world', - img_url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/docs/assets/favicon.png?raw=true', + img_url='https://github.com/docarray/docarray/blob/main/docs/assets/favicon.png?raw=true', img_description='This is the image of an apple', ) page.img_tensor = page.img_url.load() @@ -75,7 +75,7 @@ print(page.img_tensor) Output ``` { .text .no-copy } Hello world - https://github.com/docarray/docarray/blob/feat-rewrite-v2/docs/assets/favicon.png?raw=true + https://github.com/docarray/docarray/blob/main/docs/assets/favicon.png?raw=true This is DocArray ImageTorchTensor([[[0, 0, 0], [0, 0, 0], @@ -119,14 +119,14 @@ You can instantiate this more complex `Newspaper` object in the same way as befo ```python cover_page = Page( main_text='DocArray Daily', - img_url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/docs/assets/favicon.png', + img_url='https://github.com/docarray/docarray/blob/main/docs/assets/favicon.png', ) pages = DocList[Page]( [ Page( main_text='Hello world', - img_url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/docs/assets/favicon.png', + img_url='https://github.com/docarray/docarray/blob/main/docs/assets/favicon.png', img_description='This is the image of an apple', ), Page(main_text='Second page'), diff --git a/docs/data_types/table/table.md b/docs/data_types/table/table.md index 202f84208ae..701db376f19 100644 --- a/docs/data_types/table/table.md +++ b/docs/data_types/table/table.md @@ -32,7 +32,7 @@ from docarray import DocList docs = DocList[Book].from_csv( - file_path='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/books.csv?raw=true' + file_path='https://github.com/docarray/docarray/blob/main/tests/toydata/books.csv?raw=true' ) docs.summary() ``` @@ -136,7 +136,7 @@ Title2 author2 1234 ```python docs = DocList[Book].from_csv( - file_path='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/books.tsv?raw=true', + file_path='https://github.com/docarray/docarray/blob/main/tests/toydata/books.tsv?raw=true', dialect='excel-tab', ) for doc in docs: @@ -194,7 +194,7 @@ class SemicolonSeparator(csv.Dialect): Finally, you can load your data by setting the `dialect` parameter in [`.from_csv()`][docarray.array.doc_list.io.IOMixinArray.from_csv] to an instance of your `SemicolonSeparator`. ```python docs = DocList[Book].from_csv( - file_path='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/books_semicolon_sep.csv?raw=true', + file_path='https://github.com/docarray/docarray/blob/main/tests/toydata/books_semicolon_sep.csv?raw=true', dialect=SemicolonSeparator(), ) for doc in docs: diff --git a/docs/data_types/video/video.md b/docs/data_types/video/video.md index f09dce2e978..f7c55765c22 100644 --- a/docs/data_types/video/video.md +++ b/docs/data_types/video/video.md @@ -35,7 +35,7 @@ class MyVideo(BaseDoc): doc = MyVideo( - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true' + url='https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true' ) ``` @@ -88,7 +88,7 @@ class MyVideo(BaseDoc): doc = MyVideo( - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true' + url='https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true' ) doc.tf_tensor = doc.url.load().video @@ -116,7 +116,7 @@ class MyVideo(BaseDoc): doc = MyVideo( - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true' + url='https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true' ) doc.bytes_ = doc.url.load_bytes() @@ -216,7 +216,7 @@ class MyVideo(VideoDoc): video = MyVideo( - url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true' + url='https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true' ) video.name = 'My first video doc!' video.tensor = video.url.load().video diff --git a/docs/how_to/multimodal_training_and_serving.md b/docs/how_to/multimodal_training_and_serving.md index b89b852297f..628d13f5355 100644 --- a/docs/how_to/multimodal_training_and_serving.md +++ b/docs/how_to/multimodal_training_and_serving.md @@ -12,9 +12,9 @@ jupyter: name: python3 --- -# Multimodal deep learning with DocList +# Multimodal deep learning with DocArray -DocList is a library for representing, sending, and storing multi-modal data that can be used for a variety of different +DocArray is a library for representing, sending, and storing multi-modal data that can be used for a variety of different use cases. Here we will focus on a workflow familiar to many ML Engineers: Building and training a model, and then serving it to @@ -22,10 +22,10 @@ users. This notebook contains two parts: -1. **Representing**: We will use DocList to represent multi-modal data while **building and training a PyTorch model**. -We will see how DocList can help to organize and group your modalities and tensors and make clear what methods expect as inputs and return as outputs. +1. **Representing**: We will use DocArray to represent multi-modal data while **building and training a PyTorch model**. +We will see how DocArray can help to organize and group your modalities and tensors and make clear what methods expect as inputs and return as outputs. 2. **Sending**: We will take the model that we built and trained in part 1, and **serve it using FastAPI**. -We will see how DocList narrows the gap between model development and model deployment, and how the same data models can be +We will see how DocArray narrows the gap between model development and model deployment, and how the same data models can be reused in both contexts. That part will be very short, but that's the point! So without further ado, let's dive into it! @@ -39,11 +39,11 @@ We train the CLIP-like model on the [flickr8k](https://www.kaggle.com/datasets/a To run this notebook you need to download and unzip the data into the same folder as the notebook. Note that in this notebook by no means we aim at reproduce any CLIP results (our dataset is way too small anyways), -but we rather want to show how DocList datastructures help researchers and practitioners to write beautiful and +but we rather want to show how DocArray datastructures help researchers and practitioners to write beautiful and pythonic multi-modal PyTorch code. ```python tags=[] -#!pip install "git+https://github.com/DocList/DocList@feat-rewrite-v2#egg=DocList[torch,image]" +#!pip install "docarray[torch,image]" #!pip install torchvision #!pip install transformers #!pip install fastapi @@ -56,7 +56,7 @@ from typing import Callable, Dict, List, Optional ``` ```python -import DocList +import docarray import torch ``` @@ -74,23 +74,23 @@ DEVICE = "cuda:0" # change to your favourite device ## Create the Documents for handling the Muti-Modal data -The first thing we are trying to achieve when using DocList is to clearly model our data so that we never get confused +The first thing we are trying to achieve when using DocArray is to clearly model our data so that we never get confused about which tensors are supposed to represent what. -To do that we are using a concept that is at the core of DocList. The `Document`, a collection of multi-modal data. +To do that we are using a concept that is at the core of DocArray. The `Document`, a collection of multi-modal data. The `BaseDoc` class allows users to define their own (nested, multi-modal) Document schema to represent any kind of complex data. Let's start by defining a few Documents to handle the different modalities that we will use during our training: ```python -from DocList import BaseDoc, DocList -from DocList.typing import TorchTensor, ImageUrl +from docarray import BaseDoc, DocList +from docarray.typing import TorchTensor, ImageUrl ``` Let's first create a Document for our Text modality. It will contain a number of `Tokens`, which we also define: ```python -from DocList.documents import TextDoc as BaseText +from docarray.documents import TextDoc as BaseText class Tokens(BaseDoc): @@ -106,10 +106,10 @@ Notice the `TorchTensor` type. It is a thin wrapper around `torch.Tensor` that c but also enables additional features. One such feature is shape parametrization (`TorchTensor[48]`), which lets you hint and even enforce the desired shape of any tensor! -To represent our image data, we use the `Image` Document that is included in DocList: +To represent our image data, we use the `Image` Document that is included in DocArray: ```python -from DocList.documents import ImageDoc +from docarray.documents import ImageDoc ``` Under the hood, an `Image` looks something like this (with the only main difference that it can take tensors from any @@ -136,9 +136,9 @@ class PairTextImage(BaseDoc): ## Create the Dataset -In this section we will create a multi-modal pytorch dataset around the Flick8k dataset using DocList. +In this section we will create a multi-modal pytorch dataset around the Flick8k dataset using DocArray. -We will use DocList data loading functionality to load the data and use Torchvision and Transformers to preprocess the data before feeding it to our deep learning model: +We will use DocArray data loading functionality to load the data and use Torchvision and Transformers to preprocess the data before feeding it to our deep learning model: ```python from torch.utils.data import DataLoader, Dataset @@ -201,7 +201,7 @@ preprocessing = {"image": VisionPreprocess(), "text": TextPreprocess()} ``` ```python -from DocList.data import MultiModalDataset +from docarray.data import MultiModalDataset dataset = MultiModalDataset[PairTextImage](da=da, preprocessing=preprocessing) loader = DataLoader( @@ -214,7 +214,7 @@ loader = DataLoader( ) ``` -## Create the Pytorch model that works on DocList +## Create the Pytorch model that works on DocArray In this section we create two encoders, one per modality (Text and Image). These encoders are normal PyTorch `nn.Module`s. @@ -266,7 +266,7 @@ vision_encoder = VisionEncoder().to(DEVICE) text_encoder = TextEncoder().to(DEVICE) ``` -As you can see, DocList helps us to clearly convey what data is expected as input and output for each method, all through Python type hints. +As you can see, DocArray helps us to clearly convey what data is expected as input and output for each method, all through Python type hints. ## Train the model in a contrastive way between Text and Image (CLIP) @@ -337,12 +337,12 @@ Let's use our beloved [FastAPI](https://fastapi.tiangolo.com/) for that! FastAPI is powerful because it allows you to define your Rest API data schema in pure Python. -And DocList is fully compatible with FastAPI and Pydantic, which means that as long as you have a function that takes a Document as input, +And DocArray is fully compatible with FastAPI and Pydantic, which means that as long as you have a function that takes a Document as input, FastAPI will be able to automatically translate it into a fully fledged API with documentation, openAPI specification and more: ```python from fastapi import FastAPI -from DocList.base_doc import DocumentResponse +from docarray.base_doc import DocumentResponse ``` ```python @@ -400,4 +400,4 @@ doc_resp = Text.parse_raw(response.content.decode()) doc_resp.embedding.shape ``` -And we're done! You have trained and served a mulit-modal ML model, with zero headache and a lot of DocList! +And we're done! You have trained and served a mulit-modal ML model, with zero headache and a lot of DocArray! From 266fd9442de1f9a4435793f30d2d77a7201c1705 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Mon, 17 Apr 2023 11:18:56 +0200 Subject: [PATCH 4/4] fix: paths Signed-off-by: anna-charlotte --- docs/data_types/audio/audio.md | 2 +- docs/data_types/multimodal/multimodal.md | 6 +++--- tests/integrations/predefined_document/test_video.py | 2 +- tests/units/array/test_array_from_to_csv.py | 2 +- tests/units/typing/url/test_audio_url.py | 2 +- tests/units/typing/url/test_video_url.py | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/data_types/audio/audio.md b/docs/data_types/audio/audio.md index 73e3f5b798b..2a73d22f2aa 100644 --- a/docs/data_types/audio/audio.md +++ b/docs/data_types/audio/audio.md @@ -64,7 +64,7 @@ doc.summary() ╭──────────────────────┬───────────────────────────────────────────────────────╮ │ Attribute │ Value │ ├──────────────────────┼───────────────────────────────────────────────────────┤ - │ url: AudioUrl │ https://github.com/docarray/docarray/blob/feat-rew │ + │ url: AudioUrl │ https://github.com/docarray/docarray/blob/main/tes │ │ │ ... (length: 90) │ │ tensor: AudioNdArray │ AudioNdArray of shape (30833,), dtype: float64 │ │ frame_rate: int │ 44100 │ diff --git a/docs/data_types/multimodal/multimodal.md b/docs/data_types/multimodal/multimodal.md index b40dd32c4ac..57a2b1af56f 100644 --- a/docs/data_types/multimodal/multimodal.md +++ b/docs/data_types/multimodal/multimodal.md @@ -52,7 +52,7 @@ page.summary() │ Attribute │ Value │ ├──────────────────────────────┼───────────────────────────────────────────────┤ │ main_text: str │ Hello world │ - │ img_url: ImageUrl │ https://github.com/docarray/docarray/blob/fe… │ + │ img_url: ImageUrl │ https://github.com/docarray/docarray/blob/ma… │ │ │ ... (length: 90) │ │ img_description: str │ This is DocArray │ │ img_tensor: ImageTorchTensor │ ImageTorchTensor of shape (320, 320, 3), │ @@ -158,7 +158,7 @@ docarray_daily.summary() │ │ Attribute │ Value │ │ ├───────────────────┼──────────────────────────────────────────────────┤ │ │ main_text: str │ DocArray Daily │ - │ │ img_url: ImageUrl │ https://github.com/docarray/docarray/blob/feat-… │ + │ │ img_url: ImageUrl │ https://github.com/docarray/docarray/blob/main/… │ │ │ │ ... (length: 81) │ │ ╰───────────────────┴──────────────────────────────────────────────────╯ └── 💠 pages: DocList[Page] @@ -167,7 +167,7 @@ docarray_daily.summary() │ │ Attribute │ Value │ │ ├──────────────────────┼───────────────────────────────────────────────┤ │ │ main_text: str │ Hello world │ - │ │ img_url: ImageUrl │ https://github.com/docarray/docarray/blob/fe… │ + │ │ img_url: ImageUrl │ https://github.com/docarray/docarray/blob/ma… │ │ │ │ ... (length: 81) │ │ │ img_description: str │ DocArray logoooo │ │ ╰──────────────────────┴───────────────────────────────────────────────╯ diff --git a/tests/integrations/predefined_document/test_video.py b/tests/integrations/predefined_document/test_video.py index 606dad6a6c1..ae1ccf4a992 100644 --- a/tests/integrations/predefined_document/test_video.py +++ b/tests/integrations/predefined_document/test_video.py @@ -16,7 +16,7 @@ LOCAL_VIDEO_FILE = str(TOYDATA_DIR / 'mov_bbb.mp4') -REMOTE_VIDEO_FILE = 'https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true' # noqa: E501 +REMOTE_VIDEO_FILE = 'https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true' # noqa: E501 @pytest.mark.slow diff --git a/tests/units/array/test_array_from_to_csv.py b/tests/units/array/test_array_from_to_csv.py index 9d52bb3a22c..7b56d6bd35b 100644 --- a/tests/units/array/test_array_from_to_csv.py +++ b/tests/units/array/test_array_from_to_csv.py @@ -100,7 +100,7 @@ def test_from_csv_with_wrong_schema_raise_exception(nested_doc): def test_from_remote_csv_file(): - remote_url = 'https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/books.csv?raw=true' + remote_url = 'https://github.com/docarray/docarray/blob/main/tests/toydata/books.csv?raw=true' class Book(BaseDoc): title: str diff --git a/tests/units/typing/url/test_audio_url.py b/tests/units/typing/url/test_audio_url.py index 1c7bdfa5d15..2e6b46bcabf 100644 --- a/tests/units/typing/url/test_audio_url.py +++ b/tests/units/typing/url/test_audio_url.py @@ -21,7 +21,7 @@ str(TOYDATA_DIR / 'hello.wav'), str(TOYDATA_DIR / 'olleh.wav'), ] -REMOTE_AUDIO_FILE = 'https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/olleh.wav?raw=true' # noqa: E501 +REMOTE_AUDIO_FILE = 'https://github.com/docarray/docarray/blob/main/tests/toydata/olleh.wav?raw=true' # noqa: E501 @pytest.mark.slow diff --git a/tests/units/typing/url/test_video_url.py b/tests/units/typing/url/test_video_url.py index c876d8b11c3..726e66a0cb6 100644 --- a/tests/units/typing/url/test_video_url.py +++ b/tests/units/typing/url/test_video_url.py @@ -25,7 +25,7 @@ from docarray.typing.tensor.video import VideoTensorFlowTensor LOCAL_VIDEO_FILE = str(TOYDATA_DIR / 'mov_bbb.mp4') -REMOTE_VIDEO_FILE = 'https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true' # noqa: E501 +REMOTE_VIDEO_FILE = 'https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true' # noqa: E501 @pytest.mark.slow