From e15a0ef6bfa53eb0ba02a420c09a490238caab54 Mon Sep 17 00:00:00 2001 From: Han Xiao Date: Wed, 5 Jan 2022 12:52:15 +0100 Subject: [PATCH] feat(array): add summary method for array --- docarray/array/mixins/plot.py | 52 ++++++++++++++++++++++++++++ tests/unit/array/mixins/test_plot.py | 15 ++++++++ 2 files changed, 67 insertions(+) diff --git a/docarray/array/mixins/plot.py b/docarray/array/mixins/plot.py index 30df5351075..9393b87fdae 100644 --- a/docarray/array/mixins/plot.py +++ b/docarray/array/mixins/plot.py @@ -4,6 +4,7 @@ import tempfile import threading import warnings +from collections import Counter from math import sqrt, ceil, floor from typing import Optional @@ -13,6 +14,57 @@ class PlotMixin: """Helper functions for plotting the arrays. """ + def summary(self): + from rich.table import Table + from rich.console import Console + from rich import box + + all_attrs = self.get_attributes('non_empty_fields') + attr_counter = Counter(all_attrs) + + table = Table(box=box.SIMPLE, title='Documents Summary') + table.show_header = False + table.add_row('Length', str(len(self))) + is_homo = len(attr_counter) == 1 + table.add_row('Homogenous Documents', str(is_homo)) + + if is_homo: + table.add_row('Common Attributes', str(list(attr_counter.items())[0][0])) + else: + for _a, _n in attr_counter.most_common(): + if _n <= 1: + _doc_text = f'{_n} Document has' + else: + _doc_text = f'{_n} Documents have' + if len(_a) == 1: + _text = f'{_doc_text} one attribute' + elif len(_a) == 0: + _text = f'{_doc_text} no attribute' + else: + _text = f'{_doc_text} attributes' + table.add_row(_text, str(_a)) + + attr_table = Table(box=box.SIMPLE, title='Attributes Summary') + attr_table.add_column('Attribute') + attr_table.add_column('Data type') + attr_table.add_column('#Unique values') + attr_table.add_column('Has empty value') + + all_attrs_names = tuple(sorted(set(v for k in all_attrs for v in k))) + all_attrs_values = self.get_attributes(*all_attrs_names) + if len(all_attrs_names) == 1: + all_attrs_values = [all_attrs_values] + for _a, _a_name in zip(all_attrs_values, all_attrs_names): + _counter_a = Counter(_a) + _set_a = set(_a) + _set_type_a = set(type(_aa).__name__ for _aa in _a) + attr_table.add_row( + _a_name, str(tuple(_set_type_a)), str(len(_set_a)), str(None in _set_a) + ) + + console = Console() + console.print(table, attr_table) + def plot_embeddings( self, title: str = 'MyDocumentArray', diff --git a/tests/unit/array/mixins/test_plot.py b/tests/unit/array/mixins/test_plot.py index c11692bcd0a..bd99c6c0289 100644 --- a/tests/unit/array/mixins/test_plot.py +++ b/tests/unit/array/mixins/test_plot.py @@ -55,3 +55,18 @@ def test_plot_embeddings_same_path(tmpdir): with open(os.path.join(p1, 'config.json')) as fp: config = json.load(fp) assert len(config['embeddings']) == 2 + + +def test_summary_homo_hetero(): + da = DocumentArray.empty(100) + da.get_attributes() + da.summary() + + da[0].pop('id') + da.summary() + + +def test_empty_get_attributes(): + da = DocumentArray.empty(10) + da[0].pop('id') + print(da.get_attributes('id'))