From f23cfe5e84668eae79480db6c35723158d4915b0 Mon Sep 17 00:00:00 2001 From: Chris Turner Date: Wed, 3 Mar 2021 22:13:01 -0600 Subject: [PATCH 01/19] 03-Fetch_lang1.rst: explain DataFrame index as matching pkey --- docs-parts/queries/03-Fetch_lang1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs-parts/queries/03-Fetch_lang1.rst b/docs-parts/queries/03-Fetch_lang1.rst index 3e6f5e043..ad99488b3 100644 --- a/docs-parts/queries/03-Fetch_lang1.rst +++ b/docs-parts/queries/03-Fetch_lang1.rst @@ -47,7 +47,7 @@ For example: import pandas as pd frame = pd.DataFrame(tab.fetch()) -Calling ``fetch()`` with the argument ``format="frame"`` returns results as ``pandas.DataFrame`` objects with no need for conversion. +Calling ``fetch()`` with the argument ``format="frame"`` returns results as ``pandas.DataFrame`` objects indexed by the table's primary key attributes. .. code-block:: python From dd05f3c783a3570e8a5e5e63dde73655f7c515ac Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 12 Mar 2021 12:05:42 -0600 Subject: [PATCH 02/19] fix #876 in 0.12 --- datajoint/fetch.py | 2 +- datajoint/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datajoint/fetch.py b/datajoint/fetch.py index c2e6649ad..cee7fa3fa 100644 --- a/datajoint/fetch.py +++ b/datajoint/fetch.py @@ -242,7 +242,7 @@ def __call__(self, *attrs, squeeze=False, download_path='.'): for name in heading.names) else: # fetch some attributes, return as tuple attributes = [a for a in attrs if not is_key(a)] - result = self._expression.proj(*attributes).fetch(squeeze=squeeze, download_path=download_path) + result = self._expression.proj(*attributes).fetch(squeeze=squeeze, download_path=download_path, format="array") if len(result) != 1: raise DataJointError('fetch1 should only return one tuple. %d tuples were found' % len(result)) return_values = tuple( diff --git a/datajoint/version.py b/datajoint/version.py index cd2ca5888..bea7a65a6 100644 --- a/datajoint/version.py +++ b/datajoint/version.py @@ -1,3 +1,3 @@ -__version__ = "0.12.8" +__version__ = "0.12.9" assert len(__version__) <= 10 # The log table limits version to the 10 characters From d5dbd5bc096b0356721b45bacf7804342029146d Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 12 Mar 2021 15:37:14 -0600 Subject: [PATCH 03/19] update change log for 0.12.9 --- CHANGELOG.md | 3 +++ docs-parts/intro/Releases_lang1.rst | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 797a2211c..3ee92a390 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ ## Release notes +### 0.12.9 -- Mar 12, 2021 +* Fix bug with fetch1 with `dj.config['fetch_format']="frame"`. (#876) PR #880 + ### 0.12.8 -- Jan 12, 2021 * table.children, .parents, .descendents, and ancestors can return queryable objects. PR #833 * Load dependencies before querying dependencies. (#179) PR #833 diff --git a/docs-parts/intro/Releases_lang1.rst b/docs-parts/intro/Releases_lang1.rst index 38ab9c0b2..8a9e51a27 100644 --- a/docs-parts/intro/Releases_lang1.rst +++ b/docs-parts/intro/Releases_lang1.rst @@ -1,3 +1,7 @@ +### 0.12.9 -- Mar 12, 2021 +-------------------------- +* Fix bug with fetch1 with `dj.config['fetch_format']="frame"`. Issue #876 (PR #880) + 0.12.8 -- Jan 12, 2021 --------------------- * table.children, .parents, .descendents, and ancestors can return queryable objects. PR #833 From dff22f32a754f3cf66b1a42ab637f1bb82003524 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 12 Mar 2021 15:47:30 -0600 Subject: [PATCH 04/19] minor change in changelog --- docs-parts/intro/Releases_lang1.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs-parts/intro/Releases_lang1.rst b/docs-parts/intro/Releases_lang1.rst index 8a9e51a27..1c3585110 100644 --- a/docs-parts/intro/Releases_lang1.rst +++ b/docs-parts/intro/Releases_lang1.rst @@ -1,5 +1,5 @@ -### 0.12.9 -- Mar 12, 2021 --------------------------- +0.12.9 -- Mar 12, 2021 +---------------------- * Fix bug with fetch1 with `dj.config['fetch_format']="frame"`. Issue #876 (PR #880) 0.12.8 -- Jan 12, 2021 From 15412345b70548067deb3b972dbcc9b4e5e3c98a Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 23 Mar 2021 00:18:16 -0500 Subject: [PATCH 05/19] Debug id. --- .github/workflows/development.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/development.yaml b/.github/workflows/development.yaml index b1ce66fcc..ed92d3ece 100644 --- a/.github/workflows/development.yaml +++ b/.github/workflows/development.yaml @@ -29,6 +29,7 @@ jobs: python-version: ${{matrix.py_ver}} - name: Install dependencies run: | + id python -m pip install --upgrade pip pip install flake8 - name: Run syntax tests From b69c2d98edef9b9969e8a9663c84d0f6ea4bb372 Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 23 Mar 2021 00:20:51 -0500 Subject: [PATCH 06/19] Update GID. --- .github/workflows/development.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/development.yaml b/.github/workflows/development.yaml index ed92d3ece..3e2a281f4 100644 --- a/.github/workflows/development.yaml +++ b/.github/workflows/development.yaml @@ -37,7 +37,7 @@ jobs: - name: Run primary tests env: UID: "1001" - GID: "116" + GID: "121" PY_VER: ${{matrix.py_ver}} MYSQL_VER: ${{matrix.mysql_ver}} ALPINE_VER: "3.10" From a17017face106384def92fef2a93f254fe137f37 Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 23 Mar 2021 00:36:17 -0500 Subject: [PATCH 07/19] Show missing lines in coverage. --- .coveragerc | 1 + 1 file changed, 1 insertion(+) diff --git a/.coveragerc b/.coveragerc index 8e9b6dd17..df41fbcc8 100644 --- a/.coveragerc +++ b/.coveragerc @@ -3,3 +3,4 @@ branch = False source = datajoint [report] +show_missing = True \ No newline at end of file From cd24a347f2a177f2f57936ff3998e450faf110b9 Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 23 Mar 2021 00:48:20 -0500 Subject: [PATCH 08/19] Debug single test with diff coverage. --- LNX-docker-compose.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/LNX-docker-compose.yml b/LNX-docker-compose.yml index c91f4c01e..27ecf68c9 100644 --- a/LNX-docker-compose.yml +++ b/LNX-docker-compose.yml @@ -80,8 +80,9 @@ services: pip install --user -r test_requirements.txt pip install --user . pip freeze | grep datajoint - nosetests -vsw tests --with-coverage --cover-package=datajoint - coveralls + nosetests -vs --tests=tests.test_relation:TestRelation.test_missing_definition --with-coverage --cover-package=datajoint.table + # nosetests -vsw tests --with-coverage --cover-package=datajoint + # coveralls # jupyter notebook # ports: # - "8888:8888" From 2a2914bb35dd12670c0241dd987e7b0c95d89ecc Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 23 Mar 2021 10:46:57 -0500 Subject: [PATCH 09/19] Remove debug statements, bump version, fix None in DJErrors, add docs on operators, add placeholder for purge query cache method, add deprecation warning on _update. --- .github/workflows/development.yaml | 1 - LNX-docker-compose.yml | 7 ++--- datajoint/condition.py | 2 +- datajoint/connection.py | 10 ++++++- datajoint/expression.py | 44 ++++++++++++++++++++++++------ datajoint/table.py | 3 ++ datajoint/version.py | 2 +- local-docker-compose.yml | 3 +- 8 files changed, 55 insertions(+), 17 deletions(-) diff --git a/.github/workflows/development.yaml b/.github/workflows/development.yaml index 3e2a281f4..9785580b3 100644 --- a/.github/workflows/development.yaml +++ b/.github/workflows/development.yaml @@ -29,7 +29,6 @@ jobs: python-version: ${{matrix.py_ver}} - name: Install dependencies run: | - id python -m pip install --upgrade pip pip install flake8 - name: Run syntax tests diff --git a/LNX-docker-compose.yml b/LNX-docker-compose.yml index 27ecf68c9..a674b1873 100644 --- a/LNX-docker-compose.yml +++ b/LNX-docker-compose.yml @@ -78,11 +78,10 @@ services: - | set -e pip install --user -r test_requirements.txt - pip install --user . + pip install -e . pip freeze | grep datajoint - nosetests -vs --tests=tests.test_relation:TestRelation.test_missing_definition --with-coverage --cover-package=datajoint.table - # nosetests -vsw tests --with-coverage --cover-package=datajoint - # coveralls + nosetests -vsw tests --with-coverage --cover-package=datajoint + coveralls # jupyter notebook # ports: # - "8888:8888" diff --git a/datajoint/condition.py b/datajoint/condition.py index 126ed9f69..510c14295 100644 --- a/datajoint/condition.py +++ b/datajoint/condition.py @@ -84,7 +84,7 @@ def prep_value(k, v): try: v = uuid.UUID(v) except (AttributeError, ValueError): - raise DataJointError('Badly formed UUID {v} in restriction by `{k}`'.format(k=k, v=v)) from None + raise DataJointError('Badly formed UUID {v} in restriction by `{k}`'.format(k=k, v=v)) return "X'%s'" % v.bytes.hex() if isinstance(v, (datetime.date, datetime.datetime, datetime.time, decimal.Decimal)): return '"%s"' % v diff --git a/datajoint/connection.py b/datajoint/connection.py index 14e457d0b..b9aee1f5d 100644 --- a/datajoint/connection.py +++ b/datajoint/connection.py @@ -218,7 +218,7 @@ def connect(self): k == 'ssl' and self.conn_info['ssl_input'] is None)}) self._conn.autocommit(True) - def set_query_cache(self, query_cache): + def set_query_cache(self, query_cache=None): """ When query_cache is not None, the connection switches into the query caching mode, which entails: 1. Only SELECT queries are allowed. @@ -228,6 +228,14 @@ def set_query_cache(self, query_cache): """ self._query_cache = query_cache + def purge_query_cache(self, query_cache): + """ + Purges if query cache is available with the provided reference. + + :param query_cache: a string associated with the hash for query results + """ + pass # wip + def close(self): self._conn.close() diff --git a/datajoint/expression.py b/datajoint/expression.py index 9b7d10544..e0d1155fd 100644 --- a/datajoint/expression.py +++ b/datajoint/expression.py @@ -193,7 +193,7 @@ def restrict_in_place(self, restriction): def __and__(self, restriction): """ - Restriction operator + Restriction operator e.g. q1 & q2. :return: a restricted copy of the input argument See QueryExpression.restrict for more detail. """ @@ -201,7 +201,7 @@ def __and__(self, restriction): def __xor__(self, restriction): """ - Restriction operator ignoring compatibility check. + Permissive restriction operator ignoring compatibility check e.g. q1 ^ q2. """ if inspect.isclass(restriction) and issubclass(restriction, QueryExpression): restriction = restriction() @@ -211,22 +211,33 @@ def __xor__(self, restriction): def __sub__(self, restriction): """ - Inverted restriction + Inverted restriction e.g. q1 - q2. :return: a restricted copy of the input argument See QueryExpression.restrict for more detail. """ return self.restrict(Not(restriction)) def __neg__(self): + """ + Convert between restriction and inverted restriction e.g. -q1. + :return: target restriction + See QueryExpression.restrict for more detail. + """ if isinstance(self, Not): return self.restriction return Not(self) def __mul__(self, other): - """ join of query expressions `self` and `other` """ + """ + join of query expressions `self` and `other` e.g. q1 * q2. + """ return self.join(other) def __matmul__(self, other): + """ + Permissive join of query expressions `self` and `other` ignoring compatibility check + e.g. q1 @ q2. + """ if inspect.isclass(other) and issubclass(other, QueryExpression): other = other() # instantiate return self.join(other, semantic_check=False) @@ -271,7 +282,7 @@ def join(self, other, semantic_check=True, left=False): return result def __add__(self, other): - """union""" + """union e.g. q1 + q2.""" return Union.create(self, other) def proj(self, *attributes, **named_attributes): @@ -424,7 +435,7 @@ def tail(self, limit=25, **fetch_kwargs): return self.fetch(order_by="KEY DESC", limit=limit, **fetch_kwargs)[::-1] def __len__(self): - """ :return: number of elements in the result set """ + """:return: number of elements in the result set e.g. len(q1).""" return self.connection.query( 'SELECT count(DISTINCT {fields}) FROM {from_}{where}'.format( fields=self.heading.as_sql(self.primary_key, include_aliases=False), @@ -433,7 +444,8 @@ def __len__(self): def __bool__(self): """ - :return: True if the result is not empty. Equivalent to len(self) > 0 but often faster. + :return: True if the result is not empty. Equivalent to len(self) > 0 but often + faster e.g. bool(q1). """ return bool(self.connection.query( 'SELECT EXISTS(SELECT 1 FROM {from_}{where})'.format( @@ -442,7 +454,7 @@ def __bool__(self): def __contains__(self, item): """ - returns True if item is found in the . + returns True if a restriction results with any records e.g. restriction in q1. :param item: any restriction (item in query_expression) is equivalent to bool(query_expression & item) but may be executed more efficiently. @@ -450,11 +462,22 @@ def __contains__(self, item): return bool(self & item) # May be optimized e.g. using an EXISTS query def __iter__(self): + """ + returns an iterator-compatible QueryExpression object e.g. iter(q1). + + :param self: iterator-compatible QueryExpression object + """ self._iter_only_key = all(v.in_key for v in self.heading.attributes.values()) self._iter_keys = self.fetch('KEY') return self def __next__(self): + """ + returns the next record on an iterator-compatible QueryExpression object + e.g. next(q1). + + :param self: fetch1 record + """ try: key = self._iter_keys.pop(0) except AttributeError: @@ -490,6 +513,11 @@ def cursor(self, offset=0, limit=None, order_by=None, as_dict=False): return self.connection.query(sql, as_dict=as_dict) def __repr__(self): + """ + returns the string representation of a QueryExpression object e.g. str(q1). + + :param self: String version of query result + """ return super().__repr__() if config['loglevel'].lower() == 'debug' else self.preview() def preview(self, limit=None, width=None): diff --git a/datajoint/table.py b/datajoint/table.py index 53da2dc3c..e6345d2e4 100644 --- a/datajoint/table.py +++ b/datajoint/table.py @@ -568,6 +568,9 @@ def _update(self, attrname, value=None): >>> (v2p.Mice() & key)._update('mouse_dob', '2011-01-01') >>> (v2p.Mice() & key)._update( 'lens') # set the value to NULL """ + warnings.warn( + '`_update` is a deprecated function to be removed in datajoint 0.14. ' + 'Use `.update1` instead.') if len(self) != 1: raise DataJointError('Update is only allowed on one tuple at a time') if attrname not in self.heading: diff --git a/datajoint/version.py b/datajoint/version.py index 4ac209b43..a7571b6c4 100644 --- a/datajoint/version.py +++ b/datajoint/version.py @@ -1,3 +1,3 @@ -__version__ = "0.13.dev7" +__version__ = "0.13.0" assert len(__version__) <= 10 # The log table limits version to the 10 characters diff --git a/local-docker-compose.yml b/local-docker-compose.yml index ff0dda0e6..4b12e74e0 100644 --- a/local-docker-compose.yml +++ b/local-docker-compose.yml @@ -82,7 +82,8 @@ services: - -c - | set -e - pip install --user nose nose-cov coveralls flake8 ptvsd . + pip install --user nose nose-cov coveralls flake8 ptvsd + pip install -e . pip freeze | grep datajoint ## You may run the below tests once sh'ed into container i.e. docker exec -it datajoint-python_app_1 sh # nosetests -vsw tests; #run all tests From 9ebd091f8655befef067d291684c145a5b0a707e Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 23 Mar 2021 10:53:47 -0500 Subject: [PATCH 10/19] Import warnings in table submodule. --- datajoint/table.py | 1 + 1 file changed, 1 insertion(+) diff --git a/datajoint/table.py b/datajoint/table.py index e6345d2e4..d79c07a75 100644 --- a/datajoint/table.py +++ b/datajoint/table.py @@ -7,6 +7,7 @@ import logging import uuid import re +import warnings from pathlib import Path from .settings import config from .declare import declare, alter From 57b6bac900f8fd04e656aeb2de5f37877564cfff Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 23 Mar 2021 11:14:17 -0500 Subject: [PATCH 11/19] Update docs version reference. --- docs-parts/version_common.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs-parts/version_common.json b/docs-parts/version_common.json index c39fa12d1..2296f52e6 100644 --- a/docs-parts/version_common.json +++ b/docs-parts/version_common.json @@ -1,3 +1,3 @@ { - "comm_version": "v0.1" + "comm_version": "v0.2" } \ No newline at end of file From fad8f703fa9dec9b2bf0c9259e59a374efd62924 Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 23 Mar 2021 11:25:38 -0500 Subject: [PATCH 12/19] Add update doc. --- docs-parts/computation/01-autopopulate_lang1.rst | 2 +- docs-parts/computation/04-master-part_lang1.rst | 2 +- docs-parts/manipulation/1-Update_lang1.rst | 8 ++++++++ 3 files changed, 10 insertions(+), 2 deletions(-) create mode 100644 docs-parts/manipulation/1-Update_lang1.rst diff --git a/docs-parts/computation/01-autopopulate_lang1.rst b/docs-parts/computation/01-autopopulate_lang1.rst index a1caecd6c..31cf621ab 100644 --- a/docs-parts/computation/01-autopopulate_lang1.rst +++ b/docs-parts/computation/01-autopopulate_lang1.rst @@ -11,7 +11,7 @@ """ def make(self, key): - img = (test.Image & key).fetch1['image'] + img = (test.Image & key).fetch1('image') key['filtered_image'] = myfilter(img) self.insert(key) diff --git a/docs-parts/computation/04-master-part_lang1.rst b/docs-parts/computation/04-master-part_lang1.rst index d76404ff1..3bda5abb9 100644 --- a/docs-parts/computation/04-master-part_lang1.rst +++ b/docs-parts/computation/04-master-part_lang1.rst @@ -21,7 +21,7 @@ The part is subclassed from ``dj.Part`` and does not need the ``@schema`` decora """ def make(self, key): - image = (Image & key).fetch1['image'] + image = (Image & key).fetch1('image') self.insert1(key) count = itertools.count() Segmentation.ROI.insert( diff --git a/docs-parts/manipulation/1-Update_lang1.rst b/docs-parts/manipulation/1-Update_lang1.rst new file mode 100644 index 000000000..bbd41f4f6 --- /dev/null +++ b/docs-parts/manipulation/1-Update_lang1.rst @@ -0,0 +1,8 @@ + +.. code-block:: python + + # update value in record with id=1 + table.update1({'id': 1, 'value': 3}) + + # reset value to default + table.update1({'id': 1, 'value': None}) From 9161f8a2ca62f91ced6ac75e785a75074347c242 Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 23 Mar 2021 14:23:31 -0500 Subject: [PATCH 13/19] Update lang-specific docs. --- docs-parts/manipulation/1-Update_lang1.rst | 10 ++++++++-- docs-parts/queries/12-Query-Caching_lang1.rst | 15 +++++++++++++++ docs-parts/queries/12-Query-Caching_lang2.rst | 7 +++++++ docs-parts/queries/12-Query-Caching_lang3.rst | 6 ++++++ .../queries/13-Transpiler-Design_lang1.md | 3 --- 5 files changed, 36 insertions(+), 5 deletions(-) create mode 100644 docs-parts/queries/12-Query-Caching_lang1.rst create mode 100644 docs-parts/queries/12-Query-Caching_lang2.rst create mode 100644 docs-parts/queries/12-Query-Caching_lang3.rst rename dev_guide/transpiler_specs.md => docs-parts/queries/13-Transpiler-Design_lang1.md (97%) diff --git a/docs-parts/manipulation/1-Update_lang1.rst b/docs-parts/manipulation/1-Update_lang1.rst index bbd41f4f6..0960e2e22 100644 --- a/docs-parts/manipulation/1-Update_lang1.rst +++ b/docs-parts/manipulation/1-Update_lang1.rst @@ -1,8 +1,14 @@ .. code-block:: python - # update value in record with id=1 + # with record as a dict specifying the primary and + # secondary attribute values + table.update1(record) + + # update value in record with id as primary key table.update1({'id': 1, 'value': 3}) - # reset value to default + # reset value to default with id as primary key table.update1({'id': 1, 'value': None}) + ## OR + table.update1({'id': 1}) diff --git a/docs-parts/queries/12-Query-Caching_lang1.rst b/docs-parts/queries/12-Query-Caching_lang1.rst new file mode 100644 index 000000000..e1a7ff4dd --- /dev/null +++ b/docs-parts/queries/12-Query-Caching_lang1.rst @@ -0,0 +1,15 @@ + +.. code-block:: python + + # set the query cache path + dj.config['query_cache'] = os.path.expanduser('~/dj_query_cache') + + # access the currently active connection object + conn = dj.conn() + ## OR + conn = schema.connection + ## OR + conn = table.connection + + # activate query caching for a namespace called 'main' + conn.set_query_cache(query_cache='main') diff --git a/docs-parts/queries/12-Query-Caching_lang2.rst b/docs-parts/queries/12-Query-Caching_lang2.rst new file mode 100644 index 000000000..54661c6b7 --- /dev/null +++ b/docs-parts/queries/12-Query-Caching_lang2.rst @@ -0,0 +1,7 @@ + +.. code-block:: python + + # deactivate query caching + conn.set_query_cache(query_cache=None) + ## OR + conn.set_query_cache() diff --git a/docs-parts/queries/12-Query-Caching_lang3.rst b/docs-parts/queries/12-Query-Caching_lang3.rst new file mode 100644 index 000000000..11d86b146 --- /dev/null +++ b/docs-parts/queries/12-Query-Caching_lang3.rst @@ -0,0 +1,6 @@ + +.. code-block:: python + + # deactivate query caching + conn.purge_query_cache(query_cache='main') + diff --git a/dev_guide/transpiler_specs.md b/docs-parts/queries/13-Transpiler-Design_lang1.md similarity index 97% rename from dev_guide/transpiler_specs.md rename to docs-parts/queries/13-Transpiler-Design_lang1.md index c5c85ca4b..e08049772 100644 --- a/dev_guide/transpiler_specs.md +++ b/docs-parts/queries/13-Transpiler-Design_lang1.md @@ -1,6 +1,3 @@ -# Design specifications of the DataJoint-to-SQL Transpiler -This document contains information and reasoning that went into the design of the DataJoint-to-SQL transpiler for DataJoint for Python version 0.13. - MySQL appears to differ from standard SQL by the sequence of evaluating the clauses of the SELECT statement. ``` From 8a1b5cb1705f684e66047ba94ce86caf0ef9a136 Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 23 Mar 2021 14:46:35 -0500 Subject: [PATCH 14/19] Update transpiler doc headers. --- .../queries/13-Transpiler-Design_lang1.md | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/docs-parts/queries/13-Transpiler-Design_lang1.md b/docs-parts/queries/13-Transpiler-Design_lang1.md index e08049772..aa63b9601 100644 --- a/docs-parts/queries/13-Transpiler-Design_lang1.md +++ b/docs-parts/queries/13-Transpiler-Design_lang1.md @@ -11,7 +11,7 @@ Moving `SELECT` to an earlier phase allows the `GROUP BY` and `HAVING` clauses t The current implementation targets the MySQL implementation where table column aliases can be used in `HAVING`. If postgres or CockroachDB cannot be coerced to work this way, restrictions of aggregations will have to be updated accordingly. -## QueryExpression +### QueryExpression `QueryExpression` is the main object representing a distinct `SELECT` statement. It implements operators `&`, `*`, and `proj` — restriction, join, and projection. @@ -28,11 +28,11 @@ At least one element must be present in `support`. Multiple elements in `support From the user's perspective `QueryExpression` objects are immutable: once created they cannot be modified. All operators derive new objects. -### Alias attributes +#### Alias attributes `proj` can create an alias attribute by renaming an existing attribute or calculating a new attribute. Alias attributes are the primary reason why subqueries are sometimes required. -### Subqueries +#### Subqueries Projections, restrictions, and joins do not necessarily trigger new subqueries: the resulting `QueryExpression` object simply merges the properties of its inputs into self: `heading`, `restriction`, and `support`. The input object is treated as a subquery in the following cases: @@ -48,7 +48,7 @@ An error arises if A subquery is created by creating a new `QueryExpression` object (or a subclass object) with its `support` pointing to the input object. -### Join compatibility +#### Join compatibility The join is always natural (i.e. *equijoin* on the namesake attributes). **Before version 0.13:** As of version `0.12.*` and earlier, two query expressions were considered join-compatible if their namesake attributes were the primary key of at least one of the input expressions. This rule was easiest to implement but does not provide best semantics. @@ -60,17 +60,17 @@ The join is always natural (i.e. *equijoin* on the namesake attributes). The same join compatibility rules apply when restricting one query expression with another. -### Join mechanics +#### Join mechanics Any restriction applied to the inputs of a join can be applied to its output. Therefore, those inputs that are not turned into queries donate their supports, restrictions, and projections to the join itself. -## Table +### Table `Table` is a subclass of `QueryExpression` implementing table manipulation methods such as `insert`, `insert1`, `delete`, `update1`, and `drop`. The restriction operator `&` applied to a `Table` preserves its class identity so that the result remains of type `Table`. However, `proj` converts the result into a `QueryExpression` object. This may produce a base query that is not an instance of Table. -## Aggregation +### Aggregation `Aggregation` is a subclass of `QueryExpression`. Its main input is the *aggregating* query expression and it takes an additional second input — the *aggregated* query expression. @@ -88,7 +88,7 @@ With respect to the second input, the projection part of aggregation allows only All other rules for subqueries remain the same as for `QueryExpression` -## Union +### Union `Union` is a subclass of `QueryExpression`. A `Union` object results from the `+` operator on two `QueryExpression` objects. Its `support` property contains the list of expressions (at least two) to unify. @@ -98,16 +98,16 @@ The `Union` operator performs an OUTER JOIN of its inputs provided that the inpu Union treats all its inputs as subqueries except for unrestricted Union objects. -## Universal Sets `dj.U` +### Universal Sets `dj.U` `dj.U` is a special operand in query expressions that allows performing special operations. By itself, it can never form a query and is not a subclass of `QueryExpression`. Other query expressions are modified through participation in operations with `dj.U`. -### Aggegating by `dj.U` +#### Aggegating by `dj.U` -### Resttricting a `dj.U` object with a `QueryExpression` object +#### Resttricting a `dj.U` object with a `QueryExpression` object -### Joining a `dj.U` object +#### Joining a `dj.U` object -# Query "Backprojection" +### Query "Backprojection" Once a QueryExpression is used in a `fetch` operation or becomes a subquery in another query, it can project out all unnecessary attributes from its own inputs, recursively. This is implemented by the `finalize` method. This simplification produces much leaner queries resulting in improved query performance in version 0.13, especially on complex queries with blob data, compensating for MySQL's deficiencies in query optimization. From c5ac58ec1eac25733ed6e733490d5082c8fd79f3 Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 23 Mar 2021 15:21:58 -0500 Subject: [PATCH 15/19] Revert heading structure. --- .../queries/13-Transpiler-Design_lang1.md | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/docs-parts/queries/13-Transpiler-Design_lang1.md b/docs-parts/queries/13-Transpiler-Design_lang1.md index aa63b9601..e015646e1 100644 --- a/docs-parts/queries/13-Transpiler-Design_lang1.md +++ b/docs-parts/queries/13-Transpiler-Design_lang1.md @@ -11,7 +11,7 @@ Moving `SELECT` to an earlier phase allows the `GROUP BY` and `HAVING` clauses t The current implementation targets the MySQL implementation where table column aliases can be used in `HAVING`. If postgres or CockroachDB cannot be coerced to work this way, restrictions of aggregations will have to be updated accordingly. -### QueryExpression +## QueryExpression `QueryExpression` is the main object representing a distinct `SELECT` statement. It implements operators `&`, `*`, and `proj` — restriction, join, and projection. @@ -28,11 +28,11 @@ At least one element must be present in `support`. Multiple elements in `support From the user's perspective `QueryExpression` objects are immutable: once created they cannot be modified. All operators derive new objects. -#### Alias attributes +### Alias attributes `proj` can create an alias attribute by renaming an existing attribute or calculating a new attribute. Alias attributes are the primary reason why subqueries are sometimes required. -#### Subqueries +### Subqueries Projections, restrictions, and joins do not necessarily trigger new subqueries: the resulting `QueryExpression` object simply merges the properties of its inputs into self: `heading`, `restriction`, and `support`. The input object is treated as a subquery in the following cases: @@ -48,7 +48,7 @@ An error arises if A subquery is created by creating a new `QueryExpression` object (or a subclass object) with its `support` pointing to the input object. -#### Join compatibility +### Join compatibility The join is always natural (i.e. *equijoin* on the namesake attributes). **Before version 0.13:** As of version `0.12.*` and earlier, two query expressions were considered join-compatible if their namesake attributes were the primary key of at least one of the input expressions. This rule was easiest to implement but does not provide best semantics. @@ -60,17 +60,17 @@ The join is always natural (i.e. *equijoin* on the namesake attributes). The same join compatibility rules apply when restricting one query expression with another. -#### Join mechanics +### Join mechanics Any restriction applied to the inputs of a join can be applied to its output. Therefore, those inputs that are not turned into queries donate their supports, restrictions, and projections to the join itself. -### Table +## Table `Table` is a subclass of `QueryExpression` implementing table manipulation methods such as `insert`, `insert1`, `delete`, `update1`, and `drop`. The restriction operator `&` applied to a `Table` preserves its class identity so that the result remains of type `Table`. However, `proj` converts the result into a `QueryExpression` object. This may produce a base query that is not an instance of Table. -### Aggregation +## Aggregation `Aggregation` is a subclass of `QueryExpression`. Its main input is the *aggregating* query expression and it takes an additional second input — the *aggregated* query expression. @@ -88,7 +88,7 @@ With respect to the second input, the projection part of aggregation allows only All other rules for subqueries remain the same as for `QueryExpression` -### Union +## Union `Union` is a subclass of `QueryExpression`. A `Union` object results from the `+` operator on two `QueryExpression` objects. Its `support` property contains the list of expressions (at least two) to unify. @@ -98,16 +98,16 @@ The `Union` operator performs an OUTER JOIN of its inputs provided that the inpu Union treats all its inputs as subqueries except for unrestricted Union objects. -### Universal Sets `dj.U` +## Universal Sets `dj.U` `dj.U` is a special operand in query expressions that allows performing special operations. By itself, it can never form a query and is not a subclass of `QueryExpression`. Other query expressions are modified through participation in operations with `dj.U`. -#### Aggegating by `dj.U` +### Aggegating by `dj.U` -#### Resttricting a `dj.U` object with a `QueryExpression` object +### Resttricting a `dj.U` object with a `QueryExpression` object -#### Joining a `dj.U` object +### Joining a `dj.U` object -### Query "Backprojection" +## Query "Backprojection" Once a QueryExpression is used in a `fetch` operation or becomes a subquery in another query, it can project out all unnecessary attributes from its own inputs, recursively. This is implemented by the `finalize` method. This simplification produces much leaner queries resulting in improved query performance in version 0.13, especially on complex queries with blob data, compensating for MySQL's deficiencies in query optimization. From a943ad6d783d4e9c30bf96b84a851a1c5eb4b04f Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 23 Mar 2021 17:07:17 -0500 Subject: [PATCH 16/19] Add purge utility for query caching and add test. --- datajoint/connection.py | 15 +++++---- docs-parts/queries/12-Query-Caching_lang3.rst | 4 +-- tests/test_fetch.py | 32 +++++++++++++++++++ 3 files changed, 42 insertions(+), 9 deletions(-) diff --git a/datajoint/connection.py b/datajoint/connection.py index b9aee1f5d..34c8a6fa5 100644 --- a/datajoint/connection.py +++ b/datajoint/connection.py @@ -228,13 +228,14 @@ def set_query_cache(self, query_cache=None): """ self._query_cache = query_cache - def purge_query_cache(self, query_cache): - """ - Purges if query cache is available with the provided reference. - - :param query_cache: a string associated with the hash for query results - """ - pass # wip + def purge_query_cache(self): + """ Purges all query cache. """ + if 'query_cache' in config and isinstance(config['query_cache'], str) and \ + pathlib.Path(config['query_cache']).is_dir(): + path_iter = pathlib.Path(config['query_cache']).glob('**/*') + for path in path_iter: + path.unlink() + self._query_cache = None def close(self): self._conn.close() diff --git a/docs-parts/queries/12-Query-Caching_lang3.rst b/docs-parts/queries/12-Query-Caching_lang3.rst index 11d86b146..34e3784cd 100644 --- a/docs-parts/queries/12-Query-Caching_lang3.rst +++ b/docs-parts/queries/12-Query-Caching_lang3.rst @@ -1,6 +1,6 @@ .. code-block:: python - # deactivate query caching - conn.purge_query_cache(query_cache='main') + # purged the cached queries + conn.purge_query_cache() diff --git a/tests/test_fetch.py b/tests/test_fetch.py index fd4adb417..8ebf1c87e 100644 --- a/tests/test_fetch.py +++ b/tests/test_fetch.py @@ -7,6 +7,7 @@ import warnings from . import schema import datajoint as dj +import os class TestFetch: @@ -254,3 +255,34 @@ def test_same_secondary_attribute(self): children = (schema.Child * schema.Parent().proj()).fetch()['name'] assert len(children) == 1 assert children[0] == 'Dan' + + def test_query_caching(self): + # initialize cache directory + os.mkdir(os.path.expanduser('~/dj_query_cache')) + + with dj.config(query_cache=os.path.expanduser('~/dj_query_cache')): + # insert sample data and load cache + schema.TTest3.insert([dict(key=100+i, value=200+i) for i in range(2)]) + dj.conn().set_query_cache(query_cache='main') + cached_res = schema.TTest3().fetch() + # attempt to insert while caching enabled + try: + schema.TTest3.insert([dict(key=200+i, value=400+i) for i in range(2)]) + assert False, 'Insert allowed which query caching enabled' + except dj.DataJointError: + dj.conn().set_query_cache() + # insert new data + schema.TTest3.insert([dict(key=600+i, value=800+i) for i in range(2)]) + # re-enable cache to access old results + dj.conn().set_query_cache(query_cache='main') + previous_cache = schema.TTest3().fetch() + # verify properly cached and how to refresh results + assert all([c == p for c, p in zip(cached_res, previous_cache)]) + dj.conn().set_query_cache() + uncached_res = schema.TTest3().fetch() + assert len(uncached_res) > len(cached_res) + # purge query cache + dj.conn().purge_query_cache() + + # reset cache directory state (will fail if purge was unsuccessful) + os.rmdir(os.path.expanduser('~/dj_query_cache')) From 1a35ad0c7284ca17a41237777ebe136d78331960 Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 23 Mar 2021 19:13:31 -0500 Subject: [PATCH 17/19] Fix connection access issue. --- datajoint/connection.py | 1 - docs-parts/queries/12-Query-Caching_lang1.rst | 10 ++++------ tests/test_fetch.py | 13 +++++++------ 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/datajoint/connection.py b/datajoint/connection.py index 34c8a6fa5..9db3dcb77 100644 --- a/datajoint/connection.py +++ b/datajoint/connection.py @@ -235,7 +235,6 @@ def purge_query_cache(self): path_iter = pathlib.Path(config['query_cache']).glob('**/*') for path in path_iter: path.unlink() - self._query_cache = None def close(self): self._conn.close() diff --git a/docs-parts/queries/12-Query-Caching_lang1.rst b/docs-parts/queries/12-Query-Caching_lang1.rst index e1a7ff4dd..673eef85b 100644 --- a/docs-parts/queries/12-Query-Caching_lang1.rst +++ b/docs-parts/queries/12-Query-Caching_lang1.rst @@ -4,12 +4,10 @@ # set the query cache path dj.config['query_cache'] = os.path.expanduser('~/dj_query_cache') - # access the currently active connection object - conn = dj.conn() - ## OR - conn = schema.connection - ## OR - conn = table.connection + # access the active connection object for the tables + conn = dj.conn() # if queries co-located with tables + conn = module.schema.connection # if schema co-located with tables + conn = module.table.connection # most flexible # activate query caching for a namespace called 'main' conn.set_query_cache(query_cache='main') diff --git a/tests/test_fetch.py b/tests/test_fetch.py index 8ebf1c87e..cb1ba3a4f 100644 --- a/tests/test_fetch.py +++ b/tests/test_fetch.py @@ -261,28 +261,29 @@ def test_query_caching(self): os.mkdir(os.path.expanduser('~/dj_query_cache')) with dj.config(query_cache=os.path.expanduser('~/dj_query_cache')): + conn = schema.TTest3.connection # insert sample data and load cache schema.TTest3.insert([dict(key=100+i, value=200+i) for i in range(2)]) - dj.conn().set_query_cache(query_cache='main') + conn.set_query_cache(query_cache='main') cached_res = schema.TTest3().fetch() # attempt to insert while caching enabled try: schema.TTest3.insert([dict(key=200+i, value=400+i) for i in range(2)]) - assert False, 'Insert allowed which query caching enabled' + assert False, 'Insert allowed while query caching enabled' except dj.DataJointError: - dj.conn().set_query_cache() + conn.set_query_cache() # insert new data schema.TTest3.insert([dict(key=600+i, value=800+i) for i in range(2)]) # re-enable cache to access old results - dj.conn().set_query_cache(query_cache='main') + conn.set_query_cache(query_cache='main') previous_cache = schema.TTest3().fetch() # verify properly cached and how to refresh results assert all([c == p for c, p in zip(cached_res, previous_cache)]) - dj.conn().set_query_cache() + conn.set_query_cache() uncached_res = schema.TTest3().fetch() assert len(uncached_res) > len(cached_res) # purge query cache - dj.conn().purge_query_cache() + conn.purge_query_cache() # reset cache directory state (will fail if purge was unsuccessful) os.rmdir(os.path.expanduser('~/dj_query_cache')) From a75b6b4b1dfe54b1e0fab1fcf906120ff7ba1fec Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 23 Mar 2021 19:34:43 -0500 Subject: [PATCH 18/19] Add basic tests for permissive join and restriction. --- LNX-docker-compose.yml | 2 +- tests/test_relational_operand.py | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/LNX-docker-compose.yml b/LNX-docker-compose.yml index a674b1873..448ab6f02 100644 --- a/LNX-docker-compose.yml +++ b/LNX-docker-compose.yml @@ -1,4 +1,4 @@ -# docker-compose -f LNX-docker-compose.yml --env-file LNX.env up --build --exit-code-from app +# docker-compose -f LNX-docker-compose.yml --env-file LNX.env up --exit-code-from app --build version: '2.2' x-net: &net networks: diff --git a/tests/test_relational_operand.py b/tests/test_relational_operand.py index 43d3ee943..f37dafb31 100644 --- a/tests/test_relational_operand.py +++ b/tests/test_relational_operand.py @@ -8,7 +8,7 @@ import datajoint as dj from .schema_simple import A, B, D, E, F, L, DataA, DataB, TTestUpdate, IJ, JI, ReservedWord -from .schema import Experiment, TTest3, Trial, Ephys +from .schema import Experiment, TTest3, Trial, Ephys, Child, Parent def setup(): @@ -449,3 +449,13 @@ def test_reserved_words2(): rel = ReservedWord() rel.insert1({'key': 1, 'in': 'ouch', 'from': 'bummer', 'int': 3, 'select': 'major pain'}) (rel & 'key=1').fetch('in') # error because reserved word `key` is not in backquotes. See issue #249 + + @staticmethod + def test_permissive_join_basic(): + """Verify join compatibility check is skipped for join""" + Child @ Parent + + @staticmethod + def test_permissive_restriction_basic(): + """Verify join compatibility check is skipped for restriction""" + Child ^ Parent From 51f55ab44ade7ad18b0a884ff2917feec8595590 Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 23 Mar 2021 19:43:16 -0500 Subject: [PATCH 19/19] Fix styling and update changelog. --- CHANGELOG.md | 3 +++ datajoint/condition.py | 3 ++- docs-parts/intro/Releases_lang1.rst | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ffe293e22..6625d1d5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,9 @@ * Bugfix - Regression error on joins with same attribute name (#857) PR #878 * Bugfix - Error when `fetch1('KEY')` when `dj.config['fetch_format']='frame'` set (#876) PR #880, #878 * Bugfix - Error when cascading deletes in tables with many, complex keys (#883, #886) PR #839 +* Add deprecation warning for `_update`. PR #889 +* Add `purge_query_cache` utility. PR #889 +* Add tests for query caching and permissive join and restriction. PR #889 * Drop support for Python 3.5 ### 0.12.9 -- Mar 12, 2021 diff --git a/datajoint/condition.py b/datajoint/condition.py index 510c14295..7d921be4f 100644 --- a/datajoint/condition.py +++ b/datajoint/condition.py @@ -84,7 +84,8 @@ def prep_value(k, v): try: v = uuid.UUID(v) except (AttributeError, ValueError): - raise DataJointError('Badly formed UUID {v} in restriction by `{k}`'.format(k=k, v=v)) + raise DataJointError( + 'Badly formed UUID {v} in restriction by `{k}`'.format(k=k, v=v)) return "X'%s'" % v.bytes.hex() if isinstance(v, (datetime.date, datetime.datetime, datetime.time, decimal.Decimal)): return '"%s"' % v diff --git a/docs-parts/intro/Releases_lang1.rst b/docs-parts/intro/Releases_lang1.rst index 07cd30821..3dc72f2ab 100644 --- a/docs-parts/intro/Releases_lang1.rst +++ b/docs-parts/intro/Releases_lang1.rst @@ -11,6 +11,9 @@ * Bugfix - Regression error on joins with same attribute name (#857) PR #878 * Bugfix - Error when `fetch1('KEY')` when `dj.config['fetch_format']='frame'` set (#876) PR #880, #878 * Bugfix - Error when cascading deletes in tables with many, complex keys (#883, #886) PR #839 +* Add deprecation warning for `_update`. PR #889 +* Add `purge_query_cache` utility. PR #889 +* Add tests for query caching and permissive join and restriction. PR #889 * Drop support for Python 3.5 0.12.9 -- Mar 12, 2021