diff --git a/.coveragerc b/.coveragerc index 8e9b6dd17..df41fbcc8 100644 --- a/.coveragerc +++ b/.coveragerc @@ -3,3 +3,4 @@ branch = False source = datajoint [report] +show_missing = True \ No newline at end of file diff --git a/.github/workflows/development.yaml b/.github/workflows/development.yaml index b1ce66fcc..9785580b3 100644 --- a/.github/workflows/development.yaml +++ b/.github/workflows/development.yaml @@ -36,7 +36,7 @@ jobs: - name: Run primary tests env: UID: "1001" - GID: "116" + GID: "121" PY_VER: ${{matrix.py_ver}} MYSQL_VER: ${{matrix.mysql_ver}} ALPINE_VER: "3.10" diff --git a/CHANGELOG.md b/CHANGELOG.md index 35e78375a..6625d1d5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,8 +12,14 @@ * Bugfix - Regression error on joins with same attribute name (#857) PR #878 * Bugfix - Error when `fetch1('KEY')` when `dj.config['fetch_format']='frame'` set (#876) PR #880, #878 * Bugfix - Error when cascading deletes in tables with many, complex keys (#883, #886) PR #839 +* Add deprecation warning for `_update`. PR #889 +* Add `purge_query_cache` utility. PR #889 +* Add tests for query caching and permissive join and restriction. PR #889 * Drop support for Python 3.5 +### 0.12.9 -- Mar 12, 2021 +* Fix bug with fetch1 with `dj.config['fetch_format']="frame"`. (#876) PR #880 + ### 0.12.8 -- Jan 12, 2021 * table.children, .parents, .descendents, and ancestors can return queryable objects. PR #833 * Load dependencies before querying dependencies. (#179) PR #833 diff --git a/LNX-docker-compose.yml b/LNX-docker-compose.yml index c91f4c01e..448ab6f02 100644 --- a/LNX-docker-compose.yml +++ b/LNX-docker-compose.yml @@ -1,4 +1,4 @@ -# docker-compose -f LNX-docker-compose.yml --env-file LNX.env up --build --exit-code-from app +# docker-compose -f LNX-docker-compose.yml --env-file LNX.env up --exit-code-from app --build version: '2.2' x-net: &net networks: @@ -78,7 +78,7 @@ services: - | set -e pip install --user -r test_requirements.txt - pip install --user . + pip install -e . pip freeze | grep datajoint nosetests -vsw tests --with-coverage --cover-package=datajoint coveralls diff --git a/datajoint/condition.py b/datajoint/condition.py index 126ed9f69..7d921be4f 100644 --- a/datajoint/condition.py +++ b/datajoint/condition.py @@ -84,7 +84,8 @@ def prep_value(k, v): try: v = uuid.UUID(v) except (AttributeError, ValueError): - raise DataJointError('Badly formed UUID {v} in restriction by `{k}`'.format(k=k, v=v)) from None + raise DataJointError( + 'Badly formed UUID {v} in restriction by `{k}`'.format(k=k, v=v)) return "X'%s'" % v.bytes.hex() if isinstance(v, (datetime.date, datetime.datetime, datetime.time, decimal.Decimal)): return '"%s"' % v diff --git a/datajoint/connection.py b/datajoint/connection.py index 14e457d0b..9db3dcb77 100644 --- a/datajoint/connection.py +++ b/datajoint/connection.py @@ -218,7 +218,7 @@ def connect(self): k == 'ssl' and self.conn_info['ssl_input'] is None)}) self._conn.autocommit(True) - def set_query_cache(self, query_cache): + def set_query_cache(self, query_cache=None): """ When query_cache is not None, the connection switches into the query caching mode, which entails: 1. Only SELECT queries are allowed. @@ -228,6 +228,14 @@ def set_query_cache(self, query_cache): """ self._query_cache = query_cache + def purge_query_cache(self): + """ Purges all query cache. """ + if 'query_cache' in config and isinstance(config['query_cache'], str) and \ + pathlib.Path(config['query_cache']).is_dir(): + path_iter = pathlib.Path(config['query_cache']).glob('**/*') + for path in path_iter: + path.unlink() + def close(self): self._conn.close() diff --git a/datajoint/expression.py b/datajoint/expression.py index 9b7d10544..e0d1155fd 100644 --- a/datajoint/expression.py +++ b/datajoint/expression.py @@ -193,7 +193,7 @@ def restrict_in_place(self, restriction): def __and__(self, restriction): """ - Restriction operator + Restriction operator e.g. q1 & q2. :return: a restricted copy of the input argument See QueryExpression.restrict for more detail. """ @@ -201,7 +201,7 @@ def __and__(self, restriction): def __xor__(self, restriction): """ - Restriction operator ignoring compatibility check. + Permissive restriction operator ignoring compatibility check e.g. q1 ^ q2. """ if inspect.isclass(restriction) and issubclass(restriction, QueryExpression): restriction = restriction() @@ -211,22 +211,33 @@ def __xor__(self, restriction): def __sub__(self, restriction): """ - Inverted restriction + Inverted restriction e.g. q1 - q2. :return: a restricted copy of the input argument See QueryExpression.restrict for more detail. """ return self.restrict(Not(restriction)) def __neg__(self): + """ + Convert between restriction and inverted restriction e.g. -q1. + :return: target restriction + See QueryExpression.restrict for more detail. + """ if isinstance(self, Not): return self.restriction return Not(self) def __mul__(self, other): - """ join of query expressions `self` and `other` """ + """ + join of query expressions `self` and `other` e.g. q1 * q2. + """ return self.join(other) def __matmul__(self, other): + """ + Permissive join of query expressions `self` and `other` ignoring compatibility check + e.g. q1 @ q2. + """ if inspect.isclass(other) and issubclass(other, QueryExpression): other = other() # instantiate return self.join(other, semantic_check=False) @@ -271,7 +282,7 @@ def join(self, other, semantic_check=True, left=False): return result def __add__(self, other): - """union""" + """union e.g. q1 + q2.""" return Union.create(self, other) def proj(self, *attributes, **named_attributes): @@ -424,7 +435,7 @@ def tail(self, limit=25, **fetch_kwargs): return self.fetch(order_by="KEY DESC", limit=limit, **fetch_kwargs)[::-1] def __len__(self): - """ :return: number of elements in the result set """ + """:return: number of elements in the result set e.g. len(q1).""" return self.connection.query( 'SELECT count(DISTINCT {fields}) FROM {from_}{where}'.format( fields=self.heading.as_sql(self.primary_key, include_aliases=False), @@ -433,7 +444,8 @@ def __len__(self): def __bool__(self): """ - :return: True if the result is not empty. Equivalent to len(self) > 0 but often faster. + :return: True if the result is not empty. Equivalent to len(self) > 0 but often + faster e.g. bool(q1). """ return bool(self.connection.query( 'SELECT EXISTS(SELECT 1 FROM {from_}{where})'.format( @@ -442,7 +454,7 @@ def __bool__(self): def __contains__(self, item): """ - returns True if item is found in the . + returns True if a restriction results with any records e.g. restriction in q1. :param item: any restriction (item in query_expression) is equivalent to bool(query_expression & item) but may be executed more efficiently. @@ -450,11 +462,22 @@ def __contains__(self, item): return bool(self & item) # May be optimized e.g. using an EXISTS query def __iter__(self): + """ + returns an iterator-compatible QueryExpression object e.g. iter(q1). + + :param self: iterator-compatible QueryExpression object + """ self._iter_only_key = all(v.in_key for v in self.heading.attributes.values()) self._iter_keys = self.fetch('KEY') return self def __next__(self): + """ + returns the next record on an iterator-compatible QueryExpression object + e.g. next(q1). + + :param self: fetch1 record + """ try: key = self._iter_keys.pop(0) except AttributeError: @@ -490,6 +513,11 @@ def cursor(self, offset=0, limit=None, order_by=None, as_dict=False): return self.connection.query(sql, as_dict=as_dict) def __repr__(self): + """ + returns the string representation of a QueryExpression object e.g. str(q1). + + :param self: String version of query result + """ return super().__repr__() if config['loglevel'].lower() == 'debug' else self.preview() def preview(self, limit=None, width=None): diff --git a/datajoint/table.py b/datajoint/table.py index 53da2dc3c..d79c07a75 100644 --- a/datajoint/table.py +++ b/datajoint/table.py @@ -7,6 +7,7 @@ import logging import uuid import re +import warnings from pathlib import Path from .settings import config from .declare import declare, alter @@ -568,6 +569,9 @@ def _update(self, attrname, value=None): >>> (v2p.Mice() & key)._update('mouse_dob', '2011-01-01') >>> (v2p.Mice() & key)._update( 'lens') # set the value to NULL """ + warnings.warn( + '`_update` is a deprecated function to be removed in datajoint 0.14. ' + 'Use `.update1` instead.') if len(self) != 1: raise DataJointError('Update is only allowed on one tuple at a time') if attrname not in self.heading: diff --git a/datajoint/version.py b/datajoint/version.py index 4ac209b43..a7571b6c4 100644 --- a/datajoint/version.py +++ b/datajoint/version.py @@ -1,3 +1,3 @@ -__version__ = "0.13.dev7" +__version__ = "0.13.0" assert len(__version__) <= 10 # The log table limits version to the 10 characters diff --git a/docs-parts/computation/01-autopopulate_lang1.rst b/docs-parts/computation/01-autopopulate_lang1.rst index a1caecd6c..31cf621ab 100644 --- a/docs-parts/computation/01-autopopulate_lang1.rst +++ b/docs-parts/computation/01-autopopulate_lang1.rst @@ -11,7 +11,7 @@ """ def make(self, key): - img = (test.Image & key).fetch1['image'] + img = (test.Image & key).fetch1('image') key['filtered_image'] = myfilter(img) self.insert(key) diff --git a/docs-parts/computation/04-master-part_lang1.rst b/docs-parts/computation/04-master-part_lang1.rst index d76404ff1..3bda5abb9 100644 --- a/docs-parts/computation/04-master-part_lang1.rst +++ b/docs-parts/computation/04-master-part_lang1.rst @@ -21,7 +21,7 @@ The part is subclassed from ``dj.Part`` and does not need the ``@schema`` decora """ def make(self, key): - image = (Image & key).fetch1['image'] + image = (Image & key).fetch1('image') self.insert1(key) count = itertools.count() Segmentation.ROI.insert( diff --git a/docs-parts/intro/Releases_lang1.rst b/docs-parts/intro/Releases_lang1.rst index 36ad0c8f5..3dc72f2ab 100644 --- a/docs-parts/intro/Releases_lang1.rst +++ b/docs-parts/intro/Releases_lang1.rst @@ -11,8 +11,15 @@ * Bugfix - Regression error on joins with same attribute name (#857) PR #878 * Bugfix - Error when `fetch1('KEY')` when `dj.config['fetch_format']='frame'` set (#876) PR #880, #878 * Bugfix - Error when cascading deletes in tables with many, complex keys (#883, #886) PR #839 +* Add deprecation warning for `_update`. PR #889 +* Add `purge_query_cache` utility. PR #889 +* Add tests for query caching and permissive join and restriction. PR #889 * Drop support for Python 3.5 +0.12.9 -- Mar 12, 2021 +---------------------- +* Fix bug with fetch1 with `dj.config['fetch_format']="frame"`. Issue #876 (PR #880) + 0.12.8 -- Jan 12, 2021 ---------------------- * table.children, .parents, .descendents, and ancestors can return queryable objects. PR #833 diff --git a/docs-parts/manipulation/1-Update_lang1.rst b/docs-parts/manipulation/1-Update_lang1.rst new file mode 100644 index 000000000..0960e2e22 --- /dev/null +++ b/docs-parts/manipulation/1-Update_lang1.rst @@ -0,0 +1,14 @@ + +.. code-block:: python + + # with record as a dict specifying the primary and + # secondary attribute values + table.update1(record) + + # update value in record with id as primary key + table.update1({'id': 1, 'value': 3}) + + # reset value to default with id as primary key + table.update1({'id': 1, 'value': None}) + ## OR + table.update1({'id': 1}) diff --git a/docs-parts/queries/03-Fetch_lang1.rst b/docs-parts/queries/03-Fetch_lang1.rst index 3e6f5e043..ad99488b3 100644 --- a/docs-parts/queries/03-Fetch_lang1.rst +++ b/docs-parts/queries/03-Fetch_lang1.rst @@ -47,7 +47,7 @@ For example: import pandas as pd frame = pd.DataFrame(tab.fetch()) -Calling ``fetch()`` with the argument ``format="frame"`` returns results as ``pandas.DataFrame`` objects with no need for conversion. +Calling ``fetch()`` with the argument ``format="frame"`` returns results as ``pandas.DataFrame`` objects indexed by the table's primary key attributes. .. code-block:: python diff --git a/docs-parts/queries/12-Query-Caching_lang1.rst b/docs-parts/queries/12-Query-Caching_lang1.rst new file mode 100644 index 000000000..673eef85b --- /dev/null +++ b/docs-parts/queries/12-Query-Caching_lang1.rst @@ -0,0 +1,13 @@ + +.. code-block:: python + + # set the query cache path + dj.config['query_cache'] = os.path.expanduser('~/dj_query_cache') + + # access the active connection object for the tables + conn = dj.conn() # if queries co-located with tables + conn = module.schema.connection # if schema co-located with tables + conn = module.table.connection # most flexible + + # activate query caching for a namespace called 'main' + conn.set_query_cache(query_cache='main') diff --git a/docs-parts/queries/12-Query-Caching_lang2.rst b/docs-parts/queries/12-Query-Caching_lang2.rst new file mode 100644 index 000000000..54661c6b7 --- /dev/null +++ b/docs-parts/queries/12-Query-Caching_lang2.rst @@ -0,0 +1,7 @@ + +.. code-block:: python + + # deactivate query caching + conn.set_query_cache(query_cache=None) + ## OR + conn.set_query_cache() diff --git a/docs-parts/queries/12-Query-Caching_lang3.rst b/docs-parts/queries/12-Query-Caching_lang3.rst new file mode 100644 index 000000000..34e3784cd --- /dev/null +++ b/docs-parts/queries/12-Query-Caching_lang3.rst @@ -0,0 +1,6 @@ + +.. code-block:: python + + # purged the cached queries + conn.purge_query_cache() + diff --git a/dev_guide/transpiler_specs.md b/docs-parts/queries/13-Transpiler-Design_lang1.md similarity index 96% rename from dev_guide/transpiler_specs.md rename to docs-parts/queries/13-Transpiler-Design_lang1.md index c5c85ca4b..e015646e1 100644 --- a/dev_guide/transpiler_specs.md +++ b/docs-parts/queries/13-Transpiler-Design_lang1.md @@ -1,6 +1,3 @@ -# Design specifications of the DataJoint-to-SQL Transpiler -This document contains information and reasoning that went into the design of the DataJoint-to-SQL transpiler for DataJoint for Python version 0.13. - MySQL appears to differ from standard SQL by the sequence of evaluating the clauses of the SELECT statement. ``` @@ -110,7 +107,7 @@ Union treats all its inputs as subqueries except for unrestricted Union objects. ### Joining a `dj.U` object -# Query "Backprojection" +## Query "Backprojection" Once a QueryExpression is used in a `fetch` operation or becomes a subquery in another query, it can project out all unnecessary attributes from its own inputs, recursively. This is implemented by the `finalize` method. This simplification produces much leaner queries resulting in improved query performance in version 0.13, especially on complex queries with blob data, compensating for MySQL's deficiencies in query optimization. diff --git a/docs-parts/version_common.json b/docs-parts/version_common.json index c39fa12d1..2296f52e6 100644 --- a/docs-parts/version_common.json +++ b/docs-parts/version_common.json @@ -1,3 +1,3 @@ { - "comm_version": "v0.1" + "comm_version": "v0.2" } \ No newline at end of file diff --git a/local-docker-compose.yml b/local-docker-compose.yml index ff0dda0e6..4b12e74e0 100644 --- a/local-docker-compose.yml +++ b/local-docker-compose.yml @@ -82,7 +82,8 @@ services: - -c - | set -e - pip install --user nose nose-cov coveralls flake8 ptvsd . + pip install --user nose nose-cov coveralls flake8 ptvsd + pip install -e . pip freeze | grep datajoint ## You may run the below tests once sh'ed into container i.e. docker exec -it datajoint-python_app_1 sh # nosetests -vsw tests; #run all tests diff --git a/tests/test_fetch.py b/tests/test_fetch.py index fd4adb417..cb1ba3a4f 100644 --- a/tests/test_fetch.py +++ b/tests/test_fetch.py @@ -7,6 +7,7 @@ import warnings from . import schema import datajoint as dj +import os class TestFetch: @@ -254,3 +255,35 @@ def test_same_secondary_attribute(self): children = (schema.Child * schema.Parent().proj()).fetch()['name'] assert len(children) == 1 assert children[0] == 'Dan' + + def test_query_caching(self): + # initialize cache directory + os.mkdir(os.path.expanduser('~/dj_query_cache')) + + with dj.config(query_cache=os.path.expanduser('~/dj_query_cache')): + conn = schema.TTest3.connection + # insert sample data and load cache + schema.TTest3.insert([dict(key=100+i, value=200+i) for i in range(2)]) + conn.set_query_cache(query_cache='main') + cached_res = schema.TTest3().fetch() + # attempt to insert while caching enabled + try: + schema.TTest3.insert([dict(key=200+i, value=400+i) for i in range(2)]) + assert False, 'Insert allowed while query caching enabled' + except dj.DataJointError: + conn.set_query_cache() + # insert new data + schema.TTest3.insert([dict(key=600+i, value=800+i) for i in range(2)]) + # re-enable cache to access old results + conn.set_query_cache(query_cache='main') + previous_cache = schema.TTest3().fetch() + # verify properly cached and how to refresh results + assert all([c == p for c, p in zip(cached_res, previous_cache)]) + conn.set_query_cache() + uncached_res = schema.TTest3().fetch() + assert len(uncached_res) > len(cached_res) + # purge query cache + conn.purge_query_cache() + + # reset cache directory state (will fail if purge was unsuccessful) + os.rmdir(os.path.expanduser('~/dj_query_cache')) diff --git a/tests/test_relational_operand.py b/tests/test_relational_operand.py index 43d3ee943..f37dafb31 100644 --- a/tests/test_relational_operand.py +++ b/tests/test_relational_operand.py @@ -8,7 +8,7 @@ import datajoint as dj from .schema_simple import A, B, D, E, F, L, DataA, DataB, TTestUpdate, IJ, JI, ReservedWord -from .schema import Experiment, TTest3, Trial, Ephys +from .schema import Experiment, TTest3, Trial, Ephys, Child, Parent def setup(): @@ -449,3 +449,13 @@ def test_reserved_words2(): rel = ReservedWord() rel.insert1({'key': 1, 'in': 'ouch', 'from': 'bummer', 'int': 3, 'select': 'major pain'}) (rel & 'key=1').fetch('in') # error because reserved word `key` is not in backquotes. See issue #249 + + @staticmethod + def test_permissive_join_basic(): + """Verify join compatibility check is skipped for join""" + Child @ Parent + + @staticmethod + def test_permissive_restriction_basic(): + """Verify join compatibility check is skipped for restriction""" + Child ^ Parent