From 9822fc4c570d046a6ec74e882d6a093e71a17eda Mon Sep 17 00:00:00 2001 From: Martin Spacek Date: Mon, 18 Nov 2019 18:12:00 +0100 Subject: [PATCH 01/39] Add multiprocessing to AutoPopulate --- datajoint/autopopulate.py | 145 ++++++++++++++++++++++++++------------ 1 file changed, 101 insertions(+), 44 deletions(-) diff --git a/datajoint/autopopulate.py b/datajoint/autopopulate.py index 55a332914..743dca226 100644 --- a/datajoint/autopopulate.py +++ b/datajoint/autopopulate.py @@ -10,12 +10,26 @@ from .errors import DataJointError from .table import FreeTable import signal +import multiprocessing as mp # noinspection PyExceptionInherit,PyCallingNonCallable logger = logging.getLogger(__name__) +def initializer(table): + """Save pickled copy of (disconnected) table to the current process, + then reconnect to server. For use by call_make_key()""" + mp.current_process().table = table + table.connection.connect() # reconnect + +def call_make_key(key): + """Call current process' table.make_key()""" + table = mp.current_process().table + error = table.make_key(key) + return error + + class AutoPopulate: """ AutoPopulate is a mixin class that adds the method populate() to a Relation class. @@ -103,7 +117,7 @@ def _jobs_to_do(self, restrictions): def populate(self, *restrictions, suppress_errors=False, return_exception_objects=False, reserve_jobs=False, order="original", limit=None, max_calls=None, - display_progress=False): + display_progress=False, max_processes=None): """ rel.populate() calls rel.make(key) for every primary key in self.key_source for which there is not already a tuple in rel. @@ -115,14 +129,19 @@ def populate(self, *restrictions, suppress_errors=False, return_exception_object :param display_progress: if True, report progress_bar :param limit: if not None, checks at most that many keys :param max_calls: if not None, populates at max that many keys + :param max_processes: max number of processes to use simultaneously """ + self._make_key_kwargs = {'suppress_errors':suppress_errors, + 'return_exception_objects':return_exception_objects, + 'reserve_jobs':reserve_jobs, + } + if self.connection.in_transaction: raise DataJointError('Populate cannot be called during a transaction.') valid_order = ['original', 'reverse', 'random'] if order not in valid_order: raise DataJointError('The order argument must be one of %s' % str(valid_order)) - error_list = [] if suppress_errors else None jobs = self.connection.schemas[self.target.database].jobs if reserve_jobs else None # define and setup signal handler for SIGTERM @@ -138,55 +157,93 @@ def handler(signum, frame): elif order == "random": random.shuffle(keys) - call_count = 0 logger.info('Found %d keys to populate' % len(keys)) - make = self._make_tuples if hasattr(self, '_make_tuples') else self.make + if max_calls is not None: + keys = keys[:max_calls] + nkeys = len(keys) - for key in (tqdm(keys) if display_progress else keys): - if max_calls is not None and call_count >= max_calls: - break - if not reserve_jobs or jobs.reserve(self.target.table_name, self._job_key(key)): - self.connection.start_transaction() - if key in self.target: # already populated - self.connection.cancel_transaction() - if reserve_jobs: - jobs.complete(self.target.table_name, self._job_key(key)) + nproc = 1 + if max_processes and max_processes > 1: + nproc = min(max_processes, nkeys) + error_list = [] + if nproc > 1: # spawn multiple processes + # prepare to pickle self: + self.connection.close() # disconnect parent process from MySQL server + del self.connection._conn.ctx # SSLContext is not picklable + print('*** Spawning pool of %d processes' % nproc) + # send pickled copy of self to each process, + # each worker process calls initializer(*initargs) when it starts + with mp.Pool(nproc, initializer, (self,)) as pool: + if display_progress: + with tqdm(total=nkeys) as pbar: + for error in pool.imap(call_make_key, keys, chunksize=1): + if error is not None: + error_list.append(error) + pbar.update() else: - logger.info('Populating: ' + str(key)) - call_count += 1 - self.__class__._allow_insert = True - try: - make(dict(key)) - except (KeyboardInterrupt, SystemExit, Exception) as error: - try: - self.connection.cancel_transaction() - except OperationalError: - pass - error_message = '{exception}{msg}'.format( - exception=error.__class__.__name__, - msg=': ' + str(error) if str(error) else '') - if reserve_jobs: - # show error name and error message (if any) - jobs.error( - self.target.table_name, self._job_key(key), - error_message=error_message, error_stack=traceback.format_exc()) - if not suppress_errors or isinstance(error, SystemExit): - raise - else: - logger.error(error) - error_list.append((key, error if return_exception_objects else error_message)) - else: - self.connection.commit_transaction() - if reserve_jobs: - jobs.complete(self.target.table_name, self._job_key(key)) - finally: - self.__class__._allow_insert = False + for error in pool.imap(call_make_key, keys): + if error is not None: + error_list.append(error) + self.connection.connect() # reconnect parent process to MySQL server + else: # use single process + for key in tqdm(keys) if display_progress else keys: + error = self.make_key(key) + if error is not None: + error_list.append(error) - # place back the original signal handler + del self._make_key_kwargs # clean up + + # restore original signal handler: if reserve_jobs: signal.signal(signal.SIGTERM, old_handler) - return error_list + + if suppress_errors: + return error_list + + def make_key(self, key): + make = self._make_tuples if hasattr(self, '_make_tuples') else self.make + + kwargs = self._make_key_kwargs + suppress_errors = kwargs['suppress_errors'] + return_exception_objects = kwargs['return_exception_objects'] + reserve_jobs = kwargs['reserve_jobs'] + + if not reserve_jobs or jobs.reserve(self.target.table_name, self._job_key(key)): + self.connection.start_transaction() + if key in self.target: # already populated + self.connection.cancel_transaction() + if reserve_jobs: + jobs.complete(self.target.table_name, self._job_key(key)) + else: + logger.info('Populating: ' + str(key)) + self.__class__._allow_insert = True + try: + make(dict(key)) + except (KeyboardInterrupt, SystemExit, Exception) as error: + try: + self.connection.cancel_transaction() + except OperationalError: + pass + error_message = '{exception}{msg}'.format( + exception=error.__class__.__name__, + msg=': ' + str(error) if str(error) else '') + if reserve_jobs: + # show error name and error message (if any) + jobs.error( + self.target.table_name, self._job_key(key), + error_message=error_message, error_stack=traceback.format_exc()) + if not suppress_errors or isinstance(error, SystemExit): + raise + else: + logger.error(error) + return (key, error if return_exception_objects else error_message) + else: + self.connection.commit_transaction() + if reserve_jobs: + jobs.complete(self.target.table_name, self._job_key(key)) + finally: + self.__class__._allow_insert = False def progress(self, *restrictions, display=True): """ From 607490034f158f0c5c70cc688a8a437f67fd7680 Mon Sep 17 00:00:00 2001 From: Martin Spacek Date: Mon, 18 Nov 2019 18:12:10 +0100 Subject: [PATCH 02/39] Update docs --- datajoint/autopopulate.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datajoint/autopopulate.py b/datajoint/autopopulate.py index 743dca226..afa77cb81 100644 --- a/datajoint/autopopulate.py +++ b/datajoint/autopopulate.py @@ -124,11 +124,11 @@ def populate(self, *restrictions, suppress_errors=False, return_exception_object :param restrictions: a list of restrictions each restrict (rel.key_source - target.proj()) :param suppress_errors: if True, do not terminate execution. :param return_exception_objects: return error objects instead of just error messages - :param reserve_jobs: if true, reserves job to populate in asynchronous fashion + :param reserve_jobs: if True, reserve jobs to populate in asynchronous fashion :param order: "original"|"reverse"|"random" - the order of execution + :param limit: if not None, check at most this many keys + :param max_calls: if not None, populate at most this many keys :param display_progress: if True, report progress_bar - :param limit: if not None, checks at most that many keys - :param max_calls: if not None, populates at max that many keys :param max_processes: max number of processes to use simultaneously """ self._make_key_kwargs = {'suppress_errors':suppress_errors, @@ -144,7 +144,7 @@ def populate(self, *restrictions, suppress_errors=False, return_exception_object raise DataJointError('The order argument must be one of %s' % str(valid_order)) jobs = self.connection.schemas[self.target.database].jobs if reserve_jobs else None - # define and setup signal handler for SIGTERM + # define and set up signal handler for SIGTERM: if reserve_jobs: def handler(signum, frame): logger.info('Populate terminated by SIGTERM') From a7e4c2ed17d83acda0706e7f7d63fdf86aae6224 Mon Sep 17 00:00:00 2001 From: Martin Spacek Date: Mon, 18 Nov 2019 22:30:30 +0100 Subject: [PATCH 03/39] Rename max_processes -> multiprocess, accept bool or int --- datajoint/autopopulate.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/datajoint/autopopulate.py b/datajoint/autopopulate.py index afa77cb81..4d21f6b78 100644 --- a/datajoint/autopopulate.py +++ b/datajoint/autopopulate.py @@ -117,7 +117,7 @@ def _jobs_to_do(self, restrictions): def populate(self, *restrictions, suppress_errors=False, return_exception_objects=False, reserve_jobs=False, order="original", limit=None, max_calls=None, - display_progress=False, max_processes=None): + display_progress=False, multiprocess=False): """ rel.populate() calls rel.make(key) for every primary key in self.key_source for which there is not already a tuple in rel. @@ -129,7 +129,8 @@ def populate(self, *restrictions, suppress_errors=False, return_exception_object :param limit: if not None, check at most this many keys :param max_calls: if not None, populate at most this many keys :param display_progress: if True, report progress_bar - :param max_processes: max number of processes to use simultaneously + :param multiprocess: if True, use as many processes as CPU cores, or use the integer + number of processes specified """ self._make_key_kwargs = {'suppress_errors':suppress_errors, 'return_exception_objects':return_exception_objects, @@ -163,9 +164,15 @@ def handler(signum, frame): keys = keys[:max_calls] nkeys = len(keys) - nproc = 1 - if max_processes and max_processes > 1: - nproc = min(max_processes, nkeys) + if multiprocess: # True or int, presumably + if multiprocess == True: + nproc = mp.cpu_count() + else: + assert type(multiprocess) == int + nproc = multiprocess + else: + nproc = 1 + nproc = min(nproc, nkeys) # no sense spawning more than can be used error_list = [] if nproc > 1: # spawn multiple processes # prepare to pickle self: From 24de4846928790b1ac16040282f1dd4e35a5972f Mon Sep 17 00:00:00 2001 From: Martin Spacek Date: Mon, 18 Nov 2019 22:48:58 +0100 Subject: [PATCH 04/39] Fix reserved jobs --- datajoint/autopopulate.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/datajoint/autopopulate.py b/datajoint/autopopulate.py index 4d21f6b78..7af95fae9 100644 --- a/datajoint/autopopulate.py +++ b/datajoint/autopopulate.py @@ -132,11 +132,6 @@ def populate(self, *restrictions, suppress_errors=False, return_exception_object :param multiprocess: if True, use as many processes as CPU cores, or use the integer number of processes specified """ - self._make_key_kwargs = {'suppress_errors':suppress_errors, - 'return_exception_objects':return_exception_objects, - 'reserve_jobs':reserve_jobs, - } - if self.connection.in_transaction: raise DataJointError('Populate cannot be called during a transaction.') @@ -145,6 +140,12 @@ def populate(self, *restrictions, suppress_errors=False, return_exception_object raise DataJointError('The order argument must be one of %s' % str(valid_order)) jobs = self.connection.schemas[self.target.database].jobs if reserve_jobs else None + self._make_key_kwargs = {'suppress_errors':suppress_errors, + 'return_exception_objects':return_exception_objects, + 'reserve_jobs':reserve_jobs, + 'jobs':jobs, + } + # define and set up signal handler for SIGTERM: if reserve_jobs: def handler(signum, frame): @@ -215,6 +216,7 @@ def make_key(self, key): suppress_errors = kwargs['suppress_errors'] return_exception_objects = kwargs['return_exception_objects'] reserve_jobs = kwargs['reserve_jobs'] + jobs = kwargs['jobs'] if not reserve_jobs or jobs.reserve(self.target.table_name, self._job_key(key)): self.connection.start_transaction() From 28aae7a13ccaa0f2bf4094a5bf0d7e403779a1c9 Mon Sep 17 00:00:00 2001 From: Martin Spacek Date: Thu, 21 Nov 2019 19:08:32 +0100 Subject: [PATCH 05/39] Use is instead of == Co-Authored-By: Dimitri Yatsenko --- datajoint/autopopulate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datajoint/autopopulate.py b/datajoint/autopopulate.py index 7af95fae9..e84a8558e 100644 --- a/datajoint/autopopulate.py +++ b/datajoint/autopopulate.py @@ -166,7 +166,7 @@ def handler(signum, frame): nkeys = len(keys) if multiprocess: # True or int, presumably - if multiprocess == True: + if multiprocess is True: nproc = mp.cpu_count() else: assert type(multiprocess) == int From 236e62e72762ec15d0a05a9f6febf1feb607ad49 Mon Sep 17 00:00:00 2001 From: Martin Spacek Date: Thu, 21 Nov 2019 19:09:34 +0100 Subject: [PATCH 06/39] Replace assertion with DataJointError Co-Authored-By: Dimitri Yatsenko --- datajoint/autopopulate.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datajoint/autopopulate.py b/datajoint/autopopulate.py index e84a8558e..13b60e4ae 100644 --- a/datajoint/autopopulate.py +++ b/datajoint/autopopulate.py @@ -169,7 +169,9 @@ def handler(signum, frame): if multiprocess is True: nproc = mp.cpu_count() else: - assert type(multiprocess) == int + + if not isinstance(multiprocess, int): + raise DataJointError("multiprocess can be False, True or a positive integer") nproc = multiprocess else: nproc = 1 From 92666681179d8ce9b7330fbfcf281f01d630c740 Mon Sep 17 00:00:00 2001 From: Martin Spacek Date: Thu, 21 Nov 2019 19:12:34 +0100 Subject: [PATCH 07/39] Remove extra blank line --- datajoint/autopopulate.py | 1 - 1 file changed, 1 deletion(-) diff --git a/datajoint/autopopulate.py b/datajoint/autopopulate.py index 13b60e4ae..f874e7244 100644 --- a/datajoint/autopopulate.py +++ b/datajoint/autopopulate.py @@ -169,7 +169,6 @@ def handler(signum, frame): if multiprocess is True: nproc = mp.cpu_count() else: - if not isinstance(multiprocess, int): raise DataJointError("multiprocess can be False, True or a positive integer") nproc = multiprocess From 4f303286e7295e6858c49fe8b089e016e9f9209a Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Sun, 17 Nov 2019 15:16:04 -0600 Subject: [PATCH 08/39] fix #700 --- datajoint/connection.py | 5 ++--- datajoint/jobs.py | 39 +++++++++++++++++++++++++++------------ 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/datajoint/connection.py b/datajoint/connection.py index fa3512a1d..e959fee0e 100644 --- a/datajoint/connection.py +++ b/datajoint/connection.py @@ -39,10 +39,10 @@ def translate_query_error(client_error, query): return errors.DuplicateError(*client_error.args[1:]) if isinstance(client_error, client.err.IntegrityError) and client_error.args[0] == 1452: return errors.IntegrityError(*client_error.args[1:]) - # Syntax Errors + # Syntax errors if isinstance(client_error, client.err.ProgrammingError) and client_error.args[0] == 1064: return errors.QuerySyntaxError(client_error.args[1], query) - # Existence Errors + # Existence errors if isinstance(client_error, client.err.ProgrammingError) and client_error.args[0] == 1146: return errors.MissingTableError(client_error.args[1], query) if isinstance(client_error, client.err.InternalError) and client_error.args[0] == 1364: @@ -286,4 +286,3 @@ def transaction(self): raise else: self.commit_transaction() - \ No newline at end of file diff --git a/datajoint/jobs.py b/datajoint/jobs.py index b16858e52..9f8fead20 100644 --- a/datajoint/jobs.py +++ b/datajoint/jobs.py @@ -1,13 +1,27 @@ from .hash import key_hash import os import platform +import numpy as np from .table import Table -from .errors import DuplicateError, IntegrityError +from .errors import DuplicateError +from .settings import config +from .blob import MatStruct ERROR_MESSAGE_LENGTH = 2047 TRUNCATION_APPENDIX = '...truncated' +def _adapt_key_to_matstruct(key): + """ + Only used as a temporary measure for uninterrupted interoperability with datajoint 0.11. + Will be deprecated in datajoint 0.13 when support for native python data types is accepted. + :param key: a dict representing the primary key + :return: converted to dj.blob.MatStruct + """ + return (key if config.get('enable_python_native_blobs') + else np.reshape(np.rec.array((list(key.values())), names=list(key)), (1, 1)).view(MatStruct)) + + class JobTable(Table): """ A base relation with no definition. Allows reserving jobs @@ -73,7 +87,7 @@ def reserve(self, table_name, key): host=platform.node(), pid=os.getpid(), connection_id=self.connection.connection_id, - key=key, + key=_adapt_key_to_matstruct(key), user=self._user) try: self.insert1(job, ignore_extra_fields=True) @@ -101,15 +115,16 @@ def error(self, table_name, key, error_message, error_stack=None): """ if len(error_message) > ERROR_MESSAGE_LENGTH: error_message = error_message[:ERROR_MESSAGE_LENGTH-len(TRUNCATION_APPENDIX)] + TRUNCATION_APPENDIX - job_key = dict(table_name=table_name, key_hash=key_hash(key)) self.insert1( - dict(job_key, - status="error", - host=platform.node(), - pid=os.getpid(), - connection_id=self.connection.connection_id, - user=self._user, - key=key, - error_message=error_message, - error_stack=error_stack), + dict( + table_name=table_name, + key_hash=key_hash(key), + status="error", + host=platform.node(), + pid=os.getpid(), + connection_id=self.connection.connection_id, + user=self._user, + key=_adapt_key_to_matstruct(key), + error_message=error_message, + error_stack=error_stack), replace=True, ignore_extra_fields=True) From 8d7d7d3020f4f98559a8200b0a5a2adb1ece1471 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Sun, 17 Nov 2019 15:35:08 -0600 Subject: [PATCH 09/39] fix #700 differently - bypass the restriction against python-native data types --- datajoint/jobs.py | 47 ++++++++++++++++++----------------------------- 1 file changed, 18 insertions(+), 29 deletions(-) diff --git a/datajoint/jobs.py b/datajoint/jobs.py index 9f8fead20..4cb0c2554 100644 --- a/datajoint/jobs.py +++ b/datajoint/jobs.py @@ -1,27 +1,14 @@ from .hash import key_hash import os import platform -import numpy as np from .table import Table -from .errors import DuplicateError from .settings import config -from .blob import MatStruct +from .errors import DuplicateError ERROR_MESSAGE_LENGTH = 2047 TRUNCATION_APPENDIX = '...truncated' -def _adapt_key_to_matstruct(key): - """ - Only used as a temporary measure for uninterrupted interoperability with datajoint 0.11. - Will be deprecated in datajoint 0.13 when support for native python data types is accepted. - :param key: a dict representing the primary key - :return: converted to dj.blob.MatStruct - """ - return (key if config.get('enable_python_native_blobs') - else np.reshape(np.rec.array((list(key.values())), names=list(key)), (1, 1)).view(MatStruct)) - - class JobTable(Table): """ A base relation with no definition. Allows reserving jobs @@ -87,10 +74,11 @@ def reserve(self, table_name, key): host=platform.node(), pid=os.getpid(), connection_id=self.connection.connection_id, - key=_adapt_key_to_matstruct(key), + key=key, user=self._user) try: - self.insert1(job, ignore_extra_fields=True) + with config(enable_pyton_native_blobs=True): + self.insert1(job, ignore_extra_fields=True) except DuplicateError: return False return True @@ -115,16 +103,17 @@ def error(self, table_name, key, error_message, error_stack=None): """ if len(error_message) > ERROR_MESSAGE_LENGTH: error_message = error_message[:ERROR_MESSAGE_LENGTH-len(TRUNCATION_APPENDIX)] + TRUNCATION_APPENDIX - self.insert1( - dict( - table_name=table_name, - key_hash=key_hash(key), - status="error", - host=platform.node(), - pid=os.getpid(), - connection_id=self.connection.connection_id, - user=self._user, - key=_adapt_key_to_matstruct(key), - error_message=error_message, - error_stack=error_stack), - replace=True, ignore_extra_fields=True) + with config(enable_pyton_native_blobs=True): + self.insert1( + dict( + table_name=table_name, + key_hash=key_hash(key), + status="error", + host=platform.node(), + pid=os.getpid(), + connection_id=self.connection.connection_id, + user=self._user, + key=key, + error_message=error_message, + error_stack=error_stack), + replace=True, ignore_extra_fields=True) From 3e3d688f31ef20291ecaed2245c88a16a32d84ad Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Mon, 18 Nov 2019 12:59:06 -0600 Subject: [PATCH 10/39] fix typo from previous commit --- datajoint/jobs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datajoint/jobs.py b/datajoint/jobs.py index 4cb0c2554..c70d4042c 100644 --- a/datajoint/jobs.py +++ b/datajoint/jobs.py @@ -77,7 +77,7 @@ def reserve(self, table_name, key): key=key, user=self._user) try: - with config(enable_pyton_native_blobs=True): + with config(enable_python_native_blobs=True): self.insert1(job, ignore_extra_fields=True) except DuplicateError: return False @@ -103,7 +103,7 @@ def error(self, table_name, key, error_message, error_stack=None): """ if len(error_message) > ERROR_MESSAGE_LENGTH: error_message = error_message[:ERROR_MESSAGE_LENGTH-len(TRUNCATION_APPENDIX)] + TRUNCATION_APPENDIX - with config(enable_pyton_native_blobs=True): + with config(enable_python_native_blobs=True): self.insert1( dict( table_name=table_name, From ad02fe1763964362d28a4f4aadfa18f1e5608f76 Mon Sep 17 00:00:00 2001 From: Chris Turner Date: Mon, 18 Nov 2019 21:44:53 -0600 Subject: [PATCH 11/39] tests/{schema,test_jobs}.py: add tests/test_jobs.py:test_suppress_dj_errors + supporting tables: ErrorClassTable and DjExceptionNames added to test for #700: jobs table requires `enable_python_native_blobs`; additionally has utility to ensure suppress_errors can trap all DJ exceptions. populate of ErrorClassTable raises 1 DjExceptionName() per DjExceptionNames which should sucessfullly result in jobs table being filled with len(DjExceptionNames) records. --- tests/schema.py | 32 ++++++++++++++++++++++++++++++++ tests/test_jobs.py | 12 ++++++++++++ 2 files changed, 44 insertions(+) diff --git a/tests/schema.py b/tests/schema.py index f97758ed5..d2963555e 100644 --- a/tests/schema.py +++ b/tests/schema.py @@ -277,6 +277,38 @@ def make(self, key): raise SystemExit('SIGTERM received') +@schema +class DjExceptionNames(dj.Lookup): + definition = """ + dj_exception_name: char(64) + """ + @property + def contents(self): + ret = [] + for e in dir(dj.errors): + ea = getattr(dj.errors, e) + if callable(ea): + try: + werks = (isinstance(ea, type(Exception)) + and isinstance(ea(), Exception)) + except TypeError as te: + pass + if werks: + ret.append((e,)) + return ret + + +@schema +class ErrorClassTable(dj.Computed): + definition = """ + -> DjExceptionNames + """ + def make(self, key): + ename = key['dj_exception_name'] + raise getattr(dj.errors, ename)(ename) + + + @schema class DecimalPrimaryKey(dj.Lookup): definition = """ diff --git a/tests/test_jobs.py b/tests/test_jobs.py index 76e93aa98..804d1c313 100644 --- a/tests/test_jobs.py +++ b/tests/test_jobs.py @@ -88,6 +88,18 @@ def test_sigterm(): assert_equals(error_message, 'SystemExit: SIGTERM received') schema.schema.jobs.delete() + +def test_suppress_dj_errors(): + ''' test_suppress_dj_errors: dj errors suppressable w/o native py blobs''' + schema.schema.jobs.delete() + with dj.config(enable_python_native_blobs=False): + schema.ErrorClassTable().populate( + reserve_jobs=True, suppress_errors=True) + + assert len(schema.DjExceptionNames()) == len(schema.DjExceptionNames + & schema.schema.jobs) + + def test_long_error_message(): # clear out jobs table schema.schema.jobs.delete() From 53d073f98fca9e5e0bb018a20b78ba26136bdb05 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Tue, 19 Nov 2019 10:46:17 -0600 Subject: [PATCH 12/39] cleanup --- datajoint/table.py | 6 +++--- tests/schema.py | 29 ++++++++++------------------- tests/test_alter.py | 3 ++- tests/test_jobs.py | 15 ++++++++------- 4 files changed, 23 insertions(+), 30 deletions(-) diff --git a/datajoint/table.py b/datajoint/table.py index 15562c3af..0c093f0cd 100644 --- a/datajoint/table.py +++ b/datajoint/table.py @@ -19,7 +19,7 @@ logger = logging.getLogger(__name__) -class _rename_map(tuple): +class _RenameMap(tuple): """ for internal use """ pass @@ -375,7 +375,7 @@ def delete(self, verbose=True): graph = conn.dependencies graph.load() delete_list = collections.OrderedDict( - (name, _rename_map(next(iter(graph.parents(name).items()))) if name.isdigit() else FreeTable(conn, name)) + (name, _RenameMap(next(iter(graph.parents(name).items()))) if name.isdigit() else FreeTable(conn, name)) for name in graph.descendants(self.full_table_name)) # construct restrictions for each relation @@ -405,7 +405,7 @@ def delete(self, verbose=True): table.restrict([ r.proj() if isinstance(r, FreeTable) else ( delete_list[r[0]].proj(**{a: b for a, b in r[1]['attr_map'].items()}) - if isinstance(r, _rename_map) else r) + if isinstance(r, _RenameMap) else r) for r in restrictions[name]]) if safe: print('About to delete:') diff --git a/tests/schema.py b/tests/schema.py index d2963555e..960ed3dda 100644 --- a/tests/schema.py +++ b/tests/schema.py @@ -5,7 +5,7 @@ import random import numpy as np import datajoint as dj -import os, signal +import inspect from . import PREFIX, CONN_INFO schema = dj.schema(PREFIX + '_test1', connection=dj.conn(**CONN_INFO)) @@ -278,35 +278,26 @@ def make(self, key): @schema -class DjExceptionNames(dj.Lookup): +class DjExceptionName(dj.Lookup): definition = """ dj_exception_name: char(64) """ + @property def contents(self): - ret = [] - for e in dir(dj.errors): - ea = getattr(dj.errors, e) - if callable(ea): - try: - werks = (isinstance(ea, type(Exception)) - and isinstance(ea(), Exception)) - except TypeError as te: - pass - if werks: - ret.append((e,)) - return ret + return [[member_name] for member_name, member_type in inspect.getmembers(dj.errors) + if inspect.isclass(member_type) and issubclass(member_type, Exception)] @schema -class ErrorClassTable(dj.Computed): +class ErrorClass(dj.Computed): definition = """ - -> DjExceptionNames + -> DjExceptionName """ - def make(self, key): - ename = key['dj_exception_name'] - raise getattr(dj.errors, ename)(ename) + def make(self, key): + exception_name = key['dj_exception_name'] + raise getattr(dj.errors, exception_name) @schema diff --git a/tests/test_alter.py b/tests/test_alter.py index 1c0296ca6..b188bba0d 100644 --- a/tests/test_alter.py +++ b/tests/test_alter.py @@ -39,4 +39,5 @@ def test_alter(): Experiment().alter(prompt=False) restored = schema.connection.query("SHOW CREATE TABLE " + Experiment.full_table_name).fetchone()[1] assert_equal(original, restored) - assert_not_equal(original, altered) \ No newline at end of file + assert_not_equal(original, altered) + diff --git a/tests/test_jobs.py b/tests/test_jobs.py index 804d1c313..ec45bc7bb 100644 --- a/tests/test_jobs.py +++ b/tests/test_jobs.py @@ -1,4 +1,3 @@ -from decimal import Decimal from nose.tools import assert_true, assert_false, assert_equals from . import schema from datajoint.jobs import ERROR_MESSAGE_LENGTH, TRUNCATION_APPENDIX @@ -46,6 +45,7 @@ def test_reserve_job(): assert_false(schema.schema.jobs, 'failed to clear error jobs') + def test_restrictions(): # clear out jobs table jobs = schema.schema.jobs @@ -62,6 +62,7 @@ def test_restrictions(): 'There should be only one entries with error status in table a') jobs.delete() + def test_sigint(): # clear out job table schema.schema.jobs.delete() @@ -75,6 +76,7 @@ def test_sigint(): assert_equals(error_message, 'KeyboardInterrupt') schema.schema.jobs.delete() + def test_sigterm(): # clear out job table schema.schema.jobs.delete() @@ -90,14 +92,13 @@ def test_sigterm(): def test_suppress_dj_errors(): - ''' test_suppress_dj_errors: dj errors suppressable w/o native py blobs''' + """ test_suppress_dj_errors: dj errors suppressable w/o native py blobs """ schema.schema.jobs.delete() with dj.config(enable_python_native_blobs=False): - schema.ErrorClassTable().populate( - reserve_jobs=True, suppress_errors=True) - - assert len(schema.DjExceptionNames()) == len(schema.DjExceptionNames - & schema.schema.jobs) + schema.ErrorClass.populate(reserve_jobs=True, suppress_errors=True) + number_of_exceptions = len(schema.DjExceptionName()) + assert_true(number_of_exceptions > 0) + assert_equals(number_of_exceptions, len(schema.schema.jobs)) def test_long_error_message(): From df27adab007070f22071eefa303574941298468b Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Tue, 19 Nov 2019 11:28:52 -0600 Subject: [PATCH 13/39] minor syntax improvement --- tests/test_jobs.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_jobs.py b/tests/test_jobs.py index ec45bc7bb..b54c819c5 100644 --- a/tests/test_jobs.py +++ b/tests/test_jobs.py @@ -96,9 +96,7 @@ def test_suppress_dj_errors(): schema.schema.jobs.delete() with dj.config(enable_python_native_blobs=False): schema.ErrorClass.populate(reserve_jobs=True, suppress_errors=True) - number_of_exceptions = len(schema.DjExceptionName()) - assert_true(number_of_exceptions > 0) - assert_equals(number_of_exceptions, len(schema.schema.jobs)) + assert_true(len(schema.DjExceptionName()) == len(schema.schema.jobs) > 0) def test_long_error_message(): From 8f46e05f6108cac05d7341cc3843824a197e8663 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Tue, 19 Nov 2019 12:08:10 -0600 Subject: [PATCH 14/39] minor syntax --- tests/test_jobs.py | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/tests/test_jobs.py b/tests/test_jobs.py index b54c819c5..89e0734de 100644 --- a/tests/test_jobs.py +++ b/tests/test_jobs.py @@ -17,37 +17,35 @@ def test_reserve_job(): table_name = 'fake_table' # reserve jobs for key in subjects.fetch('KEY'): - assert_true(schema.schema.jobs.reserve(table_name, key), - 'failed to reserve a job') + assert_true(schema.schema.jobs.reserve(table_name, key), 'failed to reserve a job') + # refuse jobs for key in subjects.fetch('KEY'): - assert_false(schema.schema.jobs.reserve(table_name, key), - 'failed to respect reservation') + assert_false(schema.schema.jobs.reserve(table_name, key), 'failed to respect reservation') + # complete jobs for key in subjects.fetch('KEY'): schema.schema.jobs.complete(table_name, key) - assert_false(schema.schema.jobs, - 'failed to free jobs') + assert_false(schema.schema.jobs, 'failed to free jobs') + # reserve jobs again for key in subjects.fetch('KEY'): - assert_true(schema.schema.jobs.reserve(table_name, key), - 'failed to reserve new jobs') + assert_true(schema.schema.jobs.reserve(table_name, key), 'failed to reserve new jobs') + # finish with error for key in subjects.fetch('KEY'): - schema.schema.jobs.error(table_name, key, - "error message") + schema.schema.jobs.error(table_name, key, "error message") + # refuse jobs with errors for key in subjects.fetch('KEY'): - assert_false(schema.schema.jobs.reserve(table_name, key), - 'failed to ignore error jobs') + assert_false(schema.schema.jobs.reserve(table_name, key), 'failed to ignore error jobs') + # clear error jobs (schema.schema.jobs & dict(status="error")).delete() - assert_false(schema.schema.jobs, - 'failed to clear error jobs') + assert_false(schema.schema.jobs, 'failed to clear error jobs') def test_restrictions(): - # clear out jobs table jobs = schema.schema.jobs jobs.delete() jobs.reserve('a', {'key': 'a1'}) @@ -56,10 +54,9 @@ def test_restrictions(): jobs.error('a', {'key': 'a2'}, 'error') jobs.error('b', {'key': 'b1'}, 'error') - assert_true(len(jobs & 'table_name = "a"') == 2, 'There should be two entries for table a') - assert_true(len(jobs & 'status = "error"') == 2, 'There should be two entries with error status') - assert_true(len(jobs & 'table_name = "a"' & 'status = "error"') == 1, - 'There should be only one entries with error status in table a') + assert_true(len(jobs & {'table_name': "a"}) == 2) + assert_true(len(jobs & {'status': "error"}) == 2) + assert_true(len(jobs & {'table_name': "a", 'status': "error"}) == 1) jobs.delete() @@ -109,7 +106,7 @@ def test_long_error_message(): assert_true(subjects) table_name = 'fake_table' - key = list(subjects.fetch('KEY'))[0] + key = subjects.fetch('KEY')[0] # test long error message schema.schema.jobs.reserve(table_name, key) From 6ed83c67ee7f8fbfce637330037572c1dada24bc Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Tue, 19 Nov 2019 13:38:37 -0600 Subject: [PATCH 15/39] fix #675 --- datajoint/diagram.py | 6 +++--- requirements.txt | 2 +- tests/test_jobs.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/datajoint/diagram.py b/datajoint/diagram.py index 497553963..cc861e24d 100644 --- a/datajoint/diagram.py +++ b/datajoint/diagram.py @@ -236,8 +236,8 @@ def _make_graph(self): for name in self.nodes_to_show: foreign_attributes = set( attr for p in self.in_edges(name, data=True) for attr in p[2]['attr_map'] if p[2]['primary']) - self.node[name]['distinguished'] = ( - 'primary_key' in self.node[name] and foreign_attributes < self.node[name]['primary_key']) + self.nodes[name]['distinguished'] = ( + 'primary_key' in self.nodes[name] and foreign_attributes < self.nodes[name]['primary_key']) # include aliased nodes that are sandwiched between two displayed nodes gaps = set(nx.algorithms.boundary.node_boundary(self, self.nodes_to_show)).intersection( nx.algorithms.boundary.node_boundary(nx.DiGraph(self).reverse(), self.nodes_to_show)) @@ -307,7 +307,7 @@ def make_dot(self): props = graph.get_edge_data(src, dest) edge.set_color('#00000040') edge.set_style('solid' if props['primary'] else 'dashed') - master_part = graph.node[dest]['node_type'] is Part and dest.startswith(src+'.') + master_part = graph.nodes[dest]['node_type'] is Part and dest.startswith(src+'.') edge.set_weight(3 if master_part else 1) edge.set_arrowhead('none') edge.set_penwidth(.75 if props['multi'] else 2) diff --git a/requirements.txt b/requirements.txt index 7ca3bd4c2..5c84e822c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ pyparsing ipython pandas tqdm -networkx<2.4 +networkx pydot minio matplotlib diff --git a/tests/test_jobs.py b/tests/test_jobs.py index 89e0734de..7a26e3a04 100644 --- a/tests/test_jobs.py +++ b/tests/test_jobs.py @@ -10,11 +10,11 @@ def test_reserve_job(): - # clean jobs table - schema.schema.jobs.delete() + schema.schema.jobs.delete() assert_true(subjects) table_name = 'fake_table' + # reserve jobs for key in subjects.fetch('KEY'): assert_true(schema.schema.jobs.reserve(table_name, key), 'failed to reserve a job') From 91556d5c8b4d0ae6a2cb22f16ce6d6377a40434e Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Tue, 19 Nov 2019 15:22:05 -0600 Subject: [PATCH 16/39] Fix #698 --- datajoint/table.py | 11 +++-------- datajoint/user_tables.py | 4 ++-- tests/schema.py | 3 +++ tests/test_relation.py | 6 ++++++ 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/datajoint/table.py b/datajoint/table.py index 0c093f0cd..4a44f48d5 100644 --- a/datajoint/table.py +++ b/datajoint/table.py @@ -45,14 +45,9 @@ def heading(self): """ if self._heading is None: self._heading = Heading() # instance-level heading - if not self._heading: # lazy loading of heading - if self.connection is None: - raise DataJointError( - 'DataJoint class is missing a database connection. ' - 'Missing schema decorator on the class? (e.g. @schema)') - else: - self._heading.init_from_database( - self.connection, self.database, self.table_name, self.declaration_context) + if not self._heading and self.connection is not None: # lazy loading of heading + self._heading.init_from_database( + self.connection, self.database, self.table_name, self.declaration_context) return self._heading def declare(self, context=None): diff --git a/datajoint/user_tables.py b/datajoint/user_tables.py index 216e4b37c..3942264b5 100644 --- a/datajoint/user_tables.py +++ b/datajoint/user_tables.py @@ -13,7 +13,7 @@ # attributes that trigger instantiation of user classes supported_class_attrs = { 'key_source', 'describe', 'alter', 'heading', 'populate', 'progress', 'primary_key', 'proj', 'aggr', - 'fetch', 'fetch1','head', 'tail', + 'fetch', 'fetch1', 'head', 'tail', 'insert', 'insert1', 'drop', 'drop_quick', 'delete', 'delete_quick'} @@ -92,7 +92,7 @@ def table_name(cls): @ClassProperty def full_table_name(cls): - if cls not in {Manual, Imported, Lookup, Computed, Part}: + if cls not in {Manual, Imported, Lookup, Computed, Part, UserTable}: if cls.database is None: raise DataJointError('Class %s is not properly declared (schema decorator not applied?)' % cls.__name__) return r"`{0:s}`.`{1:s}`".format(cls.database, cls.table_name) diff --git a/tests/schema.py b/tests/schema.py index 960ed3dda..7848274be 100644 --- a/tests/schema.py +++ b/tests/schema.py @@ -13,6 +13,9 @@ @schema class TTest(dj.Lookup): + """ + doc string + """ definition = """ key : int # key --- diff --git a/tests/test_relation.py b/tests/test_relation.py index eecce31ed..734bdc03a 100644 --- a/tests/test_relation.py +++ b/tests/test_relation.py @@ -36,6 +36,12 @@ def setup_class(cls): cls.img = schema.Image() cls.trash = schema.UberTrash() + def test_class_help(self): + help(schema.TTest) + + def test_instance_help(self): + help(schema.TTest()) + def test_contents(self): """ test the ability of tables to self-populate using the contents property From 2c8aedf4bc3ceb4cab34a9ed1985d036ac462817 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Tue, 19 Nov 2019 16:09:01 -0600 Subject: [PATCH 17/39] fix #699 -- add table definition to doc string --- datajoint/schema.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/datajoint/schema.py b/datajoint/schema.py index c573b81de..c6e777d63 100644 --- a/datajoint/schema.py +++ b/datajoint/schema.py @@ -191,6 +191,10 @@ def process_table_class(self, table_class, context, assert_declared=False): instance.declare(context) is_declared = is_declared or instance.is_declared + # add table definition to the doc string + if isinstance(table_class.definition, str): + table_class.__doc__ = (table_class.__doc__ or "") + "\n\nTable definition:\n\n" + table_class.definition + # fill values in Lookup tables from their contents property if isinstance(instance, Lookup) and hasattr(instance, 'contents') and is_declared: contents = list(instance.contents) From bc691230d90c159259b3452ce2d03160befa9178 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Tue, 19 Nov 2019 16:24:55 -0600 Subject: [PATCH 18/39] Table doc strings now display reverse-engineered table declarations --- datajoint/heading.py | 3 ++- datajoint/schema.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/datajoint/heading.py b/datajoint/heading.py index 60f636b02..a025fb130 100644 --- a/datajoint/heading.py +++ b/datajoint/heading.py @@ -247,7 +247,8 @@ def init_from_database(self, conn, database, table_name, context): category = next(c for c in SPECIAL_TYPES if TYPE_PATTERN[c].match(attr['type'])) except StopIteration: if attr['type'].startswith('external'): - raise DataJointError('Legacy datatype `{type}`.'.format(**attr)) from None + raise DataJointError('Legacy datatype `{type}`. ' + 'Migrate your external stores to datajoint 0.12'.format(**attr)) from None raise DataJointError('Unknown attribute type `{type}`'.format(**attr)) from None if category == 'FILEPATH' and not _support_filepath_types(): raise DataJointError(""" diff --git a/datajoint/schema.py b/datajoint/schema.py index c6e777d63..eb9dfba52 100644 --- a/datajoint/schema.py +++ b/datajoint/schema.py @@ -193,7 +193,8 @@ def process_table_class(self, table_class, context, assert_declared=False): # add table definition to the doc string if isinstance(table_class.definition, str): - table_class.__doc__ = (table_class.__doc__ or "") + "\n\nTable definition:\n\n" + table_class.definition + table_class.__doc__ = ((table_class.__doc__ or "") + "\n\nTable definition:\n" + + table_class.describe(printout=False)) # fill values in Lookup tables from their contents property if isinstance(instance, Lookup) and hasattr(instance, 'contents') and is_declared: From 1034797d449233c3ae2fe65f12eda8b654c00416 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Tue, 19 Nov 2019 16:37:33 -0600 Subject: [PATCH 19/39] update error message for un-upgraded external stores. --- datajoint/heading.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/datajoint/heading.py b/datajoint/heading.py index a025fb130..217657423 100644 --- a/datajoint/heading.py +++ b/datajoint/heading.py @@ -247,8 +247,10 @@ def init_from_database(self, conn, database, table_name, context): category = next(c for c in SPECIAL_TYPES if TYPE_PATTERN[c].match(attr['type'])) except StopIteration: if attr['type'].startswith('external'): - raise DataJointError('Legacy datatype `{type}`. ' - 'Migrate your external stores to datajoint 0.12'.format(**attr)) from None + url = "https://docs.datajoint.io/python/admin/5-blob-config.html" \ + "#migration-between-datajoint-v0-11-and-v0-12" + raise DataJointError('Legacy datatype `{type}`. Migrate your external stores to ' + 'datajoint 0.12: {url}'.format(url=url, **attr)) from None raise DataJointError('Unknown attribute type `{type}`'.format(**attr)) from None if category == 'FILEPATH' and not _support_filepath_types(): raise DataJointError(""" From d934b7e890b80b46ecca04588d4ffc74fd0cb932 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Tue, 19 Nov 2019 16:54:53 -0600 Subject: [PATCH 20/39] improve table definition in the doc string --- datajoint/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datajoint/schema.py b/datajoint/schema.py index eb9dfba52..75b24c489 100644 --- a/datajoint/schema.py +++ b/datajoint/schema.py @@ -194,7 +194,7 @@ def process_table_class(self, table_class, context, assert_declared=False): # add table definition to the doc string if isinstance(table_class.definition, str): table_class.__doc__ = ((table_class.__doc__ or "") + "\n\nTable definition:\n" - + table_class.describe(printout=False)) + + table_class.describe(printout=False, context=context)) # fill values in Lookup tables from their contents property if isinstance(instance, Lookup) and hasattr(instance, 'contents') and is_declared: From 2b8588c1d526a99c229d01483081f6099dc4d324 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Tue, 19 Nov 2019 16:56:02 -0600 Subject: [PATCH 21/39] minor improvement in display of table doc strings --- datajoint/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datajoint/schema.py b/datajoint/schema.py index 75b24c489..026023d07 100644 --- a/datajoint/schema.py +++ b/datajoint/schema.py @@ -193,7 +193,7 @@ def process_table_class(self, table_class, context, assert_declared=False): # add table definition to the doc string if isinstance(table_class.definition, str): - table_class.__doc__ = ((table_class.__doc__ or "") + "\n\nTable definition:\n" + table_class.__doc__ = ((table_class.__doc__ or "") + "\nTable definition:\n\n" + table_class.describe(printout=False, context=context)) # fill values in Lookup tables from their contents property From c7b79892243dd824d19541b770d1736f8eb8700e Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 20 Nov 2019 10:44:57 -0600 Subject: [PATCH 22/39] replace .describe() with .definition to augment the table docstring --- datajoint/schema.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datajoint/schema.py b/datajoint/schema.py index 026023d07..55712f7aa 100644 --- a/datajoint/schema.py +++ b/datajoint/schema.py @@ -193,8 +193,7 @@ def process_table_class(self, table_class, context, assert_declared=False): # add table definition to the doc string if isinstance(table_class.definition, str): - table_class.__doc__ = ((table_class.__doc__ or "") + "\nTable definition:\n\n" - + table_class.describe(printout=False, context=context)) + table_class.__doc__ = (table_class.__doc__ or "") + "\nTable definition:\n\n" + table_class.definition # fill values in Lookup tables from their contents property if isinstance(instance, Lookup) and hasattr(instance, 'contents') and is_declared: From ca775da6cc713da8413f2eb744a0178f3e8998f4 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 21 Nov 2019 11:48:44 -0600 Subject: [PATCH 23/39] improve unit test for definition in docstring --- tests/test_declare.py | 14 ++++++++++++++ tests/test_relation.py | 6 ------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/tests/test_declare.py b/tests/test_declare.py index 3a734cd1d..62bd55cba 100644 --- a/tests/test_declare.py +++ b/tests/test_declare.py @@ -22,6 +22,20 @@ def test_schema_decorator(): assert_true(issubclass(Subject, dj.Lookup)) assert_true(not issubclass(Subject, dj.Part)) + @staticmethod + def test_class_help(): + help(TTest) + help(TTest2) + assert_true(TTest.definition in TTest.__doc__) + assert_true(TTest.definition in TTest2.__doc__) + + @staticmethod + def test_instance_help(): + help(TTest()) + help(TTest2()) + assert_true(TTest().definition in TTest().__doc__) + assert_true(TTest2().definition in TTest2().__doc__) + @staticmethod def test_describe(): """real_definition should match original definition""" diff --git a/tests/test_relation.py b/tests/test_relation.py index 734bdc03a..eecce31ed 100644 --- a/tests/test_relation.py +++ b/tests/test_relation.py @@ -36,12 +36,6 @@ def setup_class(cls): cls.img = schema.Image() cls.trash = schema.UberTrash() - def test_class_help(self): - help(schema.TTest) - - def test_instance_help(self): - help(schema.TTest()) - def test_contents(self): """ test the ability of tables to self-populate using the contents property From b646061f24976435aa25108d6ec347eafc93498b Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 21 Nov 2019 13:23:52 -0600 Subject: [PATCH 24/39] minor --- datajoint/table.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/datajoint/table.py b/datajoint/table.py index 4a44f48d5..b7d4ced6e 100644 --- a/datajoint/table.py +++ b/datajoint/table.py @@ -406,7 +406,7 @@ def delete(self, verbose=True): print('About to delete:') if not already_in_transaction: - self.connection.start_transaction() + conn.start_transaction() total = 0 try: for name, table in reversed(list(delete_list.items())): @@ -418,25 +418,25 @@ def delete(self, verbose=True): except: # Delete failed, perhaps due to insufficient privileges. Cancel transaction. if not already_in_transaction: - self.connection.cancel_transaction() + conn.cancel_transaction() raise else: assert not (already_in_transaction and safe) if not total: print('Nothing to delete') if not already_in_transaction: - self.connection.cancel_transaction() + conn.cancel_transaction() else: if already_in_transaction: if verbose: print('The delete is pending within the ongoing transaction.') else: if not safe or user_choice("Proceed?", default='no') == 'yes': - self.connection.commit_transaction() + conn.commit_transaction() if verbose or safe: print('Committed.') else: - self.connection.cancel_transaction() + conn.cancel_transaction() if verbose or safe: print('Cancelled deletes.') From 60c952b4825dbf461cbff4c695157930701e3ea9 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 22 Nov 2019 14:23:13 -0600 Subject: [PATCH 25/39] update CHANGELOG for version 0.12.3 --- CHANGELOG.md | 5 +++++ datajoint/version.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ec9152c5..27dd1d75d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ ## Release notes +## 0.12.3 -- Nov 22, 2019 +* Bugfix #675 (PR #705) networkx 2.4+ is now supported +* Bugfix #698 and #699 (PR #706) display table definition in doc string and help +* Bugfix #701 (PR #702) job reservation works with native python datatype support disabled + ### 0.12.2 -- Nov 11, 2019 * Bugfix - Convoluted error thrown if there is a reference to a non-existent table attribute (#691) * Bugfix - Insert into external does not trim leading slash if defined in `dj.config['stores']['']['location']` (#692) diff --git a/datajoint/version.py b/datajoint/version.py index ff1ce7e17..18c950f21 100644 --- a/datajoint/version.py +++ b/datajoint/version.py @@ -1,3 +1,3 @@ -__version__ = "0.12.2" +__version__ = "0.12.3" assert len(__version__) <= 10 # The log table limits version to the 10 characters From 95c22497c6b25f7ce655f01cee1ff951a5ea9536 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 22 Nov 2019 14:40:45 -0600 Subject: [PATCH 26/39] update docs for release 0.12.3 --- datajoint/blob.py | 3 +++ docs-parts/intro/Releases_lang1.rst | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/datajoint/blob.py b/datajoint/blob.py index 2fb5b1088..c678d7de5 100644 --- a/datajoint/blob.py +++ b/datajoint/blob.py @@ -253,6 +253,9 @@ def pack_recarray(self, array): def read_sparse_array(self): raise DataJointError('datajoint-python does not yet support sparse arrays. Issue (#590)') + def read_scalar(selfs): + + def read_decimal(self): return Decimal(self.read_string()) diff --git a/docs-parts/intro/Releases_lang1.rst b/docs-parts/intro/Releases_lang1.rst index afd2fa9ec..3e6f10782 100644 --- a/docs-parts/intro/Releases_lang1.rst +++ b/docs-parts/intro/Releases_lang1.rst @@ -1,4 +1,11 @@ -0.12.1 -- Nov 11, 2019 +0.12.3 -- Nov 22, 2019 +---------------------- +* Bugfix - networkx 2.4 causes error in diagrams (#675) PR #705 +* Bugfix - include definition in doc string and help (#698, #699) PR #706 +* Bugfix - job reservation fails when native python datatype support is disabled (#701) PR #702 + + +0.12.2 -- Nov 11, 2019 ------------------------- * Bugfix - Convoluted error thrown if there is a reference to a non-existent table attribute (#691) * Bugfix - Insert into external does not trim leading slash if defined in `dj.config['stores']['']['location']` (#692) From b3ee8d9ffbea6d8ed5563b1517966817437a66f9 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 22 Nov 2019 15:45:07 -0600 Subject: [PATCH 27/39] blob now accepts native complex scalars --- datajoint/blob.py | 9 ++------- tests/test_blob.py | 3 +++ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/datajoint/blob.py b/datajoint/blob.py index c678d7de5..390ef04bd 100644 --- a/datajoint/blob.py +++ b/datajoint/blob.py @@ -152,10 +152,8 @@ def pack_blob(self, obj): return self.pack_array(np.array(obj)) if isinstance(obj, (bool, np.bool, np.bool_)): return self.pack_array(np.array(obj)) - if isinstance(obj, float): - return self.pack_array(np.array(obj, dtype=np.float64)) - if isinstance(obj, int): - return self.pack_array(np.array(obj, dtype=np.int64)) + if isinstance(obj, (float, int, complex)): + return self.pack_array(np.array(obj)) if isinstance(obj, (datetime.datetime, datetime.date, datetime.time)): return self.pack_datetime(obj) if isinstance(obj, Decimal): @@ -253,9 +251,6 @@ def pack_recarray(self, array): def read_sparse_array(self): raise DataJointError('datajoint-python does not yet support sparse arrays. Issue (#590)') - def read_scalar(selfs): - - def read_decimal(self): return Decimal(self.read_string()) diff --git a/tests/test_blob.py b/tests/test_blob.py index 3549dd476..4520a83c0 100644 --- a/tests/test_blob.py +++ b/tests/test_blob.py @@ -23,6 +23,9 @@ def test_pack(): x = np.random.randn(10) assert_array_equal(x, unpack(pack(x)), "Arrays do not match!") + x = 7j + assert_equal(x, unpack(pack(x)), "Complex scalar does not match") + x = np.float32(np.random.randn(3, 4, 5)) assert_array_equal(x, unpack(pack(x)), "Arrays do not match!") From bb9351c267d6b58d401a943cfffedcbfbda3839c Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Sat, 13 Feb 2021 04:08:36 -0600 Subject: [PATCH 28/39] add OVERVIEW.md --- OVERVIEW.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 OVERVIEW.md diff --git a/OVERVIEW.md b/OVERVIEW.md new file mode 100644 index 000000000..7c8807925 --- /dev/null +++ b/OVERVIEW.md @@ -0,0 +1,28 @@ +# DataJoint Overview + +DataJoint is a library for interacting with scientific databases integrating computational dependencies as part of the data model. It is an ideal tool for team projects working on shared data-centric computational workflows. + +## Why use databases in scientific studies? + +Many scientists are reluctant to use databases due to their perceived unwieldiness, opting instead to use file repositories for managing their shared data. [Gray, 2005](https://arxiv.org/abs/cs/0502008) + +Yet databases provide several key advantages when it comes to sharing structured dynamic data: + +1. **Data structure:** databases communicate and enforce structure in data that reflects the logic of the scientific study. +2. **Concurrent access:** databases support transactions to allow multiple agents to read and write the data concurrently. +3. **Consistency and integrity:** database provide ways to ensure that data operations from multiple parties are combined correctly without loss, misidentification, or mismatches. +4. **Queries:** Databases simplify and accelerate data queries -- functions on data to obtain precise slices of the data without needing to send the entire dataset for analysis. + +## What does DataJoint bring? +DataJoint solves several key problems for using databases effectively in scientific projects: + +1. **Complete relational data model:** database programming directly from a scientific computing language such as MATLAB and Python without the need for SQL. +2. **Data definition language:** to define tables and dependencies in simple and consistent ways. +3. **Diagramming notation:** to visualize and navigate tables and dependencies. +4. **Query language:** to create flexible and precise queries with only a few operators. +5. **Serialization framework:** to store and retrieve numerical arrays and other data structures directly in the database. +6. **Automated distributed computations:** computational dependencies + + + + From 2d4937777705037a8846bee1617008f969a3cda5 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Sat, 13 Feb 2021 04:51:52 -0600 Subject: [PATCH 29/39] minor edits in OVERVIEW.md --- OVERVIEW.md | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/OVERVIEW.md b/OVERVIEW.md index 7c8807925..879128417 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -2,13 +2,13 @@ DataJoint is a library for interacting with scientific databases integrating computational dependencies as part of the data model. It is an ideal tool for team projects working on shared data-centric computational workflows. -## Why use databases in scientific studies? +## Why use databases in scientific studes? Many scientists are reluctant to use databases due to their perceived unwieldiness, opting instead to use file repositories for managing their shared data. [Gray, 2005](https://arxiv.org/abs/cs/0502008) Yet databases provide several key advantages when it comes to sharing structured dynamic data: -1. **Data structure:** databases communicate and enforce structure in data that reflects the logic of the scientific study. +1. **Data structure:** databases communicate and enforce structure reflecting the logic of the scientific study. 2. **Concurrent access:** databases support transactions to allow multiple agents to read and write the data concurrently. 3. **Consistency and integrity:** database provide ways to ensure that data operations from multiple parties are combined correctly without loss, misidentification, or mismatches. 4. **Queries:** Databases simplify and accelerate data queries -- functions on data to obtain precise slices of the data without needing to send the entire dataset for analysis. @@ -21,8 +21,4 @@ DataJoint solves several key problems for using databases effectively in scienti 3. **Diagramming notation:** to visualize and navigate tables and dependencies. 4. **Query language:** to create flexible and precise queries with only a few operators. 5. **Serialization framework:** to store and retrieve numerical arrays and other data structures directly in the database. -6. **Automated distributed computations:** computational dependencies - - - - +6. **Support for automated distributed computations:** for computational dependencies in the data. \ No newline at end of file From 52f1f70bf77901c68901be5d242d6f59fbf8ef7f Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Sat, 13 Mar 2021 12:23:11 -0600 Subject: [PATCH 30/39] minor error message wording --- datajoint/connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datajoint/connection.py b/datajoint/connection.py index 14e457d0b..ffc6ed67a 100644 --- a/datajoint/connection.py +++ b/datajoint/connection.py @@ -272,7 +272,7 @@ def query(self, query, args=(), *, as_dict=False, suppress_warnings=True, reconn # check cache first: use_query_cache = bool(self._query_cache) if use_query_cache and not re.match(r"\s*(SELECT|SHOW)", query): - raise errors.DataJointError("Only SELECT query are allowed when query caching is on.") + raise errors.DataJointError("Only SELECT queries are allowed when query caching is on.") if use_query_cache: if not config['query_cache']: raise errors.DataJointError("Provide filepath dj.config['query_cache'] when using query caching.") From e45e922092fc5bdc8ce4e2b64d42082ef9a329a7 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Sat, 13 Mar 2021 15:07:17 -0600 Subject: [PATCH 31/39] comment and docstring corrections --- datajoint/expression.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datajoint/expression.py b/datajoint/expression.py index 9b7d10544..1b1720fcf 100644 --- a/datajoint/expression.py +++ b/datajoint/expression.py @@ -36,7 +36,7 @@ class QueryExpression: """ _restriction = None _restriction_attributes = None - _left = [] # True for left joins, False for inner joins + _left = [] # list of booleans True for left joins, False for inner joins _original_heading = None # heading before projections # subclasses or instantiators must provide values @@ -248,7 +248,7 @@ def join(self, other, semantic_check=True, left=False): if semantic_check: assert_join_compatibility(self, other) join_attributes = set(n for n in self.heading.names if n in other.heading.names) - # needs subquery if FROM class has common attributes with the other's FROM clause + # needs subquery if self's FROM clause has common attributes with other's FROM clause need_subquery1 = need_subquery2 = bool( (set(self.original_heading.names) & set(other.original_heading.names)) - join_attributes) @@ -287,7 +287,7 @@ def proj(self, *attributes, **named_attributes): self.proj(...) or self.proj(Ellipsis) -- include all attributes (return self) self.proj() -- include only primary key self.proj('attr1', 'attr2') -- include primary key and attributes attr1 and attr2 - self.proj(..., '-attr1', '-attr2') -- include attributes except attr1 and attr2 + self.proj(..., '-attr1', '-attr2') -- include all attributes except attr1 and attr2 self.proj(name1='attr1') -- include primary key and 'attr1' renamed as name1 self.proj('attr1', dup='(attr1)') -- include primary key and attribute attr1 twice, with the duplicate 'dup' self.proj(k='abs(attr1)') adds the new attribute k with the value computed as an expression (SQL syntax) From fbdef213b93f2344fd496c0aba610665aef62887 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Sat, 25 Sep 2021 20:21:14 -0500 Subject: [PATCH 32/39] minor --- tests/test_autopopulate.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_autopopulate.py b/tests/test_autopopulate.py index 081787670..1875a6743 100644 --- a/tests/test_autopopulate.py +++ b/tests/test_autopopulate.py @@ -8,7 +8,6 @@ class TestPopulate: """ Test base relations: insert, delete """ - def setUp(self): self.user = schema.User() self.subject = schema.Subject() @@ -53,7 +52,7 @@ def test_populate(self): def test_allow_direct_insert(self): assert_true(self.subject, 'root tables are empty') - key = self.subject.fetch('KEY')[0] + key = self.subject.fetch('KEY', limit=1)[0] key['experiment_id'] = 1000 key['experiment_date'] = '2018-10-30' self.experiment.insert1(key, allow_direct_insert=True) From 6fea05873cff16d58ef5165dbd9586abcb66b87d Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Sat, 25 Sep 2021 20:23:42 -0500 Subject: [PATCH 33/39] add OVERVIEW.md --- OVERVIEW.md | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/OVERVIEW.md b/OVERVIEW.md index 879128417..37b9a7df1 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -1,19 +1,15 @@ # DataJoint Overview -DataJoint is a library for interacting with scientific databases integrating computational dependencies as part of the data model. It is an ideal tool for team projects working on shared data-centric computational workflows. +DataJoint is a library for interacting with scientific databases that support computational dependencies as part of the data model. +DataJoint serves as a principal framework for organizing data and computations in team projects. -## Why use databases in scientific studes? - -Many scientists are reluctant to use databases due to their perceived unwieldiness, opting instead to use file repositories for managing their shared data. [Gray, 2005](https://arxiv.org/abs/cs/0502008) - -Yet databases provide several key advantages when it comes to sharing structured dynamic data: +Databases provide several key advantages when it comes to sharing structured dynamic data: 1. **Data structure:** databases communicate and enforce structure reflecting the logic of the scientific study. 2. **Concurrent access:** databases support transactions to allow multiple agents to read and write the data concurrently. 3. **Consistency and integrity:** database provide ways to ensure that data operations from multiple parties are combined correctly without loss, misidentification, or mismatches. 4. **Queries:** Databases simplify and accelerate data queries -- functions on data to obtain precise slices of the data without needing to send the entire dataset for analysis. -## What does DataJoint bring? DataJoint solves several key problems for using databases effectively in scientific projects: 1. **Complete relational data model:** database programming directly from a scientific computing language such as MATLAB and Python without the need for SQL. @@ -21,4 +17,4 @@ DataJoint solves several key problems for using databases effectively in scienti 3. **Diagramming notation:** to visualize and navigate tables and dependencies. 4. **Query language:** to create flexible and precise queries with only a few operators. 5. **Serialization framework:** to store and retrieve numerical arrays and other data structures directly in the database. -6. **Support for automated distributed computations:** for computational dependencies in the data. \ No newline at end of file +6. **Support for automated distributed computations:** for computational dependencies in the data. From c64102906e19ad40e857dbb5d0aadd69dadfdf49 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Sat, 25 Sep 2021 20:32:09 -0500 Subject: [PATCH 34/39] remove OVERVIEW.md --- OVERVIEW.md | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 OVERVIEW.md diff --git a/OVERVIEW.md b/OVERVIEW.md deleted file mode 100644 index 37b9a7df1..000000000 --- a/OVERVIEW.md +++ /dev/null @@ -1,20 +0,0 @@ -# DataJoint Overview - -DataJoint is a library for interacting with scientific databases that support computational dependencies as part of the data model. -DataJoint serves as a principal framework for organizing data and computations in team projects. - -Databases provide several key advantages when it comes to sharing structured dynamic data: - -1. **Data structure:** databases communicate and enforce structure reflecting the logic of the scientific study. -2. **Concurrent access:** databases support transactions to allow multiple agents to read and write the data concurrently. -3. **Consistency and integrity:** database provide ways to ensure that data operations from multiple parties are combined correctly without loss, misidentification, or mismatches. -4. **Queries:** Databases simplify and accelerate data queries -- functions on data to obtain precise slices of the data without needing to send the entire dataset for analysis. - -DataJoint solves several key problems for using databases effectively in scientific projects: - -1. **Complete relational data model:** database programming directly from a scientific computing language such as MATLAB and Python without the need for SQL. -2. **Data definition language:** to define tables and dependencies in simple and consistent ways. -3. **Diagramming notation:** to visualize and navigate tables and dependencies. -4. **Query language:** to create flexible and precise queries with only a few operators. -5. **Serialization framework:** to store and retrieve numerical arrays and other data structures directly in the database. -6. **Support for automated distributed computations:** for computational dependencies in the data. From b52a130ea946b27903d4e0e002fda1561711626c Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Sun, 26 Sep 2021 08:21:48 -0500 Subject: [PATCH 35/39] doc string improvements in autopopulate --- datajoint/autopopulate.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/datajoint/autopopulate.py b/datajoint/autopopulate.py index 222b5d7ba..c276d4210 100644 --- a/datajoint/autopopulate.py +++ b/datajoint/autopopulate.py @@ -52,8 +52,8 @@ class AutoPopulate: def key_source(self): """ :return: the relation whose primary key values are passed, sequentially, to the - ``make`` method when populate() is called. - The default value is the join of the parent relations. + .make method when .populate() is called. + The default value is the join of the tables references by the primary key. Users may override to change the granularity or the scope of populate() calls. """ def _rename_attributes(table, props): @@ -123,9 +123,9 @@ def populate(self, *restrictions, suppress_errors=False, return_exception_object reserve_jobs=False, order="original", limit=None, max_calls=None, display_progress=False, processes=1): """ - rel.populate() calls rel.make(key) for every primary key in self.key_source - for which there is not already a tuple in rel. - :param restrictions: a list of restrictions each restrict (rel.key_source - target.proj()) + table.populate() calls table.make(key) for every primary key in self.key_source + for which there is not already a tuple in table. + :param restrictions: a list of restrictions each restrict (table.key_source - target.proj()) :param suppress_errors: if True, do not terminate execution. :param return_exception_objects: return error objects instead of just error messages :param reserve_jobs: if True, reserve jobs to populate in asynchronous fashion From 0079a7e85dbed8a686e2acfee89709f94dee3290 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Mon, 27 Sep 2021 16:26:57 -0500 Subject: [PATCH 36/39] minor cleanup in autopopulate --- datajoint/autopopulate.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/datajoint/autopopulate.py b/datajoint/autopopulate.py index c276d4210..495597319 100644 --- a/datajoint/autopopulate.py +++ b/datajoint/autopopulate.py @@ -17,9 +17,9 @@ # --- helper functions for multiprocessing -- -def _initializer(table, jobs, populate_kwargs): +def _initialize_populate(table, jobs, populate_kwargs): """ - Process initializer for mulitprocessing. + Initialize the process for mulitprocessing. Saves the unpickled copy of the table to the current process and reconnects. """ process = mp.current_process() @@ -159,9 +159,9 @@ def handler(signum, frame): logger.info('Found %d keys to populate' % len(keys)) - if max_calls is not None: - keys = keys[:max_calls] + keys = keys[:max_calls] nkeys = len(keys) + if processes > 1: processes = min(processes, nkeys, mp.cpu_count()) @@ -179,7 +179,7 @@ def handler(signum, frame): # spawn multiple processes self.connection.close() # disconnect parent process from MySQL server del self.connection._conn.ctx # SSLContext is not pickleable - with mp.Pool(processes, _initializer, (self, populate_kwargs)) as pool: + with mp.Pool(processes, _initialize_populate, (self, populate_kwargs)) as pool: if display_progress: with tqdm(desc="Processes: ", total=nkeys) as pbar: for error in pool.imap(_call_populate1, keys, chunksize=1): @@ -201,12 +201,12 @@ def handler(signum, frame): def _populate1(self, key, jobs, suppress_errors, return_exception_objects): """ - populates table for one key, calling self.make inside a transaction + populates table for one source key, calling self.make inside a transaction. :param jobs: the jobs table or None if not reserve_jobs :param key: dict specifying job to populate :param suppress_errors: bool if errors should be suppressed and returned :param return_exception_objects: if True, errors must be returned as objects - :return: key and error when suppress_errors=True, otherwise None + :return: (key, error) when suppress_errors=True, otherwise None """ make = self._make_tuples if hasattr(self, '_make_tuples') else self.make @@ -248,8 +248,8 @@ def _populate1(self, key, jobs, suppress_errors, return_exception_objects): def progress(self, *restrictions, display=True): """ - report progress of populating the table - :return: remaining, total -- tuples to be populated + Report the progress of populating the table. + :return: (remaining, total) -- numbers of tuples to be populated """ todo = self._jobs_to_do(restrictions) total = len(todo) From 85520edbc03d460232d2fa383d928a24a5518db0 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 7 Oct 2021 16:27:27 -0500 Subject: [PATCH 37/39] minor PEP8 --- datajoint/autopopulate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datajoint/autopopulate.py b/datajoint/autopopulate.py index 495597319..0619e91a9 100644 --- a/datajoint/autopopulate.py +++ b/datajoint/autopopulate.py @@ -161,7 +161,7 @@ def handler(signum, frame): keys = keys[:max_calls] nkeys = len(keys) - + if processes > 1: processes = min(processes, nkeys, mp.cpu_count()) From ab691d38d35a74392e9f33226ea3ce568d33b6a7 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 19 Jan 2022 18:22:02 -0600 Subject: [PATCH 38/39] update CHANGELOG to include multiprocessing --- CHANGELOG.md | 1 + docs-parts/intro/Releases_lang1.rst | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9eda5dd04..a1141a971 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ * Bugfix - Fix Python 3.10 compatibility (#983) PR #972 * Bugfix - Allow renaming non-conforming attributes in proj (#982) PR #972 * Add - Expose proxy feature for S3 external stores (#961) PR #962 +* Add - implement multiprocessing in populate (#695) PR #704, #969 * Bugfix - Dependencies not properly loaded on populate. (#902) PR #919 * Bugfix - Replace use of numpy aliases of built-in types with built-in type. (#938) PR #939 * Bugfix - `ExternalTable.delete` should not remove row on error (#953) PR #956 diff --git a/docs-parts/intro/Releases_lang1.rst b/docs-parts/intro/Releases_lang1.rst index d4ea88ae9..c87234aab 100644 --- a/docs-parts/intro/Releases_lang1.rst +++ b/docs-parts/intro/Releases_lang1.rst @@ -4,6 +4,7 @@ * Bugfix - Fix Python 3.10 compatibility (#983) PR #972 * Bugfix - Allow renaming non-conforming attributes in proj (#982) PR #972 * Add - Expose proxy feature for S3 external stores (#961) PR #962 +* Add - implement multiprocessing in populate (#695) PR #704, #969 * Bugfix - Dependencies not properly loaded on populate. (#902) PR #919 * Bugfix - Replace use of numpy aliases of built-in types with built-in type. (#938) PR #939 * Bugfix - `ExternalTable.delete` should not remove row on error (#953) PR #956 From 9ba4e8631816722aa49fcac18caacce39e902500 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 20 Jan 2022 09:46:38 -0600 Subject: [PATCH 39/39] whitespace --- datajoint/autopopulate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datajoint/autopopulate.py b/datajoint/autopopulate.py index e5b1c48ee..3aa9e78a8 100644 --- a/datajoint/autopopulate.py +++ b/datajoint/autopopulate.py @@ -54,7 +54,7 @@ def key_source(self): :return: the query expression that yields primary key values to be passed, sequentially, to the ``make`` method when populate() is called. The default value is the join of the parent tables references from the primary key. - Subclasses may override they key_source to change the scope or the granularity + Subclasses may override they key_source to change the scope or the granularity of the make calls. """ def _rename_attributes(table, props):