From cb4a5e4ba3518b5f53b47cb23a8dae521be46347 Mon Sep 17 00:00:00 2001 From: David Tesar Date: Thu, 21 Nov 2019 13:09:02 -0800 Subject: [PATCH 1/6] use generic model tag finder --- code/evaluate/evaluate_model.py | 37 ++++++++++++--------- code/register/register_model.py | 13 ++++---- code/util/model_helper.py | 38 +++++++++++++--------- ml_service/pipelines/run_train_pipeline.py | 6 ++-- ml_service/util/env_variables.py | 4 +-- 5 files changed, 56 insertions(+), 42 deletions(-) diff --git a/code/evaluate/evaluate_model.py b/code/evaluate/evaluate_model.py index 0959d36b..a2d01829 100644 --- a/code/evaluate/evaluate_model.py +++ b/code/evaluate/evaluate_model.py @@ -24,7 +24,8 @@ POSSIBILITY OF SUCH DAMAGE. """ import os -from azureml.core import Model, Run, Workspace, Experiment +import sys +from azureml.core import Run, Workspace, Experiment import argparse from azureml.core.authentication import ServicePrincipalAuthentication import traceback @@ -32,6 +33,8 @@ run = Run.get_context() if (run.id.startswith('OfflineRun')): from dotenv import load_dotenv + sys.path.append(os.path.abspath("./code/util")) # NOQA: E402 + from model_helper import get_model_by_tag # For local development, set values in this section load_dotenv() workspace_name = os.environ.get("WORKSPACE_NAME") @@ -56,8 +59,10 @@ ) ws = aml_workspace exp = Experiment(ws, experiment_name) - run_id = "e78b2c27-5ceb-49d9-8e84-abe7aecf37d5" + run_id = "57fee47f-5ae8-441c-bc0c-d4c371f32d70" else: + sys.path.append(os.path.abspath("./util")) # NOQA: E402 + from model_helper import get_model_by_tag exp = run.experiment ws = run.experiment.workspace run_id = 'amlcompute' @@ -94,16 +99,20 @@ # Paramaterize the matrices on which the models should be compared # Add golden data set on which all the model performance can be evaluated try: - model_list = Model.list(ws) - if (len(model_list) > 0): - production_model = next( - filter( - lambda x: x.created_time == max( - model.created_time for model in model_list), - model_list, - ) - ) - production_model_run_id = production_model.run_id + firstRegistration = False + tag_name = 'experiment_name' + try: + model_list = get_model_by_tag( + model_name, tag_name, exp.name, ws) + except Exception as e: + firstRegistration = True + print(e) + print("This is the first model, " + "thus it should be registered") + if (firstRegistration is False): + model_list = get_model_by_tag( + model_name, tag_name, exp.name, ws) + production_model_run_id = model_list[0].run_id # Get the run history for both production model and # newly trained model and compare mse @@ -133,9 +142,7 @@ print("New trained model metric is less than or equal to " "production model so skipping model registration.") run.parent.cancel() - else: - print("This is the first model, " - "thus it should be registered") + except Exception: traceback.print_exc(limit=None, file=None, chain=True) print("Something went wrong trying to evaluate. Exiting.") diff --git a/code/register/register_model.py b/code/register/register_model.py index 17388d62..61e7a6bf 100644 --- a/code/register/register_model.py +++ b/code/register/register_model.py @@ -38,7 +38,7 @@ def main(): if (run.id.startswith('OfflineRun')): from dotenv import load_dotenv sys.path.append(os.path.abspath("./code/util")) # NOQA: E402 - from model_helper import get_model_by_build_id + from model_helper import get_model_by_tag # For local development, set values in this section load_dotenv() workspace_name = os.environ.get("WORKSPACE_NAME") @@ -66,7 +66,7 @@ def main(): run_id = "bd184a18-2ac8-4951-8e78-e290bef3b012" else: sys.path.append(os.path.abspath("./util")) # NOQA: E402 - from model_helper import get_model_by_build_id + from model_helper import get_model_by_tag ws = run.experiment.workspace exp = run.experiment run_id = 'amlcompute' @@ -108,7 +108,7 @@ def main(): if (validate): try: - get_model_by_build_id(model_name, build_id, exp.workspace) + get_model_by_tag(model_name, 'BuildId', build_id, exp.workspace) print("Model was registered for this build.") except Exception as e: print(e) @@ -139,12 +139,13 @@ def register_aml_model(model_name, exp, run_id, build_id: str = 'none'): model_already_registered(model_name, exp, run_id) run = Run(experiment=exp, run_id=run_id) tagsValue = {"area": "diabetes", "type": "regression", - "BuildId": build_id, "run_id": run_id} + "BuildId": build_id, "run_id": run_id, + "experiment_name": exp.name} else: run = Run(experiment=exp, run_id=run_id) if (run is not None): - tagsValue = {"area": "diabetes", - "type": "regression", "run_id": run_id} + tagsValue = {"area": "diabetes", "type": "regression", + "run_id": run_id, "experiment_name": exp.name} else: print("A model run for experiment", exp.name, "matching properties run_id =", run_id, diff --git a/code/util/model_helper.py b/code/util/model_helper.py index 1609642a..8875dbd2 100644 --- a/code/util/model_helper.py +++ b/code/util/model_helper.py @@ -22,9 +22,10 @@ def get_current_workspace() -> Workspace: return experiment.workspace -def _get_model_by_build_id( +def _get_model_by_tag( model_name: str, - build_id: str, + tag_name: str, + tag_value: str, aml_workspace: Workspace = None ) -> AMLModel: """ @@ -34,7 +35,7 @@ def _get_model_by_build_id( Parameters: aml_workspace (Workspace): aml.core Workspace that the model lives. model_name (str): name of the model we are looking for - build_id (str): the build id the model was registered under. + tag (str): the tag value the model was registered under. Return: A single aml model from the workspace that matches the name and tag. @@ -42,23 +43,26 @@ def _get_model_by_build_id( # Validate params. cannot be None. if model_name is None: raise ValueError("model_name[:str] is required") - if build_id is None: - raise ValueError("build_id[:str] is required") + if tag_name is None: + raise ValueError("tag_name[:str] is required") + if tag_value is None: + raise ValueError("tag[:str] is required") if aml_workspace is None: aml_workspace = get_current_workspace() # get model by tag. model_list = AMLModel.list( aml_workspace, name=model_name, - tags=[["BuildId", build_id]], latest=True + tags=[[tag_name, tag_value]], latest=True ) # latest should only return 1 model, but if it does, then maybe # internal code was accidentally changed or the source code has changed. should_not_happen = ("THIS SHOULD NOT HAPPEN: found more than one model " - "for the latest with {{model_name: {model_name}," - "BuildId: {build_id}. Models found: {model_list}}}")\ - .format(model_name=model_name, build_id=build_id, + "for the latest with {{tag_name: {tag_name}," + "tag_value: {tag_value}. " + "Models found: {model_list}}}")\ + .format(model_name=model_name, tag_name=tag_value, model_list=model_list) if len(model_list) > 1: raise ValueError(should_not_happen) @@ -66,20 +70,22 @@ def _get_model_by_build_id( return model_list -def get_model_by_build_id( +def get_model_by_tag( model_name: str, - build_id: str, + tag_name: str, + tag_value: str, aml_workspace: Workspace = None ) -> AMLModel: """ - Wrapper function for get_model_by_id that throws an error if model is none + Wrapper function for get_model_by_tag that throws an error if model is none """ - model_list = _get_model_by_build_id(model_name, build_id, aml_workspace) + model_list = _get_model_by_tag( + model_name, tag_name, tag_value, aml_workspace) if model_list: return model_list[0] - no_model_found = ("Model not found with model_name: {model_name} " - "BuildId: {build_id}.")\ - .format(model_name=model_name, build_id=build_id) + no_model_found = ("Model not found with model_name: {model_name}" + "tag {tag_name} {tag_value}")\ + .format(model_name=model_name, tag_name=tag_value) raise Exception(no_model_found) diff --git a/ml_service/pipelines/run_train_pipeline.py b/ml_service/pipelines/run_train_pipeline.py index 65316007..e5c12454 100644 --- a/ml_service/pipelines/run_train_pipeline.py +++ b/ml_service/pipelines/run_train_pipeline.py @@ -10,9 +10,9 @@ def main(): e = Env() service_principal = ServicePrincipalAuthentication( - tenant_id=e.tenant_id, - service_principal_id=e.app_id, - service_principal_password=e.app_secret) + tenant_id=e.tenant_id, + service_principal_id=e.app_id, + service_principal_password=e.app_secret) aml_workspace = Workspace.get( name=e.workspace_name, diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py index f83a9fbd..5ed59866 100644 --- a/ml_service/util/env_variables.py +++ b/ml_service/util/env_variables.py @@ -7,7 +7,7 @@ class Singleton(object): def __new__(class_, *args, **kwargs): if class_ not in class_._instances: - class_._instances[class_] = super(Singleton, class_).__new__(class_, *args, **kwargs) # noqa E501 + class_._instances[class_] = super(Singleton, class_).__new__(class_, *args, **kwargs) # noqa E501 return class_._instances[class_] @@ -23,7 +23,7 @@ def __init__(self): self._app_secret = os.environ.get("SP_APP_SECRET") self._vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU") self._compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME") - self._vm_priority = os.environ.get("AML_CLUSTER_PRIORITY", 'lowpriority') # noqa E501 + self._vm_priority = os.environ.get("AML_CLUSTER_PRIORITY", 'lowpriority') # noqa E501 self._min_nodes = int(os.environ.get("AML_CLUSTER_MIN_NODES", 0)) self._max_nodes = int(os.environ.get("AML_CLUSTER_MAX_NODES", 4)) self._build_id = os.environ.get("BUILD_BUILDID") From 4bd8ab842a00c75bfcc3475dd25b69f035d663f6 Mon Sep 17 00:00:00 2001 From: David Tesar Date: Thu, 21 Nov 2019 14:33:41 -0800 Subject: [PATCH 2/6] fix register --validate --- code/register/register_model.py | 3 ++- code/util/model_helper.py | 10 ++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/code/register/register_model.py b/code/register/register_model.py index 61e7a6bf..d3e7e607 100644 --- a/code/register/register_model.py +++ b/code/register/register_model.py @@ -108,7 +108,8 @@ def main(): if (validate): try: - get_model_by_tag(model_name, 'BuildId', build_id, exp.workspace) + tag_name = 'BuildId' + get_model_by_tag(model_name, tag_name, build_id, exp.workspace) print("Model was registered for this build.") except Exception as e: print(e) diff --git a/code/util/model_helper.py b/code/util/model_helper.py index 8875dbd2..e234e6aa 100644 --- a/code/util/model_helper.py +++ b/code/util/model_helper.py @@ -62,7 +62,7 @@ def _get_model_by_tag( "for the latest with {{tag_name: {tag_name}," "tag_value: {tag_value}. " "Models found: {model_list}}}")\ - .format(model_name=model_name, tag_name=tag_value, + .format(model_name=model_name, tag_name=tag_name, tag_value=tag_value, model_list=model_list) if len(model_list) > 1: raise ValueError(should_not_happen) @@ -85,7 +85,9 @@ def get_model_by_tag( if model_list: return model_list[0] - no_model_found = ("Model not found with model_name: {model_name}" - "tag {tag_name} {tag_value}")\ - .format(model_name=model_name, tag_name=tag_value) + no_model_found = ("No Model found with {{tag_name: {tag_name} ," + "tag_value: {tag_value}.}}")\ + .format(model_name=model_name, tag_name=tag_name, tag_value=tag_value, + model_list=model_list) + raise Exception(no_model_found) From ccb23c553d56615e0bd5104724eefab3c63adc6a Mon Sep 17 00:00:00 2001 From: David Tesar Date: Thu, 21 Nov 2019 14:58:14 -0800 Subject: [PATCH 3/6] fix model index --- code/evaluate/evaluate_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/evaluate/evaluate_model.py b/code/evaluate/evaluate_model.py index a2d01829..67a7caa8 100644 --- a/code/evaluate/evaluate_model.py +++ b/code/evaluate/evaluate_model.py @@ -112,7 +112,7 @@ if (firstRegistration is False): model_list = get_model_by_tag( model_name, tag_name, exp.name, ws) - production_model_run_id = model_list[0].run_id + production_model_run_id = model_list.run_id # Get the run history for both production model and # newly trained model and compare mse From c8460a133c03588fedd1aabda6dafdb0b45b44a0 Mon Sep 17 00:00:00 2001 From: David Tesar Date: Thu, 21 Nov 2019 17:07:27 -0800 Subject: [PATCH 4/6] address feedback --- code/evaluate/evaluate_model.py | 3 +-- code/util/model_helper.py | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/code/evaluate/evaluate_model.py b/code/evaluate/evaluate_model.py index 67a7caa8..d70e0764 100644 --- a/code/evaluate/evaluate_model.py +++ b/code/evaluate/evaluate_model.py @@ -110,8 +110,7 @@ print("This is the first model, " "thus it should be registered") if (firstRegistration is False): - model_list = get_model_by_tag( - model_name, tag_name, exp.name, ws) + production_model_run_id = model_list.run_id # Get the run history for both production model and diff --git a/code/util/model_helper.py b/code/util/model_helper.py index e234e6aa..04ffbb89 100644 --- a/code/util/model_helper.py +++ b/code/util/model_helper.py @@ -62,7 +62,7 @@ def _get_model_by_tag( "for the latest with {{tag_name: {tag_name}," "tag_value: {tag_value}. " "Models found: {model_list}}}")\ - .format(model_name=model_name, tag_name=tag_name, tag_value=tag_value, + .format(tag_name=tag_name, tag_value=tag_value, model_list=model_list) if len(model_list) > 1: raise ValueError(should_not_happen) @@ -87,7 +87,6 @@ def get_model_by_tag( no_model_found = ("No Model found with {{tag_name: {tag_name} ," "tag_value: {tag_value}.}}")\ - .format(model_name=model_name, tag_name=tag_name, tag_value=tag_value, - model_list=model_list) + .format(tag_name=tag_name, tag_value=tag_value) raise Exception(no_model_found) From b3b9a1b71f353e53105074585939e15da723d29a Mon Sep 17 00:00:00 2001 From: David Tesar Date: Thu, 21 Nov 2019 21:13:02 -0800 Subject: [PATCH 5/6] Simplify model_helper --- code/util/model_helper.py | 88 ++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 51 deletions(-) diff --git a/code/util/model_helper.py b/code/util/model_helper.py index 04ffbb89..9fe48c6a 100644 --- a/code/util/model_helper.py +++ b/code/util/model_helper.py @@ -22,7 +22,7 @@ def get_current_workspace() -> Workspace: return experiment.workspace -def _get_model_by_tag( +def get_model_by_tag( model_name: str, tag_name: str, tag_value: str, @@ -40,53 +40,39 @@ def _get_model_by_tag( Return: A single aml model from the workspace that matches the name and tag. """ - # Validate params. cannot be None. - if model_name is None: - raise ValueError("model_name[:str] is required") - if tag_name is None: - raise ValueError("tag_name[:str] is required") - if tag_value is None: - raise ValueError("tag[:str] is required") - if aml_workspace is None: - aml_workspace = get_current_workspace() - - # get model by tag. - model_list = AMLModel.list( - aml_workspace, name=model_name, - tags=[[tag_name, tag_value]], latest=True - ) - - # latest should only return 1 model, but if it does, then maybe - # internal code was accidentally changed or the source code has changed. - should_not_happen = ("THIS SHOULD NOT HAPPEN: found more than one model " - "for the latest with {{tag_name: {tag_name}," - "tag_value: {tag_value}. " - "Models found: {model_list}}}")\ - .format(tag_name=tag_name, tag_value=tag_value, - model_list=model_list) - if len(model_list) > 1: - raise ValueError(should_not_happen) - - return model_list - - -def get_model_by_tag( - model_name: str, - tag_name: str, - tag_value: str, - aml_workspace: Workspace = None -) -> AMLModel: - """ - Wrapper function for get_model_by_tag that throws an error if model is none - """ - model_list = _get_model_by_tag( - model_name, tag_name, tag_value, aml_workspace) - - if model_list: - return model_list[0] - - no_model_found = ("No Model found with {{tag_name: {tag_name} ," - "tag_value: {tag_value}.}}")\ - .format(tag_name=tag_name, tag_value=tag_value) - - raise Exception(no_model_found) + try: + # Validate params. cannot be None. + if model_name is None: + raise ValueError("model_name[:str] is required") + if tag_name is None: + raise ValueError("tag_name[:str] is required") + if tag_value is None: + raise ValueError("tag[:str] is required") + if aml_workspace is None: + aml_workspace = get_current_workspace() + + # get model by tag. + model_list = AMLModel.list( + aml_workspace, name=model_name, + tags=[[tag_name, tag_value]], latest=True + ) + + # latest should only return 1 model, but if it does, + # then maybe sdk or source code changed. + should_not_happen = ("Found more than one model " + "for the latest with {{tag_name: {tag_name}," + "tag_value: {tag_value}. " + "Models found: {model_list}}}")\ + .format(tag_name=tag_name, tag_value=tag_value, + model_list=model_list) + no_model_found = ("No Model found with {{tag_name: {tag_name} ," + "tag_value: {tag_value}.}}")\ + .format(tag_name=tag_name, tag_value=tag_value) + + if len(model_list) > 1: + raise ValueError(should_not_happen) + else: + return model_list[0] + except Exception: + print(no_model_found) + raise From 126463255cfb82a44155602e9996a277290560ca Mon Sep 17 00:00:00 2001 From: David Tesar Date: Fri, 22 Nov 2019 09:42:23 -0800 Subject: [PATCH 6/6] address feedback --- code/evaluate/evaluate_model.py | 19 +++++++++---------- code/util/model_helper.py | 6 ++++-- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/code/evaluate/evaluate_model.py b/code/evaluate/evaluate_model.py index d70e0764..3cce1fb2 100644 --- a/code/evaluate/evaluate_model.py +++ b/code/evaluate/evaluate_model.py @@ -101,17 +101,13 @@ try: firstRegistration = False tag_name = 'experiment_name' - try: - model_list = get_model_by_tag( - model_name, tag_name, exp.name, ws) - except Exception as e: - firstRegistration = True - print(e) - print("This is the first model, " - "thus it should be registered") - if (firstRegistration is False): - production_model_run_id = model_list.run_id + model = get_model_by_tag( + model_name, tag_name, exp.name, ws) + + if (model is not None): + + production_model_run_id = model.run_id # Get the run history for both production model and # newly trained model and compare mse @@ -141,6 +137,9 @@ print("New trained model metric is less than or equal to " "production model so skipping model registration.") run.parent.cancel() + else: + print("This is the first model, " + "thus it should be registered") except Exception: traceback.print_exc(limit=None, file=None, chain=True) diff --git a/code/util/model_helper.py b/code/util/model_helper.py index 9fe48c6a..98df0bb8 100644 --- a/code/util/model_helper.py +++ b/code/util/model_helper.py @@ -71,8 +71,10 @@ def get_model_by_tag( if len(model_list) > 1: raise ValueError(should_not_happen) - else: + if len(model_list) == 1: return model_list[0] + else: + print(no_model_found) + return None except Exception: - print(no_model_found) raise