From e9c480622b9b456c7f5a576f4ba07c581b035368 Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Fri, 7 Feb 2020 15:52:08 -0800 Subject: [PATCH 01/27] bootstrap --- bootstrap/bootstrap.py | 119 ++++++++++++++++++++++++ diabetes_regression/ci_dependencies.yml | 2 + 2 files changed, 121 insertions(+) create mode 100644 bootstrap/bootstrap.py diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py new file mode 100644 index 00000000..bb981307 --- /dev/null +++ b/bootstrap/bootstrap.py @@ -0,0 +1,119 @@ +import os +import sys +import argparse +from git import Repo + + +class Helper: + + def __init__(self, project_directory, project_name): + self._project_directory = project_directory + self._project_name = project_name + self._git_repo = "https://github.com/microsoft/MLOpsPython.git" + + @property + def project_directory(self): + return self._project_directory + + @property + def project_name(self): + return self._project_name + + @property + def git_repo(self): + return self._git_repo + + def clonerepo(self): + # Download MLOpsPython repo from git + Repo.clone_from( + self._git_repo, self._project_directory, branch="master", depth=1) + print(self._project_directory) + + def renamefiles(self): + # Rename all files starting with diabetes_regression with project name + strtoreplace = "diabetes_regression" + dirs = [".pipelines", "ml_service\pipelines"] + for dir in dirs: + dirpath = os.path.join(self._project_directory, dir) + for filename in os.listdir(dirpath): + if(filename.find(strtoreplace) != -1): + src = os.path.join(self._project_directory, dir, filename) + dst = os.path.join(self._project_directory, + dir, filename.replace(strtoreplace, self._project_name, 1)) + os.rename(src, dst) + + def renamedir(self): + # Rename any directory with diabetes_regression with project name + dirs = ["diabetes_regression"] + for dir in dirs: + src = os.path.join(self._project_directory, dir) + dst = os.path.join(self._project_directory, self._project_name) + os.rename(src, dst) + + def deletedir(self): + # Delete unwanted directories + dirs = [".git", "docs", r"diabetes_regression\training\R"] + for dir in dirs: + os.system( + 'rmdir /S /Q "{}"'.format(os.path.join(self._project_directory, dir))) + + def replaceimport(self): + # Replace imports with new project name + dirs = [r"tests\unit\code_test.py", + r"ml_service\pipelines\diabetes_regression_verify_train_pipeline.py"] + for file in dirs: + fin = open(os.path.join(self._project_directory, file), "rt") + data = fin.read() + newimport = "from " + self._project_name + "." + data = data.replace("from diabetes_regression.", newimport) + fin.close() + fin = open(os.path.join(self._project_directory, file), "wt") + fin.write(data) + fin.close() + + def cleandir(self): + # Clean up directories + dirs = ["data", "experimentation"] + for dir in dirs: + for root, dirs, files in os.walk(os.path.join(self._project_directory, dir)): + for file in files: + os.remove(os.path.join(root, file)) + + def validateargs(self): + # Validate arguments + if (path.isdir(self._project_directory) is False): + raise Exception( + "Not a valid directory. Please provide absolute directory path") + if (len(os.listdir(self._project_directory)) > 0): + raise Exception("Directory not empty. PLease empty directory") + if(len(self._project_name) < 3 or len(self._project_name) > 8): + raise Exception("Project name should be 3 to 8 chars long") + + +def main(args): + # Run this script to create a template from mlopspython + # python bootstrap.py --d [dirpath] --n test + parser = argparse.ArgumentParser(description='New Template') + parser.add_argument("--d", type=str, + help="Absolute path to new project direcory") + parser.add_argument( + "--n", type=str, help="Name of the project[3-8 chars] ") + try: + args = parser.parse_args() + project_directory = args.d + project_name = args.n + helper = Helper(project_directory, project_name) + helper.validateargs() + helper.clonerepo() + helper.cleandir() + helper.replaceimport() + helper.deletedir() + helper.renamefiles() + helper.renamedir() + except Exception as e: + print(e) + return 0 + + +if '__main__' == __name__: + sys.exit(main(sys.argv)) diff --git a/diabetes_regression/ci_dependencies.yml b/diabetes_regression/ci_dependencies.yml index c5463456..b4254f9a 100644 --- a/diabetes_regression/ci_dependencies.yml +++ b/diabetes_regression/ci_dependencies.yml @@ -26,3 +26,5 @@ dependencies: - flake8_formatter_junit_xml==0.0.6 - azure-cli==2.0.81 - tox==3.14.3 + # Uncomment for bootstrap.py + #- GitPython==3.0.5 From dc4790636b052b757a4dc0b16e1f681d36e64561 Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Fri, 7 Feb 2020 15:54:28 -0800 Subject: [PATCH 02/27] redmi file --- bootstrap/README.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 bootstrap/README.md diff --git a/bootstrap/README.md b/bootstrap/README.md new file mode 100644 index 00000000..f296054f --- /dev/null +++ b/bootstrap/README.md @@ -0,0 +1 @@ +*TODO From d8aac456f4cac5055fe4af8ee15de2fee3429227 Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Fri, 7 Feb 2020 16:03:20 -0800 Subject: [PATCH 03/27] intial doc --- bootstrap/README.md | 7 ++++++- bootstrap/bootstrap.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/bootstrap/README.md b/bootstrap/README.md index f296054f..c301bbb2 100644 --- a/bootstrap/README.md +++ b/bootstrap/README.md @@ -1 +1,6 @@ -*TODO +# Bootstrap from MLOpsPython repository + +If you would like to bootstrap from the existing MLOpsPython repo run bootstrap.py script as below +>python bootstrap.py --d [dirpath] --n [projectname] + +This script will download and prepare the repository for your project. diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py index bb981307..c0212eef 100644 --- a/bootstrap/bootstrap.py +++ b/bootstrap/bootstrap.py @@ -92,7 +92,7 @@ def validateargs(self): def main(args): # Run this script to create a template from mlopspython - # python bootstrap.py --d [dirpath] --n test + # python bootstrap.py --d [dirpath] --n [projectname] parser = argparse.ArgumentParser(description='New Template') parser.add_argument("--d", type=str, help="Absolute path to new project direcory") From 8e8370b4fd960bc9e6238525ba80986440bba56e Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Fri, 7 Feb 2020 16:08:03 -0800 Subject: [PATCH 04/27] update doc --- bootstrap/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bootstrap/README.md b/bootstrap/README.md index c301bbb2..49f51bc6 100644 --- a/bootstrap/README.md +++ b/bootstrap/README.md @@ -1,6 +1,6 @@ # Bootstrap from MLOpsPython repository -If you would like to bootstrap from the existing MLOpsPython repo run bootstrap.py script as below +To bootstrap from the existing MLOpsPython repository run bootstrap.py script as below >python bootstrap.py --d [dirpath] --n [projectname] -This script will download and prepare the repository for your project. +This script will download and prepare directory structure for your project. From 230cf26169a273626c7cc3acb90ba03a4cb1ce3c Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Mon, 10 Feb 2020 11:12:36 -0800 Subject: [PATCH 05/27] directory structure --- README.md => README copy.md | 0 directorystructure.md | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename README.md => README copy.md (100%) create mode 100644 directorystructure.md diff --git a/README.md b/README copy.md similarity index 100% rename from README.md rename to README copy.md diff --git a/directorystructure.md b/directorystructure.md new file mode 100644 index 00000000..e69de29b From b777a2fdfbc9ef7e9c2bc90cbf1a5c06a38226e8 Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Mon, 10 Feb 2020 12:44:24 -0800 Subject: [PATCH 06/27] added directory structure doc --- README copy.md => README.md | 21 +++++++++++---------- bootstrap/bootstrap.py | 2 +- directorystructure.md | 27 +++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 11 deletions(-) rename README copy.md => README.md (94%) diff --git a/README copy.md b/README.md similarity index 94% rename from README copy.md rename to README.md index 64ab49bc..4dcd564a 100644 --- a/README copy.md +++ b/README.md @@ -11,36 +11,37 @@ description: "Code which demonstrates how to set up and operationalize an MLOps # MLOps with Azure ML - [![Build Status](https://aidemos.visualstudio.com/MLOps/_apis/build/status/microsoft.MLOpsPython?branchName=master)](https://aidemos.visualstudio.com/MLOps/_build/latest?definitionId=151&branchName=master) - -MLOps will help you to understand how to build the Continuous Integration and Continuous Delivery pipeline for a ML/AI project. We will be using the Azure DevOps Project for build and release/deployment pipelines along with Azure ML services for model retraining pipeline, model management and operationalization. +MLOps will help you to understand how to build the Continuous Integration and Continuous Delivery pipeline for a ML/AI project. We will be using the Azure DevOps Project for build and release/deployment pipelines along with Azure ML services for model retraining pipeline, model management and operationalization. ![ML lifecycle](/docs/images/ml-lifecycle.png) This template contains code and pipeline definition for a machine learning project demonstrating how to automate an end to end ML/AI workflow. The build pipelines include DevOps tasks for data sanity test, unit test, model training on different compute targets, model version management, model evaluation/model selection, model deployment as realtime web service, staged deployment to QA/prod and integration testing. - ## Prerequisite + - Active Azure subscription - At least contributor access to Azure subscription -## Getting Started: +## Directory Structure -To deploy this solution in your subscription, follow the manual instructions in the [getting started](docs/getting_started.md) doc +To understand the high level directory structure for this repository, please go through [directory structure doc](directorystructure.md) +## Getting Started + +To deploy this solution in your subscription, follow the manual instructions in the [getting started](docs/getting_started.md) doc ## Architecture Diagram -This reference architecture shows how to implement continuous integration (CI), continuous delivery (CD), and retraining pipeline for an AI application using Azure DevOps and Azure Machine Learning. The solution is built on the scikit-learn diabetes dataset but can be easily adapted for any AI scenario and other popular build systems such as Jenkins and Travis. +This reference architecture shows how to implement continuous integration (CI), continuous delivery (CD), and retraining pipeline for an AI application using Azure DevOps and Azure Machine Learning. The solution is built on the scikit-learn diabetes dataset but can be easily adapted for any AI scenario and other popular build systems such as Jenkins and Travis. ![Architecture](/docs/images/main-flow.png) - ## Architecture Flow ### Train Model + 1. Data Scientist writes/updates the code and push it to git repo. This triggers the Azure DevOps build pipeline (continuous integration). 2. Once the Azure DevOps build pipeline is triggered, it performs code quality checks, data sanity tests, unit tests, builds an [Azure ML Pipeline](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-ml-pipelines) and publishes it in an [Azure ML Service Workspace](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace). 3. The [Azure ML Pipeline](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-ml-pipelines) is triggered once the Azure DevOps build pipeline completes. All the tasks in this pipeline runs on Azure ML Compute. Following are the tasks in this pipeline: @@ -56,13 +57,13 @@ This reference architecture shows how to implement continuous integration (CI), Once you have registered your ML model, you can use Azure ML + Azure DevOps to deploy it. The [Azure DevOps multi-stage pipeline](https://docs.microsoft.com/en-us/azure/devops/pipelines/process/stages?view=azure-devops&tabs=yaml) packages the new model along with the scoring file and its python dependencies into a [docker image](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#image) and pushes it to [Azure Container Registry](https://docs.microsoft.com/en-us/azure/container-registry/container-registry-intro). This image is used to deploy the model as [web service](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#web-service) across QA and Prod environments. The QA environment is running on top of [Azure Container Instances (ACI)](https://azure.microsoft.com/en-us/services/container-instances/) and the Prod environment is built with [Azure Kubernetes Service (AKS)](https://docs.microsoft.com/en-us/azure/aks/intro-kubernetes). - ### Repo Details You can find the details of the code and scripts in the repository [here](/docs/code_description.md) ### References + - [Azure Machine Learning(Azure ML) Service Workspace](https://docs.microsoft.com/en-us/azure/machine-learning/service/overview-what-is-azure-ml) - [Azure ML CLI](https://docs.microsoft.com/en-us/azure/machine-learning/service/reference-azure-machine-learning-cli) - [Azure ML Samples](https://docs.microsoft.com/en-us/azure/machine-learning/service/samples-notebooks) @@ -73,7 +74,7 @@ You can find the details of the code and scripts in the repository [here](/docs/ This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us -the rights to use your contribution. For details, visit https://cla.microsoft.com. +the rights to use your contribution. For details, visit When you submit a pull request, a CLA-bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py index c0212eef..1c7eff4b 100644 --- a/bootstrap/bootstrap.py +++ b/bootstrap/bootstrap.py @@ -59,7 +59,7 @@ def deletedir(self): def replaceimport(self): # Replace imports with new project name - dirs = [r"tests\unit\code_test.py", + dirs = [r"diabetes_regression\training\test_train.py", r"ml_service\pipelines\diabetes_regression_verify_train_pipeline.py"] for file in dirs: fin = open(os.path.join(self._project_directory, file), "rt") diff --git a/directorystructure.md b/directorystructure.md index e69de29b..6325d1f3 100644 --- a/directorystructure.md +++ b/directorystructure.md @@ -0,0 +1,27 @@ +# Directory Structure + +High level directory structure for this repository: + +```bash +├── .pipelines <- Azure DevOps YAML pipelines for CI, PR and model training and deployment. +├── charts <- Helm charts to deploy resources on Azure Kubernetes Service(AKS). +├── data <- Initial set of data to train and evaluate model. +├── diabetes_regression <- The top-level folder for the ML project. +│ ├── evaluate <- Python script to evaluate trained ML model. +│ ├── register <- Python script to register trained ML model with Azure Machine Learning Service. +│ ├── scoring <- Python score.py to deploy trained ML model. +│ ├── training <- Python script to train ML model. +│ ├── R <- R script to train R based ML model. +│ ├── util <- Python script for various utility operations specific to this ML project. +├── docs <- Extensive markdown documentation for entire project. +├── environment_setup <- The top-level folder for everything related to infrastructure. +│ ├── arm-templates <- Azure Resource Manager(ARM) templates to build infrastructure needed for this project. +├── experimentation <- Jupyter notebooks with ML experimentation code. +├── ml_service <- The top-level folder for all Azure Machine Learning resources. +│ ├── pipelines <- Python script that builds Azure Machine Learning pipelines. +│ ├── util <- Python script for various utility operations specific to Azure Machine Learning. +├── .env.example <- Example .env file with environment for local development experience. +├── .gitignore <- A gitignore file specifies intentionally untracked files that Git should ignore. +├── LICENSE <- License document for this project. +├── README.md <- The top-level README for developers using this project. +``` From 16ea77c33b6db46d4ece519ce52043a85cf31a0d Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Mon, 10 Feb 2020 12:49:57 -0800 Subject: [PATCH 07/27] updated doc --- bootstrap/README.md | 7 ++++++- directorystructure.md | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/bootstrap/README.md b/bootstrap/README.md index 49f51bc6..6422afc4 100644 --- a/bootstrap/README.md +++ b/bootstrap/README.md @@ -3,4 +3,9 @@ To bootstrap from the existing MLOpsPython repository run bootstrap.py script as below >python bootstrap.py --d [dirpath] --n [projectname] -This script will download and prepare directory structure for your project. +This script will download and prepare a directory structure for your project. + +**NOTE** + +In order to run this bootstrap script please install GitPython. +> pip install GitPython==3.0.5 diff --git a/directorystructure.md b/directorystructure.md index 6325d1f3..c785726d 100644 --- a/directorystructure.md +++ b/directorystructure.md @@ -4,6 +4,7 @@ High level directory structure for this repository: ```bash ├── .pipelines <- Azure DevOps YAML pipelines for CI, PR and model training and deployment. +├── bootstrap <- Python script to create a re-usbale code template to bootstrap. ├── charts <- Helm charts to deploy resources on Azure Kubernetes Service(AKS). ├── data <- Initial set of data to train and evaluate model. ├── diabetes_regression <- The top-level folder for the ML project. From 8f6b31845b705831032af78e9ed8c99391e6f502 Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Mon, 10 Feb 2020 13:50:15 -0800 Subject: [PATCH 08/27] fixed linting --- bootstrap/bootstrap.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py index 1c7eff4b..f6d9680b 100644 --- a/bootstrap/bootstrap.py +++ b/bootstrap/bootstrap.py @@ -32,14 +32,14 @@ def clonerepo(self): def renamefiles(self): # Rename all files starting with diabetes_regression with project name strtoreplace = "diabetes_regression" - dirs = [".pipelines", "ml_service\pipelines"] + dirs = [".pipelines", r"ml_service\pipelines"] for dir in dirs: dirpath = os.path.join(self._project_directory, dir) for filename in os.listdir(dirpath): if(filename.find(strtoreplace) != -1): src = os.path.join(self._project_directory, dir, filename) dst = os.path.join(self._project_directory, - dir, filename.replace(strtoreplace, self._project_name, 1)) + dir, filename.replace(strtoreplace, self._project_name, 1)) # NOQA: E501 os.rename(src, dst) def renamedir(self): @@ -55,12 +55,12 @@ def deletedir(self): dirs = [".git", "docs", r"diabetes_regression\training\R"] for dir in dirs: os.system( - 'rmdir /S /Q "{}"'.format(os.path.join(self._project_directory, dir))) + 'rmdir /S /Q "{}"'.format(os.path.join(self._project_directory, dir))) # NOQA: E501 def replaceimport(self): # Replace imports with new project name dirs = [r"diabetes_regression\training\test_train.py", - r"ml_service\pipelines\diabetes_regression_verify_train_pipeline.py"] + r"ml_service\pipelines\diabetes_regression_verify_train_pipeline.py"] # NOQA: E501 for file in dirs: fin = open(os.path.join(self._project_directory, file), "rt") data = fin.read() @@ -75,15 +75,15 @@ def cleandir(self): # Clean up directories dirs = ["data", "experimentation"] for dir in dirs: - for root, dirs, files in os.walk(os.path.join(self._project_directory, dir)): + for root, dirs, files in os.walk(os.path.join(self._project_directory, dir)): # NOQA: E501 for file in files: os.remove(os.path.join(root, file)) def validateargs(self): # Validate arguments - if (path.isdir(self._project_directory) is False): + if (os.path.isdir(self._project_directory) is False): raise Exception( - "Not a valid directory. Please provide absolute directory path") + "Not a valid directory. Please provide absolute directory path") # NOQA: E501 if (len(os.listdir(self._project_directory)) > 0): raise Exception("Directory not empty. PLease empty directory") if(len(self._project_name) < 3 or len(self._project_name) > 8): From 6e5ddd05309cd34c5b52496ab42f18574b9448fe Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Mon, 10 Feb 2020 13:56:45 -0800 Subject: [PATCH 09/27] fixed linting --- directorystructure.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/directorystructure.md b/directorystructure.md index c785726d..e2785b3c 100644 --- a/directorystructure.md +++ b/directorystructure.md @@ -22,7 +22,7 @@ High level directory structure for this repository: │ ├── pipelines <- Python script that builds Azure Machine Learning pipelines. │ ├── util <- Python script for various utility operations specific to Azure Machine Learning. ├── .env.example <- Example .env file with environment for local development experience. -├── .gitignore <- A gitignore file specifies intentionally untracked files that Git should ignore. +├── .gitignore <- A gitignore file specifies intentionally un-tracked files that Git should ignore. ├── LICENSE <- License document for this project. ├── README.md <- The top-level README for developers using this project. ``` From 0b64096eb9984bd14dc8926a17504d84fae97ef9 Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Mon, 10 Feb 2020 15:23:17 -0800 Subject: [PATCH 10/27] pr comments --- bootstrap/README.md | 11 ++++---- bootstrap/bootstrap.py | 6 ++--- diabetes_regression/ci_dependencies.yml | 1 - directorystructure.md | 28 -------------------- docs/code_description.md | 34 +++++++++++++++++++++++-- 5 files changed, 40 insertions(+), 40 deletions(-) delete mode 100644 directorystructure.md diff --git a/bootstrap/README.md b/bootstrap/README.md index 6422afc4..b4f50210 100644 --- a/bootstrap/README.md +++ b/bootstrap/README.md @@ -1,11 +1,10 @@ # Bootstrap from MLOpsPython repository -To bootstrap from the existing MLOpsPython repository run bootstrap.py script as below ->python bootstrap.py --d [dirpath] --n [projectname] +To use this existing project structure and scripts for your new ML project, you can quickly get started from the existing repository, bootstrap and create a template that works for your ML project. Bootstraping will prepare a similar directory structure for your project which includes renaming files and folders, deleting and cleaning up some directories and fixing imports and absolute path based on your project name.This will enable reusing various resources like pre-built pipelines and scripts for your new project. -This script will download and prepare a directory structure for your project. +To bootstrap from the existing MLOpsPython repository install and run bootstrap.py script as below +>pip install GitPython==3.0.5 -**NOTE** +>python bootstrap.py --d [dirpath] --n [projectname] -In order to run this bootstrap script please install GitPython. -> pip install GitPython==3.0.5 +Where [dirpath] is the directory to download the repo and [projectname] is name of your ML project diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py index f6d9680b..7bab24c9 100644 --- a/bootstrap/bootstrap.py +++ b/bootstrap/bootstrap.py @@ -86,8 +86,8 @@ def validateargs(self): "Not a valid directory. Please provide absolute directory path") # NOQA: E501 if (len(os.listdir(self._project_directory)) > 0): raise Exception("Directory not empty. PLease empty directory") - if(len(self._project_name) < 3 or len(self._project_name) > 8): - raise Exception("Project name should be 3 to 8 chars long") + if(len(self._project_name) < 3 or len(self._project_name) > 15): + raise Exception("Project name should be 3 to 15 chars long") def main(args): @@ -97,7 +97,7 @@ def main(args): parser.add_argument("--d", type=str, help="Absolute path to new project direcory") parser.add_argument( - "--n", type=str, help="Name of the project[3-8 chars] ") + "--n", type=str, help="Name of the project[3-15 chars] ") try: args = parser.parse_args() project_directory = args.d diff --git a/diabetes_regression/ci_dependencies.yml b/diabetes_regression/ci_dependencies.yml index b4254f9a..17f4b3e9 100644 --- a/diabetes_regression/ci_dependencies.yml +++ b/diabetes_regression/ci_dependencies.yml @@ -25,6 +25,5 @@ dependencies: - flake8==3.7.9 - flake8_formatter_junit_xml==0.0.6 - azure-cli==2.0.81 - - tox==3.14.3 # Uncomment for bootstrap.py #- GitPython==3.0.5 diff --git a/directorystructure.md b/directorystructure.md deleted file mode 100644 index e2785b3c..00000000 --- a/directorystructure.md +++ /dev/null @@ -1,28 +0,0 @@ -# Directory Structure - -High level directory structure for this repository: - -```bash -├── .pipelines <- Azure DevOps YAML pipelines for CI, PR and model training and deployment. -├── bootstrap <- Python script to create a re-usbale code template to bootstrap. -├── charts <- Helm charts to deploy resources on Azure Kubernetes Service(AKS). -├── data <- Initial set of data to train and evaluate model. -├── diabetes_regression <- The top-level folder for the ML project. -│ ├── evaluate <- Python script to evaluate trained ML model. -│ ├── register <- Python script to register trained ML model with Azure Machine Learning Service. -│ ├── scoring <- Python score.py to deploy trained ML model. -│ ├── training <- Python script to train ML model. -│ ├── R <- R script to train R based ML model. -│ ├── util <- Python script for various utility operations specific to this ML project. -├── docs <- Extensive markdown documentation for entire project. -├── environment_setup <- The top-level folder for everything related to infrastructure. -│ ├── arm-templates <- Azure Resource Manager(ARM) templates to build infrastructure needed for this project. -├── experimentation <- Jupyter notebooks with ML experimentation code. -├── ml_service <- The top-level folder for all Azure Machine Learning resources. -│ ├── pipelines <- Python script that builds Azure Machine Learning pipelines. -│ ├── util <- Python script for various utility operations specific to Azure Machine Learning. -├── .env.example <- Example .env file with environment for local development experience. -├── .gitignore <- A gitignore file specifies intentionally un-tracked files that Git should ignore. -├── LICENSE <- License document for this project. -├── README.md <- The top-level README for developers using this project. -``` diff --git a/docs/code_description.md b/docs/code_description.md index d60df616..1b95d5c0 100644 --- a/docs/code_description.md +++ b/docs/code_description.md @@ -1,5 +1,34 @@ ## Repo Details +### Directory Structure + +High level directory structure for this repository: + +```bash +├── .pipelines <- Azure DevOps YAML pipelines for CI, PR and model training and deployment. +├── bootstrap <- Python script to create a re-usbale code template to bootstrap. +├── charts <- Helm charts to deploy resources on Azure Kubernetes Service(AKS). +├── data <- Initial set of data to train and evaluate model. +├── diabetes_regression <- The top-level folder for the ML project. +│ ├── evaluate <- Python script to evaluate trained ML model. +│ ├── register <- Python script to register trained ML model with Azure Machine Learning Service. +│ ├── scoring <- Python score.py to deploy trained ML model. +│ ├── training <- Python script to train ML model. +│ ├── R <- R script to train R based ML model. +│ ├── util <- Python script for various utility operations specific to this ML project. +├── docs <- Extensive markdown documentation for entire project. +├── environment_setup <- The top-level folder for everything related to infrastructure. +│ ├── arm-templates <- Azure Resource Manager(ARM) templates to build infrastructure needed for this project. +├── experimentation <- Jupyter notebooks with ML experimentation code. +├── ml_service <- The top-level folder for all Azure Machine Learning resources. +│ ├── pipelines <- Python script that builds Azure Machine Learning pipelines. +│ ├── util <- Python script for various utility operations specific to Azure Machine Learning. +├── .env.example <- Example .env file with environment for local development experience. +├── .gitignore <- A gitignore file specifies intentionally un-tracked files that Git should ignore. +├── LICENSE <- License document for this project. +├── README.md <- The top-level README for developers using this project. +``` + ### Environment Setup - `environment_setup/install_requirements.sh` : This script prepares a local conda environment i.e. install the Azure ML SDK and the packages specified in environment definitions. @@ -8,7 +37,7 @@ - `environment_setup/Dockerfile` : Dockerfile of a build agent containing Python 3.6 and all required packages. -- `environment_setup/docker-image-pipeline.yml` : An AzDo pipeline for building and pushing [microsoft/mlopspython](https://hub.docker.com/_/microsoft-mlops-python) image. +- `environment_setup/docker-image-pipeline.yml` : An AzDo pipeline for building and pushing [microsoft/mlopspython](https://hub.docker.com/_/microsoft-mlops-python) image. ### Pipelines @@ -37,10 +66,11 @@ - `diabetes_regression/evaluate/evaluate_model.py` : an evaluating step of an ML training pipeline which registers a new trained model if evaluation shows the new model is more performant than the previous one. - `diabetes_regression/evaluate/register_model.py` : (LEGACY) registers a new trained model if evaluation shows the new model is more performant than the previous one. - `diabetes_regression/training/R/r_train.r` : training a model with R basing on a sample dataset (weight_data.csv). -- `diabetes_regression/training/R/train_with_r.py` : a python wrapper (ML Pipeline Step) invoking R training script on ML Compute +- `diabetes_regression/training/R/train_with_r.py` : a python wrapper (ML Pipeline Step) invoking R training script on ML Compute - `diabetes_regression/training/R/train_with_r_on_databricks.py` : a python wrapper (ML Pipeline Step) invoking R training script on Databricks Compute - `diabetes_regression/training/R/weight_data.csv` : a sample dataset used by R script (r_train.r) to train a model ### Scoring + - `diabetes_regression/scoring/score.py` : a scoring script which is about to be packed into a Docker Image along with a model while being deployed to QA/Prod environment. - `diabetes_regression/scoring/inference_config.yml`, deployment_config_aci.yml, deployment_config_aks.yml : configuration files for the [AML Model Deploy](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.private-vss-services-azureml&ssr=false#overview) pipeline task for ACI and AKS deployment targets. From afac78e14f8fe331ebc47ce50a88b9511841135d Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Mon, 10 Feb 2020 15:26:05 -0800 Subject: [PATCH 11/27] formating --- bootstrap/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bootstrap/README.md b/bootstrap/README.md index b4f50210..cc5f7bed 100644 --- a/bootstrap/README.md +++ b/bootstrap/README.md @@ -1,10 +1,9 @@ # Bootstrap from MLOpsPython repository -To use this existing project structure and scripts for your new ML project, you can quickly get started from the existing repository, bootstrap and create a template that works for your ML project. Bootstraping will prepare a similar directory structure for your project which includes renaming files and folders, deleting and cleaning up some directories and fixing imports and absolute path based on your project name.This will enable reusing various resources like pre-built pipelines and scripts for your new project. +To use this existing project structure and scripts for your new ML project, you can quickly get started from the existing repository, bootstrap and create a template that works for your ML project. Bootstraping will prepare a similar directory structure for your project which includes renaming files and folders, deleting and cleaning up some directories and fixing imports and absolute path based on your project name. This will enable reusing various resources like pre-built pipelines and scripts for your new project. To bootstrap from the existing MLOpsPython repository install and run bootstrap.py script as below >pip install GitPython==3.0.5 - >python bootstrap.py --d [dirpath] --n [projectname] Where [dirpath] is the directory to download the repo and [projectname] is name of your ML project From 4a929551f5c2c50bd5f7c80839938eee4bf3b75b Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Mon, 10 Feb 2020 15:27:42 -0800 Subject: [PATCH 12/27] fomratting --- bootstrap/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/bootstrap/README.md b/bootstrap/README.md index cc5f7bed..6e95a465 100644 --- a/bootstrap/README.md +++ b/bootstrap/README.md @@ -4,6 +4,7 @@ To use this existing project structure and scripts for your new ML project, you To bootstrap from the existing MLOpsPython repository install and run bootstrap.py script as below >pip install GitPython==3.0.5 +> >python bootstrap.py --d [dirpath] --n [projectname] Where [dirpath] is the directory to download the repo and [projectname] is name of your ML project From 2d053bd91f70cc20431268ccd4d86620e85a6c27 Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Mon, 10 Feb 2020 15:57:07 -0800 Subject: [PATCH 13/27] PR --- bootstrap/README.md | 4 ++-- bootstrap/bootstrap.py | 2 +- docs/getting_started.md | 31 ++++++++++++++++--------------- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/bootstrap/README.md b/bootstrap/README.md index 6e95a465..cc3a370f 100644 --- a/bootstrap/README.md +++ b/bootstrap/README.md @@ -1,10 +1,10 @@ # Bootstrap from MLOpsPython repository -To use this existing project structure and scripts for your new ML project, you can quickly get started from the existing repository, bootstrap and create a template that works for your ML project. Bootstraping will prepare a similar directory structure for your project which includes renaming files and folders, deleting and cleaning up some directories and fixing imports and absolute path based on your project name. This will enable reusing various resources like pre-built pipelines and scripts for your new project. +To use this existing project structure and scripts for your new ML project, you can quickly get started from the existing repository, bootstrap and create a template that works for your ML project. Bootstraping will prepare a similar directory structure for your project which includes renaming files and folders, deleting and cleaning up some directories and fixing imports and absolute path based on your project name. This will enable reusing various resources like pre-built pipelines and scripts for your new project. To bootstrap from the existing MLOpsPython repository install and run bootstrap.py script as below >pip install GitPython==3.0.5 > >python bootstrap.py --d [dirpath] --n [projectname] -Where [dirpath] is the directory to download the repo and [projectname] is name of your ML project +Where [dirpath] is the directory to download the repo and [projectname] is the name of your ML project diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py index 7bab24c9..9f170075 100644 --- a/bootstrap/bootstrap.py +++ b/bootstrap/bootstrap.py @@ -104,7 +104,7 @@ def main(args): project_name = args.n helper = Helper(project_directory, project_name) helper.validateargs() - helper.clonerepo() + # helper.clonerepo() helper.cleandir() helper.replaceimport() helper.deletedir() diff --git a/docs/getting_started.md b/docs/getting_started.md index 009ae6c0..160d03e9 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -11,6 +11,7 @@ If you already have an Azure DevOps organization, create a [new project](https:/ ## Clone or fork this repository Fork this repository within GitHub, or clone it into your Azure DevOps project. + ## Create an ARM Service Connection to deploy resources This repository includes a YAML pipeline definition file for an Azure DevOps pipeline that will create the Azure ML workspace and associated resources through Azure Resource Manager. @@ -48,14 +49,14 @@ Create a variable group named **``devopsforai-aml-vg``**. The YAML pipeline defi The variable group should contain the following required variables: -| Variable Name | Suggested Value | -| --------------------------- | -----------------------------------| -| BASE_NAME | [unique base name] | -| LOCATION | centralus | -| RESOURCE_GROUP | mlops-RG | -| WORKSPACE_NAME | mlops-AML-WS | -| WORKSPACE_SVC_CONNECTION | aml-workspace-connection | -| ACI_DEPLOYMENT_NAME | diabetes-aci | +| Variable Name | Suggested Value | +| ------------------------ | ------------------------ | +| BASE_NAME | [unique base name] | +| LOCATION | centralus | +| RESOURCE_GROUP | mlops-RG | +| WORKSPACE_NAME | mlops-AML-WS | +| WORKSPACE_SVC_CONNECTION | aml-workspace-connection | +| ACI_DEPLOYMENT_NAME | diabetes-aci | **Note:** @@ -208,10 +209,10 @@ tutorial, but you can find set up information In the Variables tab, edit your variable group (`devopsforai-aml-vg`). In the variable group definition, add the following variables: -| Variable Name | Suggested Value | -| --------------------------- | -----------------------------------| -| AKS_COMPUTE_NAME | aks | -| AKS_DEPLOYMENT_NAME | diabetes-aks | +| Variable Name | Suggested Value | +| ------------------- | --------------- | +| AKS_COMPUTE_NAME | aks | +| AKS_DEPLOYMENT_NAME | diabetes-aks | Set **AKS_COMPUTE_NAME** to the *Compute name* of the Inference Cluster referencing your AKS cluster in your Azure ML Workspace. @@ -226,9 +227,9 @@ scoring service on Azure App Service](https://docs.microsoft.com/en-us/azure/mac In the Variables tab, edit your variable group (`devopsforai-aml-vg`). In the variable group definition, add the following variable: -| Variable Name | Suggested Value | -| --------------------------- | -----------------------------------| -| WEBAPP_DEPLOYMENT_NAME | mlopswebapp | +| Variable Name | Suggested Value | +| ---------------------- | --------------- | +| WEBAPP_DEPLOYMENT_NAME | mlopswebapp | Set **WEBAPP_DEPLOYMENT_NAME** to the name of your Azure Web App. Delete the **ACI_DEPLOYMENT_NAME** variable. From 4da39ba09094f8f70cfa352db9cf0e6a5b8c9ab4 Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Mon, 10 Feb 2020 16:12:56 -0800 Subject: [PATCH 14/27] removed git clone --- bootstrap/README.md | 5 ++--- bootstrap/bootstrap.py | 6 +++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/bootstrap/README.md b/bootstrap/README.md index cc3a370f..ee179d3c 100644 --- a/bootstrap/README.md +++ b/bootstrap/README.md @@ -3,8 +3,7 @@ To use this existing project structure and scripts for your new ML project, you can quickly get started from the existing repository, bootstrap and create a template that works for your ML project. Bootstraping will prepare a similar directory structure for your project which includes renaming files and folders, deleting and cleaning up some directories and fixing imports and absolute path based on your project name. This will enable reusing various resources like pre-built pipelines and scripts for your new project. To bootstrap from the existing MLOpsPython repository install and run bootstrap.py script as below ->pip install GitPython==3.0.5 -> + >python bootstrap.py --d [dirpath] --n [projectname] -Where [dirpath] is the directory to download the repo and [projectname] is the name of your ML project +Where [dirpath] is the absolute path to the root of your directory where MLOps repo is cloned and [projectname] is the name of your ML project diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py index 9f170075..251b4f71 100644 --- a/bootstrap/bootstrap.py +++ b/bootstrap/bootstrap.py @@ -52,7 +52,7 @@ def renamedir(self): def deletedir(self): # Delete unwanted directories - dirs = [".git", "docs", r"diabetes_regression\training\R"] + dirs = ["docs", r"diabetes_regression\training\R"] for dir in dirs: os.system( 'rmdir /S /Q "{}"'.format(os.path.join(self._project_directory, dir))) # NOQA: E501 @@ -84,8 +84,8 @@ def validateargs(self): if (os.path.isdir(self._project_directory) is False): raise Exception( "Not a valid directory. Please provide absolute directory path") # NOQA: E501 - if (len(os.listdir(self._project_directory)) > 0): - raise Exception("Directory not empty. PLease empty directory") + # if (len(os.listdir(self._project_directory)) > 0): + # raise Exception("Directory not empty. PLease empty directory") if(len(self._project_name) < 3 or len(self._project_name) > 15): raise Exception("Project name should be 3 to 15 chars long") From c5bbe538b9d113ee6c824d754ea33fc5e0ab4125 Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Mon, 10 Feb 2020 16:13:59 -0800 Subject: [PATCH 15/27] removed gitpython --- diabetes_regression/ci_dependencies.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/diabetes_regression/ci_dependencies.yml b/diabetes_regression/ci_dependencies.yml index 17f4b3e9..d6a52ed0 100644 --- a/diabetes_regression/ci_dependencies.yml +++ b/diabetes_regression/ci_dependencies.yml @@ -25,5 +25,3 @@ dependencies: - flake8==3.7.9 - flake8_formatter_junit_xml==0.0.6 - azure-cli==2.0.81 - # Uncomment for bootstrap.py - #- GitPython==3.0.5 From d1403a30ce6284080e07b4d5bcbfe0b2bc0abc7d Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Mon, 10 Feb 2020 16:15:26 -0800 Subject: [PATCH 16/27] commented gitpython import --- bootstrap/bootstrap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py index 251b4f71..c8364e87 100644 --- a/bootstrap/bootstrap.py +++ b/bootstrap/bootstrap.py @@ -1,7 +1,7 @@ import os import sys import argparse -from git import Repo +#from git import Repo class Helper: From 9210b3e56b9285ed50e93018155929ff090827e8 Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Mon, 10 Feb 2020 16:39:37 -0800 Subject: [PATCH 17/27] pr --- README.md | 4 ---- bootstrap/bootstrap.py | 5 +++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 4dcd564a..324f3249 100644 --- a/README.md +++ b/README.md @@ -24,10 +24,6 @@ This template contains code and pipeline definition for a machine learning proje - Active Azure subscription - At least contributor access to Azure subscription -## Directory Structure - -To understand the high level directory structure for this repository, please go through [directory structure doc](directorystructure.md) - ## Getting Started To deploy this solution in your subscription, follow the manual instructions in the [getting started](docs/getting_started.md) doc diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py index c8364e87..dc0f76b9 100644 --- a/bootstrap/bootstrap.py +++ b/bootstrap/bootstrap.py @@ -64,8 +64,9 @@ def replaceimport(self): for file in dirs: fin = open(os.path.join(self._project_directory, file), "rt") data = fin.read() - newimport = "from " + self._project_name + "." - data = data.replace("from diabetes_regression.", newimport) + #newimport = "from " + self._project_name + "." + #data = data.replace("from diabetes_regression.", newimport) + data = data.replace("diabetes_regression.", self.project_name) fin.close() fin = open(os.path.join(self._project_directory, file), "wt") fin.write(data) From aa6774f7e9e528af3bcc8448fc59080fc5b58dd4 Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Mon, 10 Feb 2020 16:51:48 -0800 Subject: [PATCH 18/27] updated doc --- bootstrap/README.md | 2 +- docs/code_description.md | 2 +- docs/getting_started.md | 14 ++++++++------ 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/bootstrap/README.md b/bootstrap/README.md index ee179d3c..bbbbff09 100644 --- a/bootstrap/README.md +++ b/bootstrap/README.md @@ -2,7 +2,7 @@ To use this existing project structure and scripts for your new ML project, you can quickly get started from the existing repository, bootstrap and create a template that works for your ML project. Bootstraping will prepare a similar directory structure for your project which includes renaming files and folders, deleting and cleaning up some directories and fixing imports and absolute path based on your project name. This will enable reusing various resources like pre-built pipelines and scripts for your new project. -To bootstrap from the existing MLOpsPython repository install and run bootstrap.py script as below +To bootstrap from the existing MLOpsPython repository clone this repository and run bootstrap.py script as below >python bootstrap.py --d [dirpath] --n [projectname] diff --git a/docs/code_description.md b/docs/code_description.md index 1b95d5c0..1b0b710d 100644 --- a/docs/code_description.md +++ b/docs/code_description.md @@ -6,7 +6,7 @@ High level directory structure for this repository: ```bash ├── .pipelines <- Azure DevOps YAML pipelines for CI, PR and model training and deployment. -├── bootstrap <- Python script to create a re-usbale code template to bootstrap. +├── bootstrap <- Python script to initialize this repository with a custom project name. ├── charts <- Helm charts to deploy resources on Azure Kubernetes Service(AKS). ├── data <- Initial set of data to train and evaluate model. ├── diabetes_regression <- The top-level folder for the ML project. diff --git a/docs/getting_started.md b/docs/getting_started.md index 160d03e9..6f096f19 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -9,8 +9,10 @@ following the instructions [here](https://docs.microsoft.com/en-us/azure/devops/ If you already have an Azure DevOps organization, create a [new project](https://docs.microsoft.com/en-us/azure/devops/organizations/projects/create-project?view=azure-devops). ## Clone or fork this repository -Fork this repository within GitHub, or clone it into your Azure DevOps project. +To initialize this repository with a custom project name refer [bootstraping doc.](../bootstrap/README.md\bootstrap\README.md) + +To learn existing repository simply fork this repository within GitHub, or clone it into your Azure DevOps project. ## Create an ARM Service Connection to deploy resources @@ -58,7 +60,7 @@ The variable group should contain the following required variables: | WORKSPACE_SVC_CONNECTION | aml-workspace-connection | | ACI_DEPLOYMENT_NAME | diabetes-aci | -**Note:** +**Note:** The **WORKSPACE_NAME** parameter is used for the Azure Machine Learning Workspace creation. You can provide an existing AML Workspace here if you have one. @@ -68,7 +70,7 @@ be naming collisions with resources that require unique names like azure blob storage and registry DNS naming. Make sure to give a unique value to the BASE_NAME variable (e.g. MyUniqueML), so that the created resources will have unique names (e.g. MyUniqueMLamlcr, MyUniqueML-AML-KV, etc.). The length of -the BASE_NAME value should not exceed 10 characters and it should contain numbers and letters only. +the BASE_NAME value should not exceed 10 characters and it should contain numbers and letters only. The **RESOURCE_GROUP** parameter is used as the name for the resource group that will hold the Azure resources for the solution. If providing an existing AML Workspace, set this value to the corresponding resource group name. @@ -122,11 +124,11 @@ Check out the newly created resources in the [Azure Portal](https://portal.azure (Optional) To remove the resources created for this project you can use the [/environment_setup/iac-remove-environment.yml](../environment_setup/iac-remove-environment.yml) definition or you can just delete the resource group in the [Azure Portal](https://portal.azure.com). -**Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and register a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and upload the datafile (e.g. [diabetes.csv](./data/diabetes.csv)) to the corresponding blob container. You can also define a datastore in the ML Workspace with [az cli](https://docs.microsoft.com/en-us/cli/azure/ext/azure-cli-ml/ml/datastore?view=azure-cli-latest#ext-azure-cli-ml-az-ml-datastore-attach-blob). +**Note:** The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. If you want to use your own dataset, you need to [create and register a datastore](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-access-data#azure-machine-learning-studio) in your ML workspace and upload the datafile (e.g. [diabetes.csv](./data/diabetes.csv)) to the corresponding blob container. You can also define a datastore in the ML Workspace with [az cli](https://docs.microsoft.com/en-us/cli/azure/ext/azure-cli-ml/ml/datastore?view=azure-cli-latest#ext-azure-cli-ml-az-ml-datastore-attach-blob). You'll also need to configure DATASTORE_NAME and DATAFILE_NAME variables in ***devopsforai-aml-vg*** variable group. - ## Create an Azure DevOps Azure ML Workspace Service Connection + Install the **Azure Machine Learning** extension to your organization from the [marketplace](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.vss-services-azureml), so that you can set up a service connection to your AML workspace. @@ -202,7 +204,7 @@ The final stage is to deploy the model to the production environment running on [Azure Kubernetes Service](https://azure.microsoft.com/en-us/services/kubernetes-service). **Note:** Creating a Kubernetes cluster on AKS is out of scope of this -tutorial, but you can find set up information +tutorial, but you can find set up information [here](https://docs.microsoft.com/en-us/azure/aks/kubernetes-walkthrough-portal#create-an-aks-cluster). **Note:** If your target deployment environment is a K8s cluster and you want to implement Canary and/or A/B testing deployment strategies check out this [tutorial](./canary_ab_deployment.md). From 4fe51cdb9f91d0d5cf06f0e32cde6ec306114669 Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Mon, 10 Feb 2020 17:06:51 -0800 Subject: [PATCH 19/27] added dirs --- bootstrap/bootstrap.py | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py index dc0f76b9..f18061eb 100644 --- a/bootstrap/bootstrap.py +++ b/bootstrap/bootstrap.py @@ -1,7 +1,7 @@ import os import sys import argparse -#from git import Repo +# from git import Repo class Helper: @@ -64,9 +64,37 @@ def replaceimport(self): for file in dirs: fin = open(os.path.join(self._project_directory, file), "rt") data = fin.read() - #newimport = "from " + self._project_name + "." - #data = data.replace("from diabetes_regression.", newimport) - data = data.replace("diabetes_regression.", self.project_name) + newimport = "from " + self._project_name + "." + data = data.replace("from diabetes_regression.", newimport) + fin.close() + fin = open(os.path.join(self._project_directory, file), "wt") + fin.write(data) + fin.close() + + def replaceprojectinstances(self): + # Replace imports with new project name + dirs = [r".env.example", + r".pipelines\azdo-base-pipeline.yml", + r".pipelines\azdo-pr-build-train.yml", + r".pipelines\test-ci-build-train.yml", + r".pipelines\test-ci-image.yml", + r".pipelines\test-template-get-model-version.yml", + r".pipelines\test-variables.yml", + r"environment_setup\Dockerfile", + r"environment_setup\install_requirements.sh", + r"ml_service\pipelines\test_build_train_pipeline_with_r_on_dbricks.py", + r"ml_service\pipelines\test_build_train_pipeline_with_r.py", + r"ml_service\pipelines\test_build_train_pipeline.py", + r"ml_service\pipelines\test_verify_train_pipeline.py", + r"ml_service\util\create_scoring_image.py", + r"test\azureml_environment.json", + r"test\conda_dependencies.yml", + r"test\evaluate\evaluate_model.py"] # NOQA: E501 + + for file in dirs: + fin = open(os.path.join(self._project_directory, file), "rt") + data = fin.read() + data = data.replace("diabetes_regression", self.project_name) fin.close() fin = open(os.path.join(self._project_directory, file), "wt") fin.write(data) @@ -107,7 +135,8 @@ def main(args): helper.validateargs() # helper.clonerepo() helper.cleandir() - helper.replaceimport() + # helper.replaceimport() + helper.replaceprojectinstances) helper.deletedir() helper.renamefiles() helper.renamedir() From fdd704e7dc5082ab5d6abd724b850e0b1765cbf2 Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Mon, 10 Feb 2020 17:07:15 -0800 Subject: [PATCH 20/27] replaced dir --- bootstrap/bootstrap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py index f18061eb..1c07f660 100644 --- a/bootstrap/bootstrap.py +++ b/bootstrap/bootstrap.py @@ -136,7 +136,7 @@ def main(args): # helper.clonerepo() helper.cleandir() # helper.replaceimport() - helper.replaceprojectinstances) + helper.replaceprojectinstances() helper.deletedir() helper.renamefiles() helper.renamedir() From ec6c268215f09b12d3c4a9cb26b18350971625d9 Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Mon, 10 Feb 2020 17:18:23 -0800 Subject: [PATCH 21/27] updated path --- bootstrap/bootstrap.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py index 1c07f660..68b22177 100644 --- a/bootstrap/bootstrap.py +++ b/bootstrap/bootstrap.py @@ -76,20 +76,20 @@ def replaceprojectinstances(self): dirs = [r".env.example", r".pipelines\azdo-base-pipeline.yml", r".pipelines\azdo-pr-build-train.yml", - r".pipelines\test-ci-build-train.yml", - r".pipelines\test-ci-image.yml", - r".pipelines\test-template-get-model-version.yml", - r".pipelines\test-variables.yml", + r".pipelines\diabetes_regression-ci-build-train.yml", + r".pipelines\diabetes_regression-ci-image.yml", + r".pipelines\diabetes_regression-template-get-model-version.yml", + r".pipelines\diabetes_regression-variables.yml", r"environment_setup\Dockerfile", r"environment_setup\install_requirements.sh", - r"ml_service\pipelines\test_build_train_pipeline_with_r_on_dbricks.py", - r"ml_service\pipelines\test_build_train_pipeline_with_r.py", - r"ml_service\pipelines\test_build_train_pipeline.py", - r"ml_service\pipelines\test_verify_train_pipeline.py", + r"ml_service\pipelines\diabetes_regression_build_train_pipeline_with_r_on_dbricks.py", + r"ml_service\pipelines\diabetes_regression_build_train_pipeline_with_r.py", + r"ml_service\pipelines\diabetes_regression_build_train_pipeline.py", + r"ml_service\pipelines\diabetes_regression_verify_train_pipeline.py", r"ml_service\util\create_scoring_image.py", - r"test\azureml_environment.json", - r"test\conda_dependencies.yml", - r"test\evaluate\evaluate_model.py"] # NOQA: E501 + r"diabetes_regression\azureml_environment.json", + r"diabetes_regression\conda_dependencies.yml", + r"diabetes_regression\evaluate\evaluate_model.py"] # NOQA: E501 for file in dirs: fin = open(os.path.join(self._project_directory, file), "rt") From 8ae2249c14b31614dc6d99f0889eb7259aaf9ff9 Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Mon, 10 Feb 2020 17:30:02 -0800 Subject: [PATCH 22/27] fixed imports --- bootstrap/bootstrap.py | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py index 68b22177..dc769b82 100644 --- a/bootstrap/bootstrap.py +++ b/bootstrap/bootstrap.py @@ -7,8 +7,8 @@ class Helper: def __init__(self, project_directory, project_name): - self._project_directory = project_directory - self._project_name = project_name + self._project_directory = r"C:\Code\mlopsbyctest\MLOpsPython" + self._project_name = "test" self._git_repo = "https://github.com/microsoft/MLOpsPython.git" @property @@ -57,21 +57,7 @@ def deletedir(self): os.system( 'rmdir /S /Q "{}"'.format(os.path.join(self._project_directory, dir))) # NOQA: E501 - def replaceimport(self): - # Replace imports with new project name - dirs = [r"diabetes_regression\training\test_train.py", - r"ml_service\pipelines\diabetes_regression_verify_train_pipeline.py"] # NOQA: E501 - for file in dirs: - fin = open(os.path.join(self._project_directory, file), "rt") - data = fin.read() - newimport = "from " + self._project_name + "." - data = data.replace("from diabetes_regression.", newimport) - fin.close() - fin = open(os.path.join(self._project_directory, file), "wt") - fin.write(data) - fin.close() - - def replaceprojectinstances(self): + def replaceprojectname(self): # Replace imports with new project name dirs = [r".env.example", r".pipelines\azdo-base-pipeline.yml", @@ -89,14 +75,17 @@ def replaceprojectinstances(self): r"ml_service\util\create_scoring_image.py", r"diabetes_regression\azureml_environment.json", r"diabetes_regression\conda_dependencies.yml", - r"diabetes_regression\evaluate\evaluate_model.py"] # NOQA: E501 + r"diabetes_regression\evaluate\evaluate_model.py", + r"diabetes_regression\training\test_train.py"] # NOQA: E501 for file in dirs: - fin = open(os.path.join(self._project_directory, file), "rt") + fin = open(os.path.join(self._project_directory, file), + "rt", encoding="utf8") data = fin.read() data = data.replace("diabetes_regression", self.project_name) fin.close() - fin = open(os.path.join(self._project_directory, file), "wt") + fin = open(os.path.join(self._project_directory, file), + "wt", encoding="utf8") fin.write(data) fin.close() @@ -120,8 +109,6 @@ def validateargs(self): def main(args): - # Run this script to create a template from mlopspython - # python bootstrap.py --d [dirpath] --n [projectname] parser = argparse.ArgumentParser(description='New Template') parser.add_argument("--d", type=str, help="Absolute path to new project direcory") @@ -135,8 +122,7 @@ def main(args): helper.validateargs() # helper.clonerepo() helper.cleandir() - # helper.replaceimport() - helper.replaceprojectinstances() + helper.replaceprojectname() helper.deletedir() helper.renamefiles() helper.renamedir() From 6a0f07b95df3ab74e36428b46fddfd2984d45cae Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Mon, 10 Feb 2020 17:32:39 -0800 Subject: [PATCH 23/27] updated script --- bootstrap/bootstrap.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py index dc769b82..5081f6c8 100644 --- a/bootstrap/bootstrap.py +++ b/bootstrap/bootstrap.py @@ -7,8 +7,8 @@ class Helper: def __init__(self, project_directory, project_name): - self._project_directory = r"C:\Code\mlopsbyctest\MLOpsPython" - self._project_name = "test" + self._project_directory = project_directory + self._project_name = project_name self._git_repo = "https://github.com/microsoft/MLOpsPython.git" @property From aaac71bee85ceb081b6015f2cf80790a496cf315 Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Mon, 10 Feb 2020 17:37:46 -0800 Subject: [PATCH 24/27] linting fix --- bootstrap/bootstrap.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py index 5081f6c8..e2bc562d 100644 --- a/bootstrap/bootstrap.py +++ b/bootstrap/bootstrap.py @@ -23,11 +23,11 @@ def project_name(self): def git_repo(self): return self._git_repo - def clonerepo(self): - # Download MLOpsPython repo from git - Repo.clone_from( - self._git_repo, self._project_directory, branch="master", depth=1) - print(self._project_directory) + # def clonerepo(self): + # # Download MLOpsPython repo from git + # Repo.clone_from( + # self._git_repo, self._project_directory, branch="master", depth=1) + # print(self._project_directory) def renamefiles(self): # Rename all files starting with diabetes_regression with project name @@ -64,14 +64,14 @@ def replaceprojectname(self): r".pipelines\azdo-pr-build-train.yml", r".pipelines\diabetes_regression-ci-build-train.yml", r".pipelines\diabetes_regression-ci-image.yml", - r".pipelines\diabetes_regression-template-get-model-version.yml", + r".pipelines\diabetes_regression-template-get-model-version.yml", # NOQA: E501 r".pipelines\diabetes_regression-variables.yml", r"environment_setup\Dockerfile", r"environment_setup\install_requirements.sh", - r"ml_service\pipelines\diabetes_regression_build_train_pipeline_with_r_on_dbricks.py", - r"ml_service\pipelines\diabetes_regression_build_train_pipeline_with_r.py", - r"ml_service\pipelines\diabetes_regression_build_train_pipeline.py", - r"ml_service\pipelines\diabetes_regression_verify_train_pipeline.py", + r"ml_service\pipelines\diabetes_regression_build_train_pipeline_with_r_on_dbricks.py", # NOQA: E501 + r"ml_service\pipelines\diabetes_regression_build_train_pipeline_with_r.py", # NOQA: E501 + r"ml_service\pipelines\diabetes_regression_build_train_pipeline.py", # NOQA: E501 + r"ml_service\pipelines\diabetes_regression_verify_train_pipeline.py", # NOQA: E501 r"ml_service\util\create_scoring_image.py", r"diabetes_regression\azureml_environment.json", r"diabetes_regression\conda_dependencies.yml", From 4a9db3dd8ec70dccc4402fa976c5a6bed5478d94 Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Mon, 10 Feb 2020 17:41:08 -0800 Subject: [PATCH 25/27] linting fix --- bootstrap/bootstrap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py index e2bc562d..67f86297 100644 --- a/bootstrap/bootstrap.py +++ b/bootstrap/bootstrap.py @@ -26,7 +26,7 @@ def git_repo(self): # def clonerepo(self): # # Download MLOpsPython repo from git # Repo.clone_from( - # self._git_repo, self._project_directory, branch="master", depth=1) + # self._git_repo, self._project_directory, branch="master", depth=1) # NOQA: E501 # print(self._project_directory) def renamefiles(self): From 80e5d45df2857ec6c04e5c6e5dc4d1f87b66f88a Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Mon, 10 Feb 2020 21:34:12 -0800 Subject: [PATCH 26/27] pr --- bootstrap/bootstrap.py | 2 +- docs/getting_started.md | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py index 67f86297..d796f6df 100644 --- a/bootstrap/bootstrap.py +++ b/bootstrap/bootstrap.py @@ -58,7 +58,7 @@ def deletedir(self): 'rmdir /S /Q "{}"'.format(os.path.join(self._project_directory, dir))) # NOQA: E501 def replaceprojectname(self): - # Replace imports with new project name + # Replace instances of diabetes_regression within files dirs = [r".env.example", r".pipelines\azdo-base-pipeline.yml", r".pipelines\azdo-pr-build-train.yml", diff --git a/docs/getting_started.md b/docs/getting_started.md index 6f096f19..4380d99c 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -8,11 +8,12 @@ following the instructions [here](https://docs.microsoft.com/en-us/azure/devops/ If you already have an Azure DevOps organization, create a [new project](https://docs.microsoft.com/en-us/azure/devops/organizations/projects/create-project?view=azure-devops). -## Clone or fork this repository +## Decide best option to copy repository code -To initialize this repository with a custom project name refer [bootstraping doc.](../bootstrap/README.md\bootstrap\README.md) +* Fork this repository if there is a desire to contribute back to the repository else +* Use this [code template](https://github.com/microsoft/MLOpsPython/generate) which copies the entire code base to your own GitHub location with the git commit history restarted. This can be used for learning and following the guide. -To learn existing repository simply fork this repository within GitHub, or clone it into your Azure DevOps project. +If the desire is to use this project for your own machine learning code, follow the [bootstrap instructions](https://github.com/microsoft/MLOpsPython/compare/..%5Cbootstrap%5CREADME.md?expand=1) after the code template is complete. ## Create an ARM Service Connection to deploy resources From fb4b33f31d40f66f01bb0abff0c78d6958ddd8b8 Mon Sep 17 00:00:00 2001 From: sushantdivate Date: Tue, 11 Feb 2020 09:50:25 -0800 Subject: [PATCH 27/27] corrected URL --- docs/getting_started.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index 4380d99c..0b4b8379 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -13,7 +13,7 @@ If you already have an Azure DevOps organization, create a [new project](https:/ * Fork this repository if there is a desire to contribute back to the repository else * Use this [code template](https://github.com/microsoft/MLOpsPython/generate) which copies the entire code base to your own GitHub location with the git commit history restarted. This can be used for learning and following the guide. -If the desire is to use this project for your own machine learning code, follow the [bootstrap instructions](https://github.com/microsoft/MLOpsPython/compare/..%5Cbootstrap%5CREADME.md?expand=1) after the code template is complete. +If the desire is to use this project for your machine learning code, follow the [bootstrap instructions](../bootstrap/README.md) after the code template is complete. ## Create an ARM Service Connection to deploy resources