From d6d6decaab09a1eeb8e9ac41c258b1ce26b3af62 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Thu, 25 Mar 2021 19:02:55 -0700 Subject: [PATCH 01/38] initial commit v2.0 --- scaleapi/__init__.py | 397 ++++++++++++++++++++------------------- scaleapi/_version.py | 3 +- scaleapi/api.py | 121 ++++++++++++ scaleapi/batches.py | 44 +++-- scaleapi/exceptions.py | 53 ++++++ scaleapi/projects.py | 16 +- scaleapi/tasks.py | 71 +++++-- setup.py | 75 ++++---- tests/test_client.py | 414 ++++++++++++++++++++++++----------------- 9 files changed, 753 insertions(+), 441 deletions(-) create mode 100644 scaleapi/api.py create mode 100644 scaleapi/exceptions.py diff --git a/scaleapi/__init__.py b/scaleapi/__init__.py index e4f7ccc..ce767e4 100644 --- a/scaleapi/__init__.py +++ b/scaleapi/__init__.py @@ -1,51 +1,26 @@ -import requests -import platform -import urllib.parse - -from .tasks import Task -from .batches import Batch -from .projects import Project -from ._version import __version__ - -TASK_TYPES = [ - 'annotation', - 'audiotranscription', - 'categorization', - 'comparison', - 'cuboidannotation', - 'datacollection', - 'imageannotation', - 'lineannotation', - 'namedentityrecognition', - 'pointannotation', - 'polygonannotation', - 'segmentannotation', - 'transcription', - 'textcollection', - 'documenttranscription', - 'videoannotation', - 'videoboxannotation', - 'videoplaybackannotation', - 'videocuboidannotation' -] -SCALE_ENDPOINT = 'https://api.scale.com/v1/' -DEFAULT_LIMIT = 100 -DEFAULT_OFFSET = 0 - - -class ScaleException(Exception): - def __init__(self, message, errcode): - super(ScaleException, self).__init__( - ' {}'.format(errcode, message)) - self.code = errcode - - -class ScaleInvalidRequest(ScaleException, ValueError): - pass +from typing import Dict, Generic, List, TypeVar, Union + +from scaleapi.batches import Batch, BatchStatus +from scaleapi.exceptions import ScaleInvalidRequest +from scaleapi.projects import Project + +from .api import Api +from .tasks import Task, TaskReviewStatus, TaskStatus, TaskType +from ._version import __version__ # noqa: F401 + +T = TypeVar("T") -class Paginator(list): - def __init__(self, docs, total, limit, offset, has_more, next_token=None): +class Paginator(list, Generic[T]): + def __init__( + self, + docs: List[T], + total: int, + limit: int, + offset: int, + has_more: bool, + next_token=None, + ): super(Paginator, self).__init__(docs) self.docs = docs self.total = total @@ -55,204 +30,238 @@ def __init__(self, docs, total, limit, offset, has_more, next_token=None): self.next_token = next_token -class Tasklist(Paginator): +class Tasklist(Paginator[Task]): pass -class Batchlist(Paginator): +class Batchlist(Paginator[Batch]): pass class ScaleClient(object): - def __init__(self, api_key, user_agent_extension=None): - self.api_key = api_key - self._headers = { - "Content-Type": "application/json", - "User-Agent": _generate_useragent(user_agent_extension) - } - - def _getrequest(self, endpoint, params=None): - """Makes a get request to an endpoint. + def __init__(self, api_key, source=None): + self.api = Api(api_key, source) - If an error occurs, assumes that endpoint returns JSON as: - { 'status_code': XXX, - 'error': 'I failed' } - """ - params = params or {} - r = requests.get(SCALE_ENDPOINT + endpoint, - headers=self._headers, - auth=(self.api_key, ''), params=params) - - if r.status_code == 200: - return r.json() - else: - try: - error = r.json()['error'] - except ValueError: - error = r.text - if r.status_code == 400: - raise ScaleInvalidRequest(error, r.status_code) - else: - raise ScaleException(error, r.status_code) - - def _postrequest(self, endpoint, payload=None): - """Makes a post request to an endpoint. - - If an error occurs, assumes that endpoint returns JSON as: - { 'status_code': XXX, - 'error': 'I failed' } - """ - payload = payload or {} - r = requests.post(SCALE_ENDPOINT + endpoint, json=payload, - headers=self._headers, - auth=(self.api_key, '')) - - if r.status_code == 200: - return r.json() - else: - try: - error = r.json()['error'] - except ValueError: - error = r.text - if r.status_code == 400: - raise ScaleInvalidRequest(error, r.status_code) - else: - raise ScaleException(error, r.status_code) - - def fetch_task(self, task_id): + def fetch_task(self, task_id: str) -> Task: """Fetches a task. - Returns the associated task. """ - return Task(self._getrequest('task/%s' % task_id), self) + endpoint = f"task/{task_id}" + return Task(self.api._get_request(endpoint), self) - def cancel_task(self, task_id): + def cancel_task(self, task_id: str) -> Task: """Cancels a task. - Returns the associated task. Raises a ScaleException if it has already been canceled. """ - return Task(self._postrequest('task/%s/cancel' % task_id), self) + endpoint = f"task/{task_id}/cancel" + return Task(self.api._post_request(endpoint), self) - def tasks(self, **kwargs): + def tasks(self, **kwargs) -> Tasklist: """Returns a list of your tasks. Returns up to 100 at a time, to get more, use the next_token param passed back. - - Note that offset is deprecated. - start/end_time are ISO8601 dates, the time range of tasks to fetch. status can be 'completed', 'pending', or 'canceled'. type is the task type. limit is the max number of results to display per page, next_token can be use to fetch the next page of tasks. customer_review_status can be 'pending', 'fixed', 'accepted' or 'rejected'. - offset (deprecated) is the number of results to skip (for showing more pages). """ - allowed_kwargs = {'start_time', 'end_time', 'status', 'type', 'project', - 'batch', 'limit', 'offset', 'completed_before', 'completed_after', - 'next_token', 'customer_review_status', 'updated_before', 'updated_after', - 'tags', 'unique_id'} + allowed_kwargs = { + "start_time", + "end_time", + "status", + "type", + "project", + "batch", + "limit", + "completed_before", + "completed_after", + "next_token", + "customer_review_status", + "tags", + "updated_before", + "updated_after", + "unique_id", + } + for key in kwargs: if key not in allowed_kwargs: - raise ScaleInvalidRequest('Illegal parameter %s for ScaleClient.tasks()' - % key, None) - response = self._getrequest('tasks', params=kwargs) - docs = [Task(json, self) for json in response['docs']] - return Tasklist(docs, response['total'], response['limit'], - response['offset'], response['has_more'], response.get('next_token')) - - def create_task(self, task_type, **kwargs): - endpoint = 'task/' + task_type - taskdata = self._postrequest(endpoint, payload=kwargs) + raise ScaleInvalidRequest( + f"Illegal parameter {key} for ScaleClient.tasks()", None + ) + + response = self.api._get_request("tasks", params=kwargs) + + docs = [Task(json, self) for json in response["docs"]] + return Tasklist( + docs, + response["total"], + response["limit"], + response["offset"], + response["has_more"], + response.get("next_token"), + ) + + def tasks_all( + self, + project_name: str, + batch_name: str = None, + type: TaskType = None, + status: TaskStatus = None, + review_status: Union[List[TaskReviewStatus], TaskReviewStatus] = None, + unique_id: Union[List[str], str] = None, + completed_after: str = None, + completed_before: str = None, + updated_after: str = None, + updated_before: str = None, + created_after: str = None, + created_before: str = None, + tags: Union[List[str], str] = None, + ) -> List[Task]: + + tasks_list: List[Task] = [] + next_token = None + has_more = True + + while has_more: + tasks_args = { + "next_token": next_token, + "start_time": created_after, + "end_time": created_before, + "project": project_name, + "batch": batch_name, + "completed_before": completed_before, + "completed_after": completed_after, + "tags": tags, + "updated_before": updated_before, + "updated_after": updated_after, + "unique_id": unique_id, + } + + if status: + tasks_args["status"] = status.value + if type: + tasks_args["type"] = type.value + if review_status: + tasks_args["customer_review_status"] = review_status.value + + tasks = self.tasks(**tasks_args) + next_token = tasks.next_token + has_more = tasks.has_more + tasks_list.extend(tasks.docs) + + return tasks_list + + def create_task(self, task_type: TaskType, **kwargs) -> Task: + endpoint = f"task/{task_type.value}" + taskdata = self.api._post_request(endpoint, body=kwargs) return Task(taskdata, self) - def create_batch(self, project, batch_name, callback): + def create_batch(self, project: str, batch_name: str, callback: str = "") -> Batch: + endpoint = "batches" payload = dict(project=project, name=batch_name, callback=callback) - batchdata = self._postrequest('batches', payload) + batchdata = self.api._post_request(endpoint, body=payload) return Batch(batchdata, self) - def finalize_batch(self, batch_name): - batchdata = self._postrequest('batches/%s/finalize' % quote_string(batch_name)) + def finalize_batch(self, batch_name: str) -> Batch: + endpoint = f"batches/{Api.quote_string(batch_name)}/finalize" + batchdata = self.api._post_request(endpoint) return Batch(batchdata, self) - def batch_status(self, batch_name): - status_data = self._getrequest('batches/%s/status' % quote_string(batch_name)) + def batch_status(self, batch_name: str) -> Dict: + endpoint = f"batches/{Api.quote_string(batch_name)}/status" + status_data = self.api._get_request(endpoint) return status_data def get_batch(self, batch_name): - batchdata = self._getrequest('batches/%s' % quote_string(batch_name)) + endpoint = f"batches/{Api.quote_string(batch_name)}" + batchdata = self.api._get_request(endpoint) return Batch(batchdata, self) - def list_batches(self, **kwargs): - allowed_kwargs = {'start_time', 'end_time', 'status', 'project', - 'limit', 'offset', } + def list_batches(self, **kwargs) -> Batchlist: + allowed_kwargs = { + "start_time", + "end_time", + "status", + "project", + "limit", + "offset", + } + for key in kwargs: if key not in allowed_kwargs: - raise ScaleInvalidRequest('Illegal parameter %s for ScaleClient.list_batches()' - % key, None) - response = self._getrequest('batches', params=kwargs) - docs = [Batch(doc, self) for doc in response['docs']] + raise ScaleInvalidRequest( + f"Illegal parameter {key} for ScaleClient.list_batches()" + ) + endpoint = "batches" + response = self.api._get_request(endpoint, params=kwargs) + docs = [Batch(doc, self) for doc in response["docs"]] + return Batchlist( - docs, response['totalDocs'], response['limit'], response['has_more'], response.get( - 'next_token'), + docs, + response["totalDocs"], + response["limit"], + response["offset"], + response["has_more"], ) - def create_project(self, project_name, type, params): - payload = dict(type=type, name=project_name, params=params) - projectdata = self._postrequest('projects', payload) + def list_batches_all( + self, + project_name: str, + batch_status: BatchStatus = None, + created_after: str = None, + created_before: str = None, + limit: int = 100, + ) -> List[Batch]: + + batches_list: List[Batch] = [] + has_more = True + offset = 0 + + while has_more: + batches_args = { + "start_time": created_after, + "end_time": created_before, + "project": project_name, + "offset": offset, + "limit": limit, + } + + if batch_status: + batches_args["status"] = batch_status.value + + batches = self.list_batches(**batches_args) + offset += batches.limit + has_more = batches.has_more + batches_list.extend(batches.docs) + + return batches_list + + def create_project(self, project_name: str, type: TaskType, params) -> Project: + endpoint = "projects" + payload = dict(type=type.value, name=project_name, params=params) + projectdata = self.api._post_request(endpoint, body=payload) return Project(projectdata, self) - def get_project(self, project_name): - projectdata = self._getrequest('projects/%s' % quote_string(project_name)) + def get_project(self, project_name: str) -> Project: + endpoint = f"projects/{Api.quote_string(project_name)}" + projectdata = self.api._get_request(endpoint) return Project(projectdata, self) - def projects(self): - response = self._getrequest('projects') - return response + def projects(self) -> List[Project]: + endpoint = "projects" + project_list = self.api._get_request(endpoint) + return [Project(project, self) for project in project_list] - def update_project(self, project_name, **kwargs): - allowed_kwargs = {'patch', 'instruction'} + def update_project(self, project_name: str, **kwargs) -> Project: + allowed_kwargs = {"patch", "instruction"} for key in kwargs: if key not in allowed_kwargs: - raise ScaleInvalidRequest('Illegal parameter %s for ScaleClient.update_project()' - % key, None) - projectdata = self._postrequest('projects/%s/setParams' % quote_string(project_name), payload=kwargs) - return projectdata - -def _generate_useragent(extension=None): - try: - python_version = platform.python_version() - os_platform = platform.platform() - - user_agent = " ".join( - filter( - None, - [ - "{}/{}".format(__name__, __version__), - "Python/{}".format(python_version), - "OS/{}".format(os_platform), - extension, - ], - ) - ) - return user_agent - - except Exception: - return "scaleapi-python-client" - -def quote_string(text): - """`quote_string('a bc/def')` -> `a%20bc%2Fdef` - Project and Batch names can be a part of URL, which causes an error - in case of a special character used. Quotation assures - the right object to be retrieved from API. - """ - return urllib.parse.quote(text, safe="") + raise ScaleInvalidRequest( + f"Illegal parameter {key} for" "ScaleClient.update_project()", None, + ) -def _AddTaskTypeCreator(task_type): - def create_task_wrapper(self, **kwargs): - return self.create_task(task_type, **kwargs) - setattr(ScaleClient, 'create_' + task_type + '_task', create_task_wrapper) - - -for taskType in TASK_TYPES: - _AddTaskTypeCreator(taskType) + endpoint = f"projects/{Api.quote_string(project_name)}/setParams" + projectdata = self.api._post_request(endpoint, body=kwargs) + return Project(projectdata, self) diff --git a/scaleapi/_version.py b/scaleapi/_version.py index 92192ee..9c9b48f 100644 --- a/scaleapi/_version.py +++ b/scaleapi/_version.py @@ -1 +1,2 @@ -__version__ = "1.0.4" +__version__ = "2.0.0" +__package_name__ = "scaleapi" diff --git a/scaleapi/api.py b/scaleapi/api.py new file mode 100644 index 0000000..ab1db01 --- /dev/null +++ b/scaleapi/api.py @@ -0,0 +1,121 @@ +import platform +import urllib.parse + +import requests + +from ._version import __package_name__, __version__ +from .exceptions import ( + ScaleDuplicateTask, + ScaleException, + ScaleInternalError, + ScaleInvalidRequest, + ScaleNotEnabled, + ScaleResourceNotFound, + ScaleTooManyRequests, + ScaleUnauthorized, +) + +SCALE_ENDPOINT = "https://api.scale.com/v1" + + +class Api(object): + def __init__(self, api_key, user_agent_extension=None): + if api_key == "" or api_key is None: + raise ScaleException("Please provide a valid API Key.") + + self.api_key = api_key + + self._auth = (self.api_key, "") + self._headers = { + "Content-Type": "application/json", + "User-Agent": self._generate_useragent(user_agent_extension), + } + + def _request( + self, method, endpoint, headers=None, auth=None, params=None, body=None + ): + """Generic request method with error handling.""" + + url = f"{SCALE_ENDPOINT}/{endpoint}" + error_message = None + + try: + params = params or {} + body = body or {} + + res = requests.request( + method=method, + url=url, + headers=headers, + auth=auth, + params=params, + json=body, + ) + + except ( + requests.exceptions.HTTPError, + requests.exceptions.ConnectionError, + requests.exceptions.Timeout, + requests.exceptions.RequestException, + ) as err: + raise err + + if res.status_code == 200: + return res.json() + else: + error_message = res.json().get("error", res.text) + + if res.status_code == 400: + raise ScaleInvalidRequest(error_message, res.status_code) + elif res.status_code == 401: + raise ScaleUnauthorized(error_message, res.status_code) + elif res.status_code == 402: + raise ScaleNotEnabled(error_message, res.status_code) + elif res.status_code == 404: + raise ScaleResourceNotFound(error_message, res.status_code) + elif res.status_code == 409: + raise ScaleDuplicateTask(error_message, res.status_code) + elif res.status_code == 429: + raise ScaleTooManyRequests(error_message, res.status_code) + elif res.status_code == 500: + raise ScaleInternalError(error_message, res.status_code) + else: + raise ScaleException(error_message, res.status_code) + + def _get_request(self, endpoint, params=None): + return self._request( + "GET", endpoint, headers=self._headers, auth=self._auth, params=params + ) + + def _post_request(self, endpoint, body=None): + return self._request( + "POST", endpoint, headers=self._headers, auth=self._auth, body=body + ) + + @staticmethod + def _generate_useragent(extension=None): + python_version = platform.python_version() + os_platform = platform.platform() + + user_agent = " ".join( + filter( + None, + [ + f"{__package_name__}/{__version__}", + f"Python/{python_version}", + f"OS/{os_platform}", + extension, + ], + ) + ) + return user_agent + + @staticmethod + def quote_string(text): + """`quote_string('a bc/def')` -> `a%20bc%2Fdef` + + Project and Batch names can be a part of URL, which causes + an error in case of a special character used. + Quotation assures the right object to be retrieved from API. + """ + return urllib.parse.quote(text, safe="") diff --git a/scaleapi/batches.py b/scaleapi/batches.py index d5ca697..12585fe 100644 --- a/scaleapi/batches.py +++ b/scaleapi/batches.py @@ -1,33 +1,47 @@ +from enum import Enum + + +class BatchStatus(Enum): + Staging = "staging" + InProgress = "in_progress" + Completed = "completed" + + class Batch(object): - def __init__(self, param_dict, client): - self.param_dict = param_dict - self.name = param_dict['name'] - self.status = param_dict["status"] - - self.pending = None - self.completed = None - self.error = None - self.canceled = None - self.client = client + def __init__(self, json, client): + self._json = json + self.name = json["name"] + self.status = json["status"] + self.project = json["project"] + self.created_at = json["created_at"] + + self.tasks_pending = None + self.tasks_completed = None + self.tasks_error = None + self.tasks_canceled = None + self._client = client self.get_status() def __hash__(self): return hash(self.name) def __str__(self): - return 'Batch(name=%s)' % self.name + return f"Batch(name={self.name})" def __repr__(self): - return 'Batch(%s)' % self.param_dict + return f"Batch({self._json})" + + def as_dict(self): + return self._json def finalize(self): - res = self.client.finalize_batch(self.name) + res = self._client.finalize_batch(self.name) self.status = res.status return res def get_status(self): - res = self.client.batch_status(self.name) + res = self._client.batch_status(self.name) self.status = res["status"] for stat in ["pending", "completed", "error", "canceled"]: - setattr(self, stat, res.get(stat, 0)) + setattr(self, "tasks_" + stat, res.get(stat, 0)) return res diff --git a/scaleapi/exceptions.py b/scaleapi/exceptions.py new file mode 100644 index 0000000..be6fb25 --- /dev/null +++ b/scaleapi/exceptions.py @@ -0,0 +1,53 @@ +class ScaleException(Exception): + def __init__(self, message, errcode=None): + self.code = errcode + if errcode: + super(ScaleException, self).__init__(f" {message}") + else: + super(ScaleException, self).__init__(f" {message}") + + +class ScaleInvalidRequest(ScaleException): + """400 - Bad Request -- The request was unacceptable, + often due to missing a required parameter.""" + + pass + + +class ScaleUnauthorized(ScaleException): + """401 - Unauthorized -- No valid API key provided.""" + + pass + + +class ScaleNotEnabled(ScaleException): + """402 - Not enabled -- Please contact sales@scaleapi.com before creating + this type of task.""" + + pass + + +class ScaleResourceNotFound(ScaleException): + """404 - Not Found -- The requested resource doesn't exist.""" + + pass + + +class ScaleDuplicateTask(ScaleException): + """409 - Conflict -- The provided idempotency key or unique_id is already + in use for a different request.""" + + pass + + +class ScaleTooManyRequests(ScaleException): + """429 - Too Many Requests -- Too many requests hit the API too quickly.""" + + pass + + +class ScaleInternalError(ScaleException): + """500 - Internal Server Error -- We had a problem with our server. + Try again later.""" + + pass diff --git a/scaleapi/projects.py b/scaleapi/projects.py index b9dddd7..87361ec 100644 --- a/scaleapi/projects.py +++ b/scaleapi/projects.py @@ -1,14 +1,18 @@ class Project(object): - def __init__(self, param_dict, client): - self.param_dict = param_dict - self.name = param_dict['name'] - self.client = client + def __init__(self, json, client): + self._json = json + self.name = json["name"] + self.type = json["type"] + self._client = client def __hash__(self): return hash(self.name) def __str__(self): - return 'Project(name=%s)' % self.name + return f"Project(name={self.name})" def __repr__(self): - return 'Project(%s)' % self.param_dict + return f"Project({self._json})" + + def as_dict(self): + return self._json diff --git a/scaleapi/tasks.py b/scaleapi/tasks.py index a94579a..e47e545 100644 --- a/scaleapi/tasks.py +++ b/scaleapi/tasks.py @@ -1,30 +1,73 @@ +from enum import Enum + + +class TaskType(Enum): + Annotation = "annotation" + Categorization = "categorization" + Comparison = "comparison" + CuboidAnnotation = "cuboidannotation" + DataCollection = "datacollection" + DocumentModel = "documentmodel" + DocumentTranscription = "documenttranscription" + ImageAnnotation = "imageannotation" + LaneAnnotation = "laneannotation" + LidarAnnotation = "lidarannotation" + LidarLinking = "lidarlinking" + LidarSegmentation = "lidarsegmentation" + LidarTopdown = "lidartopdown" + LineAnnotation = "lineannotation" + NamedEntityRecognition = "namedentityrecognition" + PointAnnotation = "pointannotation" + PolygonAnnotation = "polygonannotation" + SegmentAnnotation = "segmentannotation" + Transcription = "transcription" + TextCollection = "textcollection" + VideoAnnotation = "videoannotation" + VideoBoxAnnotation = "videoboxannotation" + VideoPlaybackAnnotation = "videoplaybackannotation" + VideoCuboidAnnotation = "videocuboidannotation" + + +class TaskReviewStatus(Enum): + Accepted = "accepted" + Fixed = "fixed" + Commented = "commented" + Rejected = "rejected" + + +class TaskStatus(Enum): + Pending = "pending" + Completed = "completed" + Canceled = "canceled" + + class Task(object): """Task class, containing task information.""" - def __init__(self, param_dict, client): - self.client = client - self.param_dict = param_dict - self.id = param_dict['task_id'] + def __init__(self, json, client): + self._client = client + self._json = json + self.id = json["task_id"] def __getattr__(self, name): - if name in self.param_dict: - return self.param_dict[name] - if name in self.params: - return self.params[name] - raise AttributeError("'%s' object has no attribute %s" - % (type(self).__name__, name)) + if name in self._json: + return self._json[name] + raise AttributeError(f"'{type(self).__name__}' object has no attribute {name}") def __hash__(self): return hash(self.id) def __str__(self): - return 'Task(id=%s)' % self.id + return f"Task(id={self.id})" def __repr__(self): - return 'Task(%s)' % self.param_dict + return f"Task({self._json})" + + def as_dict(self): + return self._json def refresh(self): - self.param_dict = self.client._getrequest('task/%s' % self.id) + self._json = self._client.fetch_task(self.id).as_dict() def cancel(self): - self.client.cancel_task(self.id) + self._client.cancel_task(self.id) diff --git a/setup.py b/setup.py index 5a8621a..595db44 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,3 @@ -import sys -import warnings import os.path try: @@ -7,62 +5,53 @@ except ImportError: from distutils.core import setup -install_requires = ['requests>=2.4.2'] +install_requires = ["requests>=2.25.0"] -if sys.version_info < (3, 4, 0): - install_requires.append('enum34') - -if sys.version_info < (2, 7, 9): - warnings.warn( - 'Users have reported issues with SNI / SSL by using Scale on ' - 'versions of Python older than 2.7.9. If at all possible, you should ' - 'upgrade your version of Python. ' - 'If you have any questions, please file an issue on Github or ' - 'contact us at support@scale.com.', - DeprecationWarning) - install_requires.append('pyOpenSSL') - install_requires.append('ndg-httpsclient') - install_requires.append('pyasn1') - install_requires.append('idna') - install_requires.append('requests[security]') def read(rel_path): here = os.path.abspath(os.path.dirname(__file__)) - with open(os.path.join(here, rel_path), 'r') as fp: + with open(os.path.join(here, rel_path), "r") as fp: return fp.read() + def get_version(rel_path): for line in read(rel_path).splitlines(): - if line.startswith('__version__'): + if line.startswith("__version__"): delim = '"' if '"' in line else "'" return line.split(delim)[1] raise RuntimeError("Unable to find a valid __version__ string in %s." % rel_path) + setup( - name='scaleapi', - packages=['scaleapi'], + name="scaleapi", + packages=["scaleapi"], version=get_version("scaleapi/_version.py"), - description='The official Python client library for Scale AI, the Data Platform for AI', - author='Scale AI', - author_email='support@scale.com', - url='https://github.com/scaleapi/scaleapi-python-client', + description="The official Python client library for Scale AI, " + "the Data Platform for AI", + author="Scale AI", + author_email="support@scale.com", + url="https://github.com/scaleapi/scaleapi-python-client", keywords=[ - 'scale', - 'scaleapi', - 'tasks', - 'categorization', - 'labeling', - 'annotation', + "scale", + "scaleapi", + "tasks", + "categorization", + "labeling", + "annotation", ], install_requires=install_requires, - classifiers=['Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - 'License :: OSI Approved :: MIT License', - 'Intended Audience :: Developers', - 'Topic :: Software Development :: Libraries'] + python_requires=">=3.6", + classifiers=[ + "Development Status :: 5 - Production/Stable", + "Natural Language :: English", + "Programming Language :: Python", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "License :: OSI Approved :: MIT License", + "Intended Audience :: Developers", + "Topic :: Software Development :: Libraries", + ], ) diff --git a/tests/test_client.py b/tests/test_client.py index 05222b1..5fbd90c 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1,223 +1,276 @@ # coding: utf-8 -import pytest -import scaleapi +import os import time +import uuid from datetime import datetime -from random import randint -import os + +import pytest +import scaleapi +from scaleapi.exceptions import (ScaleDuplicateTask, ScaleInvalidRequest, + ScaleResourceNotFound, ScaleUnauthorized) +from scaleapi.tasks import TaskType + +TEST_PROJECT_NAME = "scaleapi-python-sdk" try: - test_api_key = os.environ['SCALE_TEST_API_KEY'] - client = scaleapi.ScaleClient(test_api_key, 'pytest') + test_api_key = os.environ["SCALE_TEST_API_KEY"] + client = scaleapi.ScaleClient(test_api_key, "pytest") except KeyError: - raise Exception("Please set the environment variable SCALE_TEST_API_KEY to run tests.") - -def make_a_task(): - return client.create_imageannotation_task( - callback_url = "http://www.example.com/callback", - instruction = "Draw a box around each baby cow and big cow.", - attachment_type = "image", - attachment = "http://i.imgur.com/v4cBreD.jpg", - geometries = { + raise Exception( + "Please set the environment variable SCALE_TEST_API_KEY to run tests." + ) + + +def test_invalidkey_fail(): + client_fail = scaleapi.ScaleClient("dummy_api_key", "pytest") + with pytest.raises(ScaleUnauthorized): + client_fail.list_batches(limit=1) + + +def make_a_task(unique_id: str = None): + + args = { + "callback_url": "http://www.example.com/callback", + "instruction": "Draw a box around each baby cow and big cow.", + "attachment_type": "image", + "attachment": "http://i.imgur.com/v4cBreD.jpg", + "geometries": { "box": { - "objects_to_annotate": ["Baby Cow", "Big Cow"], - "min_height": 10, - "min_width": 10 + "objects_to_annotate": ["Baby Cow", "Big Cow"], + "min_height": 10, + "min_width": 10, } - } - ) + }, + } + if unique_id: + args["unique_id"] = unique_id + + return client.create_task(TaskType.ImageAnnotation, **args) + + +def test_uniquekey_fail(): + unique_key = str(uuid.uuid4()) + make_a_task(unique_key) + with pytest.raises(ScaleDuplicateTask): + make_a_task(unique_key) + def test_categorize_ok(): - task = client.create_categorization_task( - callback_url='http://www.example.com/callback', - instruction='Is this company public or private?', - attachment_type='website', + client.create_task( + TaskType.Categorization, + callback_url="http://www.example.com/callback", + instruction="Is this company public or private?", + attachment_type="website", force=True, - attachment='http://www.google.com/', - categories=['public', 'private']) + attachment="http://www.google.com/", + categories=["public", "private"], + ) + def test_categorize_fail(): - with pytest.raises(scaleapi.ScaleInvalidRequest): - client.create_categorization_task( - callback_url='http://www.example.com/callback', - categories=['public', 'private']) + with pytest.raises(ScaleInvalidRequest): + client.create_task( + TaskType.Categorization, + callback_url="http://www.example.com/callback", + categories=["public", "private"], + ) + def test_transcription_ok(): - task = client.create_transcription_task( - callback_url='http://www.example.com/callback', - instruction='Transcribe the given fields. Then for each news item on the page, transcribe the information for the row.', - attachment_type='website', - attachment='http://www.google.com/', - fields={ - 'title': 'Title of Webpage', - 'top_result': 'Title of the top result' - }, + client.create_task( + TaskType.Transcription, + callback_url="http://www.example.com/callback", + instruction="Transcribe the given fields. Then for each news item on the page, " + "transcribe the information for the row.", + attachment_type="website", + attachment="http://www.google.com/", + fields={"title": "Title of Webpage", "top_result": "Title of the top result"}, repeatable_fields={ - 'username': 'Username of submitter', - 'comment_count': 'Number of comments' - }) + "username": "Username of submitter", + "comment_count": "Number of comments", + }, + ) + def test_transcription_fail(): - with pytest.raises(scaleapi.ScaleInvalidRequest): - client.create_transcription_task( - callback_url='http://www.example.com/callback', - attachment_type='website') + with pytest.raises(ScaleInvalidRequest): + client.create_task( + TaskType.Transcription, + callback_url="http://www.example.com/callback", + attachment_type="website", + ) + def test_imageannotation_ok(): - client.create_imageannotation_task( - callback_url = "http://www.example.com/callback", - instruction = "Draw a box around each baby cow and big cow.", - attachment_type = "image", - attachment = "http://i.imgur.com/v4cBreD.jpg", - geometries = { + client.create_task( + TaskType.ImageAnnotation, + callback_url="http://www.example.com/callback", + instruction="Draw a box around each baby cow and big cow.", + attachment_type="image", + attachment="http://i.imgur.com/v4cBreD.jpg", + geometries={ "box": { - "objects_to_annotate": ["Baby Cow", "Big Cow"], - "min_height": 10, - "min_width": 10 + "objects_to_annotate": ["Baby Cow", "Big Cow"], + "min_height": 10, + "min_width": 10, } - } + }, ) + def test_imageannotation_fail(): - with pytest.raises(scaleapi.ScaleInvalidRequest): - client.create_imageannotation_task( - callback_url='http://www.example.com/callback', - instruction='Draw a box around each **baby cow** and **big cow**', - attachment_type='image') + with pytest.raises(ScaleInvalidRequest): + client.create_task( + TaskType.ImageAnnotation, + callback_url="http://www.example.com/callback", + instruction="Draw a box around each **baby cow** and **big cow**", + attachment_type="image", + ) + def test_documenttranscription_ok(): - client.create_documenttranscription_task( - callback_url= 'http://www.example.com/callback', - instruction= 'Please transcribe this receipt.', - attachment= 'http://document.scale.com/receipt-20200519.jpg', - features= [ - { - 'type': "block", - 'label': "barcode", - } - ] + client.create_task( + TaskType.DocumentTranscription, + callback_url="http://www.example.com/callback", + instruction="Please transcribe this receipt.", + attachment="http://document.scale.com/receipt-20200519.jpg", + features=[{"type": "block", "label": "barcode"}], ) + def test_documenttranscription_fail(): - with pytest.raises(scaleapi.ScaleInvalidRequest): - client.create_imageannotation_task( - callback_url='http://www.example.com/callback', - instruction='Please transcribe this receipt.', - ) + with pytest.raises(ScaleInvalidRequest): + client.create_task( + TaskType.DocumentTranscription, + callback_url="http://www.example.com/callback", + instruction="Please transcribe this receipt.", + ) + def test_annotation_ok(): - task = client.create_annotation_task( - callback_url='http://www.example.com/callback', - instruction='Draw a box around each **baby cow** and **big cow**', - attachment_type='image', - attachment='http://i.imgur.com/v4cBreD.jpg', - min_width='30', - min_height='30', - objects_to_annotate=['baby cow', 'big cow'], - with_labels=True) + client.create_task( + TaskType.Annotation, + callback_url="http://www.example.com/callback", + instruction="Draw a box around each **baby cow** and **big cow**", + attachment_type="image", + attachment="http://i.imgur.com/v4cBreD.jpg", + min_width="30", + min_height="30", + objects_to_annotate=["baby cow", "big cow"], + with_labels=True, + ) + def test_annotation_fail(): - with pytest.raises(scaleapi.ScaleInvalidRequest): - client.create_annotation_task( - callback_url='http://www.example.com/callback', - instruction='Draw a box around each **baby cow** and **big cow**', - attachment_type='image') + with pytest.raises(ScaleInvalidRequest): + client.create_task( + TaskType.Annotation, + callback_url="http://www.example.com/callback", + instruction="Draw a box around each **baby cow** and **big cow**", + attachment_type="image", + ) + def test_polygonannotation_ok(): - task = client.create_polygonannotation_task( - callback_url='http://www.example.com/callback', - instruction='Draw a tight shape around the big cow', - attachment_type='image', - attachment='http://i.imgur.com/v4cBreD.jpg', - objects_to_annotate=['big cow'], - with_labels=True) + client.create_task( + TaskType.PolygonAnnotation, + callback_url="http://www.example.com/callback", + instruction="Draw a tight shape around the big cow", + attachment_type="image", + attachment="http://i.imgur.com/v4cBreD.jpg", + objects_to_annotate=["big cow"], + with_labels=True, + ) + def test_polygonannotation_fail(): - with pytest.raises(scaleapi.ScaleInvalidRequest): - client.create_polygonannotation_task( - callback_url='http://www.example.com/callback', - instruction='Draw a tight shape around the big cow', - attachment_type='image') + with pytest.raises(ScaleInvalidRequest): + client.create_task( + TaskType.PolygonAnnotation, + callback_url="http://www.example.com/callback", + instruction="Draw a tight shape around the big cow", + attachment_type="image", + ) + def test_lineannotation_ok(): - task = client.create_lineannotation_task( - callback_url='http://www.example.com/callback', - instruction='Draw a tight shape around the big cow', - attachment_type='image', - attachment='http://i.imgur.com/v4cBreD.jpg', - objects_to_annotate=['big cow'], - with_labels=True) + client.create_task( + TaskType.LineAnnotation, + callback_url="http://www.example.com/callback", + instruction="Draw a tight shape around the big cow", + attachment_type="image", + attachment="http://i.imgur.com/v4cBreD.jpg", + objects_to_annotate=["big cow"], + with_labels=True, + ) + def test_lineannotation_fail(): - with pytest.raises(scaleapi.ScaleInvalidRequest): - client.create_lineannotation_task( - callback_url='http://www.example.com/callback', - instruction='Draw a tight shape around the big cow', - attachment_type='image') + with pytest.raises(ScaleInvalidRequest): + client.create_task( + TaskType.LineAnnotation, + callback_url="http://www.example.com/callback", + instruction="Draw a tight shape around the big cow", + attachment_type="image", + ) + def test_datacollection_ok(): - task = client.create_datacollection_task( - callback_url='http://www.example.com/callback', - instruction='Find the URL for the hiring page for the company with attached website.', - attachment_type='website', - attachment='http://www.google.com/', - fields={ 'hiring_page': 'Hiring Page URL' }) + client.create_task( + TaskType.DataCollection, + callback_url="http://www.example.com/callback", + instruction="Find the URL for the hiring page for the company" + " with attached website.", + attachment_type="website", + attachment="http://www.google.com/", + fields={"hiring_page": "Hiring Page URL"}, + ) + def test_datacollection_fail(): - with pytest.raises(scaleapi.ScaleInvalidRequest): - client.create_datacollection_task( - callback_url='http://www.example.com/callback', - attachment_type='website') - -def test_audiotranscription_ok(): - task = client.create_audiotranscription_task( - callback_url='http://www.example.com/callback', - attachment_type='audio', - instruction='Listen to the audio file and transcript.', - attachment='https://storage.googleapis.com/deepmind-media/pixie/knowing-what-to-say/second-list/speaker-3.wav', - verbatim=False, - phrases=['avocado', 'stone'] - ) + with pytest.raises(ScaleInvalidRequest): + client.create_task( + TaskType.DataCollection, + callback_url="http://www.example.com/callback", + attachment_type="website", + ) -def test_audiotranscription_fail(): - with pytest.raises(scaleapi.ScaleInvalidRequest): - client.create_audiotranscription_task( - callback_url='http://www.example.com/callback', - attachment_type='audio') def test_namedentityrecognition_ok(): - return client.create_namedentityrecognition_task( - callback_url='http://www.example.com/callback', - instruction='Do the objects in these images have the same pattern?', - text='Example text to label with NER tool', - labels=[{ - 'name': 'Label_A', - 'description': 'the first label', - }]) - + return client.create_task( + TaskType.NamedEntityRecognition, + callback_url="http://www.example.com/callback", + instruction="Do the objects in these images have the same pattern?", + text="Example text to label with NER tool", + labels=[{"name": "Label_A", "description": "the first label"}], + ) + + def test_cancel(): task = make_a_task() # raises a scaleexception, because test tasks complete instantly - with pytest.raises(scaleapi.ScaleException): + with pytest.raises(ScaleInvalidRequest): task.cancel() + def test_task_retrieval(): task = make_a_task() task2 = client.fetch_task(task.id) - assert task2.status == 'completed' + assert task2.status == "completed" assert task2.id == task.id assert task2.callback_url == task.callback_url assert task2.instruction == task.instruction - assert task2.attachment_type == task.attachment_type - assert task2.attachment == task.attachment - assert task2.geometries == task.geometries + assert task2.params["attachment_type"] == task.params["attachment_type"] + assert task2.params["attachment"] == task.params["attachment"] + assert task2.params["geometries"] == task.params["geometries"] assert task2.metadata == task.metadata assert task2.type == task.type assert task2.created_at == task.created_at + def test_task_retrieval_time(): - task = make_a_task() + make_a_task() time.sleep(0.5) start_time = datetime.utcnow().isoformat() time.sleep(0.5) @@ -225,49 +278,74 @@ def test_task_retrieval_time(): tasks = client.tasks(start_time=start_time, end_time=end_time) assert tasks.docs == [] + def test_task_retrieval_fail(): - with pytest.raises(scaleapi.ScaleException): - client.fetch_task('fake_id_qwertyuiop') + with pytest.raises(ScaleResourceNotFound): + client.fetch_task("fake_id_qwertyuiop") + def test_tasks(): tasks = [] - for i in range(3): + for _ in range(3): tasks.append(make_a_task()) task_ids = {task.id for task in tasks} for task in client.tasks(limit=3): assert task.id in task_ids + def test_tasks_invalid(): - with pytest.raises(scaleapi.ScaleException): + with pytest.raises(ScaleInvalidRequest): client.tasks(bogus=0) + def create_a_batch(): return client.create_batch( - callback = "http://www.example.com/callback", - batch_name = "scaleapi-python-sdk-" + str(randint(0, 99999)), - project = "scaleapi-python-sdk" + callback="http://www.example.com/callback", + batch_name=str(uuid.uuid4()), + project=TEST_PROJECT_NAME, ) + def test_finalize_batch(): batch = create_a_batch() batch = client.finalize_batch(batch.name) - assert batch.status == 'in_progress' batch2 = create_a_batch() batch2.finalize() - assert batch2.status == 'in_progress' + def test_get_batch_status(): batch = create_a_batch() client.batch_status(batch.name) - assert batch.status == 'staging' + assert batch.status == "staging" + + batch2 = client.get_batch(batch.name) + batch2.get_status() # Test status update + assert batch2.status == "staging" - batch.finalize() - batch.get_status() # Test status update - assert batch.status == 'in_progress' def test_get_batch(): batch = create_a_batch() batch2 = client.get_batch(batch.name) assert batch.name == batch2.name - assert batch2.status == 'staging' + assert batch2.status == "staging" + + +def test_list_batch(): + batches = [] + for _ in range(3): + batches.append(create_a_batch()) + batch_names = {batch.name for batch in batches} + + for batch in client.list_batches(limit=3): + assert batch.name in batch_names + + +def test_list_batch_all(): + # Get count of all batches + batchlist = client.list_batches(project=TEST_PROJECT_NAME, limit=1) + total_batches = batchlist.total + + # Download all batches to check total count + all_batches = client.list_batches_all(project_name=TEST_PROJECT_NAME, limit=10) + assert total_batches == len(all_batches) From 8889dbd305c11c77d22ea1d1d9372a0b92d49cd8 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Mon, 29 Mar 2021 10:03:50 -0700 Subject: [PATCH 02/38] v2.0 development pt2 --- scaleapi/__init__.py | 389 ++++++++++++++++++++++++++++++++++++++--- scaleapi/api.py | 103 +++++++---- scaleapi/exceptions.py | 26 ++- scaleapi/projects.py | 4 + scaleapi/tasks.py | 11 ++ setup.py | 2 +- tests/test_client.py | 12 +- 7 files changed, 469 insertions(+), 78 deletions(-) diff --git a/scaleapi/__init__.py b/scaleapi/__init__.py index ce767e4..5afacdb 100644 --- a/scaleapi/__init__.py +++ b/scaleapi/__init__.py @@ -1,12 +1,13 @@ -from typing import Dict, Generic, List, TypeVar, Union +import warnings +from typing import Dict, Generator, Generic, List, TypeVar, Union from scaleapi.batches import Batch, BatchStatus from scaleapi.exceptions import ScaleInvalidRequest from scaleapi.projects import Project +from ._version import __version__ # noqa: F401 from .api import Api from .tasks import Task, TaskReviewStatus, TaskStatus, TaskType -from ._version import __version__ # noqa: F401 T = TypeVar("T") @@ -41,31 +42,111 @@ class Batchlist(Paginator[Batch]): class ScaleClient(object): def __init__(self, api_key, source=None): self.api = Api(api_key, source) + warnings.simplefilter("always", DeprecationWarning) - def fetch_task(self, task_id: str) -> Task: + def get_task(self, task_id: str) -> Task: """Fetches a task. Returns the associated task. + + Args: + task_id (str): + Task identifier + Returns: + Task: """ endpoint = f"task/{task_id}" return Task(self.api._get_request(endpoint), self) + def fetch_task(self, task_id: str) -> Task: + warnings.warn( + "fetch_task() will be deprecated, please use get_task() method " + "as the alternative.", + DeprecationWarning, + stacklevel=2, + ) + return self.get_task(task_id) + def cancel_task(self, task_id: str) -> Task: - """Cancels a task. - Returns the associated task. + """Cancels a task and returns the associated task. Raises a ScaleException if it has already been canceled. + + Args: + task_id (str): + Task id + + Returns: + Task """ endpoint = f"task/{task_id}/cancel" return Task(self.api._post_request(endpoint), self) def tasks(self, **kwargs) -> Tasklist: """Returns a list of your tasks. - Returns up to 100 at a time, to get more, use the next_token param passed back. - start/end_time are ISO8601 dates, the time range of tasks to fetch. - status can be 'completed', 'pending', or 'canceled'. - type is the task type. - limit is the max number of results to display per page, - next_token can be use to fetch the next page of tasks. - customer_review_status can be 'pending', 'fixed', 'accepted' or 'rejected'. + Returns up to 100 at a time, to get more, use the + next_token param passed back. + + Valid Args: + start_time (str): + The minimum value of `created_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + end_time (str): + The maximum value of `created_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + status (str): + Status to filter tasks, can be 'completed', 'pending', + or 'canceled' + + type (str): + Task type to filter. i.e. 'imageannotation' + + project (str): + Project name to filter tasks by + + batch (str): + Batch name to filter tasks by + + customer_review_status (str): + Audit status of task, can be 'pending', 'fixed', + 'accepted' or 'rejected'. + + unique_id (List[str] | str): + The unique_id of a task. + + completed_after (str): + The minimum value of `completed_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + completed_before (str): + The maximum value of `completed_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + updated_after (str): + The minimum value of `updated_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + updated_before (str): + The maximum value of `updated_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + created_after (str): + The minimum value of `created_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + created_before (str): + The maximum value of `created_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + tags (List[str] | str): + The tags of a task; multiple tags can be + specified as a list. + + limit (int): + Determines the page size (1-100) + + next_token (str): + Can be use to fetch the next page of tasks """ allowed_kwargs = { "start_time", @@ -118,9 +199,70 @@ def tasks_all( created_after: str = None, created_before: str = None, tags: Union[List[str], str] = None, - ) -> List[Task]: + ) -> Generator[Task, None, None]: + """Retrieve all tasks as a generator function, with the + given parameters. This methods handles pagination of + tasks() method. + + In order to retrieve results as a list, please use: + `tasks = list(tasks_all(...))` + + Args: + project_name (str): + Project Name + + batch_name (str, optional): + Batch Name + + type (TaskType, optional): + Task type to filter i.e. `TaskType.TextCollection` + + status (TaskStatus, optional): + Task status i.e. `TaskStatus.Completed` + + review_status (List[TaskReviewStatus] | TaskReviewStatus): + The status of the audit result of the task. + Input can be a single element or a list of + TaskReviewStatus. i.e. `TaskReviewStatus.Accepted` to + filter the tasks that you accepted after audit. + + unique_id (List[str] | str, optional): + The unique_id of a task. Multiple unique IDs can be + specified at the same time as a list. + + completed_after (str, optional): + The minimum value of `completed_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + completed_before (str, optional): + The maximum value of `completed_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + updated_after (str, optional): + The minimum value of `updated_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + updated_before (str, optional): + The maximum value of `updated_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + created_after (str, optional): + The minimum value of `created_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + created_before (str, optional): + The maximum value of `created_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + tags (List[str] | str, optional): + The tags of a task; multiple tags can be + specified as a list. + + Yields: + Generator[Task]: + Yields Task objects, can be iterated. + """ - tasks_list: List[Task] = [] next_token = None has_more = True @@ -144,42 +286,161 @@ def tasks_all( if type: tasks_args["type"] = type.value if review_status: - tasks_args["customer_review_status"] = review_status.value + if isinstance(review_status, List): + value = ",".join(map(lambda x: x.value, review_status)) + else: + value = review_status.value + + tasks_args["customer_review_status"] = value tasks = self.tasks(**tasks_args) + for task in tasks.docs: + yield task + next_token = tasks.next_token has_more = tasks.has_more - tasks_list.extend(tasks.docs) - - return tasks_list def create_task(self, task_type: TaskType, **kwargs) -> Task: + """This method can be used for any Scale supported task type. + Parameters may differ based on the given task_type. + https://github.com/scaleapi/scaleapi-python-client#create-task + + Args: + task_type (TaskType): + Task type to be created + i.e. `TaskType.ImageAnnotation` + **kwargs: + Passing in the applicable values into thefunction + definition. The applicable fields and further + information for each task type can be found in + Scale's API documentation. + https://docs.scale.com/reference + + Returns: + Task: + Returns created task. + """ endpoint = f"task/{task_type.value}" taskdata = self.api._post_request(endpoint, body=kwargs) return Task(taskdata, self) def create_batch(self, project: str, batch_name: str, callback: str = "") -> Batch: + """Create a new Batch within a project. + https://docs.scale.com/reference#batch-creation + + Args: + project (str): + Project name to create batch in + batch_name (str): + Batch name + callback (str, optional): + Email to notify, or URL to POST to + when a batch is complete. + + Returns: + Batch: Created batch object + """ endpoint = "batches" payload = dict(project=project, name=batch_name, callback=callback) batchdata = self.api._post_request(endpoint, body=payload) return Batch(batchdata, self) def finalize_batch(self, batch_name: str) -> Batch: + """Finalizes a batch so its tasks can be worked on. + https://docs.scale.com/reference#batch-finalization + + Args: + batch_name (str): + Batch name + + Returns: + Batch + """ endpoint = f"batches/{Api.quote_string(batch_name)}/finalize" batchdata = self.api._post_request(endpoint) return Batch(batchdata, self) def batch_status(self, batch_name: str) -> Dict: + """Returns the status of a batch with the counts of + its tasks grouped by task status. + https://docs.scale.com/reference#batch-status + + Args: + batch_name (str): + Batch name + + Returns: + Dict { + status: Batch status + pending (optional): # of tasks in pending stage + error (optional): # of tasks in error stage + completed (optional): # of tasks in completed stage + canceled (optional): # of tasks in canceled stage + } + + """ endpoint = f"batches/{Api.quote_string(batch_name)}/status" status_data = self.api._get_request(endpoint) return status_data - def get_batch(self, batch_name): + def get_batch(self, batch_name: str) -> Batch: + """Returns the details of a batch with the given name. + https://docs.scale.com/reference#batch-retrieval + + Args: + batch_name (str): + Batch name + + Returns: + Batch + """ endpoint = f"batches/{Api.quote_string(batch_name)}" batchdata = self.api._get_request(endpoint) return Batch(batchdata, self) def list_batches(self, **kwargs) -> Batchlist: + warnings.warn( + "list_batches() will be deprecated, please use batches() method " + "as the alternative.", + DeprecationWarning, + stacklevel=2, + ) + return self.batches(**kwargs) + + def batches(self, **kwargs) -> Batchlist: + """This is a paged endpoint for all of your batches. + Pagination is based off limit and offset parameters, + which determine the page size and how many results to skip. + Returns up to 100 batches at a time (limit). + https://docs.scale.com/reference#batch-list + + Valid Args: + start_time (str): + The minimum value of `created_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + end_time (str): + The maximum value of `created_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + status (str): + Status to filter batches by + + project (str): + Project name to filter batches by + + limit (int): + Determines the page size (1-100) + + offset (int): + How many results to skip + + Returns: + Batchlist: + Paginated result. Batchlist.docs provides access + to batches list. Batchlist.limit and Batchlist.offset + are helpers for pagination. + """ allowed_kwargs = { "start_time", "end_time", @@ -192,7 +453,7 @@ def list_batches(self, **kwargs) -> Batchlist: for key in kwargs: if key not in allowed_kwargs: raise ScaleInvalidRequest( - f"Illegal parameter {key} for ScaleClient.list_batches()" + f"Illegal parameter {key} for ScaleClient.batches()" ) endpoint = "batches" response = self.api._get_request(endpoint, params=kwargs) @@ -206,16 +467,39 @@ def list_batches(self, **kwargs) -> Batchlist: response["has_more"], ) - def list_batches_all( + def batches_all( self, project_name: str, batch_status: BatchStatus = None, created_after: str = None, created_before: str = None, - limit: int = 100, - ) -> List[Batch]: + ) -> Generator[Batch, None, None]: + """Generator method to yield all batches with the given + parameters. + + In order to retrieve results as a list, please use: + `batches = list(batches_all(...))` + + Args: + project_name (str): + Project Name to filter batches + + batch_status (BatchStatus, optional): + i.e. `BatchStatus.Completed` + + created_after (str, optional): + The minimum value of `created_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + created_before (str, optional): + The maximum value of `created_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + Yields: + Generator[Batch]: + Yields Batch, can be iterated. + """ - batches_list: List[Batch] = [] has_more = True offset = 0 @@ -225,36 +509,83 @@ def list_batches_all( "end_time": created_before, "project": project_name, "offset": offset, - "limit": limit, } if batch_status: batches_args["status"] = batch_status.value - batches = self.list_batches(**batches_args) + batches = self.batches(**batches_args) + for batch in batches.docs: + yield batch offset += batches.limit has_more = batches.has_more - batches_list.extend(batches.docs) - return batches_list + def create_project( + self, project_name: str, type: TaskType, params: Dict + ) -> Project: + """Creates a new project. + https://docs.scale.com/reference#project-creation - def create_project(self, project_name: str, type: TaskType, params) -> Project: + Args: + project_name (str): + Project name + + type (TaskType): + Task Type i.e. `TaskType.ImageAnnotation` + + params (Dict): + Project parameters to be specificed. + i.e. `{'instruction':'Please label the kittens'}` + + Returns: + Project: [description] + """ endpoint = "projects" payload = dict(type=type.value, name=project_name, params=params) projectdata = self.api._post_request(endpoint, body=payload) return Project(projectdata, self) def get_project(self, project_name: str) -> Project: + """Retrieves a single project with the given name. + https://docs.scale.com/reference#project-retrieval + + Args: + project_name (str): + Project name + + Returns: + Project + """ endpoint = f"projects/{Api.quote_string(project_name)}" projectdata = self.api._get_request(endpoint) return Project(projectdata, self) def projects(self) -> List[Project]: + """Returns all projects. + Refer to Projects API Reference: + https://docs.scale.com/reference#list-all-projects + + Returns: + List[Project] + """ endpoint = "projects" project_list = self.api._get_request(endpoint) return [Project(project, self) for project in project_list] def update_project(self, project_name: str, **kwargs) -> Project: + """You can set parameters on a project. Project-level-parameters + will be set on future tasks created under this project if they + are not set in the task request. Any parameters specified in + the task request will override any project parameter. + https://docs.scale.com/reference#project-update-parameters + + Args: + project_name (str): + Project's name + + Returns: + Project + """ allowed_kwargs = {"patch", "instruction"} for key in kwargs: if key not in allowed_kwargs: diff --git a/scaleapi/api.py b/scaleapi/api.py index ab1db01..3a291ad 100644 --- a/scaleapi/api.py +++ b/scaleapi/api.py @@ -2,6 +2,7 @@ import urllib.parse import requests +from requests.adapters import HTTPAdapter, Retry from ._version import __package_name__, __version__ from .exceptions import ( @@ -11,11 +12,13 @@ ScaleInvalidRequest, ScaleNotEnabled, ScaleResourceNotFound, + ScaleTimeoutError, ScaleTooManyRequests, ScaleUnauthorized, ) SCALE_ENDPOINT = "https://api.scale.com/v1" +NUM_OF_RETRIES = 3 class Api(object): @@ -34,16 +37,28 @@ def __init__(self, api_key, user_agent_extension=None): def _request( self, method, endpoint, headers=None, auth=None, params=None, body=None ): - """Generic request method with error handling.""" + """Generic HTTP request method with error handling.""" url = f"{SCALE_ENDPOINT}/{endpoint}" error_message = None + https = requests.Session() + retry_strategy = Retry( + total=NUM_OF_RETRIES, + backoff_factor=2, # Will wait 1, 2, 4 seconds between retries + status_forcelist=[429, 504], + allowed_methods=["GET", "POST"], + raise_on_status=False, + ) + + adapter = HTTPAdapter(max_retries=retry_strategy) + https.mount("https://", adapter) + try: params = params or {} body = body or {} - res = requests.request( + res = https.request( method=method, url=url, headers=headers, @@ -52,48 +67,60 @@ def _request( json=body, ) - except ( - requests.exceptions.HTTPError, - requests.exceptions.ConnectionError, - requests.exceptions.Timeout, - requests.exceptions.RequestException, - ) as err: - raise err - - if res.status_code == 200: - return res.json() - else: - error_message = res.json().get("error", res.text) - - if res.status_code == 400: - raise ScaleInvalidRequest(error_message, res.status_code) - elif res.status_code == 401: - raise ScaleUnauthorized(error_message, res.status_code) - elif res.status_code == 402: - raise ScaleNotEnabled(error_message, res.status_code) - elif res.status_code == 404: - raise ScaleResourceNotFound(error_message, res.status_code) - elif res.status_code == 409: - raise ScaleDuplicateTask(error_message, res.status_code) - elif res.status_code == 429: - raise ScaleTooManyRequests(error_message, res.status_code) - elif res.status_code == 500: - raise ScaleInternalError(error_message, res.status_code) + if res.status_code == 200: + return res.json() else: - raise ScaleException(error_message, res.status_code) + try: + error_message = res.json().get("error", res.text) + except Exception: + error_message = res.text + + if res.status_code == 400: + raise ScaleInvalidRequest(error_message, res.status_code) + elif res.status_code == 401: + raise ScaleUnauthorized(error_message, res.status_code) + elif res.status_code == 402: + raise ScaleNotEnabled(error_message, res.status_code) + elif res.status_code == 404: + raise ScaleResourceNotFound(error_message, res.status_code) + elif res.status_code == 409: + raise ScaleDuplicateTask(error_message, res.status_code) + elif res.status_code == 429: + raise ScaleTooManyRequests(error_message, res.status_code) + elif res.status_code == 500: + raise ScaleInternalError(error_message, res.status_code) + elif res.status_code == 504: + raise ScaleTimeoutError(error_message, res.status_code) + else: + raise ScaleException(error_message, res.status_code) + + except (requests.exceptions.Timeout, requests.exceptions.RetryError,) as err: + raise ScaleException(err) def _get_request(self, endpoint, params=None): + """Generic GET Request Wrapper""" return self._request( "GET", endpoint, headers=self._headers, auth=self._auth, params=params ) def _post_request(self, endpoint, body=None): + """Generic POST Request Wrapper""" return self._request( "POST", endpoint, headers=self._headers, auth=self._auth, body=body ) @staticmethod - def _generate_useragent(extension=None): + def _generate_useragent(extension: str = None) -> str: + """Generates UserAgent parameter with module, Python + and OS details + + Args: + extension (str, optional): Option to extend UserAgent + with source system + + Returns: + str: Generated UserAgent parameter with platform versions + """ python_version = platform.python_version() os_platform = platform.platform() @@ -111,11 +138,17 @@ def _generate_useragent(extension=None): return user_agent @staticmethod - def quote_string(text): - """`quote_string('a bc/def')` -> `a%20bc%2Fdef` - - Project and Batch names can be a part of URL, which causes + def quote_string(text: str) -> str: + """Project and Batch names can be a part of URL, which causes an error in case of a special character used. Quotation assures the right object to be retrieved from API. + + `quote_string('a bc/def')` -> `a%20bc%2Fdef` + + Args: + text (str): Input text to be quoted + + Returns: + str: Quoted text in return """ return urllib.parse.quote(text, safe="") diff --git a/scaleapi/exceptions.py b/scaleapi/exceptions.py index be6fb25..8d3f348 100644 --- a/scaleapi/exceptions.py +++ b/scaleapi/exceptions.py @@ -9,7 +9,8 @@ def __init__(self, message, errcode=None): class ScaleInvalidRequest(ScaleException): """400 - Bad Request -- The request was unacceptable, - often due to missing a required parameter.""" + often due to missing a required parameter. + """ pass @@ -21,8 +22,9 @@ class ScaleUnauthorized(ScaleException): class ScaleNotEnabled(ScaleException): - """402 - Not enabled -- Please contact sales@scaleapi.com before creating - this type of task.""" + """402 - Not enabled -- Please contact sales@scaleapi.com before + creating this type of task. + """ pass @@ -34,20 +36,30 @@ class ScaleResourceNotFound(ScaleException): class ScaleDuplicateTask(ScaleException): - """409 - Conflict -- The provided idempotency key or unique_id is already - in use for a different request.""" + """409 - Conflict -- The provided idempotency key or unique_id is + already in use for a different request. + """ pass class ScaleTooManyRequests(ScaleException): - """429 - Too Many Requests -- Too many requests hit the API too quickly.""" + """429 - Too Many Requests -- Too many requests hit the API + too quickly. + """ pass class ScaleInternalError(ScaleException): """500 - Internal Server Error -- We had a problem with our server. - Try again later.""" + Try again later. + """ + + pass + + +class ScaleTimeoutError(ScaleException): + default = """504 - Server Timeout Error -- Try again later.""" pass diff --git a/scaleapi/projects.py b/scaleapi/projects.py index 87361ec..0a8a3f3 100644 --- a/scaleapi/projects.py +++ b/scaleapi/projects.py @@ -5,6 +5,10 @@ def __init__(self, json, client): self.type = json["type"] self._client = client + if len(json["param_history"]): + self.version = json["param_history"][-1]["version"] + self.instruction = json["param_history"][-1]["instruction"] + def __hash__(self): return hash(self.name) diff --git a/scaleapi/tasks.py b/scaleapi/tasks.py index e47e545..601fdc6 100644 --- a/scaleapi/tasks.py +++ b/scaleapi/tasks.py @@ -64,10 +64,21 @@ def __repr__(self): return f"Task({self._json})" def as_dict(self): + """Returns object details as a dictionary + + `Task.as_dict()['params']` + + Returns: + Dict with object content + """ return self._json def refresh(self): + """Refreshes the task details. + """ self._json = self._client.fetch_task(self.id).as_dict() def cancel(self): + """Cancels the task + """ self._client.cancel_task(self.id) diff --git a/setup.py b/setup.py index 595db44..712445c 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ def get_version(rel_path): if line.startswith("__version__"): delim = '"' if '"' in line else "'" return line.split(delim)[1] - raise RuntimeError("Unable to find a valid __version__ string in %s." % rel_path) + raise RuntimeError(f"Unable to find a valid __version__ string in {rel_path}.") setup( diff --git a/tests/test_client.py b/tests/test_client.py index 5fbd90c..f694d51 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -25,7 +25,7 @@ def test_invalidkey_fail(): client_fail = scaleapi.ScaleClient("dummy_api_key", "pytest") with pytest.raises(ScaleUnauthorized): - client_fail.list_batches(limit=1) + client_fail.batches(limit=1) def make_a_task(unique_id: str = None): @@ -256,7 +256,7 @@ def test_cancel(): def test_task_retrieval(): task = make_a_task() - task2 = client.fetch_task(task.id) + task2 = client.get_task(task.id) assert task2.status == "completed" assert task2.id == task.id assert task2.callback_url == task.callback_url @@ -281,7 +281,7 @@ def test_task_retrieval_time(): def test_task_retrieval_fail(): with pytest.raises(ScaleResourceNotFound): - client.fetch_task("fake_id_qwertyuiop") + client.get_task("fake_id_qwertyuiop") def test_tasks(): @@ -337,15 +337,15 @@ def test_list_batch(): batches.append(create_a_batch()) batch_names = {batch.name for batch in batches} - for batch in client.list_batches(limit=3): + for batch in client.batches(limit=3): assert batch.name in batch_names def test_list_batch_all(): # Get count of all batches - batchlist = client.list_batches(project=TEST_PROJECT_NAME, limit=1) + batchlist = client.batches(project=TEST_PROJECT_NAME, limit=1) total_batches = batchlist.total # Download all batches to check total count - all_batches = client.list_batches_all(project_name=TEST_PROJECT_NAME, limit=10) + all_batches = list(client.batches_all(project_name=TEST_PROJECT_NAME)) assert total_batches == len(all_batches) From 7373239bcb05f170030d5560d73972ac435c47e0 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Mon, 29 Mar 2021 13:09:09 -0700 Subject: [PATCH 03/38] minor fixes --- scaleapi/__init__.py | 2 +- scaleapi/exceptions.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/scaleapi/__init__.py b/scaleapi/__init__.py index 5afacdb..81ec200 100644 --- a/scaleapi/__init__.py +++ b/scaleapi/__init__.py @@ -469,7 +469,7 @@ def batches(self, **kwargs) -> Batchlist: def batches_all( self, - project_name: str, + project_name: str = None, batch_status: BatchStatus = None, created_after: str = None, created_before: str = None, diff --git a/scaleapi/exceptions.py b/scaleapi/exceptions.py index 8d3f348..1871d3d 100644 --- a/scaleapi/exceptions.py +++ b/scaleapi/exceptions.py @@ -60,6 +60,7 @@ class ScaleInternalError(ScaleException): class ScaleTimeoutError(ScaleException): - default = """504 - Server Timeout Error -- Try again later.""" + """504 - Server Timeout Error -- Try again later. + """ pass From 7cb5a177d6521e97763bbcdf1691d6324acf288c Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Mon, 29 Mar 2021 15:51:53 -0700 Subject: [PATCH 04/38] readme v2 and updates to objects --- README.rst | 174 +++++++++++++++++++++++++++-------------- scaleapi/batches.py | 1 + scaleapi/exceptions.py | 1 + scaleapi/projects.py | 6 +- 4 files changed, 123 insertions(+), 59 deletions(-) diff --git a/README.rst b/README.rst index 96bebb6..fb7187c 100644 --- a/README.rst +++ b/README.rst @@ -2,6 +2,53 @@ Scale AI | Python SDK ===================== +If you use earlier versions of the SDK, please refer to `v1.0.4 documentation `_. + + +Migration Guide to v2.x +________________________ + +If you are migrating from v0.x or v1.x, this guide explains how to update your application for compatibility with v2.x. We recommend migrating as soon as possible to ensure that your application is unaffected. + +Creating New Tasks +^^^^^^^^^^^^^^^^^^ +Methods with task types such as `create_imageannotation_task, create_textcollection_task` etc. are deprecated. + +Creating a new task is now unified under the `create_task(TaskType, ...)` method. Please review `Create Task`_ section for more details. + +Retrieving Tasks +^^^^^^^^^^^^^^^^ +A new generator method is introduced to retrieve a list of tasks with all available parameters. The new method handles pagination and tokens: `tasks_all(...)`. You can have a simpler code by replacing `tasks()` loops with pagination. + +Please refer to `List Tasks`_ for more details. + +Accessing Attributes (Task, Batch, Project) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +The old `param_dict` attribute is now replaced with a method `as_dict()` to return an object's attributes as a dictionary. + +First-level attributes of Task can also be accessed with `.` annotation as `task.as_dict()["status"]` is equal to `task.status`. +Other examples are `task.type, task.params, task.response["annotations"]`. + +Task Count Summary of Batches +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Attributes of Batch `pending, completed, error, canceled` are replaced with `tasks_pending, tasks_completed, tasks_error, tasks_canceled` respectively. + +Deprecated Methods +^^^^^^^^^^^^^^^^^^ +- `fetch_task()` replaced with `get_task()` +- `list_batches()` replaced with `batches()` + +Enabled Auto-Retry +^^^^^^^^^^^^^^^^^^ +SDK now enables auto-retry in case of a TimeOut (504) or TooManyRequests (429) occurs. + +New Exceptions +^^^^^^^^^^^^^^ +New error types are introduces if you want to handle specific exception cases. +`ScaleInvalidRequest, ScaleUnauthorized, ScaleNotEnabled, ScaleResourceNotFound, ScaleDuplicateTask, ScaleTooManyRequests, ScaleInternalError` and `ScaleTimeoutError`. +All new error types are child of the existing `ScaleException` which can be used to handle all cases. + + Installation ____________ @@ -9,8 +56,6 @@ ____________ $ pip install --upgrade scaleapi -Note: We strongly suggest using `scaleapi` with Python version 2.7.9 or greater due to SSL issues with prior versions. - Usage _____ @@ -23,11 +68,11 @@ Tasks _____ Most of these methods will return a `scaleapi.Task` object, which will contain information -about the json response (task_id, status, etc.). +about the json response (task_id, status, params, response etc.). Any parameter available in `Scale's API documentation`__ can be passed as an argument option with the corresponding type. -__ https://docs.scale.com/reference#task-object +__ https://docs.scale.com/reference#tasks-object-overview The following endpoints for tasks are available: @@ -38,15 +83,18 @@ This method can be used for any Scale supported task type using the following fo .. code-block:: python - client.create_{{Task Type}}_task(...) + client.create_task(TaskType, ...task parameters...) Passing in the applicable values into the function definition. The applicable fields and further information for each task type can be found in `Scale's API documentation`__. -__ https://docs.scale.com/reference#general-image-annotation +__ https://docs.scale.com/reference .. code-block:: python - client.create_imageannotation_task( + from scaleapi.tasks import TaskType + + client.create_task( + TaskType.ImageAnnotation, project = 'test_project', callback_url = "http://www.example.com/callback", instruction= "Draw a box around each baby cow and big cow.", @@ -60,9 +108,9 @@ __ https://docs.scale.com/reference#general-image-annotation } } ) - -Retrieve task -^^^^^^^^^^^^^ + +Retrieve a task +^^^^^^^^^^^^^^^ Retrieve a task given its id. Check out `Scale's API documentation`__ for more information. @@ -70,37 +118,40 @@ __ https://docs.scale.com/reference#retrieve-tasks .. code-block :: python - task = client.fetch_task('asdfasdfasdfasdfasdfasdf') - print(task.status) // Task status ('pending', 'completed', 'error', 'canceled') - print(task.response) // If task is complete + task = client.get_task('30553edd0b6a93f8f05f0fee') + print(task.status) # Task status ('pending', 'completed', 'error', 'canceled') + print(task.response) # If task is complete List Tasks ^^^^^^^^^^ -Retrieve a list of tasks, with optional filter by start and end date/time. Paginated with `next_token`. The return value is a `scaleapi.Tasklist`, which acts as a list, but also has fields for the total number of tasks, the limit and offset, and whether or not there's more. Check out `Scale's API documentation`__ for more information. +Retrieve a list of `Task` objects, with optional filters for: `project_name, batch_name, type, status, review_status, unique_id, completed_after, completed_before, updated_after, updated_before, created_after, created_before` and `tags`. + +This method is a generator and yields tasks. It can be wrapped in a `list` statement if a Task list is needed. + +Check out `Scale's API documentation`__ for more information. __ https://docs.scale.com/reference#list-multiple-tasks .. code-block :: python - - next_token = None; - counter = 0 - all_tasks =[] - while True: - tasks = client.tasks( - start_time = "2020-09-08", - end_time = "2021-01-01", - customer_review_status = "accepted", - next_token = next_token, - ) - for task in tasks: - counter += 1 - print('Downloading Task %s | %s' % (counter, task.task_id)) - all_tasks.append(task.__dict__['param_dict']) - next_token = tasks.next_token - if next_token is None: - break - print(all_tasks) + + from scaleapi.tasks import TaskReviewStatus, TaskStatus + + tasks = client.tasks_all( + project_name = "My Project", + created_after = "2020-09-08", + completed_before = "2021-04-01", + status = TaskStatus.Completed, + review_status = TaskReviewStatus.Accepted + ) + + for task in tasks: + # Download task or do something! + print(task.task_id) + + # Alternative for accessing as a Task list + task_list = list(tasks) + print(f"{len(task_list))} tasks retrieved") Cancel Task ^^^^^^^^^^^ @@ -153,6 +204,11 @@ __ https://docs.scale.com/reference#batch-status client.batch_status(batch_name = 'batch_name_01_07_2021') + # Alternative via Batch.get_status() + batch = client.get_batch('batch_name_01_07_2021') + batch.get_status() # Refreshes tasks_{status} attributes of Batch + print(batch.tasks_pending, batch.tasks_completed) + Retrieve Batch ^^^^^^^^^^^^^^ @@ -167,27 +223,29 @@ __ https://docs.scale.com/reference#batch-retrieval List Batches ^^^^^^^^^^^^ -Retrieve a list of Batches. Check out `Scale's API documentation`__ for more information. +Retrieve a list of Batches. Optional parameters are `project_name, batch_status, created_after, created_before`. + +Check out `Scale's API documentation`__ for more information. __ https://docs.scale.com/reference#batch-list .. code-block :: python - next_token = None; + from scaleapi.batches import BatchStatus + + batches = client.batches_all( + batch_status=BatchStatus.Completed, + created_after = "2020-09-08" + ) + counter = 0 - all_batchs =[] - while True: - batches = client.list_batches( - status = "completed" - ) - for batch in batches: - counter += 1 - print('Downloading Batch %s | %s | %s' % (counter, batch.name, batch.param_dict['status'])) - all_batchs.append(batch.__dict__['param_dict']) - next_token = batches.next_token - if next_token is None: - break - print(all_batchs) + for batch in batches: + counter += 1 + print(f'Downloading batch {counter} | {batch.name} | {batch.project}') + + # Alternative for accessing as a Batch list + batch_list = list(batches) + print(f"{len(batch_list))} batches retrieved") Projects ________ @@ -232,7 +290,7 @@ __ https://docs.scale.com/reference#batch-list projects = client.projects() for project in projects: counter += 1 - print('Downloading project %s | %s | %s' % (counter, project['name'], project['type'])) + print(f'Downloading project {counter} | {project.name} | { project.type}') Update Project ^^^^^^^^^^^^^^ @@ -253,17 +311,19 @@ Error handling ______________ If something went wrong while making API calls, then exceptions will be raised automatically -as a `scaleapi.ScaleException` or `scaleapi.ScaleInvalidRequest` runtime error. For example: +as a `scaleapi.ScaleException` parent type and child exceptions like: `ScaleInvalidRequest, ScaleUnauthorized, ScaleNotEnabled, ScaleResourceNotFound, ScaleDuplicateTask, ScaleTooManyRequests, ScaleInternalError` and `ScaleTimeoutError`. -.. code-block:: python +For example: - try - client.create_categorization_task('Some parameters are missing.') - except scaleapi.ValidationError as e: - print(e.code) # 400 - print(e.message) # missing param X +.. code-block:: python + try: + client.create_task(TaskType.TextCollection, attachment='Some parameters are missing.') + except ScaleException as err: + print(err.code) # 400 + print(err.message) # Parameters is invalid, reason: "attachments" is required + Troubleshooting _______________ -If you notice any problems, please email us at support@scale.com. +If you notice any problems, please email us at support@scale.com. \ No newline at end of file diff --git a/scaleapi/batches.py b/scaleapi/batches.py index 12585fe..90447fd 100644 --- a/scaleapi/batches.py +++ b/scaleapi/batches.py @@ -14,6 +14,7 @@ def __init__(self, json, client): self.status = json["status"] self.project = json["project"] self.created_at = json["created_at"] + self.project = json["project"] self.tasks_pending = None self.tasks_completed = None diff --git a/scaleapi/exceptions.py b/scaleapi/exceptions.py index 1871d3d..b06d89e 100644 --- a/scaleapi/exceptions.py +++ b/scaleapi/exceptions.py @@ -1,6 +1,7 @@ class ScaleException(Exception): def __init__(self, message, errcode=None): self.code = errcode + self.message = message if errcode: super(ScaleException, self).__init__(f" {message}") else: diff --git a/scaleapi/projects.py b/scaleapi/projects.py index 0a8a3f3..799a54b 100644 --- a/scaleapi/projects.py +++ b/scaleapi/projects.py @@ -6,8 +6,10 @@ def __init__(self, json, client): self._client = client if len(json["param_history"]): - self.version = json["param_history"][-1]["version"] - self.instruction = json["param_history"][-1]["instruction"] + last_params = json["param_history"][-1] + self.version = last_params["version"] + if "instruction" in last_params: + self.instruction = last_params["instruction"] def __hash__(self): return hash(self.name) From 327db4e55a4f31fa30fa431dfac346e6a955e575 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Mon, 29 Mar 2021 16:04:56 -0700 Subject: [PATCH 05/38] readme updates --- README.rst | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index fb7187c..d583a8a 100644 --- a/README.rst +++ b/README.rst @@ -12,9 +12,28 @@ If you are migrating from v0.x or v1.x, this guide explains how to update your Creating New Tasks ^^^^^^^^^^^^^^^^^^ -Methods with task types such as `create_imageannotation_task, create_textcollection_task` etc. are deprecated. +Methods with task types such as ``create_imageannotation_task, create_textcollection_task`` etc. are deprecated. -Creating a new task is now unified under the `create_task(TaskType, ...)` method. Please review `Create Task`_ section for more details. +Creating a new task is now unified under the ``create_task(TaskType, ...)`` method. Please review `Create Task`_ section for more details. + + +.. code-block:: python + + # Deprecated + client.create_imageannotation_task( + project = 'test_project', + instruction= "Draw a box around each baby cow and big cow.", + ... + ) + + # New Method + from scaleapi.tasks import TaskType + client.create_task( + TaskType.ImageAnnotation, + project = 'test_project', + instruction= "Draw a box around each baby cow and big cow.", + ... + ) Retrieving Tasks ^^^^^^^^^^^^^^^^ From d04b5fa41d74bfeff4c95248f85c42e9af70221d Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Mon, 29 Mar 2021 16:33:19 -0700 Subject: [PATCH 06/38] readme enhanced with samples --- README.rst | 47 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/README.rst b/README.rst index d583a8a..8edeabf 100644 --- a/README.rst +++ b/README.rst @@ -12,7 +12,7 @@ If you are migrating from v0.x or v1.x, this guide explains how to update your Creating New Tasks ^^^^^^^^^^^^^^^^^^ -Methods with task types such as ``create_imageannotation_task, create_textcollection_task`` etc. are deprecated. +Methods with task types such as ``create_imageannotation_task``, ``create_textcollection_task`` etc. are deprecated. Creating a new task is now unified under the ``create_task(TaskType, ...)`` method. Please review `Create Task`_ section for more details. @@ -37,35 +37,45 @@ Creating a new task is now unified under the ``create_task(TaskType, ...)`` meth Retrieving Tasks ^^^^^^^^^^^^^^^^ -A new generator method is introduced to retrieve a list of tasks with all available parameters. The new method handles pagination and tokens: `tasks_all(...)`. You can have a simpler code by replacing `tasks()` loops with pagination. +A new generator method is introduced to retrieve a list of tasks with all available parameters. The new method handles pagination and tokens: ``tasks_all(...)``. +You can have a simpler code by replacing ``tasks()`` loops with pagination and tokens. Please refer to `List Tasks`_ for more details. Accessing Attributes (Task, Batch, Project) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The old `param_dict` attribute is now replaced with a method `as_dict()` to return an object's attributes as a dictionary. +The old ``param_dict`` attribute is now replaced with a method ``as_dict()`` to return an object's attributes as a dictionary. + +First-level attributes of Task can also be accessed with `.` annotation as the following: + +.. code-block:: python + + task.as_dict()["status"] + # is same as + task.status + +Other examples are ``task.type``, ``task.params``, ``task.response["annotations"]`` etc. -First-level attributes of Task can also be accessed with `.` annotation as `task.as_dict()["status"]` is equal to `task.status`. -Other examples are `task.type, task.params, task.response["annotations"]`. Task Count Summary of Batches ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Attributes of Batch `pending, completed, error, canceled` are replaced with `tasks_pending, tasks_completed, tasks_error, tasks_canceled` respectively. +Attributes of Batch ``pending``, ``completed``, ``error``, ``canceled`` are replaced with ``tasks_pending``, ``tasks_completed``, ``tasks_error``, ``tasks_canceled`` respectively. Deprecated Methods ^^^^^^^^^^^^^^^^^^ -- `fetch_task()` replaced with `get_task()` -- `list_batches()` replaced with `batches()` +- ``fetch_task()`` replaced with ``get_task()`` +- ``list_batches()`` replaced with ``batches()`` Enabled Auto-Retry ^^^^^^^^^^^^^^^^^^ -SDK now enables auto-retry in case of a TimeOut (504) or TooManyRequests (429) occurs. +SDK now enabled auto-retry in case of a TimeOut (504) or TooManyRequests (429) occurs. New Exceptions ^^^^^^^^^^^^^^ New error types are introduces if you want to handle specific exception cases. -`ScaleInvalidRequest, ScaleUnauthorized, ScaleNotEnabled, ScaleResourceNotFound, ScaleDuplicateTask, ScaleTooManyRequests, ScaleInternalError` and `ScaleTimeoutError`. -All new error types are child of the existing `ScaleException` which can be used to handle all cases. +``ScaleInvalidRequest``, ``ScaleUnauthorized``, ``ScaleNotEnabled``, ``ScaleResourceNotFound``, ``ScaleDuplicateTask``, ``ScaleTooManyRequests``, ``ScaleInternalError`` and ``ScaleTimeoutError``. + +All new error types are child of the existing ``ScaleException`` which can be used to handle all cases. Installation @@ -330,12 +340,25 @@ Error handling ______________ If something went wrong while making API calls, then exceptions will be raised automatically -as a `scaleapi.ScaleException` parent type and child exceptions like: `ScaleInvalidRequest, ScaleUnauthorized, ScaleNotEnabled, ScaleResourceNotFound, ScaleDuplicateTask, ScaleTooManyRequests, ScaleInternalError` and `ScaleTimeoutError`. +as a `ScaleException` parent type and child exceptions: + +- ``ScaleInvalidRequest``: 400 - Bad Request -- The request was unacceptable, often due to missing a required parameter. +- ``ScaleUnauthorized``: 401 - Unauthorized -- No valid API key provided. +- ``ScaleNotEnabled``: 402 - Not enabled -- Please contact sales@scaleapi.com before creating this type of task. +- ``ScaleResourceNotFound``: 404 - Not Found -- The requested resource doesn't exist. +- ``ScaleDuplicateTask``: 409 - Conflict -- The provided idempotency key or unique_id is already in use for a different request. +- ``ScaleTooManyRequests``: 429 - Too Many Requests -- Too many requests hit the API too quickly. +- ``ScaleInternalError``: 500 - Internal Server Error -- We had a problem with our server. Try again later +- ``ScaleTimeoutError``: 504 - Server Timeout Error -- Try again later. + +Check out `Scale's API documentation `_ for more details. For example: .. code-block:: python + from scaleapi.exceptions import ScaleException + try: client.create_task(TaskType.TextCollection, attachment='Some parameters are missing.') except ScaleException as err: From f42f5c17bcb3eb4c8f03ccf56dbd5189168bcb94 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Mon, 29 Mar 2021 19:41:16 -0700 Subject: [PATCH 07/38] updates to gitignore, setup and manifest files --- .gitignore | 32 ++++++++++++++++++++++++++------ MANIFEST | 5 ----- setup.cfg | 3 ++- 3 files changed, 28 insertions(+), 12 deletions(-) delete mode 100644 MANIFEST diff --git a/.gitignore b/.gitignore index 9bc59cf..2d07bef 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,28 @@ -*.pyc +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# Distribution / packaging +/build/ /dist/ -/*.egg-info -.tox -.cache -/.vscode/ +*.egg +*.eggs +*.egg-info/ +MANIFEST + +# For Visual Studio Code +.vscode/ + +# Mac .DS_Store -/build/ \ No newline at end of file + +# Unit test / coverage reports +.[nt]ox/ +htmlcov/ +.coverage +.coverage.* +.*cache +nosetests.xml +coverage.xml +*.cover \ No newline at end of file diff --git a/MANIFEST b/MANIFEST deleted file mode 100644 index 45be37e..0000000 --- a/MANIFEST +++ /dev/null @@ -1,5 +0,0 @@ -# file GENERATED by distutils, do NOT edit -setup.cfg -setup.py -scaleapi/__init__.py -scaleapi/tasks.py diff --git a/setup.cfg b/setup.cfg index 5aef279..3158478 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,3 @@ [metadata] -description-file = README.rst +long_description = file: README.rst +long_description_content_type = text/x-rst From 5791fd5cca5479b72257c8fb5d9a03ce352bfc20 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Mon, 29 Mar 2021 22:30:18 -0700 Subject: [PATCH 08/38] pytest updated --- tests/test_client.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/tests/test_client.py b/tests/test_client.py index f694d51..1a3a800 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -7,8 +7,12 @@ import pytest import scaleapi -from scaleapi.exceptions import (ScaleDuplicateTask, ScaleInvalidRequest, - ScaleResourceNotFound, ScaleUnauthorized) +from scaleapi.exceptions import ( + ScaleDuplicateTask, + ScaleInvalidRequest, + ScaleResourceNotFound, + ScaleUnauthorized, +) from scaleapi.tasks import TaskType TEST_PROJECT_NAME = "scaleapi-python-sdk" @@ -28,7 +32,7 @@ def test_invalidkey_fail(): client_fail.batches(limit=1) -def make_a_task(unique_id: str = None): +def make_a_task(unique_id: str = None, batch: str = None): args = { "callback_url": "http://www.example.com/callback", @@ -45,6 +49,8 @@ def make_a_task(unique_id: str = None): } if unique_id: args["unique_id"] = unique_id + if batch: + args["batch"] = batch return client.create_task(TaskType.ImageAnnotation, **args) @@ -306,6 +312,16 @@ def create_a_batch(): ) +def test_tasks_all(): + batch = create_a_batch() + tasks = [] + for _ in range(3): + tasks.append(make_a_task(batch=batch.name)) + task_ids = {task.id for task in tasks} + for task in client.tasks_all(project_name=TEST_PROJECT_NAME, batch_name=batch.name): + assert task.id in task_ids + + def test_finalize_batch(): batch = create_a_batch() batch = client.finalize_batch(batch.name) From 3e08bca21811f1df0bdbc41a607cde9180e35627 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Mon, 29 Mar 2021 22:31:12 -0700 Subject: [PATCH 09/38] docstring updates in model files --- scaleapi/batches.py | 9 ++++++++- scaleapi/projects.py | 4 +++- scaleapi/tasks.py | 5 ++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/scaleapi/batches.py b/scaleapi/batches.py index 90447fd..c1f99c5 100644 --- a/scaleapi/batches.py +++ b/scaleapi/batches.py @@ -2,12 +2,14 @@ class BatchStatus(Enum): + """Status of Batches""" Staging = "staging" InProgress = "in_progress" Completed = "completed" -class Batch(object): +class Batch(): + """Batch class, contains Batch information""" def __init__(self, json, client): self._json = json self.name = json["name"] @@ -33,14 +35,19 @@ def __repr__(self): return f"Batch({self._json})" def as_dict(self): + """Returns all attributes as a dictionary""" return self._json def finalize(self): + """Finalizes the batch""" res = self._client.finalize_batch(self.name) self.status = res.status return res def get_status(self): + """Returns status of the batch and + updates tasks_... parameters + """ res = self._client.batch_status(self.name) self.status = res["status"] for stat in ["pending", "completed", "error", "canceled"]: diff --git a/scaleapi/projects.py b/scaleapi/projects.py index 799a54b..5901cf9 100644 --- a/scaleapi/projects.py +++ b/scaleapi/projects.py @@ -1,4 +1,5 @@ -class Project(object): +class Project(): + """Project class, containing Project information.""" def __init__(self, json, client): self._json = json self.name = json["name"] @@ -21,4 +22,5 @@ def __repr__(self): return f"Project({self._json})" def as_dict(self): + """Returns all attributes as a dictionary""" return self._json diff --git a/scaleapi/tasks.py b/scaleapi/tasks.py index 601fdc6..36dde0c 100644 --- a/scaleapi/tasks.py +++ b/scaleapi/tasks.py @@ -2,6 +2,7 @@ class TaskType(Enum): + """Task Type List""" Annotation = "annotation" Categorization = "categorization" Comparison = "comparison" @@ -29,6 +30,7 @@ class TaskType(Enum): class TaskReviewStatus(Enum): + """Customer Audit Status of Task""" Accepted = "accepted" Fixed = "fixed" Commented = "commented" @@ -36,12 +38,13 @@ class TaskReviewStatus(Enum): class TaskStatus(Enum): + """Status of Task""" Pending = "pending" Completed = "completed" Canceled = "canceled" -class Task(object): +class Task(): """Task class, containing task information.""" def __init__(self, json, client): From 107ada73c3fa548c32bfc249ec018ddbe04b288c Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Mon, 29 Mar 2021 22:32:21 -0700 Subject: [PATCH 10/38] exception structure and pylint recommendations --- scaleapi/__init__.py | 61 +++++++++++++----------- scaleapi/api.py | 104 ++++++++++++++++++++--------------------- scaleapi/exceptions.py | 42 ++++++++++++----- 3 files changed, 115 insertions(+), 92 deletions(-) diff --git a/scaleapi/__init__.py b/scaleapi/__init__.py index 81ec200..9abda5c 100644 --- a/scaleapi/__init__.py +++ b/scaleapi/__init__.py @@ -13,6 +13,7 @@ class Paginator(list, Generic[T]): + """Paginator for list endpoints""" def __init__( self, docs: List[T], @@ -22,7 +23,7 @@ def __init__( has_more: bool, next_token=None, ): - super(Paginator, self).__init__(docs) + super().__init__(docs) self.docs = docs self.total = total self.limit = limit @@ -32,14 +33,16 @@ def __init__( class Tasklist(Paginator[Task]): - pass + """Tasks Paginator""" class Batchlist(Paginator[Batch]): - pass + """Batches Paginator""" -class ScaleClient(object): +class ScaleClient(): + """Main class serves as an interface for Scale API + """ def __init__(self, api_key, source=None): self.api = Api(api_key, source) warnings.simplefilter("always", DeprecationWarning) @@ -55,9 +58,11 @@ def get_task(self, task_id: str) -> Task: Task: """ endpoint = f"task/{task_id}" - return Task(self.api._get_request(endpoint), self) + return Task(self.api.get_request(endpoint), self) def fetch_task(self, task_id: str) -> Task: + """fetch_task() will be deprecated, please use get_task() method + """ warnings.warn( "fetch_task() will be deprecated, please use get_task() method " "as the alternative.", @@ -78,7 +83,7 @@ def cancel_task(self, task_id: str) -> Task: Task """ endpoint = f"task/{task_id}/cancel" - return Task(self.api._post_request(endpoint), self) + return Task(self.api.post_request(endpoint), self) def tasks(self, **kwargs) -> Tasklist: """Returns a list of your tasks. @@ -169,10 +174,10 @@ def tasks(self, **kwargs) -> Tasklist: for key in kwargs: if key not in allowed_kwargs: raise ScaleInvalidRequest( - f"Illegal parameter {key} for ScaleClient.tasks()", None + f"Illegal parameter {key} for ScaleClient.tasks()" ) - response = self.api._get_request("tasks", params=kwargs) + response = self.api.get_request("tasks", params=kwargs) docs = [Task(json, self) for json in response["docs"]] return Tasklist( @@ -188,7 +193,7 @@ def tasks_all( self, project_name: str, batch_name: str = None, - type: TaskType = None, + task_type: TaskType = None, status: TaskStatus = None, review_status: Union[List[TaskReviewStatus], TaskReviewStatus] = None, unique_id: Union[List[str], str] = None, @@ -214,7 +219,7 @@ def tasks_all( batch_name (str, optional): Batch Name - type (TaskType, optional): + task_type (TaskType, optional): Task type to filter i.e. `TaskType.TextCollection` status (TaskStatus, optional): @@ -283,8 +288,8 @@ def tasks_all( if status: tasks_args["status"] = status.value - if type: - tasks_args["type"] = type.value + if task_type: + tasks_args["type"] = task_type.value if review_status: if isinstance(review_status, List): value = ",".join(map(lambda x: x.value, review_status)) @@ -321,7 +326,7 @@ def create_task(self, task_type: TaskType, **kwargs) -> Task: Returns created task. """ endpoint = f"task/{task_type.value}" - taskdata = self.api._post_request(endpoint, body=kwargs) + taskdata = self.api.post_request(endpoint, body=kwargs) return Task(taskdata, self) def create_batch(self, project: str, batch_name: str, callback: str = "") -> Batch: @@ -336,13 +341,13 @@ def create_batch(self, project: str, batch_name: str, callback: str = "") -> Bat callback (str, optional): Email to notify, or URL to POST to when a batch is complete. - +clea Returns: Batch: Created batch object """ endpoint = "batches" payload = dict(project=project, name=batch_name, callback=callback) - batchdata = self.api._post_request(endpoint, body=payload) + batchdata = self.api.post_request(endpoint, body=payload) return Batch(batchdata, self) def finalize_batch(self, batch_name: str) -> Batch: @@ -357,7 +362,7 @@ def finalize_batch(self, batch_name: str) -> Batch: Batch """ endpoint = f"batches/{Api.quote_string(batch_name)}/finalize" - batchdata = self.api._post_request(endpoint) + batchdata = self.api.post_request(endpoint) return Batch(batchdata, self) def batch_status(self, batch_name: str) -> Dict: @@ -380,7 +385,7 @@ def batch_status(self, batch_name: str) -> Dict: """ endpoint = f"batches/{Api.quote_string(batch_name)}/status" - status_data = self.api._get_request(endpoint) + status_data = self.api.get_request(endpoint) return status_data def get_batch(self, batch_name: str) -> Batch: @@ -395,10 +400,12 @@ def get_batch(self, batch_name: str) -> Batch: Batch """ endpoint = f"batches/{Api.quote_string(batch_name)}" - batchdata = self.api._get_request(endpoint) + batchdata = self.api.get_request(endpoint) return Batch(batchdata, self) def list_batches(self, **kwargs) -> Batchlist: + """list_batches() will be deprecated, please use batches() method + """ warnings.warn( "list_batches() will be deprecated, please use batches() method " "as the alternative.", @@ -456,7 +463,7 @@ def batches(self, **kwargs) -> Batchlist: f"Illegal parameter {key} for ScaleClient.batches()" ) endpoint = "batches" - response = self.api._get_request(endpoint, params=kwargs) + response = self.api.get_request(endpoint, params=kwargs) docs = [Batch(doc, self) for doc in response["docs"]] return Batchlist( @@ -521,7 +528,7 @@ def batches_all( has_more = batches.has_more def create_project( - self, project_name: str, type: TaskType, params: Dict + self, project_name: str, task_type: TaskType, params: Dict ) -> Project: """Creates a new project. https://docs.scale.com/reference#project-creation @@ -530,7 +537,7 @@ def create_project( project_name (str): Project name - type (TaskType): + task_type (TaskType): Task Type i.e. `TaskType.ImageAnnotation` params (Dict): @@ -541,8 +548,8 @@ def create_project( Project: [description] """ endpoint = "projects" - payload = dict(type=type.value, name=project_name, params=params) - projectdata = self.api._post_request(endpoint, body=payload) + payload = dict(type=task_type.value, name=project_name, params=params) + projectdata = self.api.post_request(endpoint, body=payload) return Project(projectdata, self) def get_project(self, project_name: str) -> Project: @@ -557,7 +564,7 @@ def get_project(self, project_name: str) -> Project: Project """ endpoint = f"projects/{Api.quote_string(project_name)}" - projectdata = self.api._get_request(endpoint) + projectdata = self.api.get_request(endpoint) return Project(projectdata, self) def projects(self) -> List[Project]: @@ -569,7 +576,7 @@ def projects(self) -> List[Project]: List[Project] """ endpoint = "projects" - project_list = self.api._get_request(endpoint) + project_list = self.api.get_request(endpoint) return [Project(project, self) for project in project_list] def update_project(self, project_name: str, **kwargs) -> Project: @@ -590,9 +597,9 @@ def update_project(self, project_name: str, **kwargs) -> Project: for key in kwargs: if key not in allowed_kwargs: raise ScaleInvalidRequest( - f"Illegal parameter {key} for" "ScaleClient.update_project()", None, + f"Illegal parameter {key} for" "ScaleClient.update_project()" ) endpoint = f"projects/{Api.quote_string(project_name)}/setParams" - projectdata = self.api._post_request(endpoint, body=kwargs) + projectdata = self.api.post_request(endpoint, body=kwargs) return Project(projectdata, self) diff --git a/scaleapi/api.py b/scaleapi/api.py index 3a291ad..9c28164 100644 --- a/scaleapi/api.py +++ b/scaleapi/api.py @@ -2,26 +2,19 @@ import urllib.parse import requests -from requests.adapters import HTTPAdapter, Retry +from requests.adapters import HTTPAdapter, Response, Retry from ._version import __package_name__, __version__ -from .exceptions import ( - ScaleDuplicateTask, - ScaleException, - ScaleInternalError, - ScaleInvalidRequest, - ScaleNotEnabled, - ScaleResourceNotFound, - ScaleTimeoutError, - ScaleTooManyRequests, - ScaleUnauthorized, -) +from .exceptions import ExceptionMap, ScaleException + SCALE_ENDPOINT = "https://api.scale.com/v1" NUM_OF_RETRIES = 3 -class Api(object): +class Api(): + """Internal Api reference for handling http operations""" + def __init__(self, api_key, user_agent_extension=None): if api_key == "" or api_key is None: raise ScaleException("Please provide a valid API Key.") @@ -34,13 +27,10 @@ def __init__(self, api_key, user_agent_extension=None): "User-Agent": self._generate_useragent(user_agent_extension), } - def _request( - self, method, endpoint, headers=None, auth=None, params=None, body=None - ): - """Generic HTTP request method with error handling.""" - - url = f"{SCALE_ENDPOINT}/{endpoint}" - error_message = None + @staticmethod + def _http_request( + method, url, headers=None, auth=None, params=None, body=None + ) -> Response: https = requests.Session() retry_strategy = Retry( @@ -67,45 +57,51 @@ def _request( json=body, ) - if res.status_code == 200: - return res.json() - else: - try: - error_message = res.json().get("error", res.text) - except Exception: - error_message = res.text - - if res.status_code == 400: - raise ScaleInvalidRequest(error_message, res.status_code) - elif res.status_code == 401: - raise ScaleUnauthorized(error_message, res.status_code) - elif res.status_code == 402: - raise ScaleNotEnabled(error_message, res.status_code) - elif res.status_code == 404: - raise ScaleResourceNotFound(error_message, res.status_code) - elif res.status_code == 409: - raise ScaleDuplicateTask(error_message, res.status_code) - elif res.status_code == 429: - raise ScaleTooManyRequests(error_message, res.status_code) - elif res.status_code == 500: - raise ScaleInternalError(error_message, res.status_code) - elif res.status_code == 504: - raise ScaleTimeoutError(error_message, res.status_code) - else: - raise ScaleException(error_message, res.status_code) - - except (requests.exceptions.Timeout, requests.exceptions.RetryError,) as err: - raise ScaleException(err) - - def _get_request(self, endpoint, params=None): + return res + except Exception as err: + raise ScaleException(err) from err + + @staticmethod + def _raise_on_respose(res: Response): + + message = "" + try: + message = res.json().get("error", res.text) + except ValueError: + message = res.text + + try: + exception = ExceptionMap[res.status_code] + raise exception(message) + except KeyError as err: + raise ScaleException(message) from err + + def _api_request( + self, method, endpoint, headers=None, auth=None, params=None, body=None + ): + """Generic HTTP request method with error handling.""" + + url = f"{SCALE_ENDPOINT}/{endpoint}" + + res = self._http_request(method, url, headers, auth, params, body) + + json = None + if res.status_code == 200: + json = res.json() + else: + self._raise_on_respose(res) + + return json + + def get_request(self, endpoint, params=None): """Generic GET Request Wrapper""" - return self._request( + return self._api_request( "GET", endpoint, headers=self._headers, auth=self._auth, params=params ) - def _post_request(self, endpoint, body=None): + def post_request(self, endpoint, body=None): """Generic POST Request Wrapper""" - return self._request( + return self._api_request( "POST", endpoint, headers=self._headers, auth=self._auth, body=body ) diff --git a/scaleapi/exceptions.py b/scaleapi/exceptions.py index b06d89e..a205981 100644 --- a/scaleapi/exceptions.py +++ b/scaleapi/exceptions.py @@ -1,11 +1,19 @@ class ScaleException(Exception): + """Generic ScaleException class""" + code = None + def __init__(self, message, errcode=None): - self.code = errcode + if not message: + message = type(self).__name__ self.message = message + if errcode: - super(ScaleException, self).__init__(f" {message}") + self.code = errcode + + if self.code: + super().__init__(f" {message}") else: - super(ScaleException, self).__init__(f" {message}") + super().__init__(f" {message}") class ScaleInvalidRequest(ScaleException): @@ -13,13 +21,13 @@ class ScaleInvalidRequest(ScaleException): often due to missing a required parameter. """ - pass + code = 400 class ScaleUnauthorized(ScaleException): """401 - Unauthorized -- No valid API key provided.""" - pass + code = 401 class ScaleNotEnabled(ScaleException): @@ -27,13 +35,13 @@ class ScaleNotEnabled(ScaleException): creating this type of task. """ - pass + code = 402 class ScaleResourceNotFound(ScaleException): """404 - Not Found -- The requested resource doesn't exist.""" - pass + code = 404 class ScaleDuplicateTask(ScaleException): @@ -41,7 +49,7 @@ class ScaleDuplicateTask(ScaleException): already in use for a different request. """ - pass + code = 409 class ScaleTooManyRequests(ScaleException): @@ -49,7 +57,7 @@ class ScaleTooManyRequests(ScaleException): too quickly. """ - pass + code = 429 class ScaleInternalError(ScaleException): @@ -57,11 +65,23 @@ class ScaleInternalError(ScaleException): Try again later. """ - pass + code = 500 class ScaleTimeoutError(ScaleException): """504 - Server Timeout Error -- Try again later. """ - pass + code = 504 + + +ExceptionMap = { + ScaleInvalidRequest.code: ScaleInvalidRequest, + ScaleUnauthorized.code: ScaleUnauthorized, + ScaleNotEnabled.code: ScaleNotEnabled, + ScaleResourceNotFound.code: ScaleResourceNotFound, + ScaleDuplicateTask.code: ScaleDuplicateTask, + ScaleTooManyRequests.code: ScaleTooManyRequests, + ScaleInternalError.code: ScaleInternalError, + ScaleTimeoutError.code: ScaleTimeoutError, +} From 49f19f8e81f49aa01194c1aef23b2a8ac5bf82ae Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Mon, 29 Mar 2021 22:32:49 -0700 Subject: [PATCH 11/38] pylint configuration --- .pylintrc | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 .pylintrc diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..faa124d --- /dev/null +++ b/.pylintrc @@ -0,0 +1,8 @@ +[MASTER] +disable= + C0114, + R0903, + R0914, + R0913, + R0902, + C0103, \ No newline at end of file From 8bc75edee14c1362f1b813ee682cf9c984c01693 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Mon, 29 Mar 2021 22:35:29 -0700 Subject: [PATCH 12/38] re-formatting by black --- scaleapi/__init__.py | 4 +++- scaleapi/api.py | 3 +-- scaleapi/batches.py | 4 +++- scaleapi/exceptions.py | 1 + scaleapi/projects.py | 3 ++- scaleapi/tasks.py | 5 ++++- 6 files changed, 14 insertions(+), 6 deletions(-) diff --git a/scaleapi/__init__.py b/scaleapi/__init__.py index 9abda5c..6fd0141 100644 --- a/scaleapi/__init__.py +++ b/scaleapi/__init__.py @@ -14,6 +14,7 @@ class Paginator(list, Generic[T]): """Paginator for list endpoints""" + def __init__( self, docs: List[T], @@ -40,9 +41,10 @@ class Batchlist(Paginator[Batch]): """Batches Paginator""" -class ScaleClient(): +class ScaleClient: """Main class serves as an interface for Scale API """ + def __init__(self, api_key, source=None): self.api = Api(api_key, source) warnings.simplefilter("always", DeprecationWarning) diff --git a/scaleapi/api.py b/scaleapi/api.py index 9c28164..914d50b 100644 --- a/scaleapi/api.py +++ b/scaleapi/api.py @@ -7,12 +7,11 @@ from ._version import __package_name__, __version__ from .exceptions import ExceptionMap, ScaleException - SCALE_ENDPOINT = "https://api.scale.com/v1" NUM_OF_RETRIES = 3 -class Api(): +class Api: """Internal Api reference for handling http operations""" def __init__(self, api_key, user_agent_extension=None): diff --git a/scaleapi/batches.py b/scaleapi/batches.py index c1f99c5..636e1ea 100644 --- a/scaleapi/batches.py +++ b/scaleapi/batches.py @@ -3,13 +3,15 @@ class BatchStatus(Enum): """Status of Batches""" + Staging = "staging" InProgress = "in_progress" Completed = "completed" -class Batch(): +class Batch: """Batch class, contains Batch information""" + def __init__(self, json, client): self._json = json self.name = json["name"] diff --git a/scaleapi/exceptions.py b/scaleapi/exceptions.py index a205981..ab1e2b9 100644 --- a/scaleapi/exceptions.py +++ b/scaleapi/exceptions.py @@ -1,5 +1,6 @@ class ScaleException(Exception): """Generic ScaleException class""" + code = None def __init__(self, message, errcode=None): diff --git a/scaleapi/projects.py b/scaleapi/projects.py index 5901cf9..486300a 100644 --- a/scaleapi/projects.py +++ b/scaleapi/projects.py @@ -1,5 +1,6 @@ -class Project(): +class Project: """Project class, containing Project information.""" + def __init__(self, json, client): self._json = json self.name = json["name"] diff --git a/scaleapi/tasks.py b/scaleapi/tasks.py index 36dde0c..1412041 100644 --- a/scaleapi/tasks.py +++ b/scaleapi/tasks.py @@ -3,6 +3,7 @@ class TaskType(Enum): """Task Type List""" + Annotation = "annotation" Categorization = "categorization" Comparison = "comparison" @@ -31,6 +32,7 @@ class TaskType(Enum): class TaskReviewStatus(Enum): """Customer Audit Status of Task""" + Accepted = "accepted" Fixed = "fixed" Commented = "commented" @@ -39,12 +41,13 @@ class TaskReviewStatus(Enum): class TaskStatus(Enum): """Status of Task""" + Pending = "pending" Completed = "completed" Canceled = "canceled" -class Task(): +class Task: """Task class, containing task information.""" def __init__(self, json, client): From 04a6f016ab9d8d1bff5232fafaba428314d988e4 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Tue, 30 Mar 2021 12:23:52 -0700 Subject: [PATCH 13/38] fixed setup to add urllib3 requirement --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 712445c..da0489c 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ except ImportError: from distutils.core import setup -install_requires = ["requests>=2.25.0"] +install_requires = ["requests>=2.25.0", "urllib3>=1.26.0"] def read(rel_path): From 7ae04ebe2be1511826fb4f955c88cd3217cc4d68 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Tue, 30 Mar 2021 12:58:26 -0700 Subject: [PATCH 14/38] fixed docstring --- scaleapi/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scaleapi/__init__.py b/scaleapi/__init__.py index 6fd0141..7d4b008 100644 --- a/scaleapi/__init__.py +++ b/scaleapi/__init__.py @@ -343,7 +343,7 @@ def create_batch(self, project: str, batch_name: str, callback: str = "") -> Bat callback (str, optional): Email to notify, or URL to POST to when a batch is complete. -clea + Returns: Batch: Created batch object """ @@ -384,7 +384,6 @@ def batch_status(self, batch_name: str) -> Dict: completed (optional): # of tasks in completed stage canceled (optional): # of tasks in canceled stage } - """ endpoint = f"batches/{Api.quote_string(batch_name)}/status" status_data = self.api.get_request(endpoint) From ee7cafc68da1055fbbd2fc0dacfb270200f9a62a Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Tue, 30 Mar 2021 13:40:01 -0700 Subject: [PATCH 15/38] updates to project model, update_project method --- README.rst | 2 +- scaleapi/__init__.py | 13 +++++-------- scaleapi/projects.py | 11 +++++++---- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/README.rst b/README.rst index 8edeabf..d585dda 100644 --- a/README.rst +++ b/README.rst @@ -332,7 +332,7 @@ __ https://docs.scale.com/reference#project-update-parameters data = client.update_project( project_name='test_project', - pathc = false, + patch = false, instruction='update: Please label all the stuff', ) diff --git a/scaleapi/__init__.py b/scaleapi/__init__.py index 7d4b008..1f3e67a 100644 --- a/scaleapi/__init__.py +++ b/scaleapi/__init__.py @@ -405,10 +405,10 @@ def get_batch(self, batch_name: str) -> Batch: return Batch(batchdata, self) def list_batches(self, **kwargs) -> Batchlist: - """list_batches() will be deprecated, please use batches() method + """list_batches will be deprecated, please use batches() method """ warnings.warn( - "list_batches() will be deprecated, please use batches() method " + "list_batches will be deprecated, please use batches() method " "as the alternative.", DeprecationWarning, stacklevel=2, @@ -591,15 +591,12 @@ def update_project(self, project_name: str, **kwargs) -> Project: project_name (str): Project's name + **kwargs: + Project parameters to be set. + Returns: Project """ - allowed_kwargs = {"patch", "instruction"} - for key in kwargs: - if key not in allowed_kwargs: - raise ScaleInvalidRequest( - f"Illegal parameter {key} for" "ScaleClient.update_project()" - ) endpoint = f"projects/{Api.quote_string(project_name)}/setParams" projectdata = self.api.post_request(endpoint, body=kwargs) diff --git a/scaleapi/projects.py b/scaleapi/projects.py index 486300a..8a0536f 100644 --- a/scaleapi/projects.py +++ b/scaleapi/projects.py @@ -6,12 +6,15 @@ def __init__(self, json, client): self.name = json["name"] self.type = json["type"] self._client = client + self.params = None + self.version = None + self.instruction = None if len(json["param_history"]): - last_params = json["param_history"][-1] - self.version = last_params["version"] - if "instruction" in last_params: - self.instruction = last_params["instruction"] + self.params = json["param_history"][-1] + self.version = self.params["version"] + if "instruction" in self.params: + self.instruction = self.params["instruction"] def __hash__(self): return hash(self.name) From c7f45218a85cfc5ea7501c59e49b65d8f0e692ca Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Tue, 30 Mar 2021 16:20:46 -0700 Subject: [PATCH 16/38] updates to readme and migration guide --- README.rst | 74 +------------------------------------- migration_guide.rst | 87 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 73 deletions(-) create mode 100644 migration_guide.rst diff --git a/README.rst b/README.rst index d585dda..a4c3e86 100644 --- a/README.rst +++ b/README.rst @@ -4,79 +4,7 @@ Scale AI | Python SDK If you use earlier versions of the SDK, please refer to `v1.0.4 documentation `_. - -Migration Guide to v2.x -________________________ - -If you are migrating from v0.x or v1.x, this guide explains how to update your application for compatibility with v2.x. We recommend migrating as soon as possible to ensure that your application is unaffected. - -Creating New Tasks -^^^^^^^^^^^^^^^^^^ -Methods with task types such as ``create_imageannotation_task``, ``create_textcollection_task`` etc. are deprecated. - -Creating a new task is now unified under the ``create_task(TaskType, ...)`` method. Please review `Create Task`_ section for more details. - - -.. code-block:: python - - # Deprecated - client.create_imageannotation_task( - project = 'test_project', - instruction= "Draw a box around each baby cow and big cow.", - ... - ) - - # New Method - from scaleapi.tasks import TaskType - client.create_task( - TaskType.ImageAnnotation, - project = 'test_project', - instruction= "Draw a box around each baby cow and big cow.", - ... - ) - -Retrieving Tasks -^^^^^^^^^^^^^^^^ -A new generator method is introduced to retrieve a list of tasks with all available parameters. The new method handles pagination and tokens: ``tasks_all(...)``. -You can have a simpler code by replacing ``tasks()`` loops with pagination and tokens. - -Please refer to `List Tasks`_ for more details. - -Accessing Attributes (Task, Batch, Project) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The old ``param_dict`` attribute is now replaced with a method ``as_dict()`` to return an object's attributes as a dictionary. - -First-level attributes of Task can also be accessed with `.` annotation as the following: - -.. code-block:: python - - task.as_dict()["status"] - # is same as - task.status - -Other examples are ``task.type``, ``task.params``, ``task.response["annotations"]`` etc. - - -Task Count Summary of Batches -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Attributes of Batch ``pending``, ``completed``, ``error``, ``canceled`` are replaced with ``tasks_pending``, ``tasks_completed``, ``tasks_error``, ``tasks_canceled`` respectively. - -Deprecated Methods -^^^^^^^^^^^^^^^^^^ -- ``fetch_task()`` replaced with ``get_task()`` -- ``list_batches()`` replaced with ``batches()`` - -Enabled Auto-Retry -^^^^^^^^^^^^^^^^^^ -SDK now enabled auto-retry in case of a TimeOut (504) or TooManyRequests (429) occurs. - -New Exceptions -^^^^^^^^^^^^^^ -New error types are introduces if you want to handle specific exception cases. -``ScaleInvalidRequest``, ``ScaleUnauthorized``, ``ScaleNotEnabled``, ``ScaleResourceNotFound``, ``ScaleDuplicateTask``, ``ScaleTooManyRequests``, ``ScaleInternalError`` and ``ScaleTimeoutError``. - -All new error types are child of the existing ``ScaleException`` which can be used to handle all cases. - +If you are migrating from earlier versions to v2, please refer to `Migration Guide to v2 `_. Installation ____________ diff --git a/migration_guide.rst b/migration_guide.rst new file mode 100644 index 0000000..96a6418 --- /dev/null +++ b/migration_guide.rst @@ -0,0 +1,87 @@ +===================== +Migration Guide to v2 +===================== + +If you are migrating from v0 or v1, this guide explains how to update your application for compatibility with v2. We recommend migrating as soon as possible to ensure that your application is unaffected. + +Creating New Tasks +__________________ +Methods with task types such as ``create_imageannotation_task``, ``create_textcollection_task`` etc. are deprecated. + +Creating a new task is now unified under the ``create_task(TaskType, ...)`` method. Please review `Create Task `_ section for more details. + + +.. code-block:: python + + # Deprecated + client.create_imageannotation_task( + project = 'test_project', + instruction= "Draw a box around each baby cow and big cow.", + ... + ) + + # New Method + from scaleapi.tasks import TaskType + client.create_task( + TaskType.ImageAnnotation, + project = 'test_project', + instruction= "Draw a box around each baby cow and big cow.", + ... + ) + +Retrieving Tasks +________________ +A new generator method is introduced to retrieve a list of tasks with all available parameters. The new method handles pagination and tokens: ``tasks_all(...)``. +You can have a simpler code by replacing ``tasks()`` loops with pagination and tokens. + +Please refer to `List Tasks `_ for more details. + +Accessing Attributes (Task, Batch, Project) +__________________________________________________ +The old ``param_dict`` attribute is now replaced with a method ``as_dict()`` to return an object's attributes as a dictionary. + +First-level attributes of Task are still accessible with `.` annotation as the following: + +.. code-block:: python + + task.status # same as task.as_dict()["status"] + task.params["geometries"] # same as task.as_dict()["params"]["geometries"] + task.response["annotations"] # same as task.as_dict()["response"]["annotations"] + +Accessing ``task.params`` child objects at task level is **deprecated**. Instead of ``task.attribute``, you should use ``task.params["attribute"]`` for accessing objects under ``params``. + +.. code-block:: python + + task.params["geometries"] # Migrate from => task.geometries + task.params["attachment"] # Migrate from => task.attachment + + +Task Counts Summary of Batch +____________________________ +Attributes of Batch ``pending``, ``completed``, ``error``, ``canceled`` are replaced with ``tasks_pending``, ``tasks_completed``, ``tasks_error``, ``tasks_canceled`` respectively. + +.. code-block:: python + + # NEW Attributes # DEPRECATED Attributes + + batch.tasks_pending # batch.pending + batch.tasks_completed # batch.completed + batch.tasks_error # batch.error + batch.tasks_canceled # batch.canceled + +Deprecated Methods +__________________ +- ``fetch_task()`` replaced with ``get_task()`` +- ``list_batches()`` replaced with ``batches()`` + +Enabled Auto-Retry +__________________ +SDK now enabled auto-retry in case of a ``TimeOut(504)`` or ``TooManyRequests(429)`` error occurs. + +New Exceptions +______________ +New error types are introduces if you want to handle specific exception cases. +``ScaleInvalidRequest``, ``ScaleUnauthorized``, ``ScaleNotEnabled``, ``ScaleResourceNotFound``, ``ScaleDuplicateTask``, ``ScaleTooManyRequests``, ``ScaleInternalError`` and ``ScaleTimeoutError``. + +All new error types are child of the existing ``ScaleException`` which can be used to handle all cases. +Please review `Error handling `_ section for more details. \ No newline at end of file From ccc4dff7a9ad33be6c18714b524d0617b328ea7f Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Tue, 30 Mar 2021 16:22:08 -0700 Subject: [PATCH 17/38] typo in migration guide --- migration_guide.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/migration_guide.rst b/migration_guide.rst index 96a6418..d8b4400 100644 --- a/migration_guide.rst +++ b/migration_guide.rst @@ -76,7 +76,7 @@ __________________ Enabled Auto-Retry __________________ -SDK now enabled auto-retry in case of a ``TimeOut(504)`` or ``TooManyRequests(429)`` error occurs. +SDK now supports auto-retry in case of a ``TimeOut(504)`` or ``TooManyRequests(429)`` error occurs. New Exceptions ______________ From cde7cd11c230dfc00258af3f5366470753506bc6 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Tue, 30 Mar 2021 16:45:18 -0700 Subject: [PATCH 18/38] updates to migration guide --- migration_guide.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/migration_guide.rst b/migration_guide.rst index d8b4400..351fda6 100644 --- a/migration_guide.rst +++ b/migration_guide.rst @@ -32,7 +32,7 @@ Creating a new task is now unified under the ``create_task(TaskType, ...)`` meth Retrieving Tasks ________________ A new generator method is introduced to retrieve a list of tasks with all available parameters. The new method handles pagination and tokens: ``tasks_all(...)``. -You can have a simpler code by replacing ``tasks()`` loops with pagination and tokens. +You can have a simpler code by replacing ``tasks()`` loops with single ``tasks_all()`` call. Please refer to `List Tasks `_ for more details. @@ -56,9 +56,9 @@ Accessing ``task.params`` child objects at task level is **deprecated**. Instead task.params["attachment"] # Migrate from => task.attachment -Task Counts Summary of Batch -____________________________ -Attributes of Batch ``pending``, ``completed``, ``error``, ``canceled`` are replaced with ``tasks_pending``, ``tasks_completed``, ``tasks_error``, ``tasks_canceled`` respectively. +Task Counts as Summary of Batch +_______________________________ +Attributes of Batch ``pending``, ``completed``, ``error``, ``canceled`` are replaced with ``tasks_pending``, ``tasks_completed``, ``tasks_error``, ``tasks_canceled`` respectively to avoid confusion. .. code-block:: python @@ -84,4 +84,5 @@ New error types are introduces if you want to handle specific exception cases. ``ScaleInvalidRequest``, ``ScaleUnauthorized``, ``ScaleNotEnabled``, ``ScaleResourceNotFound``, ``ScaleDuplicateTask``, ``ScaleTooManyRequests``, ``ScaleInternalError`` and ``ScaleTimeoutError``. All new error types are child of the existing ``ScaleException`` which can be used to handle all cases. + Please review `Error handling `_ section for more details. \ No newline at end of file From 76913140cc2f5c4affd2a25929d025b4a3cb2ef0 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Tue, 30 Mar 2021 21:51:39 -0700 Subject: [PATCH 19/38] pre-commit configuration --- .gitignore | 2 +- .pylintrc | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 2d07bef..3e29bf8 100644 --- a/.gitignore +++ b/.gitignore @@ -25,4 +25,4 @@ htmlcov/ .*cache nosetests.xml coverage.xml -*.cover \ No newline at end of file +*.cover diff --git a/.pylintrc b/.pylintrc index faa124d..8c3132a 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,8 +1,8 @@ [MASTER] disable= - C0114, - R0903, - R0914, - R0913, - R0902, - C0103, \ No newline at end of file + missing-module-docstring, + too-few-public-methods, + too-many-locals, + too-many-arguments, + too-many-instance-attributes, + invalid-name, From 5478aa4f6a691c6d0ca7cc6e6bc7c53a3fe36436 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Tue, 30 Mar 2021 21:54:31 -0700 Subject: [PATCH 20/38] readme whitespace changes --- README.rst | 34 +++++++++++++++++----------------- migration_guide.rst | 10 +++++----- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/README.rst b/README.rst index a4c3e86..b0f1b50 100644 --- a/README.rst +++ b/README.rst @@ -49,7 +49,7 @@ __ https://docs.scale.com/reference .. code-block:: python from scaleapi.tasks import TaskType - + client.create_task( TaskType.ImageAnnotation, project = 'test_project', @@ -65,7 +65,7 @@ __ https://docs.scale.com/reference } } ) - + Retrieve a task ^^^^^^^^^^^^^^^ @@ -82,7 +82,7 @@ __ https://docs.scale.com/reference#retrieve-tasks List Tasks ^^^^^^^^^^ -Retrieve a list of `Task` objects, with optional filters for: `project_name, batch_name, type, status, review_status, unique_id, completed_after, completed_before, updated_after, updated_before, created_after, created_before` and `tags`. +Retrieve a list of `Task` objects, with optional filters for: `project_name, batch_name, type, status, review_status, unique_id, completed_after, completed_before, updated_after, updated_before, created_after, created_before` and `tags`. This method is a generator and yields tasks. It can be wrapped in a `list` statement if a Task list is needed. @@ -91,7 +91,7 @@ Check out `Scale's API documentation`__ for more information. __ https://docs.scale.com/reference#list-multiple-tasks .. code-block :: python - + from scaleapi.tasks import TaskReviewStatus, TaskStatus tasks = client.tasks_all( @@ -101,13 +101,13 @@ __ https://docs.scale.com/reference#list-multiple-tasks status = TaskStatus.Completed, review_status = TaskReviewStatus.Accepted ) - + for task in tasks: # Download task or do something! print(task.task_id) - + # Alternative for accessing as a Task list - task_list = list(tasks) + task_list = list(tasks) print(f"{len(task_list))} tasks retrieved") Cancel Task @@ -180,7 +180,7 @@ __ https://docs.scale.com/reference#batch-retrieval List Batches ^^^^^^^^^^^^ -Retrieve a list of Batches. Optional parameters are `project_name, batch_status, created_after, created_before`. +Retrieve a list of Batches. Optional parameters are `project_name, batch_status, created_after, created_before`. Check out `Scale's API documentation`__ for more information. @@ -189,20 +189,20 @@ __ https://docs.scale.com/reference#batch-list .. code-block :: python from scaleapi.batches import BatchStatus - + batches = client.batches_all( batch_status=BatchStatus.Completed, created_after = "2020-09-08" - ) - + ) + counter = 0 for batch in batches: counter += 1 print(f'Downloading batch {counter} | {batch.name} | {batch.project}') # Alternative for accessing as a Batch list - batch_list = list(batches) - print(f"{len(batch_list))} batches retrieved") + batch_list = list(batches) + print(f"{len(batch_list))} batches retrieved") Projects ________ @@ -236,7 +236,7 @@ __ https://docs.scale.com/reference#project-retrieval List Projects ^^^^^^^^^^^^^ -This function does not take any arguments. Retrieve a list of every Project. +This function does not take any arguments. Retrieve a list of every Project. Check out `Scale's API documentation`__ for more information. __ https://docs.scale.com/reference#batch-list @@ -268,7 +268,7 @@ Error handling ______________ If something went wrong while making API calls, then exceptions will be raised automatically -as a `ScaleException` parent type and child exceptions: +as a `ScaleException` parent type and child exceptions: - ``ScaleInvalidRequest``: 400 - Bad Request -- The request was unacceptable, often due to missing a required parameter. - ``ScaleUnauthorized``: 401 - Unauthorized -- No valid API key provided. @@ -292,8 +292,8 @@ For example: except ScaleException as err: print(err.code) # 400 print(err.message) # Parameters is invalid, reason: "attachments" is required - + Troubleshooting _______________ -If you notice any problems, please email us at support@scale.com. \ No newline at end of file +If you notice any problems, please email us at support@scale.com. diff --git a/migration_guide.rst b/migration_guide.rst index 351fda6..57a0b14 100644 --- a/migration_guide.rst +++ b/migration_guide.rst @@ -31,8 +31,8 @@ Creating a new task is now unified under the ``create_task(TaskType, ...)`` meth Retrieving Tasks ________________ -A new generator method is introduced to retrieve a list of tasks with all available parameters. The new method handles pagination and tokens: ``tasks_all(...)``. -You can have a simpler code by replacing ``tasks()`` loops with single ``tasks_all()`` call. +A new generator method is introduced to retrieve a list of tasks with all available parameters. The new method handles pagination and tokens: ``tasks_all(...)``. +You can have a simpler code by replacing ``tasks()`` loops with single ``tasks_all()`` call. Please refer to `List Tasks `_ for more details. @@ -40,14 +40,14 @@ Accessing Attributes (Task, Batch, Project) __________________________________________________ The old ``param_dict`` attribute is now replaced with a method ``as_dict()`` to return an object's attributes as a dictionary. -First-level attributes of Task are still accessible with `.` annotation as the following: +First-level attributes of Task are still accessible with `.` annotation as the following: .. code-block:: python task.status # same as task.as_dict()["status"] task.params["geometries"] # same as task.as_dict()["params"]["geometries"] task.response["annotations"] # same as task.as_dict()["response"]["annotations"] - + Accessing ``task.params`` child objects at task level is **deprecated**. Instead of ``task.attribute``, you should use ``task.params["attribute"]`` for accessing objects under ``params``. .. code-block:: python @@ -85,4 +85,4 @@ New error types are introduces if you want to handle specific exception cases. All new error types are child of the existing ``ScaleException`` which can be used to handle all cases. -Please review `Error handling `_ section for more details. \ No newline at end of file +Please review `Error handling `_ section for more details. From 39283f776f859e9e4e1ee58d7f84391e92ea18fe Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Tue, 30 Mar 2021 22:35:58 -0700 Subject: [PATCH 21/38] pre-commit formatting updates --- .pre-commit-config.yaml | 42 +++++++++++++++++++++++++++++++++++++++++ scaleapi/__init__.py | 13 +++++++------ scaleapi/exceptions.py | 3 +-- scaleapi/tasks.py | 6 ++---- setup.py | 2 ++ tests/test_client.py | 1 + 6 files changed, 55 insertions(+), 12 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..7a1a32a --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,42 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +default_language_version: + python: python3.6 + +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.2.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-added-large-files +- repo: https://github.com/pycqa/isort + rev: 5.8.0 + hooks: + - id: isort + name: isort + files: scaleapi/ +- repo: https://github.com/psf/black + rev: 20.8b1 + hooks: + - id: black +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.8.4 + hooks: + - id: flake8 + args: [--max-complexity=10, --max-line-length=88, --max-doc-length=72] +- repo: https://github.com/pycqa/pylint + rev: pylint-2.7.3 + hooks: + - id: pylint + files: scaleapi/ + + +# - repo: local +# hooks: +# - id: pylint +# name: pylint +# entry: pylint +# language: system +# types: [python] +# files: scaleapi/ diff --git a/scaleapi/__init__.py b/scaleapi/__init__.py index 1f3e67a..4c871de 100644 --- a/scaleapi/__init__.py +++ b/scaleapi/__init__.py @@ -42,8 +42,7 @@ class Batchlist(Paginator[Batch]): class ScaleClient: - """Main class serves as an interface for Scale API - """ + """Main class serves as an interface for Scale API""" def __init__(self, api_key, source=None): self.api = Api(api_key, source) @@ -63,8 +62,9 @@ def get_task(self, task_id: str) -> Task: return Task(self.api.get_request(endpoint), self) def fetch_task(self, task_id: str) -> Task: - """fetch_task() will be deprecated, please use get_task() method - """ + """fetch_task() will be deprecated, + please use get_task() method""" + warnings.warn( "fetch_task() will be deprecated, please use get_task() method " "as the alternative.", @@ -405,8 +405,9 @@ def get_batch(self, batch_name: str) -> Batch: return Batch(batchdata, self) def list_batches(self, **kwargs) -> Batchlist: - """list_batches will be deprecated, please use batches() method - """ + """list_batches will be deprecated, + please use batches() method""" + warnings.warn( "list_batches will be deprecated, please use batches() method " "as the alternative.", diff --git a/scaleapi/exceptions.py b/scaleapi/exceptions.py index ab1e2b9..a603d2a 100644 --- a/scaleapi/exceptions.py +++ b/scaleapi/exceptions.py @@ -70,8 +70,7 @@ class ScaleInternalError(ScaleException): class ScaleTimeoutError(ScaleException): - """504 - Server Timeout Error -- Try again later. - """ + """504 - Server Timeout Error -- Try again later.""" code = 504 diff --git a/scaleapi/tasks.py b/scaleapi/tasks.py index 1412041..7292386 100644 --- a/scaleapi/tasks.py +++ b/scaleapi/tasks.py @@ -80,11 +80,9 @@ def as_dict(self): return self._json def refresh(self): - """Refreshes the task details. - """ + """Refreshes the task details.""" self._json = self._client.fetch_task(self.id).as_dict() def cancel(self): - """Cancels the task - """ + """Cancels the task""" self._client.cancel_task(self.id) diff --git a/setup.py b/setup.py index da0489c..9c5799e 100644 --- a/setup.py +++ b/setup.py @@ -9,12 +9,14 @@ def read(rel_path): + """Read lines from given file""" here = os.path.abspath(os.path.dirname(__file__)) with open(os.path.join(here, rel_path), "r") as fp: return fp.read() def get_version(rel_path): + """Read __version__ from given file""" for line in read(rel_path).splitlines(): if line.startswith("__version__"): delim = '"' if '"' in line else "'" diff --git a/tests/test_client.py b/tests/test_client.py index 1a3a800..1d932ff 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -6,6 +6,7 @@ from datetime import datetime import pytest + import scaleapi from scaleapi.exceptions import ( ScaleDuplicateTask, From 7d8c2315aebf5bd5726b061d123ea627a5b59767 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Tue, 30 Mar 2021 23:20:12 -0700 Subject: [PATCH 22/38] pre-commit settings updated --- .pre-commit-config.yaml | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7a1a32a..9cac726 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,18 +25,16 @@ repos: hooks: - id: flake8 args: [--max-complexity=10, --max-line-length=88, --max-doc-length=72] -- repo: https://github.com/pycqa/pylint - rev: pylint-2.7.3 +- repo: local hooks: - id: pylint + name: pylint + entry: pylint + language: python + types: [python] files: scaleapi/ - - -# - repo: local -# hooks: -# - id: pylint -# name: pylint -# entry: pylint -# language: system -# types: [python] -# files: scaleapi/ + additional_dependencies: + - 'pylint>=2.7.4' + - 'requests>=2.25.0' + - 'urllib3>=1.26.0' + language_version: python3.6 From c785f562169124a836d8a42f4dadbfc955d08e10 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Wed, 31 Mar 2021 16:42:19 -0700 Subject: [PATCH 23/38] global http parameters --- .pre-commit-config.yaml | 6 ++---- scaleapi/api.py | 15 ++++++++++----- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9cac726..648b353 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,3 @@ -# See https://pre-commit.com for more information -# See https://pre-commit.com/hooks.html for more hooks default_language_version: python: python3.6 @@ -15,7 +13,7 @@ repos: hooks: - id: isort name: isort - files: scaleapi/ + args: ["--profile", "black"] - repo: https://github.com/psf/black rev: 20.8b1 hooks: @@ -32,9 +30,9 @@ repos: entry: pylint language: python types: [python] - files: scaleapi/ additional_dependencies: - 'pylint>=2.7.4' - 'requests>=2.25.0' - 'urllib3>=1.26.0' + - 'pytest>=6.2.2' language_version: python3.6 diff --git a/scaleapi/api.py b/scaleapi/api.py index 914d50b..cdbe693 100644 --- a/scaleapi/api.py +++ b/scaleapi/api.py @@ -8,7 +8,12 @@ from .exceptions import ExceptionMap, ScaleException SCALE_ENDPOINT = "https://api.scale.com/v1" -NUM_OF_RETRIES = 3 + +# Parameters for HTTP retry +HTTP_TOTAL_RETRIES = 3 # Number of total retries +HTTP_RETRY_BACKOFF_FACTOR = 2 # Wait 1, 2, 4 seconds between retries +HTTP_STATUS_FORCE_LIST = [429, 504] # Status codes to force retry +HTTP_RETRY_ALLOWED_METHODS = frozenset({"GET", "POST"}) class Api: @@ -33,10 +38,10 @@ def _http_request( https = requests.Session() retry_strategy = Retry( - total=NUM_OF_RETRIES, - backoff_factor=2, # Will wait 1, 2, 4 seconds between retries - status_forcelist=[429, 504], - allowed_methods=["GET", "POST"], + total=HTTP_TOTAL_RETRIES, + backoff_factor=HTTP_RETRY_BACKOFF_FACTOR, + status_forcelist=HTTP_STATUS_FORCE_LIST, + allowed_methods=HTTP_RETRY_ALLOWED_METHODS, raise_on_status=False, ) From c9957788383d38f47b8f6b55e5dede0042d7a4e5 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Wed, 31 Mar 2021 16:43:55 -0700 Subject: [PATCH 24/38] minor updates to pytest and batch --- scaleapi/batches.py | 1 - tests/test_client.py | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/scaleapi/batches.py b/scaleapi/batches.py index 636e1ea..d0cb73a 100644 --- a/scaleapi/batches.py +++ b/scaleapi/batches.py @@ -25,7 +25,6 @@ def __init__(self, json, client): self.tasks_error = None self.tasks_canceled = None self._client = client - self.get_status() def __hash__(self): return hash(self.name) diff --git a/tests/test_client.py b/tests/test_client.py index 1d932ff..ad3e89d 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1,4 +1,4 @@ -# coding: utf-8 +# pylint: disable=missing-function-docstring import os import time @@ -21,10 +21,10 @@ try: test_api_key = os.environ["SCALE_TEST_API_KEY"] client = scaleapi.ScaleClient(test_api_key, "pytest") -except KeyError: +except KeyError as err: raise Exception( "Please set the environment variable SCALE_TEST_API_KEY to run tests." - ) + ) from err def test_invalidkey_fail(): From 67a31160689a18f815850d144599f5e9b428cfd6 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Wed, 31 Mar 2021 20:14:38 -0700 Subject: [PATCH 25/38] update pytest --- .pre-commit-config.yaml | 1 + tests/test_client.py | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 648b353..b5987b5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,6 +30,7 @@ repos: entry: pylint language: python types: [python] + files: scaleapi/ additional_dependencies: - 'pylint>=2.7.4' - 'requests>=2.25.0' diff --git a/tests/test_client.py b/tests/test_client.py index ad3e89d..91a8482 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -26,6 +26,13 @@ "Please set the environment variable SCALE_TEST_API_KEY to run tests." ) from err +try: + project = client.get_project(TEST_PROJECT_NAME) +except ScaleResourceNotFound: + client.create_project( + project_name=TEST_PROJECT_NAME, task_type=TaskType.ImageAnnotation + ) + def test_invalidkey_fail(): client_fail = scaleapi.ScaleClient("dummy_api_key", "pytest") From 00cac123e6b460b1e60ff1f29ecacdd76a1093b6 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Wed, 31 Mar 2021 20:15:10 -0700 Subject: [PATCH 26/38] make project params optional in creation --- scaleapi/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scaleapi/__init__.py b/scaleapi/__init__.py index 4c871de..63d8ce8 100644 --- a/scaleapi/__init__.py +++ b/scaleapi/__init__.py @@ -530,7 +530,7 @@ def batches_all( has_more = batches.has_more def create_project( - self, project_name: str, task_type: TaskType, params: Dict + self, project_name: str, task_type: TaskType, params: Dict = None ) -> Project: """Creates a new project. https://docs.scale.com/reference#project-creation From 697f9eea528490139812692af113e57e793cd30f Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Wed, 31 Mar 2021 20:38:07 -0700 Subject: [PATCH 27/38] pre-commit updates --- .pre-commit-config.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b5987b5..bc66f79 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,6 @@ default_language_version: python: python3.6 +default_stages: [commit] repos: - repo: https://github.com/pre-commit/pre-commit-hooks @@ -8,6 +9,7 @@ repos: - id: trailing-whitespace - id: end-of-file-fixer - id: check-added-large-files + stages: [-re-c] - repo: https://github.com/pycqa/isort rev: 5.8.0 hooks: @@ -37,3 +39,12 @@ repos: - 'urllib3>=1.26.0' - 'pytest>=6.2.2' language_version: python3.6 +- repo: local + hooks: + - id: pytest-check + name: pytest-check + entry: pytest + language: system + stages: [push] + pass_filenames: false + always_run: true From 5df8782b3e44bfe30b7f14fa2bce5a3fffd57b01 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Wed, 31 Mar 2021 20:39:38 -0700 Subject: [PATCH 28/38] changes to pre-commit --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bc66f79..04baa2c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,8 +43,8 @@ repos: hooks: - id: pytest-check name: pytest-check - entry: pytest - language: system + entry: -m pytest + language: python stages: [push] pass_filenames: false always_run: true From 1dbe25dae524874881ee99da929229d6e8c60d61 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Wed, 31 Mar 2021 20:40:46 -0700 Subject: [PATCH 29/38] pre-commit updates --- .pre-commit-config.yaml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 04baa2c..2e63965 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -39,12 +39,3 @@ repos: - 'urllib3>=1.26.0' - 'pytest>=6.2.2' language_version: python3.6 -- repo: local - hooks: - - id: pytest-check - name: pytest-check - entry: -m pytest - language: python - stages: [push] - pass_filenames: false - always_run: true From aa52f5ceb7e977fafa88db0a48e8115c61c0c5a3 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Wed, 31 Mar 2021 21:59:17 -0700 Subject: [PATCH 30/38] updated readme --- README.rst | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index b0f1b50..a03e84d 100644 --- a/README.rst +++ b/README.rst @@ -1,10 +1,17 @@ -===================== +********************* Scale AI | Python SDK -===================== +********************* If you use earlier versions of the SDK, please refer to `v1.0.4 documentation `_. -If you are migrating from earlier versions to v2, please refer to `Migration Guide to v2 `_. +If you are migrating from earlier versions to v2, please refer to `Migration Guide to v2 `_. + +.. image:: https://pepy.tech/badge/scaleapi/month + :alt: Downloads +.. image:: https://img.shields.io/pypi/pyversions/scaleapi.svg + :alt: Supported Versions +.. image:: https://img.shields.io/github/contributors/scaleapi/scaleapi-python-client.svg + :alt: Contributors Installation ____________ From afdbd02e4ede8cc7dda17319c94a3d0f40a5c851 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Wed, 31 Mar 2021 22:01:25 -0700 Subject: [PATCH 31/38] readme update --- README.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index a03e84d..f2298e8 100644 --- a/README.rst +++ b/README.rst @@ -6,11 +6,13 @@ If you use earlier versions of the SDK, please refer to `v1.0.4 documentation `_. -.. image:: https://pepy.tech/badge/scaleapi/month +|pic1| |pic2| |pic3| + +.. |pic1| image:: https://pepy.tech/badge/scaleapi/month :alt: Downloads -.. image:: https://img.shields.io/pypi/pyversions/scaleapi.svg +.. |pic2| image:: https://img.shields.io/pypi/pyversions/scaleapi.svg :alt: Supported Versions -.. image:: https://img.shields.io/github/contributors/scaleapi/scaleapi-python-client.svg +.. |pic3| image:: https://img.shields.io/github/contributors/scaleapi/scaleapi-python-client.svg :alt: Contributors Installation From 9067831a630a68383e7d3646a05364f928c728d8 Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Wed, 31 Mar 2021 22:12:00 -0700 Subject: [PATCH 32/38] readme updated with icons --- README.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.rst b/README.rst index f2298e8..01e8efe 100644 --- a/README.rst +++ b/README.rst @@ -10,10 +10,13 @@ If you are migrating from earlier versions to v2, please refer to `Migration Gu .. |pic1| image:: https://pepy.tech/badge/scaleapi/month :alt: Downloads + :target: https://pepy.tech/project/scaleapi .. |pic2| image:: https://img.shields.io/pypi/pyversions/scaleapi.svg :alt: Supported Versions + :target: https://pypi.org/project/scaleapi .. |pic3| image:: https://img.shields.io/github/contributors/scaleapi/scaleapi-python-client.svg :alt: Contributors + :target: https://github.com/scaleapi/scaleapi-python-client/graphs/contributors Installation ____________ From 65f216cdc320484975741ab2f55a7d324dbde9be Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Wed, 31 Mar 2021 22:27:15 -0700 Subject: [PATCH 33/38] automated publish to pypi --- publish.sh | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100755 publish.sh diff --git a/publish.sh b/publish.sh new file mode 100755 index 0000000..c30ed51 --- /dev/null +++ b/publish.sh @@ -0,0 +1,69 @@ +#!/bin/bash +echo "##### STARTING BUILD and PUBLISH #####" +echo + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +cd "${DIR}" || exit 1 + +BRANCH_NAME=$(git branch 2>/dev/null | grep '^*' | tr -d ' *') +echo "Active Git Branch: ${BRANCH_NAME}" # release-1.0.5 + +# IFS='-' read -ra strarr <<< "$BRANCH_NAME" +# BRANCH_PREFIX="${strarr[0]}" # release +# BRANCH_VERSION="${strarr[1]}" # 1.0.5 + +while IFS= read -r line; do + if [[ $line == __version__* ]]; + then + IFS=' = ' read -ra strarr <<< "$line" + PKG_VERSION=$( sed -e 's/^"//' -e 's/"$//' <<< "${strarr[1]}" ) + echo "SDK Package Version: ${PKG_VERSION}" + break + fi +done < "${DIR}/scaleapi/_version.py" + +if [ "$BRANCH_NAME" != "release-${PKG_VERSION}" ]; +then + echo "ERROR: You need to be in 'release-${PKG_VERSION}' git branch to publish this version (${PKG_VERSION})." + exit 1 +fi + +if [ "$1" == "runtest" ]; +then + echo "Validating environment variable [SCALE_TEST_API_KEY] for pytest..." + if [[ -z "${SCALE_TEST_API_KEY}" ]]; then + echo "Test key not found. Please assign 'SCALE_TEST_API_KEY=...' as your test environment key." + exit 1 + fi + + + if ! python -m pytest; then echo "ERROR: pytest failed."; exit; fi + echo "pytest is successful!" +fi + +# Clean-up build and dist folders +rm -rf build/ dist/ + +# Build package +echo "Building package..." + +if ! python3 setup.py sdist bdist_wheel > /dev/null 2>&1; then echo "ERROR: Package building failed."; exit 1; fi + + +if ! pip show twine > /dev/null 2>&1; +then + echo "WARN: 'twine' package is not found, installing..."; + pip install twine +fi + +# Twine Validation +echo "Validating package..." + +if ! twine check --strict dist/* ; then echo "ERROR: Twine check failed."; exit 1; fi + +# Twine Upload to Pypi +echo "Uploading package..." + +if ! twine upload dist/*; then echo "ERROR: Twine upload failed."; exit 1; fi + +exit 0; From 3b7938a1fb507ebf5a9503393673e20abed3a5ab Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Thu, 1 Apr 2021 01:05:04 -0700 Subject: [PATCH 34/38] version update guide --- publish.sh | 19 +++++++++++++++---- pypi_update_guide.md | 39 ++++++++++++--------------------------- 2 files changed, 27 insertions(+), 31 deletions(-) diff --git a/publish.sh b/publish.sh index c30ed51..60ad021 100755 --- a/publish.sh +++ b/publish.sh @@ -1,6 +1,17 @@ #!/bin/bash echo "##### STARTING BUILD and PUBLISH #####" -echo + +VERSION_FILE="scaleapi/_version.py" + +staged_files=$(git diff --cached --name-only --diff-filter=ACMR ${VERSION_FILE}) +changed_files=$(git diff --name-only --diff-filter=ACMR ${VERSION_FILE}) + +if [[ "$staged_files" == "$VERSION_FILE" || "$changed_files" == "$VERSION_FILE" ]]; +then + echo "ERROR: You have uncommitted changes in version file: ${VERSION_FILE}" + echo " Please commit and push your changes before publishing." + exit +fi DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" cd "${DIR}" || exit 1 @@ -20,11 +31,11 @@ while IFS= read -r line; do echo "SDK Package Version: ${PKG_VERSION}" break fi -done < "${DIR}/scaleapi/_version.py" +done < "${DIR}/${VERSION_FILE}" -if [ "$BRANCH_NAME" != "release-${PKG_VERSION}" ]; +if [ "$BRANCH_NAME" != "master" ]; then - echo "ERROR: You need to be in 'release-${PKG_VERSION}' git branch to publish this version (${PKG_VERSION})." + echo "ERROR: You need to be in 'master' git branch to publish this version (${PKG_VERSION})." exit 1 fi diff --git a/pypi_update_guide.md b/pypi_update_guide.md index 000329f..deb378d 100644 --- a/pypi_update_guide.md +++ b/pypi_update_guide.md @@ -2,50 +2,35 @@ _Creating and deploying a new package version is easy_ ### Prerequisites -1. Ensure you're on the latest master +1. Ensure you're on the latest `master` branch -2. Ensure you have a PyPI account created and are added as a Collaborator +2. Ensure you have access to a PyPI account that is a maintainer of [scaleapi](https://pypi.org/project/scaleapi/) on PyPI ### Deployment Steps: **Step 0: Critical - Bump Project Version** -In `_version.py`, you need to specify a new project version. +Ensure `_version.py` has an updated project version. If not, please increment the project version, commit and push the changes. We use [semantic versioning](https://packaging.python.org/guides/distributing-packages-using-setuptools/#semantic-versioning-preferred). If you are adding a meaningful feature, bump the minor version. If you are fixing a bug, bump the incremental version. -**Step 1: Remove Previous Versions** - -Clear out any previously packages and files in the `dist` and `build/lib` folders - -**Step 2: Create a Source Distribution** - -``` -python3 setup.py sdist -``` - -**Step 3: Create `wheel`** - -You should also create a wheel for your project. A wheel is a built package that can be installed without needing to go through the “build” process. Installing wheels is substantially faster for the end user than installing from a source distribution +**Step 1: Run Publish Script** ``` -python3 setup.py bdist_wheel +./publish.sh ``` -**Step 4: Install Twine** - -Twine is what is used to manage PyPI pacakges +If you want to run test cases via `pytest` before publishing, add the _optional_ `runtest` arg to the script. +You need to set your own test key as `SCALE_TEST_API_KEY` environment variable before running. ``` -pip3 install twine +SCALE_TEST_API_KEY="{apikey}|{userid}|test" ./publish.sh runtest ``` -**Step 5: Upload distribution to PyPI** +**Step 2: Check out the PyPI page to ensure all looks good** -``` -twine upload dist/* -``` +[https://pypi.org/project/scaleapi/](https://pypi.org/project/scaleapi/) -**Step 6: Check out the PyPI page to ensure all looks good** +**Step 3: Create a New Release** -[https://pypi.org/project/scaleapi/](https://pypi.org/project/scaleapi/) +Create a [new release](https://github.com/scaleapi/scaleapi-python-client/releases/new) on GitHub with a matching version tag _(i.e. v2.0.1)_. Please provide a summary about new features and fixed bugs in the Release Notes. From a105982f52a8d059b18748208408730a0a2d807e Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Thu, 1 Apr 2021 14:36:12 -0700 Subject: [PATCH 35/38] updated documentation --- .pre-commit-config.yaml | 4 +- README.rst | 2 +- docs/dev_requirements.txt | 6 ++ docs/developer_guide.md | 102 ++++++++++++++++++ docs/migration_guide.md | 88 +++++++++++++++ .../pypi_update_guide.md | 9 +- migration_guide.rst | 88 --------------- publish.sh | 9 +- setup.cfg | 9 ++ 9 files changed, 221 insertions(+), 96 deletions(-) create mode 100644 docs/dev_requirements.txt create mode 100644 docs/developer_guide.md create mode 100644 docs/migration_guide.md rename pypi_update_guide.md => docs/pypi_update_guide.md (91%) delete mode 100644 migration_guide.rst diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2e63965..3bb7b03 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,7 +9,8 @@ repos: - id: trailing-whitespace - id: end-of-file-fixer - id: check-added-large-files - stages: [-re-c] + - id: check-yaml + - id: check-case-conflict - repo: https://github.com/pycqa/isort rev: 5.8.0 hooks: @@ -24,7 +25,6 @@ repos: rev: 3.8.4 hooks: - id: flake8 - args: [--max-complexity=10, --max-line-length=88, --max-doc-length=72] - repo: local hooks: - id: pylint diff --git a/README.rst b/README.rst index 01e8efe..50b6424 100644 --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ Scale AI | Python SDK If you use earlier versions of the SDK, please refer to `v1.0.4 documentation `_. -If you are migrating from earlier versions to v2, please refer to `Migration Guide to v2 `_. +If you are migrating from earlier versions to v2, please refer to `Migration Guide to v2 `_. |pic1| |pic2| |pic3| diff --git a/docs/dev_requirements.txt b/docs/dev_requirements.txt new file mode 100644 index 0000000..8a5d25e --- /dev/null +++ b/docs/dev_requirements.txt @@ -0,0 +1,6 @@ +black>=19.10b0 +flake8>=3.8.4 +pre-commit==2.11.1 +isort>=5.7.0 +pytest>=6.2.2 +pylint>=2.7.2 diff --git a/docs/developer_guide.md b/docs/developer_guide.md new file mode 100644 index 0000000..a204e04 --- /dev/null +++ b/docs/developer_guide.md @@ -0,0 +1,102 @@ +# Developer Guide for Python SDK + +### 1. Clone repo + +Clone git repo into your local machine. + +```bash +$ git clone git@github.com:scaleapi/scaleapi-python-client.git +``` + +### 2. Install required packages + +If you use a virtual environment (via venv or conda), please activate it before installing the following packages. + +_Python SDK v2+ supports only Python 3.6+_ + +```bash +$ pip install -r docs/dev_requirements.txt +``` +### 3. Setup pre-commit + +Assure pre-commit1 is installed: +```bash +$ pre-commit --version +# pre-commit 2.11.1 +``` + +Configure pre-commit for the repo: +```bash +pre-commit install +``` +Now `pre-commit` will run automatically on `git commit`! + +### 4. (Optional) VS Code Settings + +Press `Cmd+Shift+P` to open Command Palette on VSCode to find **Preferences: Open Settings (JSON)**. + +If you want to make those settings only apply to current workspace (not VS Code general), choose **Preferences: Open Workspace Settings (JSON)** + +- Enables `pylint`[2] and `flake8`[3] as linters together +- Auto-formats python files on save according to `black` + +Append following lines to the json file: +```json +"python.linting.enabled": true, +"python.linting.pylintEnabled": true, +"python.linting.flake8Enabled": true, +"python.formatting.provider": "black", +"[python]": { + "editor.formatOnSave": true, + "editor.defaultFormatter": "ms-python.python" + }, +``` + +### 5. Running pre-commit Tests Manually + +You can run following command to run pre-commit linter for all files, without a commit. It provides a report for issues as well as fixes formatting. + +```bash +$ pre-commit run --all-files + +Trim Trailing Whitespace.................................................Passed +Fix End of Files.........................................................Passed +Check for added large files..............................................Passed +Check Yaml...............................................................Passed +Check for case conflicts.................................................Passed +isort....................................................................Passed +black....................................................................Passed +flake8...................................................................Passed +pylint...................................................................Passed +``` + +### 6. Running pytest Test Cases + +Before pushing your code changes, you can run `pytest` to test existing cases. You can add new test cases if you're adding a new method or functionality to be tested. + +In order to run `pytest` you need to set environment variable `SCALE_TEST_API_KEY` as your Scale user's test key. + +```bash +$ SCALE_TEST_API_KEY="{apikey}|{userid}|test" python3 -m pytest -v + +================================ test session starts ================================ +platform darwin -- Python 3.6.12, pytest-6.2.2, py-1.10.0, pluggy-0.13.1 +cachedir: .pytest_cache +rootdir: /Users/git/scaleapi-python-client +collected 31 items + +tests/test_client.py::test_invalidkey_fail PASSED [ 3%] +tests/test_client.py::test_uniquekey_fail PASSED [ 6%] +tests/test_client.py::test_categorize_ok PASSED [ 9%] +tests/test_client.py::test_categorize_fail PASSED [ 12%] +tests/test_client.py::test_transcription_ok PASSED [ 16%] +......... +``` + +#### 7. Deployment and Publishing of a new version + +Please refer to [Deployment and Publishing Guide](pypi_update_guide.md) for details. +_____ +[1] pre-commit configuration is available in `.pre-commit-config.yaml` +[2] Pylint configuration is available in `.pylintrc` +[3] flake8 configuration is available in `setup.cfg` diff --git a/docs/migration_guide.md b/docs/migration_guide.md new file mode 100644 index 0000000..81bd627 --- /dev/null +++ b/docs/migration_guide.md @@ -0,0 +1,88 @@ + +# SDK Migration Guide to v2 + + +If you are migrating from v0 or v1, this guide explains how to update your application for compatibility with v2. We recommend migrating as soon as possible to ensure that your application is unaffected. + +### Creating New Tasks + +Methods with task types such as `create_imageannotation_task`, `create_textcollection_task` etc. are ~~**deprecated**~~. + +Creating a new task is now unified under the `create_task(TaskType, ...)` method. Please review [Create Task](../README.rst#create-task) section for more details. + + +```python +# Deprecated Old Method +client.create_imageannotation_task( + project = 'test_project', + instruction= "Draw a box around each baby cow and big cow.", + ... +) + +# New Method +from scaleapi.tasks import TaskType + +client.create_task( + TaskType.ImageAnnotation, + project = 'test_project', + instruction= "Draw a box around each baby cow and big cow.", + ... +) +``` + +### Retrieving Tasks + +A new generator method is introduced to retrieve a list of tasks with all available parameters. The new method `tasks_all(...)` handles pagination and tokens. +You can have a simpler code by replacing `tasks()` loops with a single `tasks_all()` call. + +Please refer to [List Tasks](../README.rst#list-tasks) for more details. + +### Accessing Attributes (Task, Batch, Project) + +The old `param_dict` attribute is now replaced with a method `as_dict()` to return an object's attributes as a dictionary. + +First-level attributes of Task are still accessible with `.` annotation as the following: + +```python +task.status # same as task.as_dict()["status"] +task.params["geometries"] # same as task.as_dict()["params"]["geometries"] +task.response["annotations"] # same as task.as_dict()["response"]["annotations"] +``` + +Accessing `task.params` child objects directly at task level is ~~**deprecated**~~. Instead of `task.attribute`, you should use `task.params["attribute"]` for accessing objects under `params`. + +```python +task.params["geometries"] # DEPRECATED access => task.geometries +task.params["attachment"] # DEPRECATED access => task.attachment +``` + +### Task Counts as Summary of Batch + +Attributes of Batch `pending`, `completed`, `error`, `canceled` are replaced with `tasks_pending`, `tasks_completed`, `tasks_error`, `tasks_canceled` respectively to avoid confusion. + +```python +# NEW Attributes # DEPRECATED Attributes + +batch.tasks_pending # batch.pending +batch.tasks_completed # batch.completed +batch.tasks_error # batch.error +batch.tasks_canceled # batch.canceled +``` + +### Deprecated Methods + +- `fetch_task()` replaced with `get_task()` +- `list_batches()` replaced with `batches()` + +### Enabled Auto-Retry + +SDK now supports auto-retry in case of a `TimeOut(504)` or `TooManyRequests(429)` error occurs. + +### New Exceptions + +New error types are introduces if you want to handle specific exception cases. +`ScaleInvalidRequest`, `ScaleUnauthorized`, `ScaleNotEnabled`, `ScaleResourceNotFound`, `ScaleDuplicateTask`, `ScaleTooManyRequests`, `ScaleInternalError` and `ScaleTimeoutError`. + +All new error types are child of the existing `ScaleException` which can be used to handle all cases. + +Please review [Error handling](../README.rst#error-handling) section for more details. diff --git a/pypi_update_guide.md b/docs/pypi_update_guide.md similarity index 91% rename from pypi_update_guide.md rename to docs/pypi_update_guide.md index deb378d..a89d4dc 100644 --- a/pypi_update_guide.md +++ b/docs/pypi_update_guide.md @@ -1,4 +1,6 @@ -_Creating and deploying a new package version is easy_ +# Deployment and Publishing Guide for Python SDK + +_Creating and deploying a new package version is easy!_ ### Prerequisites @@ -16,14 +18,15 @@ We use [semantic versioning](https://packaging.python.org/guides/distributing-pa **Step 1: Run Publish Script** -``` +```bash ./publish.sh ``` If you want to run test cases via `pytest` before publishing, add the _optional_ `runtest` arg to the script. You need to set your own test key as `SCALE_TEST_API_KEY` environment variable before running. -``` + +```bash SCALE_TEST_API_KEY="{apikey}|{userid}|test" ./publish.sh runtest ``` diff --git a/migration_guide.rst b/migration_guide.rst deleted file mode 100644 index 57a0b14..0000000 --- a/migration_guide.rst +++ /dev/null @@ -1,88 +0,0 @@ -===================== -Migration Guide to v2 -===================== - -If you are migrating from v0 or v1, this guide explains how to update your application for compatibility with v2. We recommend migrating as soon as possible to ensure that your application is unaffected. - -Creating New Tasks -__________________ -Methods with task types such as ``create_imageannotation_task``, ``create_textcollection_task`` etc. are deprecated. - -Creating a new task is now unified under the ``create_task(TaskType, ...)`` method. Please review `Create Task `_ section for more details. - - -.. code-block:: python - - # Deprecated - client.create_imageannotation_task( - project = 'test_project', - instruction= "Draw a box around each baby cow and big cow.", - ... - ) - - # New Method - from scaleapi.tasks import TaskType - client.create_task( - TaskType.ImageAnnotation, - project = 'test_project', - instruction= "Draw a box around each baby cow and big cow.", - ... - ) - -Retrieving Tasks -________________ -A new generator method is introduced to retrieve a list of tasks with all available parameters. The new method handles pagination and tokens: ``tasks_all(...)``. -You can have a simpler code by replacing ``tasks()`` loops with single ``tasks_all()`` call. - -Please refer to `List Tasks `_ for more details. - -Accessing Attributes (Task, Batch, Project) -__________________________________________________ -The old ``param_dict`` attribute is now replaced with a method ``as_dict()`` to return an object's attributes as a dictionary. - -First-level attributes of Task are still accessible with `.` annotation as the following: - -.. code-block:: python - - task.status # same as task.as_dict()["status"] - task.params["geometries"] # same as task.as_dict()["params"]["geometries"] - task.response["annotations"] # same as task.as_dict()["response"]["annotations"] - -Accessing ``task.params`` child objects at task level is **deprecated**. Instead of ``task.attribute``, you should use ``task.params["attribute"]`` for accessing objects under ``params``. - -.. code-block:: python - - task.params["geometries"] # Migrate from => task.geometries - task.params["attachment"] # Migrate from => task.attachment - - -Task Counts as Summary of Batch -_______________________________ -Attributes of Batch ``pending``, ``completed``, ``error``, ``canceled`` are replaced with ``tasks_pending``, ``tasks_completed``, ``tasks_error``, ``tasks_canceled`` respectively to avoid confusion. - -.. code-block:: python - - # NEW Attributes # DEPRECATED Attributes - - batch.tasks_pending # batch.pending - batch.tasks_completed # batch.completed - batch.tasks_error # batch.error - batch.tasks_canceled # batch.canceled - -Deprecated Methods -__________________ -- ``fetch_task()`` replaced with ``get_task()`` -- ``list_batches()`` replaced with ``batches()`` - -Enabled Auto-Retry -__________________ -SDK now supports auto-retry in case of a ``TimeOut(504)`` or ``TooManyRequests(429)`` error occurs. - -New Exceptions -______________ -New error types are introduces if you want to handle specific exception cases. -``ScaleInvalidRequest``, ``ScaleUnauthorized``, ``ScaleNotEnabled``, ``ScaleResourceNotFound``, ``ScaleDuplicateTask``, ``ScaleTooManyRequests``, ``ScaleInternalError`` and ``ScaleTimeoutError``. - -All new error types are child of the existing ``ScaleException`` which can be used to handle all cases. - -Please review `Error handling `_ section for more details. diff --git a/publish.sh b/publish.sh index 60ad021..4fbceca 100755 --- a/publish.sh +++ b/publish.sh @@ -41,13 +41,18 @@ fi if [ "$1" == "runtest" ]; then - echo "Validating environment variable [SCALE_TEST_API_KEY] for pytest..." + echo "Validating environment for pytest..." + if ! pip show pytest > /dev/null 2>&1; + then + echo "WARN: 'pytest' package is not found, installing..."; + pip install pytest + fi + if [[ -z "${SCALE_TEST_API_KEY}" ]]; then echo "Test key not found. Please assign 'SCALE_TEST_API_KEY=...' as your test environment key." exit 1 fi - if ! python -m pytest; then echo "ERROR: pytest failed."; exit; fi echo "pytest is successful!" fi diff --git a/setup.cfg b/setup.cfg index 3158478..69b2789 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,3 +1,12 @@ [metadata] long_description = file: README.rst long_description_content_type = text/x-rst + +[flake8] +# Recommend matching the black line length (default 88) +max-line-length = 88 +max-doc-length = 72 +max-complexity = 10 +extend-ignore = + # See https://github.com/PyCQA/pycodestyle/issues/373 + E203, From cf018aaecfa8df211b940288819d69a58d0c9baa Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Thu, 1 Apr 2021 14:58:25 -0700 Subject: [PATCH 36/38] Updated developer_guide --- docs/developer_guide.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/developer_guide.md b/docs/developer_guide.md index a204e04..50f44c6 100644 --- a/docs/developer_guide.md +++ b/docs/developer_guide.md @@ -98,5 +98,7 @@ tests/test_client.py::test_transcription_ok PASSED [ Please refer to [Deployment and Publishing Guide](pypi_update_guide.md) for details. _____ [1] pre-commit configuration is available in `.pre-commit-config.yaml` + [2] Pylint configuration is available in `.pylintrc` + [3] flake8 configuration is available in `setup.cfg` From 2118ffd0a6183541a6cd7a5edc04ec8287e0c0ef Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Mon, 5 Apr 2021 11:38:22 -0700 Subject: [PATCH 37/38] method updates --- scaleapi/__init__.py | 49 +++++++++++++++----------------------------- tests/test_client.py | 10 ++++----- 2 files changed, 22 insertions(+), 37 deletions(-) diff --git a/scaleapi/__init__.py b/scaleapi/__init__.py index 63d8ce8..7d80431 100644 --- a/scaleapi/__init__.py +++ b/scaleapi/__init__.py @@ -1,4 +1,3 @@ -import warnings from typing import Dict, Generator, Generic, List, TypeVar, Union from scaleapi.batches import Batch, BatchStatus @@ -46,7 +45,6 @@ class ScaleClient: def __init__(self, api_key, source=None): self.api = Api(api_key, source) - warnings.simplefilter("always", DeprecationWarning) def get_task(self, task_id: str) -> Task: """Fetches a task. @@ -61,18 +59,6 @@ def get_task(self, task_id: str) -> Task: endpoint = f"task/{task_id}" return Task(self.api.get_request(endpoint), self) - def fetch_task(self, task_id: str) -> Task: - """fetch_task() will be deprecated, - please use get_task() method""" - - warnings.warn( - "fetch_task() will be deprecated, please use get_task() method " - "as the alternative.", - DeprecationWarning, - stacklevel=2, - ) - return self.get_task(task_id) - def cancel_task(self, task_id: str) -> Task: """Cancels a task and returns the associated task. Raises a ScaleException if it has already been canceled. @@ -191,7 +177,7 @@ def tasks(self, **kwargs) -> Tasklist: response.get("next_token"), ) - def tasks_all( + def get_tasks( self, project_name: str, batch_name: str = None, @@ -207,12 +193,12 @@ def tasks_all( created_before: str = None, tags: Union[List[str], str] = None, ) -> Generator[Task, None, None]: - """Retrieve all tasks as a generator function, with the + """Retrieve all tasks as a `generator` method, with the given parameters. This methods handles pagination of tasks() method. In order to retrieve results as a list, please use: - `tasks = list(tasks_all(...))` + `task_list = list(get_tasks(...))` Args: project_name (str): @@ -404,18 +390,6 @@ def get_batch(self, batch_name: str) -> Batch: batchdata = self.api.get_request(endpoint) return Batch(batchdata, self) - def list_batches(self, **kwargs) -> Batchlist: - """list_batches will be deprecated, - please use batches() method""" - - warnings.warn( - "list_batches will be deprecated, please use batches() method " - "as the alternative.", - DeprecationWarning, - stacklevel=2, - ) - return self.batches(**kwargs) - def batches(self, **kwargs) -> Batchlist: """This is a paged endpoint for all of your batches. Pagination is based off limit and offset parameters, @@ -476,18 +450,18 @@ def batches(self, **kwargs) -> Batchlist: response["has_more"], ) - def batches_all( + def get_batches( self, project_name: str = None, batch_status: BatchStatus = None, created_after: str = None, created_before: str = None, ) -> Generator[Batch, None, None]: - """Generator method to yield all batches with the given + """`Generator` method to yield all batches with the given parameters. In order to retrieve results as a list, please use: - `batches = list(batches_all(...))` + `batches_list = list(get_batches(...))` Args: project_name (str): @@ -569,6 +543,17 @@ def get_project(self, project_name: str) -> Project: projectdata = self.api.get_request(endpoint) return Project(projectdata, self) + def get_projects(self) -> List[Project]: + """Returns all projects. + Refer to Projects API Reference: + https://docs.scale.com/reference#list-all-projects + Same as `projects()` method. + + Returns: + List[Project] + """ + return self.projects() + def projects(self) -> List[Project]: """Returns all projects. Refer to Projects API Reference: diff --git a/tests/test_client.py b/tests/test_client.py index 91a8482..c2af8f7 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -320,13 +320,13 @@ def create_a_batch(): ) -def test_tasks_all(): +def test_get_tasks(): batch = create_a_batch() tasks = [] for _ in range(3): tasks.append(make_a_task(batch=batch.name)) task_ids = {task.id for task in tasks} - for task in client.tasks_all(project_name=TEST_PROJECT_NAME, batch_name=batch.name): + for task in client.get_tasks(project_name=TEST_PROJECT_NAME, batch_name=batch.name): assert task.id in task_ids @@ -355,7 +355,7 @@ def test_get_batch(): assert batch2.status == "staging" -def test_list_batch(): +def test_batches(): batches = [] for _ in range(3): batches.append(create_a_batch()) @@ -365,11 +365,11 @@ def test_list_batch(): assert batch.name in batch_names -def test_list_batch_all(): +def test_get_batches(): # Get count of all batches batchlist = client.batches(project=TEST_PROJECT_NAME, limit=1) total_batches = batchlist.total # Download all batches to check total count - all_batches = list(client.batches_all(project_name=TEST_PROJECT_NAME)) + all_batches = list(client.get_batches(project_name=TEST_PROJECT_NAME)) assert total_batches == len(all_batches) From 4ad0951d120f2397c6452e41b290f655b866876a Mon Sep 17 00:00:00 2001 From: Fatih Kurtoglu Date: Mon, 5 Apr 2021 11:39:23 -0700 Subject: [PATCH 38/38] updated documentation --- README.rst | 43 +++++++++++++++++++++++++++++------------ docs/migration_guide.md | 6 +++--- 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/README.rst b/README.rst index 50b6424..95cf508 100644 --- a/README.rst +++ b/README.rst @@ -37,7 +37,7 @@ Tasks _____ Most of these methods will return a `scaleapi.Task` object, which will contain information -about the json response (task_id, status, params, response etc.). +about the json response (task_id, status, params, response, etc.). Any parameter available in `Scale's API documentation`__ can be passed as an argument option with the corresponding type. @@ -94,9 +94,19 @@ __ https://docs.scale.com/reference#retrieve-tasks List Tasks ^^^^^^^^^^ -Retrieve a list of `Task` objects, with optional filters for: `project_name, batch_name, type, status, review_status, unique_id, completed_after, completed_before, updated_after, updated_before, created_after, created_before` and `tags`. +Retrieve a list of `Task` objects, with filters for: ``project_name``, ``batch_name``, ``type``, ``status``, +``review_status``, ``unique_id``, ``completed_after``, ``completed_before``, ``updated_after``, ``updated_before``, +``created_after``, ``created_before`` and ``tags``. -This method is a generator and yields tasks. It can be wrapped in a `list` statement if a Task list is needed. +``get_tasks()`` is a **generator** method and yields ``Task`` objects. + +`A generator is another type of function, returns an iterable that you can loop over like a list. +However, unlike lists, generators do not store the content in the memory. +That helps you to process a large number of objects without increasing memory usage.` + +If you will iterate through the tasks and process them once, using a generator is the most efficient method. +However, if you need to process the list of tasks multiple times, you can wrap the generator in a ``list(...)`` +statement, which returns a list of Tasks by loading them into the memory. Check out `Scale's API documentation`__ for more information. @@ -106,7 +116,7 @@ __ https://docs.scale.com/reference#list-multiple-tasks from scaleapi.tasks import TaskReviewStatus, TaskStatus - tasks = client.tasks_all( + tasks = client.get_tasks( project_name = "My Project", created_after = "2020-09-08", completed_before = "2021-04-01", @@ -114,18 +124,19 @@ __ https://docs.scale.com/reference#list-multiple-tasks review_status = TaskReviewStatus.Accepted ) + # Iterating through the generator for task in tasks: # Download task or do something! print(task.task_id) - # Alternative for accessing as a Task list + # For retrieving results as a Task list task_list = list(tasks) print(f"{len(task_list))} tasks retrieved") Cancel Task ^^^^^^^^^^^ -Cancel a task given its id if work has not started on the task (task status is `Queued` in the UI). Check out `Scale's API documentation`__ for more information. +Cancel a task given its id if work has not started on the task (task status is ``Queued`` in the UI). Check out `Scale's API documentation`__ for more information. __ https://docs.scale.com/reference#cancel-task @@ -178,8 +189,8 @@ __ https://docs.scale.com/reference#batch-status batch.get_status() # Refreshes tasks_{status} attributes of Batch print(batch.tasks_pending, batch.tasks_completed) -Retrieve Batch -^^^^^^^^^^^^^^ +Retrieve A Batch +^^^^^^^^^^^^^^^^ Retrieve a single Batch. Check out `Scale's API documentation`__ for more information. @@ -192,7 +203,15 @@ __ https://docs.scale.com/reference#batch-retrieval List Batches ^^^^^^^^^^^^ -Retrieve a list of Batches. Optional parameters are `project_name, batch_status, created_after, created_before`. +Retrieve a list of Batches. Optional parameters are ``project_name``, ``batch_status``, ``created_after`` and ``created_before``. + +``get_batches()`` is a **generator** method and yields ``Batch`` objects. + +`A generator is another type of function, returns an iterable that you can loop over like a list. +However, unlike lists, generators do not store the content in the memory. +That helps you to process a large number of objects without increasing memory usage.` + +When wrapped in a ``list(...)`` statement, it returns a list of Batches by loading them into the memory. Check out `Scale's API documentation`__ for more information. @@ -202,7 +221,7 @@ __ https://docs.scale.com/reference#batch-list from scaleapi.batches import BatchStatus - batches = client.batches_all( + batches = client.get_batches( batch_status=BatchStatus.Completed, created_after = "2020-09-08" ) @@ -259,7 +278,7 @@ __ https://docs.scale.com/reference#batch-list projects = client.projects() for project in projects: counter += 1 - print(f'Downloading project {counter} | {project.name} | { project.type}') + print(f'Downloading project {counter} | {project.name} | {project.type}') Update Project ^^^^^^^^^^^^^^ @@ -303,7 +322,7 @@ For example: client.create_task(TaskType.TextCollection, attachment='Some parameters are missing.') except ScaleException as err: print(err.code) # 400 - print(err.message) # Parameters is invalid, reason: "attachments" is required + print(err.message) # Parameter is invalid, reason: "attachments" is required Troubleshooting _______________ diff --git a/docs/migration_guide.md b/docs/migration_guide.md index 81bd627..300f19c 100644 --- a/docs/migration_guide.md +++ b/docs/migration_guide.md @@ -32,8 +32,8 @@ client.create_task( ### Retrieving Tasks -A new generator method is introduced to retrieve a list of tasks with all available parameters. The new method `tasks_all(...)` handles pagination and tokens. -You can have a simpler code by replacing `tasks()` loops with a single `tasks_all()` call. +A new generator method is introduced to retrieve a list of tasks with all available parameters. The new method `get_tasks(...)` handles pagination and tokens. +You can have a simpler code by replacing `tasks()` loops with a single `get_tasks()` call. Please refer to [List Tasks](../README.rst#list-tasks) for more details. @@ -72,7 +72,7 @@ batch.tasks_canceled # batch.canceled ### Deprecated Methods - `fetch_task()` replaced with `get_task()` -- `list_batches()` replaced with `batches()` +- `list_batches()` replaced with `get_batches()` ### Enabled Auto-Retry