diff --git a/examples/run_all.sh b/examples/run_all.sh old mode 100755 new mode 100644 diff --git a/rosette/api.py b/rosette/api.py index 7fa91e1..cb5dc0c 100644 --- a/rosette/api.py +++ b/rosette/api.py @@ -27,7 +27,7 @@ from socket import gethostbyname, gaierror from datetime import datetime -_ACCEPTABLE_SERVER_VERSION = "0.5" +_BINDING_VERSION = "0.7" _GZIP_BYTEARRAY = bytearray([0x1F, 0x8b, 0x08]) N_RETRIES = 3 HTTP_CONNECTION = None @@ -516,6 +516,23 @@ def info(self): r = _get_http(url, headers=headers) return self.__finish_result(r, "info") + def checkVersion(self): + """Issues a special "info" request to the L{EndpointCaller}'s specific endpoint. + @return: A dictionary containing server version as well as version check""" + if self.suburl is not None: + self.checker() + url = self.service_url + '/' + self.suburl + "/info?clientVersion=" + _BINDING_VERSION + else: + url = self.service_url + "/info?clientVersion=" + _BINDING_VERSION + if self.debug: + url = add_query(url, "debug", "true") + self.logger.info('info: ' + url) + headers = {'Accept': 'application/json'} + if self.user_key is not None: + headers["user_key"] = self.user_key + r = _post_http(url, None, headers=headers) + return self.__finish_result(r, "info") + def ping(self): """Issues a "ping" request to the L{EndpointCaller}'s (server-wide) endpoint. @return: A dictionary if OK. If the server cannot be reached, @@ -619,10 +636,10 @@ def check_version(self): if self.version_checked: return True op = EndpointCaller(self, None) - result = op.info() + result = op.checkVersion() version = ".".join(result["version"].split(".")[0:2]) - if version != _ACCEPTABLE_SERVER_VERSION: - raise RosetteException("incompatibleVersion", "The server version is not " + _ACCEPTABLE_SERVER_VERSION, + if result['versionChecked'] is False: + raise RosetteException("incompatibleVersion", "The server version is not compatible with binding version " + _BINDING_VERSION, version) self.version_checked = True return True diff --git a/rosette/api.py.orig b/rosette/api.py.orig deleted file mode 100644 index 682be0f..0000000 --- a/rosette/api.py.orig +++ /dev/null @@ -1,752 +0,0 @@ -#!/usr/bin/env python - -""" -Python client for the Rosette API. - -Copyright (c) 2014-2015 Basis Technology Corporation. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -from io import BytesIO -import base64 -import gzip -import json -import logging -import sys -<<<<<<< HEAD -import pprint -import time -from socket import gethostbyname, gaierror -from datetime import datetime -======= ->>>>>>> rcb-100-python-test-call - -_ACCEPTABLE_SERVER_VERSION = "0.5" -_GZIP_BYTEARRAY = bytearray([0x1F, 0x8b, 0x08]) -N_RETRIES = 3 -HTTP_CONNECTION = None -REUSE_CONNECTION = True -CONNECTION_TYPE = "" -CONNECTION_START = datetime.now() -CONNECTION_REFRESH_DURATION = 86400 -N_RETRIES = 3 - -_IsPy3 = sys.version_info[0] == 3 - - -try: - import urlparse - import urllib -except ImportError: - import urllib.parse as urlparse - import urllib.parse as urllib -try: - import httplib -except ImportError: - import http.client as httplib - -if _IsPy3: - _GZIP_SIGNATURE = _GZIP_BYTEARRAY -else: - _GZIP_SIGNATURE = str(_GZIP_BYTEARRAY) - - -class _ReturnObject: - def __init__(self, js, code): - self._json = js - self.status_code = code - - def json(self): - return self._json - - -def _my_loads(obj): - if _IsPy3: - return json.loads(obj.decode("utf-8")) # if py3, need chars. - else: - return json.loads(obj) - - -def _retrying_request(op, url, data, headers): - global HTTP_CONNECTION - global REUSE_CONNECTION - global CONNECTION_TYPE - global CONNECTION_START - global CONNECTION_REFRESH_DURATION - - timeDelta = datetime.now() - CONNECTION_START - totalTime = timeDelta.days * 86400 + timeDelta.seconds - parsed = urlparse.urlparse(url) - if parsed.scheme != CONNECTION_TYPE: - totalTime = CONNECTION_REFRESH_DURATION - - if not REUSE_CONNECTION or HTTP_CONNECTION is None or totalTime >= CONNECTION_REFRESH_DURATION: - parsed = urlparse.urlparse(url) - loc = parsed.netloc - CONNECTION_TYPE = parsed.scheme - CONNECTION_START = datetime.now() - if parsed.scheme == "https": - HTTP_CONNECTION = httplib.HTTPSConnection(loc) - else: - HTTP_CONNECTION = httplib.HTTPConnection(loc) - - message = None - code = "unknownError" - rdata = None - for i in range(N_RETRIES + 1): - # Try to connect with the Rosette API server - # 500 errors will store a message and code - try: - HTTP_CONNECTION.request(op, url, data, headers) - response = HTTP_CONNECTION.getresponse() - status = response.status - rdata = response.read() - if status < 500: - if not REUSE_CONNECTION: - HTTP_CONNECTION.close() - return rdata, status - if rdata is not None: - try: - the_json = _my_loads(rdata) - if "message" in the_json: - message = the_json["message"] - if "code" in the_json: - code = the_json["code"] - except: - pass - # If there are issues connecting to the API server, - # try to regenerate the connection as long as there are - # still retries left. - # A short sleep delay occurs (similar to google reconnect) - # if the problem was a temporal one. - except (httplib.BadStatusLine, gaierror) as e: - totalTime = CONNECTION_REFRESH_DURATION - if i == N_RETRIES - 1: - raise RosetteException("ConnectionError", "Unable to establish connection to the Rosette API server", url) - else: - if not REUSE_CONNECTION or HTTP_CONNECTION is None or totalTime >= CONNECTION_REFRESH_DURATION: - time.sleep(min(5 * (i + 1) * (i + 1), 300)) - parsed = urlparse.urlparse(url) - loc = parsed.netloc - CONNECTION_TYPE = parsed.scheme - CONNECTION_START = datetime.now() - if parsed.scheme == "https": - HTTP_CONNECTION = httplib.HTTPSConnection(loc) - else: - HTTP_CONNECTION = httplib.HTTPConnection(loc) - - # Do not wait to retry -- the model is that a bunch of dynamically-routed - # resources has failed -- Retry means some other set of servelets and their - # underlings will be called up, and maybe they'll do better. - # This will not help with a persistent or impassible delay situation, - # but the former case is thought to be more likely. - - if not REUSE_CONNECTION: - HTTP_CONNECTION.close() - - if message is None: - message = "A retryable network operation has not succeeded after " + str(N_RETRIES) + " attempts" - - raise RosetteException(code, message, url) - - -def _get_http(url, headers): - (rdata, status) = _retrying_request("GET", url, None, headers) - return _ReturnObject(_my_loads(rdata), status) - - -def _post_http(url, data, headers): - if data is None: - json_data = "" - else: - json_data = json.dumps(data) - - (rdata, status) = _retrying_request("POST", url, json_data, headers) - - if len(rdata) > 3 and rdata[0:3] == _GZIP_SIGNATURE: - buf = BytesIO(rdata) - rdata = gzip.GzipFile(fileobj=buf).read() - - return _ReturnObject(_my_loads(rdata), status) - - -def add_query(orig_url, key, value): - parts = urlparse.urlsplit(orig_url) - queries = urlparse.parse_qsl(parts[3]) - queries.append((key, value)) - qs = urllib.urlencode(queries) - return urlparse.urlunsplit((parts[0], parts[1], parts[2], qs, parts[4])) - - -class RosetteException(Exception): - """Exception thrown by all Rosette API operations for errors local and remote. - - TBD. Right now, the only valid operation is conversion to __str__. - """ - - def __init__(self, status, message, response_message): - self.status = status - self.message = message - self.response_message = response_message - - def __str__(self): - sst = self.status - if not (isinstance(sst, str)): - sst = repr(sst) - return sst + ": " + self.message + ":\n " + self.response_message - - -class _PseudoEnum: - def __init__(self): - pass - - @classmethod - def validate(cls, value, name): - values = [] - for (k, v) in vars(cls).items(): - if not k.startswith("__"): - values += [v] - - # this is still needed to make sure that the parameter NAMES are known. - # If python didn't allow setting unknown values, this would be a language error. - if value not in values: - raise RosetteException("unknownVariable", "The value supplied for " + name + - " is not one of " + ", ".join(values) + ".", repr(value)) - - -class DataFormat(_PseudoEnum): - """Data Format, as much as it is known.""" - SIMPLE = "text/plain" - """The data is unstructured text, supplied as a possibly-unicode string.""" - JSON = "application/json" - """To be supplied. The API uses JSON internally, but that is not what this refers to.""" - HTML = "text/html" - """The data is a 'loose' HTML page; that is, it may not be HTML-compliant, or may even not - really be HTML. The data must be a narrow (single-byte) string, not a python Unicode string, - perhaps read from a file. (Of course, it can be UTF-8 encoded).""" - XHTML = "application/xhtml+xml" - """The data is a compliant XHTML page. The data must be a narrow (single-byte) string, not a - python Unicode string, perhaps read from a file. (Of course, it can be UTF-8 encoded).""" - UNSPECIFIED = "application/octet-stream" - """The data is of unknown format, it may be a binary data type (the contents of a binary file), - or may not. It will be sent as is and identified and analyzed by the server.""" - - -class InputUnit(_PseudoEnum): - """Elements are used in the L{DocumentParameters} class to specify whether textual data - is to be treated as one sentence or possibly many.""" - DOC = "doc" - """The data is a whole document; it may or may not contain multiple sentences.""" - SENTENCE = "sentence" - """The data is a single sentence.""" - - -class MorphologyOutput(_PseudoEnum): - LEMMAS = "lemmas" - PARTS_OF_SPEECH = "parts-of-speech" - COMPOUND_COMPONENTS = "compound-components" - HAN_READINGS = "han-readings" - COMPLETE = "complete" - - -class _DocumentParamSetBase(object): - def __init__(self, repertoire): - self.__params = {} - for k in repertoire: - self.__params[k] = None - - def __setitem__(self, key, val): - if key not in self.__params: - raise RosetteException("badKey", "Unknown Rosette parameter key", repr(key)) - self.__params[key] = val - - def __getitem__(self, key): - if key not in self.__params: - raise RosetteException("badKey", "Unknown Rosette parameter key", repr(key)) - return self.__params[key] - - def validate(self): - pass - - def serialize(self): - self.validate() - v = {} - for (key, val) in self.__params.items(): - if val is None: - pass - else: - v[key] = val - return v - - -def _byteify(s): # py 3 only - l = len(s) - b = bytearray(l) - for ix in range(l): - oc = ord(s[ix]) - assert (oc < 256) - b[ix] = oc - return b - - -class DocumentParameters(_DocumentParamSetBase): - """Parameter object for all operations requiring input other than - translated_name. - Four fields, C{content}, C{contentType}, C{unit}, and C{inputUri}, are set via - the subscript operator, e.g., C{params["content"]}, or the - convenience instance methods L{DocumentParameters.load_document_file} - and L{DocumentParameters.load_document_string}. The unit size and - data format are defaulted to L{InputUnit.DOC} and L{DataFormat.SIMPLE}. - - Using subscripts instead of instance variables facilitates diagnosis. - - If the field C{contentUri} is set to the URL of a web page (only - protocols C{http, https, ftp, ftps} are accepted), the server will - fetch the content from that web page. In this case, neither C{content} - nor C{contentType} may be set. - """ - - def __init__(self): - """Create a L{DocumentParameters} object. Default data format - is L{DataFormat.SIMPLE}, unit is L{InputUnit.DOC}.""" - _DocumentParamSetBase.__init__(self, ("content", "contentUri", "contentType", "unit", "language")) - self["unit"] = InputUnit.DOC # default - - def validate(self): - """Internal. Do not use.""" - if self["content"] is None: - if self["contentUri"] is None: - raise RosetteException("badArgument", "Must supply one of Content or ContentUri", "bad arguments") - else: # self["content"] not None - if self["contentUri"] is not None: - raise RosetteException("badArgument", "Cannot supply both Content and ContentUri", "bad arguments") - - def serialize(self): - """Internal. Do not use.""" - self.validate() - slz = super(DocumentParameters, self).serialize() - if self["contentType"] is None and self["contentUri"] is None: - slz["contentType"] = DataFormat.SIMPLE - elif self["contentType"] in (DataFormat.HTML, DataFormat.XHTML, DataFormat.UNSPECIFIED): - content = slz["content"] - if _IsPy3 and isinstance(content, str): - content = _byteify(content) - - encoded = base64.b64encode(content) - if _IsPy3: - encoded = encoded.decode("utf-8") # if py3, need chars. - slz["content"] = encoded - return slz - - def load_document_file(self, path, data_type=DataFormat.UNSPECIFIED): - """Loads a file into the object. - The file will be read as bytes; the appropriate conversion will - be determined by the server. The document unit size remains - by default L{InputUnit.DOC}. - @parameter path: Pathname of a file acceptable to the C{open} function. - @parameter data_type: One of L{DataFormat.HTML}, L{DataFormat.XHTML}, or L{DataFormat.UNSPECIFIED}. - No other types are acceptable at this time, although HTML is broad enough to include text strings - without markup. - If the data type is unknown, or describes a binary file, use the default (L{DataFormat.UNSPECIFIED}). - @type data_type: L{DataFormat} - """ - if data_type not in (DataFormat.HTML, DataFormat.XHTML, DataFormat.UNSPECIFIED): - raise RosetteException("badArgument", "Must supply one of HTML, XHTML, or UNSPECIFIED", data_type) - self.load_document_string(open(path, "rb").read(), data_type) - - def load_document_string(self, s, data_type): - """Loads a string into the object. - The string will be taken as bytes or as Unicode dependent upon - its native python type and the data type asked for; if the - type is HTML or XHTML, bytes, not python Unicode, are expected, - the encoding to be determined by the server. - The document unit size remains (by default) L{InputUnit.DOC}. - @parameter s: A string, possibly a unicode-string, to be loaded - for subsequent analysis, as per the C{data_type}. - @parameter data_type: The data type of the string, as per L{DataFormat}. - @type data_type: L{DataFormat} - """ - self["content"] = s - self["contentType"] = data_type - self["unit"] = InputUnit.DOC - - -class NameTranslationParameters(_DocumentParamSetBase): - """Parameter object for C{translated_name} endpoint. - The following values may be set by the indexing (i.e.,C{ parms["name"]}) operator. The values are all - strings (when not C{None}). - All are optional except C{name} and C{targetLanguage}. Scripts are in - ISO15924 codes, and languages in ISO639 (two- or three-letter) codes. See the Name Translation documentation for - more description of these terms, as well as the content of the return result. - - C{name} The name to be translated. - - C{targetLangauge} The language into which the name is to be translated. - - C{entityType} The entity type (TBD) of the name. - - C{sourceLanguageOfOrigin} The language of origin of the name. - - C{sourceLanguageOfUse} The language of use of the name. - - C{sourceScript} The script in which the name is supplied. - - C{targetScript} The script into which the name should be translated. - - C{targetScheme} The transliteration scheme by which the translated name should be rendered. - """ - - def __init__(self): - _DocumentParamSetBase.__init__(self, ("name", "targetLanguage", "entityType", "sourceLanguageOfOrigin", - "sourceLanguageOfUse", "sourceScript", "targetScript", "targetScheme")) - - def validate(self): - """Internal. Do not use.""" - for n in ("name", "targetLanguage"): # required - if self[n] is None: - raise RosetteException("missingParameter", "Required Name Translation parameter not supplied", repr(n)) - - -class NameMatchingParameters(_DocumentParamSetBase): - """Parameter object for C{matched_name} endpoint. - All are required. - - C{name1} The name to be matched, a C{name} object. - - C{name2} The name to be matched, a C{name} object. - - The C{name} object contains these fields: - - C{text} Text of the name, required. - - C{language} Language of the name in ISO639 three-letter code, optional. - - C{script} The ISO15924 code of the name, optional. - - C{entityType} The entity type, can be "PERSON", "LOCATION" or "ORGANIZATION", optional. - """ - - def __init__(self): - _DocumentParamSetBase.__init__(self, ("name1", "name2")) - - def validate(self): - """Internal. Do not use.""" - for n in ("name1", "name2"): # required - if self[n] is None: - raise RosetteException("missingParameter", "Required Name Matching parameter not supplied", repr(n)) - - -class EndpointCaller: - """L{EndpointCaller} objects are invoked via their instance methods to obtain results - from the Rosette server described by the L{API} object from which they - are created. Each L{EndpointCaller} object communicates with a specific endpoint - of the Rosette server, specified at its creation. Use the specific - instance methods of the L{API} object to create L{EndpointCaller} objects bound to - corresponding endpoints. - - Use L{EndpointCaller.ping} to ping, and L{EndpointCaller.info} to retrieve server info. - For all other types of requests, use L{EndpointCaller.call}, which accepts - an argument specifying the data to be processed and certain metadata. - - The results of all operations are returned as python dictionaries, whose - keys and values correspond exactly to those of the corresponding - JSON return value described in the Rosette web service documentation. - """ - - def __init__(self, api, suburl): - """This method should not be invoked by the user. Creation is reserved - for internal use by API objects.""" - - self.service_url = api.service_url - self.user_key = api.user_key - self.logger = api.logger - self.useMultipart = api.useMultipart - self.checker = lambda: api.check_version() - self.suburl = suburl - self.debug = api.debug - - def __finish_result(self, r, ename): - code = r.status_code - the_json = r.json() - if code == 200: - return the_json - else: - if 'message' in the_json: - msg = the_json['message'] - else: - msg = the_json['code'] # punt if can't get real message - if self.suburl is None: - complaint_url = "Top level info" - else: - complaint_url = ename + " " + self.suburl - - if "code" in the_json: - server_code = the_json["code"] - else: - server_code = "unknownError" - - raise RosetteException(server_code, - complaint_url + " : failed to communicate with Rosette", - msg) - - def _set_use_multipart(self, value): - self.useMultipart = value - - def info(self): - """Issues an "info" request to the L{EndpointCaller}'s specific endpoint. - @return: A dictionary telling server version and other - identifying data.""" - if self.suburl is not None: - self.checker() - url = self.service_url + '/' + self.suburl + "/info" - else: - url = self.service_url + "/info" - if self.debug: - url = add_query(url, "debug", "true") - self.logger.info('info: ' + url) - headers = {'Accept': 'application/json'} - if self.user_key is not None: - headers["user_key"] = self.user_key - r = _get_http(url, headers=headers) - return self.__finish_result(r, "info") - - def ping(self): - """Issues a "ping" request to the L{EndpointCaller}'s (server-wide) endpoint. - @return: A dictionary if OK. If the server cannot be reached, - or is not the right server or some other error occurs, it will be - signalled.""" - - url = self.service_url + '/ping' - if self.debug: - url = add_query(url, "debug", "true") - self.logger.info('Ping: ' + url) - headers = {'Accept': 'application/json'} - if self.user_key is not None: - headers["user_key"] = self.user_key - r = _get_http(url, headers=headers) - return self.__finish_result(r, "ping") - - def call(self, parameters): - """Invokes the endpoint to which this L{EndpointCaller} is bound. - Passes data and metadata specified by C{parameters} to the server - endpoint to which this L{EndpointCaller} object is bound. For all - endpoints except C{translated_name} and C{matched_name}, it must be a L{DocumentParameters} - object or a string; for C{translated_name}, it must be an L{NameTranslationParameters} object; - for C{matched_name}, it must be an L{NameMatchingParameters} object. - - In all cases, the result is returned as a python dictionary - conforming to the JSON object described in the endpoint's entry - in the Rosette web service documentation. - - @param parameters: An object specifying the data, - and possible metadata, to be processed by the endpoint. See the - details for those object types. - @type parameters: For C{translated_name}, L{NameTranslationParameters}, otherwise L{DocumentParameters} or L{str} - @return: A python dictionary expressing the result of the invocation. - """ - - if not isinstance(parameters, _DocumentParamSetBase): - if self.suburl != "matched-name" and self.suburl != "translated-name": - text = parameters - parameters = DocumentParameters() - parameters['content'] = text - else: - raise RosetteException("incompatible", "Text-only input only works for DocumentParameter endpoints", - self.suburl) - - self.checker() - - if self.useMultipart and (parameters['contentType'] != DataFormat.SIMPLE): - raise RosetteException("incompatible", "Multipart requires contentType SIMPLE", - repr(parameters['contentType'])) - url = self.service_url + '/' + self.suburl - if self.debug: - url = add_query(url, "debug", "true") - self.logger.info('operate: ' + url) - params_to_serialize = parameters.serialize() - headers = {'Accept': "application/json", 'Accept-Encoding': "gzip"} - if self.user_key is not None: - headers["user_key"] = self.user_key - headers['Content-Type'] = "application/json" - r = _post_http(url, params_to_serialize, headers) -<<<<<<< HEAD - # pprint.pprint(headers) - # pprint.pprint(url) - # pprint.pprint(params_to_serialize) -======= ->>>>>>> rcb-100-python-test-call - return self.__finish_result(r, "operate") - - -class API: - """ - Rosette Python Client Binding API; representation of a Rosette server. - Call instance methods upon this object to obtain L{EndpointCaller} objects - which can communicate with particular Rosette server endpoints. - """ - def __init__(self, user_key=None, service_url='https://api.rosette.com/rest/v1', retries=3, reuse_connection=True, refresh_duration=86400, debug=False): - """ Create an L{API} object. - @param user_key: (Optional; required for servers requiring authentication.) An authentication string to be sent - as user_key with all requests. The default Rosette server requires authentication. - to the server. - @param service_url: (Optional) The root URL (string) of the Rosette service to which this L{API} object will be - bound. The default is that of Basis Technology's public Rosette server. - """ - self.user_key = user_key - self.service_url = service_url - self.logger = logging.getLogger('rosette.api') - self.logger.info('Initialized on ' + self.service_url) - self.debug = debug - self.useMultipart = False - self.version_checked = False - - global N_RETRIES - global REUSE_CONNECTION - global CONNECTION_REFRESH_DURATION - - if (retries < 1): - retries = 1 - if (refresh_duration < 60): - refresh_duration = 60 - N_RETRIES = retries - REUSE_CONNECTION = reuse_connection - CONNECTION_REFRESH_DURATION = refresh_duration - - def check_version(self): - if self.version_checked: - return True - op = EndpointCaller(self, None) - result = op.info() - version = ".".join(result["version"].split(".")[0:2]) - if version != _ACCEPTABLE_SERVER_VERSION: - raise RosetteException("incompatibleVersion", "The server version is not " + _ACCEPTABLE_SERVER_VERSION, - version) - self.version_checked = True - return True - - def _set_use_multipart(self, value): - self.useMultipart = value - - def ping(self): - """ - Create a ping L{EndpointCaller} for the server and ping it. - @return: A python dictionary including the ping message of the L{API} - """ - return EndpointCaller(self, None).ping() - - def info(self): - """ - Create a ping L{EndpointCaller} for the server and ping it. - @return: A python dictionary including the ping message of the L{API} - """ - return EndpointCaller(self, None).info() - - def language(self, parameters): - """ - Create an L{EndpointCaller} for language identification and call it. - @param parameters: An object specifying the data, - and possible metadata, to be processed by the language identifier. - @type parameters: L{DocumentParameters} or L{str} - @return: A python dictionary containing the results of language - identification.""" - return EndpointCaller(self, "language").call(parameters) - - def sentences(self, parameters): - """ - Create an L{EndpointCaller} to break a text into sentences and call it. - @param parameters: An object specifying the data, - and possible metadata, to be processed by the sentence identifier. - @type parameters: L{DocumentParameters} or L{str} - @return: A python dictionary containing the results of sentence identification.""" - return EndpointCaller(self, "sentences").call(parameters) - - def tokens(self, parameters): - """ - Create an L{EndpointCaller} to break a text into tokens and call it. - @param parameters: An object specifying the data, - and possible metadata, to be processed by the tokens identifier. - @type parameters: L{DocumentParameters} or L{str} - @return: A python dictionary containing the results of tokenization.""" - return EndpointCaller(self, "tokens").call(parameters) - - def morphology(self, parameters, facet=MorphologyOutput.COMPLETE): - """ - Create an L{EndpointCaller} to returns a specific facet - of the morphological analyses of texts to which it is applied and call it. - @param parameters: An object specifying the data, - and possible metadata, to be processed by the morphology analyzer. - @type parameters: L{DocumentParameters} or L{str} - @param facet: The facet desired, to be returned by the created L{EndpointCaller}. - @type facet: An element of L{MorphologyOutput}. - @return: A python dictionary containing the results of morphological analysis.""" - return EndpointCaller(self, "morphology/" + facet).call(parameters) - - def entities(self, parameters, linked=False): - """ - Create an L{EndpointCaller} to identify named entities found in the texts - to which it is applied and call it. Linked entity information is optional, and - its need must be specified at the time the operator is created. - @param parameters: An object specifying the data, - and possible metadata, to be processed by the entity identifier. - @type parameters: L{DocumentParameters} or L{str} - @param linked: Specifies whether or not linked entity information will - be wanted. - @type linked: Boolean - @return: A python dictionary containing the results of entity extraction.""" - if linked: - return EndpointCaller(self, "entities/linked").call(parameters) - else: - return EndpointCaller(self, "entities").call(parameters) - - def categories(self, parameters): - """ - Create an L{EndpointCaller} to identify the category of the text to which - it is applied and call it. - @param parameters: An object specifying the data, - and possible metadata, to be processed by the category identifier. - @type parameters: L{DocumentParameters} or L{str} - @return: A python dictionary containing the results of categorization.""" - return EndpointCaller(self, "categories").call(parameters) - - def sentiment(self, parameters): - """ - Create an L{EndpointCaller} to identify the sentiment of the text to - which it is applied and call it. - @param parameters: An object specifying the data, - and possible metadata, to be processed by the sentiment identifier. - @type parameters: L{DocumentParameters} or L{str} - @return: A python dictionary containing the results of sentiment identification.""" - """Create an L{EndpointCaller} to identify sentiments of the texts - to which is applied. - @return: An L{EndpointCaller} object which can return sentiments - of texts to which it is applied.""" - return EndpointCaller(self, "sentiment").call(parameters) - - def translated_name(self, parameters): - """ - Create an L{EndpointCaller} to perform name analysis and translation - upon the name to which it is applied and call it. - @param parameters: An object specifying the data, - and possible metadata, to be processed by the name translator. - @type parameters: L{NameTranslationParameters} - @return: A python dictionary containing the results of name translation.""" - return EndpointCaller(self, "translated-name").call(parameters) - - def matched_name(self, parameters): - """ - Create an L{EndpointCaller} to perform name matching and call it. - @param parameters: An object specifying the data, - and possible metadata, to be processed by the name matcher. - @type parameters: L{NameMatchingParameters} - @return: A python dictionary containing the results of name matching.""" - return EndpointCaller(self, "matched-name").call(parameters) diff --git a/tests/mock-data/response/checkVersion.json b/tests/mock-data/response/checkVersion.json new file mode 100644 index 0000000..bc284e6 --- /dev/null +++ b/tests/mock-data/response/checkVersion.json @@ -0,0 +1,7 @@ +{ + "buildNumber": "6bafb29d", + "buildTime": "2015.05.08_12:31:26", + "name": "Rosette API", + "version": "0.4.0", + "versionChecked": false +} diff --git a/tests/mock-data/response/checkVersion.status b/tests/mock-data/response/checkVersion.status new file mode 100644 index 0000000..08839f6 --- /dev/null +++ b/tests/mock-data/response/checkVersion.status @@ -0,0 +1 @@ +200 diff --git a/tests/mock-data/response/info.json b/tests/mock-data/response/info.json index 4fd8d08..2d30960 100644 --- a/tests/mock-data/response/info.json +++ b/tests/mock-data/response/info.json @@ -2,5 +2,6 @@ "buildNumber": "6bafb29d", "buildTime": "2015.05.08_12:31:26", "name": "Rosette API", - "version": "0.5.0" -} \ No newline at end of file + "version": "0.5.0", + "versionChecked": true +} diff --git a/tests/mock-data/response/ping.json b/tests/mock-data/response/ping.json index e035af5..e114494 100644 --- a/tests/mock-data/response/ping.json +++ b/tests/mock-data/response/ping.json @@ -1,4 +1,4 @@ { - "message":"Rosette API at your service", - "time":1433962008758 + "message":"Rosette API at your service", + "time":1433962008758 } diff --git a/tests/test_rosette_api.py b/tests/test_rosette_api.py index 657fa1b..e85a3d0 100644 --- a/tests/test_rosette_api.py +++ b/tests/test_rosette_api.py @@ -125,6 +125,7 @@ def test_info(): result = test.api.info() assert result["buildNumber"] == "6bafb29d" assert result["name"] == "Rosette API" + assert result["versionChecked"] is True # Test that retrying request retries the correct number of times @@ -170,6 +171,8 @@ def call_endpoint(input_filename, expected_status_filename, expected_output_file body = info_file.read() httpretty.register_uri(httpretty.GET, "https://api.rosette.com/rest/v1/info", body=body, status=200, content_type="application/json") + httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", + body=body, status=200, content_type="application/json") error_expected = False # Create an instance of the app, feeding the filename to be stored as the user key so the response will be correct @@ -240,6 +243,8 @@ def test_debug(): body = info_file.read() httpretty.register_uri(httpretty.GET, "https://api.rosette.com/rest/v1/info", body=body, status=200, content_type="application/json") + httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", + body=body, status=200, content_type="application/json") api = API("0123456789", debug=True) @@ -276,6 +281,8 @@ def test_just_text(): body = info_file.read() httpretty.register_uri(httpretty.GET, "https://api.rosette.com/rest/v1/info", body=body, status=200, content_type="application/json") + httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", + body=body, status=200, content_type="application/json") api = API("0123456789") diff --git a/tests/test_rosette_api.py.orig b/tests/test_rosette_api.py.orig deleted file mode 100644 index 5958bef..0000000 --- a/tests/test_rosette_api.py.orig +++ /dev/null @@ -1,285 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -Copyright (c) 2014-2015 Basis Technology Corporation. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -# To run tests, run `py.test test_rosette_api.py` - -import glob -import httpretty -import json -import os -import pytest -import re -import sys -try: - from StringIO import StringIO as streamIO -except ImportError: - from io import BytesIO as streamIO -import gzip -from rosette.api import API, DocumentParameters, NameTranslationParameters, NameMatchingParameters, RosetteException - -_IsPy3 = sys.version_info[0] == 3 - -request_file_dir = os.path.dirname(__file__) + "/mock-data/request/" -response_file_dir = os.path.dirname(__file__) + "/mock-data/response/" - -# Define the regex pattern of file names. Example: eng-doc-categories.json -filename_pattern = re.compile("(\w+-\w+-([a-z_-]+))[.]json") - - -def get_file_content(filename): - with open(filename, "r") as f: - s = f.read() - if len(s) > 200: - out = streamIO() - f1 = gzip.GzipFile(fileobj=out, mode="w") - if _IsPy3: - f1.write(bytes(s, 'UTF-8')) - else: - f1.write(s) - f1.close() - s = out.getvalue() - return s - - -# Run through all files in the mock-data directory, extract endpoint, and create a list of tuples of the form -# (input filename, output status filename, output data filename, endpoint) as the elements -def categorize_reqs(): - files = [] - # Loop through all file names in the mock-data/request directory - for full_filename in glob.glob(request_file_dir + "*.json"): - filename = os.path.basename(full_filename) - # Extract the endpoint (the part after the first two "-" but before .json) - endpoint = "/" + filename_pattern.match(filename).group(2).replace("_", "/") - # Add (input, output status, output json, endpoint) to list of files - files.append((filename_pattern.match(filename).group(1), - response_file_dir + filename.replace("json", "status"), - response_file_dir + filename, - endpoint)) - return files - - -class RosetteTest: - def __init__(self, filename=None): - self.url = "https://api.rosette.com/rest/v1" - # Set user key as filename as a workaround - tests don"t require user key - # Filename is necessary to get the correct response in the mocked test - self.api = API(service_url=self.url, user_key=filename) - # Default to DocumentParameters as self.params - self.params = DocumentParameters() - if filename is not None: - # Name matching endpoint requires NameMatchingParameters - if "matched-name" in filename: - self.params = NameMatchingParameters() - # Name translation requires NameTranslationParameters - elif "translated-name" in filename: - self.params = NameTranslationParameters() - # Find and load contents of request file into parameters - with open(request_file_dir + filename + ".json", "r") as inp_file: - params_dict = json.loads(inp_file.read()) - for key in params_dict: - self.params[key] = params_dict[key] - - -# Setup for tests - register urls with HTTPretty and compile a list of all necessary information about each file -# in mock-data/request so that tests can be run -docs_list = categorize_reqs() - - -# Test that pinging the API is working properly -@httpretty.activate -def test_ping(): - with open(response_file_dir + "ping.json", "r") as ping_file: - body = ping_file.read() - httpretty.register_uri(httpretty.GET, "https://api.rosette.com/rest/v1/ping", - body=body, status=200, content_type="application/json") - - test = RosetteTest(None) - result = test.api.ping() - assert result["message"] == "Rosette API at your service" - - -# Test that getting the info about the API is being called correctly -@httpretty.activate -def test_info(): - with open(response_file_dir + "info.json", "r") as info_file: - body = info_file.read() - httpretty.register_uri(httpretty.GET, "https://api.rosette.com/rest/v1/info", - body=body, status=200, content_type="application/json") - - test = RosetteTest(None) - result = test.api.info() - assert result["buildNumber"] == "6bafb29d" - assert result["name"] == "Rosette API" - - -# Test that retrying request retries the correct number of times -@httpretty.activate -def test_retryNum(): - with open(response_file_dir + "info.json", "r") as info_file: - body = info_file.read() - httpretty.register_uri(httpretty.GET, "https://api.rosette.com/rest/v1/info", - body=body, status=500, content_type="application/json") - test = API(service_url='https://api.rosette.com/rest/v1', user_key=None, retries=5) - try: - result = test.info() - assert False - except RosetteException as e: - assert e.message == "A retryable network operation has not succeeded after 5 attempts" - assert e.status == "unknownError" - - -# Test that retrying request throws the right error -@httpretty.activate -def test_retry500(): - with open(response_file_dir + "info.json", "r") as info_file: - body = {'message': 'We had a problem with our server. Try again later.', 'code': 'Internal Server Error'} - httpretty.register_uri(httpretty.GET, "https://api.rosette.com/rest/v1/info", - body=json.dumps(body), status=500, content_type="application/json") - test = RosetteTest(None) - try: - result = test.api.info() - assert False - except RosetteException as e: - assert e.message == "We had a problem with our server. Try again later." - assert e.status == "Internal Server Error" - - -@httpretty.activate -def call_endpoint(input_filename, expected_status_filename, expected_output_filename, rest_endpoint): - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1" + rest_endpoint, - status=get_file_content(expected_status_filename), - body=get_file_content(expected_output_filename), - content_type="application/json") - # need to mock /info call too because the api will call it implicitly - with open(response_file_dir + "info.json", "r") as info_file: - body = info_file.read() - httpretty.register_uri(httpretty.GET, "https://api.rosette.com/rest/v1/info", - body=body, status=200, content_type="application/json") - - error_expected = False - # Create an instance of the app, feeding the filename to be stored as the user key so the response will be correct - test = RosetteTest(input_filename) - # Open the expected response file and store the data - with open(expected_output_filename, "r") as expected_file: - expected_result = json.loads(expected_file.read()) - # Check to see if this particular request should throw an exception for an unsupported language - if "code" in expected_result: - if expected_result["code"] == "unsupportedLanguage": - error_expected = True - functions = {"/categories": test.api.categories, - "/entities": test.api.entities, - "/entities/linked": test.api.entities, # (test.params, True) - "/language": test.api.language, - "/matched-name": test.api.matched_name, - "/morphology/complete": test.api.morphology, - "/sentiment": test.api.sentiment, - "/translated-name": test.api.translated_name} - - # If the request is expected to throw an exception, try complete the operation and pass the test only if it fails - if error_expected: - try: - functions[rest_endpoint](test.params) - assert False - except RosetteException as e: - assert True - return - - # Otherwise, actually complete the operation and check that it got the correct result - # entities/linked must be handled separately because they require two arguments - if "entities/linked" not in rest_endpoint: - result = functions[rest_endpoint](test.params) - else: - result = functions[rest_endpoint](test.params, True) - assert result == expected_result - - -# Test all other endpoints -# docs_list is the list of information from documents in the mock-data/request directory above -# @pytest.mark.parametrize means that it will call the below test for each tuple -# in the docs_list feeding the elements of the tuple as arguments to the test -@pytest.mark.parametrize("input_filename, expected_status_filename, expected_output_filename, rest_endpoint", docs_list) -def test_all(input_filename, expected_status_filename, expected_output_filename, rest_endpoint): - # @httpretty and @pytest cannot co-exist, so separate the function definition - call_endpoint(input_filename, expected_status_filename, expected_output_filename, rest_endpoint) - - -<<<<<<< HEAD -# Test that debug flag is working properly -@httpretty.activate -def test_debug(): - # Doesn't really matter what it returns for this test, so just making sure it catches all of them -======= -# Test using text only input -# To call entities: should work -# To call matched-name and translated-name: should throw errors -@httpretty.activate -def test_just_text(): ->>>>>>> rcb-100-python-test-call - endpoints = ["categories", "entities", "entities/linked", "language", "matched-name", "morphology-complete", - "sentiment", "translated-name"] - expected_status_filename = response_file_dir + "eng-sentence-entities.status" - expected_output_filename = response_file_dir + "eng-sentence-entities.json" - for rest_endpoint in endpoints: - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/" + rest_endpoint, - status=get_file_content(expected_status_filename), - body=get_file_content(expected_output_filename), - content_type="application/json") - - with open(expected_output_filename, "r") as expected_file: - expected_result = json.loads(expected_file.read()) - - # need to mock /info call too because the api will call it implicitly - with open(response_file_dir + "info.json", "r") as info_file: - body = info_file.read() - httpretty.register_uri(httpretty.GET, "https://api.rosette.com/rest/v1/info", - body=body, status=200, content_type="application/json") - -<<<<<<< HEAD - api = API("0123456789", debug=True) - - content = "He also acknowledged the ongoing U.S. conflicts in Iraq and Afghanistan, noting that he is the \"commander in chief of a country that is responsible for ending a war and working in another theater to confront a ruthless adversary that directly threatens the American people\" and U.S. allies." - - params = DocumentParameters() - params.__setitem__("content", content) - api.entities(params) - - # Check that the most recent querystring had debug=true - assert httpretty.last_request().querystring == {'debug': ['true']} -======= - api = API("0123456789") - - content = "He also acknowledged the ongoing U.S. conflicts in Iraq and Afghanistan, noting that he is the \"commander in chief of a country that is responsible for ending a war and working in another theater to confront a ruthless adversary that directly threatens the American people\" and U.S. allies." - - result = api.entities(content) - # Check that it work for entities - assert result == expected_result - - # Check that it throws the correct error for matched-name - try: - api.matched_name(content) - assert False - except RosetteException as e: - assert e.status == "incompatible" - - # Check that it throws the correct error for translated-name - try: - api.translated_name(content) - assert False - except RosetteException as e: - assert e.status == "incompatible" ->>>>>>> rcb-100-python-test-call