diff --git a/.gitignore b/.gitignore index ea16fe4..be7f8d9 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,9 @@ ehthumbs.db Thumbs.db +# VS Code +.vscode + # Jetbrains **/.idea/* !**/.idea/runConfigurations/ @@ -23,6 +26,7 @@ __pycache__/ # Distribution / packaging .Python +doc/ env/ build/ develop-eggs/ @@ -58,6 +62,7 @@ htmlcov/ nosetests.xml coverage.xml *,cover +.scannerwork/ # Translations *.mo @@ -71,3 +76,9 @@ docs/_build/ # PyBuilder target/ + +settings.json +*.orig + +# Sonar +.scannerwork/ diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index dd2b691..0000000 --- a/.travis.yml +++ /dev/null @@ -1,13 +0,0 @@ -language: python -install: - - pip install tox -script: - - tox - -notifications: - slack: - rooms: - - secure: 4FRaTAAiYyeUvgw2RhmblgbNiJO4wmd34OBgWcwURjP9oVmFfSwR9r1LNCdUGxrPOghexSY2DjXIuvIrfTfi/xYbhHb3Kw7PEAyB8IuBMlKtY4NSFou62S2VhYpxyg58T+C7P2zi0eDnDE06pwTCoGPaimxMZQY91yQ0yPYDPVXbwe5SjEgamzlwGBxlS/0A6w1iCPHg27/iO2hXtdW3oLS2I0F/Q8Q95RBkX9hpg6yqHlTV7jRbSqvQ9OFBqk/tXMHQvhoPDGgCgQDuykJuaAYx7g9d0YL0eEYYOh9B/TJ/kNOwdRFBu5kuQ2/nFS5Z0S3Y3UIhdYjUmm9gSMnwIbYnrW22EqDJLoT9Zi3Gv7Prg/8/fSkWsof7BJTMSuXUqO1AxDGKIxFv9uSF1daZoY+AC1ooU1xDu1nNvWVYPlkwEdDxxmHpFkGT3ESTZYccPovQl8Z5K0I1BBAVdJKDzm07lE6VHbxkKcvK6gG0TN3uLxnSlQtjkfJ+aVMq1kxeVsB9lEsKs9oezsKzzbftMm525aXPg+OAv+31CUFWxvT/p4ps8Q+AV6aZpoPHkpK8VryyNirUeZ/m4m4ebDHhD9vcN+JqE9gzshT+0U3g19SvLiUMQtbuZ2BUvrq2hh2LEGs03AFZaNg9AEUVA1PQRhV5NILyoS/lbiBYJPT39Sg= -# - secure: D4VxkkZlj7uaaFbqEBITkJCusVeii436N8X6GijuosUSaee9lqGYUF5ZS9lV6VGMKs719IfSJsCc2v/N4nc9Y/8AKgXd7AWHUwaRR+MC6rLwv9xqH8ZlkTPUKHUXkUQe1f9042PcMxzYa9r0+uKniM9l915Yx0PLaawJrWe3ZVig/uBhn5FueLhtUACzLJcjNcri/BKEgmP2+EgUsupUhujd0MsQd8xN5YIIv6VM5oD0XWXZCnBqHoZK+Qq7LfwDnTmUk7juqTOAjefF6v8IJTDELvSdK9QW7f9x7h0ICabIrI+Gl4IbNLJYEnELBi+X7da2YJUTgUoQrlBB4z+T8KybD8myqV/pJc1n0xrk84g1MtYQ3ahZn7eD3DTjIvK+ML8qAAGwxibF8VsV+GSoo5FcB6e3vV+glHODlwszstVs11PpL1grurVdoa5HPhUbm1jtQixOwO7h8GgGpwO20PuMLfyOfTjfoMcJ3ISjGNHRmUwcCH9avcaHqMLPLHht0z+8inVlDZeTcN+Wi8tk1YXwuQYUkTW46iFmoZNGdygRjTu43h9hCqdG7vcoKrR8oDbMT8X/sfs+z/UaBbuYBr9dT8rrdKJoa9115PaYpCs529DLrw1iGmzr5CkAXDElc1y2F352nwVS+dX7/OIQz1/dbT1ozWQ9Blx6ez5XN84= -# - secure: g/eOC2QCn7oUW234u/P1kmh9OASO9t5g6iC+DflNdnedHnpw8IEG9CsLAnNynOY0KvC/2/Q6BAIFpT/qO8BPdaC2eQnd9LDurOXExxvjKZgTujhqgzijJGzVPSL8//hafj0Ec2+iJZFo/DYMPqUpYDsiKxX53+SMz9/GN/PKVKsI7bzuaOm04xXktU0A76pVfaG0N/qFSVHI64SShACoUXRpTzyUbmksRgzk7FB162R+TH1soPsR3vPh8c2SSWC9msrDc4iljcBhSFTdjL68z+srjDPYMSoOcFXXFNP+dmL+Q0veL/E4e40e7CWIU1O3grOcEcCkaSoZVSpGduNnCst8h6MpgauPtrgwHk4zGMoSl+L6al+nFo/3h2dXeebrQ0tY/hRfZi4Q8xwqG9083TBqi71fTpoFZ7sNtrY4Kdtl5Oa2CFUo7lVn1JB9qQwSa0eai3Whv0RyRqqQe77aDUj0dfD2R3Q61rX5OF/f++W0XtNwHQubzmj8HD/cFneShIQsbl9KgYXoR6HiXcbBiNdmmZjSrzkPYh7vlsujYrz6Cg2msKybWJ/FfOz0tS1cjlCtiCMOTExN6tEF8YCp9l+s+5RLe61pgFPy4Snr9pEjuTS5DYliTyZMY2ZZC1clBQtgE8E2qCG1QSzqnqqiGxj2K4zmLPpB4y1XpW8e3yk= - diff --git a/CI.Jenkinsfile b/CI.Jenkinsfile new file mode 100644 index 0000000..8704189 --- /dev/null +++ b/CI.Jenkinsfile @@ -0,0 +1,65 @@ + + +def versions = [3.9, 3.10, 3.11, 3.12, 3.13] + +def runSonnarForPythonVersion(sourceDir, ver){ + mySonarOpts="-Dsonar.sources=/source -Dsonar.host.url=${env.SONAR_HOST_URL} -Dsonar.login=${env.SONAR_AUTH_TOKEN}" + if("${env.CHANGE_ID}" != "null"){ + mySonarOpts = "$mySonarOpts -Dsonar.pullrequest.key=${env.CHANGE_ID} -Dsonar.pullrequest.branch=${env.BRANCH_NAME}" + } else { + mySonarOpts = "$mySonarOpts -Dsonar.branch.name=${env.BRANCH_NAME}" + } + if ("${env.CHANGE_BRANCH}" != "null") { + mySonarOpts="$mySonarOpts -Dsonar.pullrequest.base=${env.CHANGE_TARGET} -Dsonar.pullrequest.branch=${env.CHANGE_BRANCH}" + } + + // Only run Sonar once. + // Check for new versions at https://binaries.sonarsource.com/?prefix=Distribution/sonar-scanner-cli/ + sonarScannerVersion="6.2.1.4610-linux-x64" + if(ver == 3.13) { + sonarExec="cd /root/ && \ + wget -q https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-${sonarScannerVersion}.zip && \ + unzip -q sonar-scanner-cli-${sonarScannerVersion}.zip && \ + cd /source && \ + /root/sonar-scanner-${sonarScannerVersion}/bin/sonar-scanner ${mySonarOpts}" + } else { + sonarExec="echo Skipping Sonar for this version." + } + + sh "docker run \ + --pull always \ + --rm --volume ${sourceDir}:/source \ + python:${ver}-slim \ + bash -c \"apt-get update && \ + apt-get install -y wget unzip && \ + pip3 install tox && \ + cd /source && \ + tox && \ + ${sonarExec} && \ + echo && \ + echo [INFO] Re-permission files for cleanup. && \ + chown -R 9960:9960 /source\"" +} + +node ("docker-light") { + def sourceDir = pwd() + try { + stage("Clean up") { + step([$class: 'WsCleanup']) + } + stage("Checkout Code") { + checkout scm + } + stage("Build & Test") { + withSonarQubeEnv { + + versions.each { ver -> + runSonnarForPythonVersion(sourceDir, ver) + } + } + } + } catch (e) { + currentBuild.result = "FAILED" + throw e + } +} diff --git a/DEVELOPER.md b/DEVELOPER.md new file mode 100644 index 0000000..dfa4aa0 --- /dev/null +++ b/DEVELOPER.md @@ -0,0 +1,77 @@ +## Developer Information + +#### Sonar Scanning +* Uncomment the `sonar.branch.name` line in `sonar-project.properties` and adjust the value to match your branch name. +* Install the `coverage` module in to your virtual environment. + ``` + virtualenv -p python3 ~/venvs/python-binding-development + source ~/venvs/python-binding-development/bin/activate + pip install --upgrade pip + pip install coverage + ``` +* Generate the coverage data. + ``` + coverage run --source=rosette -m pytest + ``` +* Check the results locally + ``` + coverage report + ``` +* Generate the XML coverage report + ``` + coverage xml + ``` +* Push the results to Sonar + ``` + sonar_host=https://sonar.basistech.net + sonar_token= # Generate a token at https://sonar.basistech.net/account/security/ + + docker run \ + --rm \ + -e SONAR_HOST_URL="${sonar_host}" \ + -e SONAR_LOGIN="${sonar_token}" \ + -v "$(pwd):/usr/src" \ + sonarsource/sonar-scanner-cli + + ``` + +### Testing +To test changes you have made to the binding, you can use a pre-configured Docker environment. This environment will: +- Compile the binding within the container. +- Install the binding within the container. +- Execute one or more example files using the installed binding. +- The example files can be executed against a Cloud release or an Enterprise release. +- If a test suite exists, it will also be executed. + +``` +git clone git@github.com:rosette-api/python.git +cd python +# Modify the binding... +docker run -e API_KEY=$API_KEY -v $(pwd):/source rosette/docker-python +``` + +Optional parameters for the `docker run` execution are: + +- `-e ALT_URL=` + - For testing against an Enterprise environment or the staging environment. +- `-e FILENAME=` + - For testing a single example file instead of all the example files. + +To alter the behavior of the pre-configured Docker environment, you can see the Dockerfile source and entry-point +script [here](https://github.com/RosetteTextAnalytics/rapid-development-tools/tree/master/binding-dockerfiles). + +### Documentation Generation +The existing README for documentation generation is [here](docs/README.md). +The next time the API documentation is touched, please refresh the README and migrate it here. + +### Examples README +There's an old [Docker README](examples/docker) in the examples directory that might be a candidate for removal. + +### Building A Release +See the [instructions](https://github.com/RosetteTextAnalytics/rapid-development-tools/tree/master/publish) + +### TODOs +- Inconsistent references with `rosette_api` and `rosette-api` +- Doc generation README cleanup? +- Example Docker file still needed? +- `docker-compose.yaml` still needed? diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 0000000..58a6d77 --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,35 @@ +properties([ + pipelineTriggers([[$class: "SCMTrigger", scmpoll_spec: "H/15 * * * *"]]) +]) + +node ("docker-light") { + def SOURCEDIR = pwd() + try { + stage("Clean up") { + step([$class: 'WsCleanup']) + } + stage("Checkout Code") { + checkout scm + } + stage("Test with Docker") { + echo "${env.ALT_URL}" + def useUrl = ("${env.ALT_URL}" == "null") ? "${env.BINDING_TEST_URL}" : "${env.ALT_URL}" + withEnv(["API_KEY=${env.ROSETTE_API_KEY}", "ALT_URL=${useUrl}"]) { + sh "docker pull rosette/docker-python" + sh "docker run --rm -e API_KEY=${API_KEY} -e ALT_URL=${ALT_URL} -v ${SOURCEDIR}:/source rosette/docker-python" + } + } + slack(true) + } catch (e) { + currentBuild.result = "FAILED" + slack(false) + throw e + } +} + +def slack(boolean success) { + def color = success ? "#00FF00" : "#FF0000" + def status = success ? "SUCCESSFUL" : "FAILED" + def message = status + ": Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]' (${env.BUILD_URL})" + slackSend(color: color, channel: "#rapid", message: message) +} diff --git a/Jenkinsfile.examples b/Jenkinsfile.examples new file mode 100644 index 0000000..887147a --- /dev/null +++ b/Jenkinsfile.examples @@ -0,0 +1,38 @@ +node { + def SOURCEDIR = pwd() + def TEST_CONTAINER = 'examples/python-test' + def DOCKERFILE_DIR = './examples/docker' + try { + stage("Clean up") { + step([$class: 'WsCleanup']) + } + stage("Checkout Code") { + checkout scm + } + stage("Build Dockerfile") { + dir ("${DOCKERFILE_DIR}") { + sh "docker build --no-cache -t ${TEST_CONTAINER} ." + } + } + stage("Run Examples") { + echo "${env.ALT_URL}" + def useUrl = ("${env.ALT_URL}" == "null") ? "${env.BINDING_TEST_URL}" : "${env.ALT_URL}" + withEnv(["API_KEY=${env.ROSETTE_API_KEY}", "ALT_URL=${useUrl}"]) { + sh "docker run --rm -e API_KEY=${API_KEY} -e ALT_URL=${ALT_URL} -v ${SOURCEDIR}:/source ${TEST_CONTAINER}" + } + } + postToTeams(true) + } catch (e) { + currentBuild.result = "FAILED" + postToTeams(false) + throw e + } +} + +def postToTeams(boolean success) { + def webhookUrl = "${env.TEAMS_PNC_JENKINS_WEBHOOK_URL}" + def color = success ? "#00FF00" : "#FF0000" + def status = success ? "SUCCESSFUL" : "FAILED" + def message = "*" + status + ":* '${env.JOB_NAME}' - [${env.BUILD_NUMBER}] - ${env.BUILD_URL}" + office365ConnectorSend(webhookUrl: webhookUrl, color: color, message: message, status: status) +} diff --git a/LICENSE.txt b/LICENSE.txt index 8e1eca0..5723e02 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2014-2016 Basis Technology Corporation. +Copyright (c) 2014-2019 Basis Technology Corporation. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index 075fbf1..bc34f47 100644 --- a/README.md +++ b/README.md @@ -1,92 +1,48 @@ -[![Build Status](https://travis-ci.org/rosette-api/python.svg?branch=master)](https://travis-ci.org/rosette-api/python) + + + + + Babel Street Logo + + -# This is the Python client binding for Rosette API. +# Analytics by Babel Street -Installation ------------- +[![PyPI version](https://badge.fury.io/py/rosette-api.svg)](https://badge.fury.io/py/rosette-api) +[![Python Versions](https://img.shields.io/pypi/pyversions/rosette-api.svg?color=dark%20green&label=Python%20Versions)](https://img.shields.io/pypi/pyversions/rosette-api.svg?color=dark%20green&label=Python%20Versions) -The Python binding requires Python 2.6 or greater and is available through pip: +Our product is a full text processing pipeline from data preparation to extracting the most relevant information and +analysis utilizing precise, focused AI that has built-in human understanding. Text Analytics provides foundational +linguistic analysis for identifying languages and relating words. The result is enriched and normalized text for +high-speed search and processing without translation. -`pip install rosette_api` - -Basic Usage ------------ - -```python -# 1. Set utf-8 encoding. -# -*- coding: utf-8 -*- - -# 2. Imports from rosette.api. -from rosette.api import API, DocumentParameters, MorphologyOutput - -# 3. Create API object. -api = API("[your_api-key]") - -# 4. Create parameters object -params = DocumentParameters() - -# 5. Set parameters. -params["content"] = "The quick brown fox jumped over the lazy dog. Yes he did." - -# 6. Make a call. -result = api.morphology(params) - -# result is a Python dictionary that contains - -{u'tokens': [u'The', u'quick', u'brown', u'fox', u'jumped', u'over', u'the', u'lazy', u'dog', u'.', u'Yes', u'he', u'did', u'.'], u'posTags': [u'DET', u'ADJ', u'ADJ', u'NOUN', u'VERB', u'ADP', u'DET', u'ADJ', u'NOUN', u'PUNCT', u'VERB', u'PRON', u'VERB', u'PUNCT'], u'compoundComponents': [None, None, None, None, None, None, None, None, None, None, None, None, None, None], u'lemmas': [u'the', u'quick', u'brown', u'fox', u'jump', u'over', u'the', u'lazy', u'dog', u'.', u'yes', u'he', u'do', u'.'], u'hanReadings': [None, None, None, None, None, None, None, None, None, None, None, None, None, None]} -``` +Text Analytics extracts events and entities — people, organizations, and places — from unstructured text and adds the +structure of associating those entities into events that deliver only the necessary information for near real-time +decision making. Accompanying tools shorten the process of training AI models to recognize domain-specific events. -The samples use the following procedure: +The product delivers a multitude of ways to sharpen and expand search results. Semantic similarity expands search +beyond keywords to words with the same meaning, even in other languages. Sentiment analysis and topic extraction help +filter results to what’s relevant. -1. If the application reads text in, set encoding to utf-8 in the first line of the script. +## Analytics API Access +- Analytics Cloud [Sign Up](https://developer.babelstreet.com/signup) -2. Import the `rosette.api` packages that your application needs. The `rosette.api` packages include - * `API` - * `DocumentParameters` - * `NameSimilarityParameters` - * `NameTranslationParameters` - * `MorphologyOutput` - * `DataFormat` +## Quick Start -3. Create an `API` object with the `user_key` parameter. - -4. Create a parameters object for your request input: - - | Parameter | Endpoint | - | ----|----| - | `NameSimilarityParameters` | for `/name-similarity` | - | `NameTranslationParameters` | for `/translated-name` | - | `DocumentParameters` | for all other endpoints | - - -5. Set the parameters required for your operation: "`content`" or "`contentUri`" for `DocumentParameters`; -"`name`" and "`targetLanguage`" for `NameTranslationParameters`; "`name1.text`" and "`name2.text`" for - `NameSimilarityParameters`; Other parameters are optional. - -6. Invoke the `API` method for the endpoint you are calling. The methods are - * `entities(linked)` where `linked` is `False` for entity extraction and `True` for entity linking. - * `categories()` - * `sentiment()` - * `language()` - * `morphology(tag)` where tag is a member of `MorphologyOutput`: `LEMMAS`, `PARTS_OF_SPEECH`, `COMPOUND_COMPONENTS`, `HAN_READINGS`, or `COMPLETE`. An empty tag is equivalent to `COMPLETE`. - * `sentences()` - * `tokens()` - * `relationships()` - * `name_translation()` - * `name_similarity()` - * `matched_name()` *deprecated - * `translated_name()` *deprecated - -7. The API will return a dictionary with the results. - -See [examples](examples) for more request samples. - -API Documentation ------------------ +#### Installation +`pip install rosette_api` -See [documentation](http://rosette-api.github.io/python) +#### Examples +View small example programs for each Analytics endpoint +in the [examples](https://github.com/rosette-api/python/tree/develop/examples) directory. -Additional Information ----------------------- +#### Documentation & Support +- [Binding API](https://rosette-api.github.io/python/) +- [Analytics Platform API](https://docs.babelstreet.com/API/en/index-en.html) +- [Binding Release Notes](https://github.com/rosette-api/python/wiki/Release-Notes) +- [Analytics Platform Release Notes](https://docs.babelstreet.com/Release/en/rosette-cloud.html) +- [Support](https://babelstreet.my.site.com/support/s/) +- [Binding License: Apache 2.0](https://github.com/rosette-api/python/blob/develop/LICENSE.txt) -Visit [Rosette API site](https://developer.rosette.com) +## Binding Developer Information +If you are modifying the binding code, please refer to the [developer README](https://github.com/rosette-api/python/tree/develop/DEVELOPER.md) file. diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..1ca7e96 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,15 @@ +application: + image: rosette/docker-python + environment: + - API_KEY=$API_KEY + - HTTP_PROXY=http://squid:3128 + - HTTPS_PROXY=https://squid:3128 + volumes: + - .:/source + links: + - proxy:squid + +proxy: + image: datadog/squid + ports: + - 3128:3128 diff --git a/docker/Dockerfile b/docker/Dockerfile deleted file mode 100644 index 62796d0..0000000 --- a/docker/Dockerfile +++ /dev/null @@ -1,54 +0,0 @@ -FROM ubuntu:14.04 -MAINTAINER Fiona Hasanaj - -ENV DEBIAN_FRONTEND noninteractive -RUN locale-gen en_US.UTF-8 && /usr/sbin/update-locale LANG=en_US.UTF-8 -ENV LANG en_US.UTF-8 - -# proper init to handle signal propagation and zombie reaping -ADD https://github.com/krallin/tini/releases/download/v0.8.4/tini /tini -RUN chmod +x /tini -ENTRYPOINT ["/tini", "--"] - -RUN apt-get update && \ - apt-get -y install \ - wget \ - curl \ - libssl-dev \ - libffi-dev \ - python-pip \ - python-software-properties \ - software-properties-common && \ - add-apt-repository -y ppa:fkrull/deadsnakes && \ - apt-get update && \ - apt-get -y install \ - python2.6 \ - python2.7 \ - python3.3 \ - python3.4 \ - python3.5 \ - git\ - pypy && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* - -RUN mkdir /install && \ - wget -O /install/pypy3-2.4-linux_x86_64-portable.tar.bz2 \ - "https://bitbucket.org/squeaky/portable-pypy/downloads/pypy3-2.4-linux_x86_64-portable.tar.bz2" && \ - tar jxf /install/pypy3-*.tar.bz2 -C /install && \ - rm /install/pypy3-*.tar.bz2 && \ - ln -s /install/pypy3-*/bin/pypy3 /usr/local/bin/pypy3 - -RUN pip install -U pip && pip install tox -RUN pip install --upgrade autopep8 - -# copy over the necessary files -COPY run_python.sh /python-dev/run_python.sh -RUN chmod 755 /python-dev/run_python.sh -COPY tox.ini /python-dev/tox.ini -WORKDIR /python-dev - -# allow interactive bash inside docker container -CMD ./run_python.sh $API_KEY $FILENAME $ALT_URL $GIT_USERNAME $VERSION - -VOLUME ["/source"] diff --git a/docker/README.md b/docker/README.md deleted file mode 100644 index d87f8d7..0000000 --- a/docker/README.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -# Docker Image for Python Examples ---- -### Summary -To simplify the running of the Python examples, the Dockerfile will build an image where the examples can be tested against the development source. - -### Basic Usage -Build the docker image, e.g. `docker build -t basistech/python:1.1 .` - -Run an example as `docker run -e API_KEY=api-key -v "path-to-local-python-dir:/source" basistech/python:1.1` - -To test against a specific source file, add `-e FILENAME=filename` before the `-v`, to test against an alternate url, add `-e ALT_URL=alternate_url`, and optionally if you would like to regenerate gh-pages from the changes made to the development source you can add `-e GIT_USERNAME=git-username -e VERSION=version` before the `-v`. In order to push the gh-pages to git remember to mount .ssh and .gitconfig to the root dir `-v path-to-.ssh-dir:/root/.ssh -v path-to-.gitconfig:/root/.gitconfig`. \ No newline at end of file diff --git a/docker/run_python.sh b/docker/run_python.sh deleted file mode 100644 index f0571b9..0000000 --- a/docker/run_python.sh +++ /dev/null @@ -1,143 +0,0 @@ -#!/bin/bash - -retcode=0 -ping_url="https://api.rosette.com/rest/v1" - -#------------------ Functions ---------------------------------------------------- -#Gets called when the user doesn't provide any args -function HELP { - echo -e "\nusage: --key API_KEY [--FILENAME filename] [--url ALT_URL]" - echo " API_KEY - Rosette API key (required)" - echo " FILENAME - Python source file (optional)" - echo " ALT_URL - Alternate service URL (optional)" - echo " GIT_USERNAME - Git username where you would like to push regenerated gh-pages (optional)" - echo " VERSION - Build version (optional)" - echo "Compiles and runs the source file(s) using the local development source." - exit 1 -} - -#Checks if Rosette API key is valid -function checkAPI { - match=$(curl "${ping_url}/ping" -H "X-RosetteAPI-Key: ${API_KEY}" | grep -o "forbidden") - if [ ! -z $match ]; then - echo -e "\nInvalid Rosette API Key" - exit 1 - fi -} - -function cleanURL() { - # strip the trailing slash off of the alt_url if necessary - if [ ! -z "${ALT_URL}" ]; then - case ${ALT_URL} in - */) ALT_URL=${ALT_URL::-1} - echo "Slash detected" - ;; - esac - ping_url=${ALT_URL} - fi -} - -function validateURL() { - match=$(curl "${ping_url}/ping" -H "X-RosetteAPI-Key: ${API_KEY}" | grep -o "Rosette API") - if [ "${match}" = "" ]; then - echo -e "\n${ping_url} server not responding\n" - exit 1 - fi -} - -function runExample() { - echo -e "\n---------- ${1} start -------------" - result="" - if [ -z ${ALT_URL} ]; then - result="$(python ${1} --key ${API_KEY} 2>&1 )" - else - result="$(python ${1} --key ${API_KEY} --url ${ALT_URL} 2>&1 )" - fi - echo "${result}" - echo -e "\n---------- ${1} end -------------" - if [[ "${result}" == *"Exception"* ]]; then - echo "Exception found" - retcode=1 - elif [[ "$result" == *"processingFailure"* ]]; then - retcode=1 - elif [[ "$result" == *"AttributeError"* ]]; then - retcode=1 - elif [[ "$result" == *"ImportError"* ]]; then - retcode=1 - fi -} -#------------------ Functions End ------------------------------------------------ - -#Gets API_KEY, FILENAME and ALT_URL if present -while getopts ":API_KEY:FILENAME:ALT_URL:GIT_USERNAME:VERSION" arg; do - case "${arg}" in - API_KEY) - API_KEY=${OPTARG} - usage - ;; - ALT_URL) - ALT_URL=${OPTARG} - usage - ;; - FILENAME) - FILENAME=${OPTARG} - usage - ;; - GIT_USERNAME) - GIT_USERNAME=${OPTARG} - usage - ;; - VERSION) - VERSION={OPTARG} - usage - ;; - esac -done - -cleanURL - -validateURL - - -#Copy the mounted content in /source to current WORKDIR -cp -r -n /source/* . - -#Run the examples -if [ ! -z ${API_KEY} ]; then - checkAPI - #Prerequisite - python /python-dev/setup.py install - cd /python-dev/examples - if [ ! -z ${FILENAME} ]; then - runExample ${FILENAME} - else - for file in *.py; do - runExample ${file} - done - fi -else - HELP -fi - -#Run unit tests -cd /python-dev -tox - -#Generate gh-pages and push them to git account (if git username is provided) -if [ ! -z ${GIT_USERNAME} ] && [ ! -z ${VERSION} ]; then - #clone python git repo - cd / - git clone git@github.com:${GIT_USERNAME}/python.git - cd python - git checkout origin/gh-pages -b gh-pages - git branch -d develop - #generate gh-pages and set ouput dir to git repo (gh-pages branch) - cd /python-dev - .tox/py27/bin/epydoc -v --no-private --no-frames --css epydoc.css -o /python rosette/*.py - cd /python - git add . - git commit -a -m "publish python apidocs ${VERSION}" - git push -fi - -exit ${retcode} diff --git a/docker/tox.ini b/docker/tox.ini deleted file mode 100644 index 5955b11..0000000 --- a/docker/tox.ini +++ /dev/null @@ -1,18 +0,0 @@ -# Tox (http://tox.testrun.org/) is a tool for running tests -# in multiple virtualenvs. This configuration file will run the -# test suite on all supported python versions. To use it, "pip install tox" -# and then run "tox" from this directory. - -[tox] -skipsdist = True -envlist = py26, py27, py33, py34 - -[testenv] -commands = - py.test {toxinidir}/tests -s --pep8 -deps = - pytest - pytest-pep8 - httpretty - epydoc - requests diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d8f4f61 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = python3 -msphinx +SPHINXPROJ = PythonBinding +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..b9f2584 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,23 @@ +## Generating documentation with Sphinx + +The best tutorial for generating documentation can be found [here](http://gisellezeno.com/tutorials/sphinx-for-python-documentation.html) + +Files in the source directory are generally static and should not need to be updated. If another python module is created in `../rosette`, then the source may need to be regenerated using + +`sphinx-apidoc -f -o source/ ../rosette/` + +This will overwrite the *.rst files, which may then require some editing to provide the desired look. Edits to date: +1. index.rst: Changed the `Welcome ...` title to `Python Binding` +1. index.rst: Added minor summary, "This is the API documentation for the Babel Street Analytics API Python Binding. For examples and usage, please refer to our `API Guide `_." +1. conf.py: removed blank line at end of file +1. conf.py: added Babel Street logo +1. conf.py: blank project (let logo handle it) +1. conf.py: added version (auto updated by publish) +1. conf.py: added author +1. conf.py: enabled `sys.path.insert(0, os.path.abspath('../..'))` + +To change the logo, edit conf.py, `html_logo` + +To generate the html run `make html`. The output will be written to `build/html`. This is the step that is run by the `publish.sh` script when publishing the Python binding. Note that the version, which is noted in `conf.py` is not displayed anywhere, but is updated during the publish phase. + +You can view the generated html locally, by navigating to `docs/build/html` and opening `index.html` \ No newline at end of file diff --git a/docs/source/_static/README.md b/docs/source/_static/README.md new file mode 100644 index 0000000..6fa98c9 --- /dev/null +++ b/docs/source/_static/README.md @@ -0,0 +1 @@ +### Place static sphinx files here if needed \ No newline at end of file diff --git a/docs/source/_templates/README.md b/docs/source/_templates/README.md new file mode 100644 index 0000000..055e18a --- /dev/null +++ b/docs/source/_templates/README.md @@ -0,0 +1,3 @@ +### Place sphinx templates here (if desired) + +Default is alabaster \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..2114fa5 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Python Binding documentation build configuration file, created by +# sphinx-quickstart on Fri Jul 28 09:16:12 2017. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys +sys.path.insert(0, os.path.abspath('../..')) + + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = ['sphinx.ext.autodoc'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = '' +copyright = '2024, Babel Street' +author = 'Babel Street' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '1.31.0' +# The full version, including alpha/beta/rc tags. +release = '1.31.0' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This patterns also effect to html_static_path and html_extra_path +exclude_patterns = [] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'alabaster' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] +html_logo = 'logo-400x113.png' +html_favicon = 'favicon-16x16.png' + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# This is required for the alabaster theme +# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars +html_sidebars = { + '**': [ + 'about.html', + 'relations.html', # needs 'show_related': True theme option to display + 'searchbox.html', + 'donate.html', + ] +} + + +# -- Options for HTMLHelp output ------------------------------------------ + +# Output file base name for HTML help builder. +htmlhelp_basename = 'PythonBindingdoc' + + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'PythonBinding.tex', 'Python Binding Documentation', + 'Basis Technology', 'manual'), +] + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'pythonbinding', 'Python Binding Documentation', + [author], 1) +] + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'PythonBinding', 'Python Binding Documentation', + author, 'PythonBinding', 'One line description of project.', + 'Miscellaneous'), +] diff --git a/docs/source/favicon-16x16.png b/docs/source/favicon-16x16.png new file mode 100644 index 0000000..2750b93 Binary files /dev/null and b/docs/source/favicon-16x16.png differ diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..9436fde --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,22 @@ +.. Python Binding documentation master file, created by + sphinx-quickstart on Fri Jul 28 09:16:12 2017. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + + +Python Binding +========================================== +This is the API documentation for the Babel Street Analytics API Python Binding. For examples and usage, please refer to our `API Guide `_. + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/source/logo-400x113.png b/docs/source/logo-400x113.png new file mode 100644 index 0000000..b411943 Binary files /dev/null and b/docs/source/logo-400x113.png differ diff --git a/docs/source/modules.rst b/docs/source/modules.rst new file mode 100644 index 0000000..7a6d415 --- /dev/null +++ b/docs/source/modules.rst @@ -0,0 +1,7 @@ +rosette +======= + +.. toctree:: + :maxdepth: 4 + + rosette diff --git a/docs/source/rosette.rst b/docs/source/rosette.rst new file mode 100644 index 0000000..a6f70fb --- /dev/null +++ b/docs/source/rosette.rst @@ -0,0 +1,22 @@ +rosette package +=============== + +Submodules +---------- + +rosette\.api module +------------------- + +.. automodule:: rosette.api + :members: + :undoc-members: + :show-inheritance: + + +Module contents +--------------- + +.. automodule:: rosette + :members: + :undoc-members: + :show-inheritance: diff --git a/examples/README.md b/examples/README.md index 5413e59..80a19b0 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,42 +1,50 @@ -Python Examples -================== - -These examples are scripts that can be run independently to demonstrate the Rosette API functionality. - -Prerequisite: Either run `pip install rosette_api` or run `python setup.py install` in the python top level folder. - -Alternatively, you can run all the examples with the command line: -`find -maxdepth 1 -name "*.py" -exec tox -- {} --key api-key --url alternate_url \;` - -You can now run your desired _endpoint_.py file to see it in action. -For example, run `python/examples/categories.py` if you want to see the categories -functionality demonstrated. - -All files require you to input your Rosette API User Key after --key to run. -For example: `python ping.py --key 1234567890` -All also allow you to input your own service URL if desired. -For example: `python ping.py --key 1234567890 --service_url http://www.myurl.com` -Some (specified below) allow an additional input of either a file (.html or .txt) or a URL with `--file` or `--url` - -Each example, when run, prints its output to the console. - -| File Name | What it does | -| ------------- |------------- | -| categories.py | Gets the category of a document at a URL | -| entities.py | Gets the entities from a piece of text | -| entities_linked.py | Gets the linked (to Wikipedia) entities from a piece of text | -| info.py | Gets information about Rosette API | -| language.py | Gets the language of a piece of text | -| matched-name.py | Gets the similarity score of two names | -| morphology_complete.py | Gets the complete morphological analysis of a piece of text| -| morphology_compound-components.py | Gets the de-compounded words from a piece of text | -| morphology_han-readings.py | Gets the Chinese words from a piece of text | -| morphology_lemmas.py | Gets the lemmas of words from a piece of text | -| morphology_parts-of-speech.py | Gets the part-of-speech tags for words in a piece of text | -| ping.py | Pings the Rosette API to check for reachability | -| relationships.py | Gets the relationships between entities from a piece of text | -| sentences.py | Gets the sentences from a piece of text | -| sentiment.py | Gets the sentiment of a local file | -| tokens.py | Gets the tokens (words) from a piece of text | -| translated-name.py | Translates a name from one language to another | - +## Endpoint Examples + +Each example file demonstrates one of the capabilities of the Babel Street Analytics Platform. + +Here are some methods for running the examples. Each example will also accept an optional `--url` parameter for +overriding the default URL. + +A note on prerequisites. Analytics API only supports TLS 1.2 so ensure your toolchain also supports it. + +#### Virtualenv/Latest Release +``` +git clone git@github.com:rosette-api/python.git +cd python/examples +python -m venv analytics_venv +source analytics_venv/bin/activate +pip install rosette_api +python ping.py -k $API_KEY +``` + +#### Virtualenv/Local Source +``` +git clone git@github.com:rosette-api/python.git +cd python +python -m venv analytics_venv +source analytics_venv/bin/activate +python setup.py install +cd examples +python ping.py -k $API_KEY +``` + +#### Docker/Latest Release +``` +git clone git@github.com:rosette-api/python.git +cd python/examples +docker run -it -v $(pwd):/source --entrypoint bash python:3.12-slim +cd /source +pip install rosette_api +python ping.py -k $API_KEY +``` + +#### Docker/Local Source +``` +git clone git@github.com:rosette-api/python.git +cd python +docker run -it -v $(pwd):/source --entrypoint bash python:3.12-slim +cd /source +python setup.py install +cd examples +python ping.py -k $API_KEY +``` diff --git a/examples/address_similarity.py b/examples/address_similarity.py new file mode 100644 index 0000000..2175817 --- /dev/null +++ b/examples/address_similarity.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Analytics API to get match score (similarity) of two addresses. +""" + +import argparse +import json +import os + +from rosette.api import API, AddressSimilarityParameters, RosetteException + + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + # Create an API instance + api = API(user_key=key, service_url=alt_url) + + params = AddressSimilarityParameters() + params["address1"] = {"houseNumber": "1600", "road": "Pennsylvania Ave NW", "city": "Washington", "state": "DC", "postCode": "20500"} + params["address2"] = "160 Pennsilvana Avenue, Washington, D.C., 20500" + #params["parameters"] = {"houseNumberAddressFieldWeight": "0.9"} + + try: + return api.address_similarity(params) + except RosetteException as exception: + print(exception) + + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/categories.py b/examples/categories.py index 4731edb..6f09c75 100644 --- a/examples/categories.py +++ b/examples/categories.py @@ -1,33 +1,48 @@ # -*- coding: utf-8 -*- """ -Example code to call Rosette API to get the category of a document (at a given URL). +Example code to call Analytics API to get the category of a document (at a given URL). """ import argparse import json import os -from rosette.api import API, DocumentParameters +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): - categories_url_data = "http://www.onlocationvacations.com/2015/03/05/the-new-ghostbusters-movie-begins-filming-in-boston-in-june/" - url = categories_url_data + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + categories_text_data = "If you are a fan of the British television series Downton Abbey and you are planning to be in New York anytime before April 2nd, there is a perfect stop for you while in town." # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) + + # Set selected API options + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#categorization + + # api.set_option('singleLabel', 'true') + # api.set_option('scoreThreshold',- 0.20) + params = DocumentParameters() - # Use a URL to input data instead of a string - params["contentUri"] = url - return api.categories(params) + params["content"] = categories_text_data + try: + return api.categories(params) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/docker/Dockerfile b/examples/docker/Dockerfile deleted file mode 100644 index e9eea7d..0000000 --- a/examples/docker/Dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -FROM python:2.7.11 -MAINTAINER Fiona Hasanaj -ENV MAINTENANCE_DATE 03.28.2016 - -# install necessary software -RUN apt-get -y update && apt-get install -y vim && apt-get install -y git && pip install rosette_api - -COPY run_python.sh /python/examples/run_python.sh -RUN chmod 755 /python/examples/run_python.sh -WORKDIR /python/examples - -# allow interactive bash inside docker container -CMD ./run_python.sh $API_KEY $FILENAME $ALT_URL - -VOLUME ["/source"] diff --git a/examples/docker/README.md b/examples/docker/README.md deleted file mode 100644 index 8335d53..0000000 --- a/examples/docker/README.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -# Docker Image for Python Examples ---- -### Summary -To simplify the running of the Python examples, the Dockerfile will build an image and install the latest rosette-api library. - -### Basic Usage -Build the docker image, e.g. `docker build -t basistech/python:1.1 .` - -Run an example as `docker run -e API_KEY=api-key -v "path-to-example-source:/source" basistech/python:1.1` - -To test against a specific source file, add `-e FILENAME=filename` before the `-v` - -Also, to test against an alternate url, add `-e ALT_URL=alternate_url` before the `-v` \ No newline at end of file diff --git a/examples/docker/run_python.sh b/examples/docker/run_python.sh deleted file mode 100644 index 503443d..0000000 --- a/examples/docker/run_python.sh +++ /dev/null @@ -1,110 +0,0 @@ -#!/bin/bash - -retcode=0 -ping_url="https://api.rosette.com/rest/v1" - -#------------------ Functions ---------------------------------------------------- - -#Gets called when the user doesn't provide any args -function HELP { - echo -e "\nusage: source_file.py --key API_KEY [--url ALT_URL]" - echo " API_KEY - Rosette API key (required)" - echo " FILENAME - Python source file (optional)" - echo " ALT_URL - Alternate service URL (optional)" - echo "Compiles and runs the source file(s) using the published rosette-api" - exit 1 -} - -#Checks if Rosette API key is valid -function checkAPI() { - match=$(curl "${ping_url}/ping" -H "X-RosetteAPI-Key: ${API_KEY}" | grep -o "forbidden") - if [ ! -z $match ]; then - echo -e "\nInvalid Rosette API Key" - exit 1 - fi -} - -function cleanURL() { - # strip the trailing slash off of the alt_url if necessary - if [ ! -z "${ALT_URL}" ]; then - case ${ALT_URL} in - */) ALT_URL=${ALT_URL::-1} - echo "Slash detected" - ;; - esac - ping_url=${ALT_URL} - fi -} - -function validateURL() { - match=$(curl "${ping_url}/ping" -H "X-RosetteAPI-Key: ${API_KEY}" -H "user_key: ${API_KEY}" | grep -o "Rosette API") - if [ "${match}" = "" ]; then - echo -e "\n${ping_url} server not responding\n" - exit 1 - fi -} - -function runExample() { - echo -e "\n---------- ${1} start -------------" - result="" - if [ -z ${ALT_URL} ]; then - result="$(python ${1} --key ${API_KEY} 2>&1 )" - else - result="$(python ${1} --key ${API_KEY} --url ${ALT_URL} 2>&1 )" - fi - echo "${result}" - echo -e "\n---------- ${1} end -------------" - if [[ "${result}" == *"Exception"* ]]; then - echo "Exception found" - retcode=1 - elif [[ "$result" == *"processingFailure"* ]]; then - retcode=1 - elif [[ "$result" == *"AttributeError"* ]]; then - retcode=1 - elif [[ "$result" == *"ImportError"* ]]; then - retcode=1 - fi -} - -#------------------ Functions End ------------------------------------------------ - -#Gets API_KEY, FILENAME and ALT_URL if present -while getopts ":API_KEY:FILENAME:ALT_URL" arg; do - case "${arg}" in - API_KEY) - API_KEY=${OPTARG} - usage - ;; - FILENAME) - FILENAME=${OPTARG} - usage - ;; - ALT_URL) - ALT_URL=${OPTARG} - usage - ;; - esac -done - -cleanURL - -validateURL - -#Copy the examples from the mounted content in /source to current WORKDIR -cp /source/examples/*.* . - -#Run the examples -if [ ! -z ${API_KEY} ]; then - checkAPI - if [ ! -z ${FILENAME} ]; then - runExample ${FILENAME} - else - for file in *.py; do - runExample ${file} - done - fi -else - HELP -fi - -exit ${retcode} diff --git a/examples/entities.py b/examples/entities.py index 530df49..beba9de 100644 --- a/examples/entities.py +++ b/examples/entities.py @@ -1,29 +1,46 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get entities from a piece of text. +Example code to call Analytics API to get entities from a piece of text. """ import argparse import json import os -from rosette.api import API, DocumentParameters +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) - entities_text_data = "Bill Murray will appear in new Ghostbusters film: Dr. Peter Venkman was spotted filming a cameo in Boston this… http://dlvr.it/BnsFfS" + api = API(user_key=key, service_url=alt_url) + + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#entity-extraction-and-linking + + # api.set_option('calculateSalience','true') + # api.set_option('linkEntities','false') + # api.set_option('useIndocServer', True) + + entities_text_data = "The Securities and Exchange Commission today announced the leadership of the agency’s trial unit. Bridget Fitzpatrick has been named Chief Litigation Counsel of the SEC and David Gottesman will continue to serve as the agency’s Deputy Chief Litigation Counsel. Since December 2016, Ms. Fitzpatrick and Mr. Gottesman have served as Co-Acting Chief Litigation Counsel. In that role, they were jointly responsible for supervising the trial unit at the agency’s Washington D.C. headquarters as well as coordinating with litigators in the SEC’s 11 regional offices around the country." params = DocumentParameters() params["content"] = entities_text_data - return api.entities(params) # entity linking is turned off -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') + try: + return api.entities(params) + except RosetteException as exception: + print(exception) + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/entities_linked.py b/examples/entities_linked.py deleted file mode 100644 index acbd4f2..0000000 --- a/examples/entities_linked.py +++ /dev/null @@ -1,32 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -Example code to call Rosette API to get linked (against Wikipedia) entities from a piece of text. -""" - -import argparse -import json -import os - -from rosette.api import API, DocumentParameters - - -def run(key, altUrl='https://api.rosette.com/rest/v1/'): - # Create an API instance - api = API(user_key=key, service_url=altUrl) - - entities_linked_text_data = "Last month director Paul Feig announced the movie will have an all-star female cast including Kristen Wiig, Melissa McCarthy, Leslie Jones and Kate McKinnon." - params = DocumentParameters() - params["content"] = entities_linked_text_data - params["genre"] = "social-media" - return api.entities(params, True) # entity linking is turned on - - -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') - -if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) diff --git a/examples/events.py b/examples/events.py new file mode 100644 index 0000000..79ffc3e --- /dev/null +++ b/examples/events.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Analytics API to get events from a piece of text. +""" + +import argparse +import json +import os + +from rosette.api import API, DocumentParameters, RosetteException + + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + # Create an API instance + api = API(user_key=key, service_url=alt_url) + + events_text_data = "I am looking for flights to Super Bowl 2022 in Inglewood, LA." + params = DocumentParameters() + params["content"] = events_text_data + + try: + return api.events(params) + except RosetteException as exception: + print(exception) + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/events_negation.py b/examples/events_negation.py new file mode 100644 index 0000000..becc731 --- /dev/null +++ b/examples/events_negation.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Analytics API to get events, based on a set negation option, from a piece of text. +""" + +import argparse +import json +import os + +from rosette.api import API, DocumentParameters, RosetteException + + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + # Create an API instance + api = API(user_key=key, service_url=alt_url) + + # Double negative, meaning that the event should be skipped with "IGNORE" or "ONLY_NEGATIVE" + # and recognized under "BOTH" or "ONLY_POSITIVE" + events_text_data = "Sam didn't not take a flight to Boston." + params = DocumentParameters() + params["content"] = events_text_data + api.set_option('negation', 'ONLY_POSITIVE') + + + try: + return api.events(params) + except RosetteException as exception: + print(exception) + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/info.py b/examples/info.py index a4c0bd5..8fba621 100644 --- a/examples/info.py +++ b/examples/info.py @@ -1,28 +1,34 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get information such as version and build +Example code to call Analytics API to get information such as version and build """ import argparse import json import os -from rosette.api import API, DocumentParameters +from rosette.api import API, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) - return api.info() + try: + return api.info() + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/language.py b/examples/language.py index 98f140e..e4fa87a 100644 --- a/examples/language.py +++ b/examples/language.py @@ -1,32 +1,38 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to determine the language of a piece of text. +Example code to call Analytics API to determine the language of a piece of text. """ import argparse import json import os -from rosette.api import API, DocumentParameters +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) language_data = "Por favor Señorita, says the man." params = DocumentParameters() - params["content"] = language_data - return api.language(params) + api.set_custom_headers("X-RosetteAPI-App", "python-app") + try: + return api.language(params) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/language_multilingual.py b/examples/language_multilingual.py new file mode 100644 index 0000000..36bd8e6 --- /dev/null +++ b/examples/language_multilingual.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Analytics API to determine the language of a piece of text. +""" + +import argparse +import json +import os + +from rosette.api import API, DocumentParameters, RosetteException + + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + # Create an API instance + api = API(user_key=key, service_url=alt_url) + + language_multilingual_data = "On Thursday, as protesters gathered in Washington D.C., the United States Federal Communications Commission under Chairman Ajit Pai voted 3-2 to overturn a 2015 decision, commonly called Net Neutrality, that forbade Internet service providers (ISPs) such as Verizon, Comcast, and AT&T from blocking individual websites or charging websites or customers more for faster load times. Quatre femmes ont été nommées au Conseil de rédaction de la loi du Qatar. Jeudi, le décret royal du Qatar a annoncé que 28 nouveaux membres ont été nommés pour le Conseil de la Choura du pays. ذكرت مصادر أمنية يونانية، أن 9 موقوفين من منظمة \"د هـ ك ب ج\" الذين كانت قد أوقفتهم الشرطة اليونانية في وقت سابق كانوا يخططون لاغتيال الرئيس التركي رجب طيب أردوغان." + params = DocumentParameters() + params["content"] = language_multilingual_data + api.set_custom_headers("X-RosetteAPI-App", "python-app") + api.set_option('multilingual', True) + + try: + return api.language(params) + except RosetteException as exception: + print(exception) + + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/morphology_complete.py b/examples/morphology_complete.py index 6e3dc40..10b1004 100644 --- a/examples/morphology_complete.py +++ b/examples/morphology_complete.py @@ -1,31 +1,44 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get the complete morphological analysis of a piece of text. +Example code to call Analytics API to get the complete morphological analysis of a piece of text. """ import argparse import json import os -from rosette.api import API, DocumentParameters +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) + + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#morphological-analysis-introduction + + # api.set_option('modelType','perceptron') #Valid for Chinese and Japanese only - morphology_complete_data = "The quick brown fox jumped over the lazy dog. Yes he did." + morphology_complete_data = "The quick brown fox jumped over the lazy dog. 👍🏾 Yes he did. B)" params = DocumentParameters() params["content"] = morphology_complete_data - return api.morphology(params) + try: + return api.morphology(params) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/morphology_compound-components.py b/examples/morphology_compound-components.py index 596f39d..5bacddb 100644 --- a/examples/morphology_compound-components.py +++ b/examples/morphology_compound-components.py @@ -1,31 +1,44 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get de-compounded words from a piece of text. +Example code to call Analytics API to get de-compounded words from a piece of text. """ import argparse import json import os -from rosette.api import API, DocumentParameters, MorphologyOutput +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) + + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#morphological-analysis-introduction + + # api.set_option('modelType','perceptron') #Valid for Chinese and Japanese only morphology_compound_components_data = "Rechtsschutzversicherungsgesellschaften" params = DocumentParameters() params["content"] = morphology_compound_components_data - return api.morphology(params, MorphologyOutput.COMPOUND_COMPONENTS) + try: + return api.morphology(params, api.morphology_output['COMPOUND_COMPONENTS']) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/morphology_han-readings.py b/examples/morphology_han-readings.py index 535b314..f5c12f6 100644 --- a/examples/morphology_han-readings.py +++ b/examples/morphology_han-readings.py @@ -1,31 +1,43 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get Chinese readings of words in a piece of text. +Example code to call Analytics API to get Chinese readings of words in a piece of text. """ import argparse import json import os -from rosette.api import API, DocumentParameters, MorphologyOutput +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#morphological-analysis-introduction + + # api.set_option('modelType','perceptron') #Valid for Chinese and Japanese only morphology_han_readings_data = "北京大学生物系主任办公室内部会议" params = DocumentParameters() params["content"] = morphology_han_readings_data - return api.morphology(params, MorphologyOutput.HAN_READINGS) + try: + return api.morphology(params, api.morphology_output['HAN_READINGS']) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/morphology_lemmas.py b/examples/morphology_lemmas.py index 9617712..dc7bb8d 100644 --- a/examples/morphology_lemmas.py +++ b/examples/morphology_lemmas.py @@ -1,31 +1,44 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get lemmas for words in a piece of text. +Example code to call Analytics API to get lemmas for words in a piece of text. """ import argparse import json import os -from rosette.api import API, DocumentParameters, MorphologyOutput +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) + + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#morphological-analysis-introduction + + # api.set_option('modelType','perceptron') #Valid for Chinese and Japanese only morphology_lemmas_data = "The fact is that the geese just went back to get a rest and I'm not banking on their return soon" params = DocumentParameters() params["content"] = morphology_lemmas_data - return api.morphology(params, MorphologyOutput.LEMMAS) + try: + return api.morphology(params, api.morphology_output['LEMMAS']) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/morphology_parts-of-speech.py b/examples/morphology_parts-of-speech.py index 69dbcdb..f020ca2 100644 --- a/examples/morphology_parts-of-speech.py +++ b/examples/morphology_parts-of-speech.py @@ -1,31 +1,44 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get part-of-speech tags for words in a piece of text. +Example code to call Analytics API to get part-of-speech tags for words in a piece of text. """ import argparse import json import os -from rosette.api import API, DocumentParameters, MorphologyOutput +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) + + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#morphological-analysis-introduction + + # api.set_option('modelType','perceptron') # Valid for Chinese and Japanese only morphology_parts_of_speech_data = "The fact is that the geese just went back to get a rest and I'm not banking on their return soon" params = DocumentParameters() params["content"] = morphology_parts_of_speech_data - return api.morphology(params, MorphologyOutput.PARTS_OF_SPEECH) + try: + return api.morphology(params, api.morphology_output['PARTS_OF_SPEECH']) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/name_deduplication.py b/examples/name_deduplication.py new file mode 100644 index 0000000..7c69e20 --- /dev/null +++ b/examples/name_deduplication.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Analytics API to deduplicate a list of names. +""" + +import argparse +import json +import os + +from rosette.api import API, NameDeduplicationParameters, RosetteException + + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + # Create an API instance + api = API(user_key=key, service_url=alt_url) + + name_dedupe_data = "Alice Terry,Alice Thierry,Betty Grable,Betty Gable,Norma Shearer,Norm Shearer,Brigitte Helm,Bridget Helem,Judy Holliday,Julie Halliday" + threshold = 0.75 + params = NameDeduplicationParameters() + params["names"] = name_dedupe_data.split(',') + params["threshold"] = threshold + try: + return api.name_deduplication(params) + except RosetteException as exception: + print(exception) + + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/name_similarity.py b/examples/name_similarity.py index b4ed053..b8a51ec 100644 --- a/examples/name_similarity.py +++ b/examples/name_similarity.py @@ -1,33 +1,41 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get match score (similarity) of two names. +Example code to call Analytics API to get match score (similarity) of two names. """ import argparse import json import os -from rosette.api import API, NameSimilarityParameters +from rosette.api import API, NameSimilarityParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) matched_name_data1 = "Michael Jackson" matched_name_data2 = "迈克尔·杰克逊" params = NameSimilarityParameters() params["name1"] = {"text": matched_name_data1, "language": "eng", "entityType": "PERSON"} params["name2"] = {"text": matched_name_data2, "entityType": "PERSON"} - return api.name_similarity(params) + #params["parameters"] = {"conflictScore": "0.9", "deletionScore": "0.2"} + + try: + return api.name_similarity(params) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/name_translation.py b/examples/name_translation.py index c6704e4..455fc50 100644 --- a/examples/name_translation.py +++ b/examples/name_translation.py @@ -1,19 +1,19 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to translate a name from one language to another. +Example code to call Analytics API to translate a name from one language to another. """ import argparse import json import os -from rosette.api import API, NameTranslationParameters +from rosette.api import API, NameTranslationParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) translated_name_data = "معمر محمد أبو منيار القذاف" params = NameTranslationParameters() @@ -21,14 +21,20 @@ def run(key, altUrl='https://api.rosette.com/rest/v1/'): params["entityType"] = "PERSON" params["targetLanguage"] = "eng" params["targetScript"] = "Latn" - return api.name_translation(params) + try: + return api.name_translation(params) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/ping.py b/examples/ping.py index 98506f3..f908367 100644 --- a/examples/ping.py +++ b/examples/ping.py @@ -1,28 +1,34 @@ # -*- coding: utf-8 -*- - """ -Example code to send Rosette API a ping to check its reachability. +Example code to send Analytics API a ping to check its reachability. """ import argparse import json import os -from rosette.api import API +from rosette.api import API, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) - return api.ping() + try: + return api.ping() + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/record_similarity.py b/examples/record_similarity.py new file mode 100644 index 0000000..a2c1fe2 --- /dev/null +++ b/examples/record_similarity.py @@ -0,0 +1,131 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Analytics API to get similarity score between a list of records +""" + +import argparse +import json +import os + +from rosette.api import API, RecordSimilarityParameters, RosetteException + + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + # Create an API instance + api = API(user_key=key, service_url=alt_url) + + fields = { + "primaryName": { + "type": "rni_name", + "weight": 0.5 + }, + "dob": { + "type": "rni_date", + "weight": 0.2 + }, + "addr": { + "type": "rni_address", + "weight": 0.5 + }, + "dob2": { + "type": "rni_date", + "weight": 0.1 + }, + "jobTitle": { + "type": "rni_string", + "weight": 0.2 + }, + "age": { + "type": "rni_number", + "weight": 0.4 + }, + "isRetired": { + "type": "rni_boolean", + "weight": 0.05 + } + } + properties = { + "threshold": 0.7, + "includeExplainInfo": True + } + records = { + "left": [ + { + "primaryName": { + "text": "Ethan R", + "entityType": "PERSON", + "language": "eng", + "languageOfOrigin": "eng", + "script": "Latn" + }, + "dob": "1993-04-16", + "addr": "123 Roadlane Ave", + "dob2": { + "date": "04161993", + "format": "MMddyyyy" + }, + "jobTitle": "software engineer" + }, + { + "dob": { + "date": "1993-04-16" + }, + "primaryName": { + "text": "Evan R" + }, + "age": 47, + "isRetired": False + } + ], + "right": [ + { + "dob": { + "date": "1993-04-16" + }, + "primaryName": { + "text": "Seth R", + "language": "eng" + }, + "jobTitle": "manager", + "isRetired": True + }, + { + "primaryName": "Ivan R", + "dob": { + "date": "1993-04-16" + }, + "addr": { + "houseNumber": "123", + "road": "Roadlane Ave" + }, + "dob2": { + "date": "1993/04/16" + }, + "age": 72, + "isRetired": True + } + ] + } + params = RecordSimilarityParameters() + params["fields"] = fields + params["properties"] = properties + params["records"] = records + + try: + return api.record_similarity(params) + except RosetteException as exception: + print(exception) + + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/relationships.py b/examples/relationships.py index de366a8..490a527 100644 --- a/examples/relationships.py +++ b/examples/relationships.py @@ -1,31 +1,36 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get entities's relationships from a piece of text. +Example code to call Analytics API to get entities's relationships from a piece of text. """ import argparse import json import os -from rosette.api import API, RelationshipsParameters +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) - relationships_text_data = "The Ghostbusters movie was filmed in Boston." - params = RelationshipsParameters() + api = API(user_key=key, service_url=alt_url) + relationships_text_data = "FLIR Systems is headquartered in Oregon and produces thermal imaging, night vision, and infrared cameras and sensor systems. According to the SEC’s order instituting a settled administrative proceeding, FLIR entered into a multi-million dollar contract to provide thermal binoculars to the Saudi government in November 2008. Timms and Ramahi were the primary sales employees responsible for the contract, and also were involved in negotiations to sell FLIR’s security cameras to the same government officials. At the time, Timms was the head of FLIR’s Middle East office in Dubai." + params = DocumentParameters() params["content"] = relationships_text_data - params["options"] = {"accuracyMode": "PRECISION"} - return api.relationships(params) + try: + return api.relationships(params) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/run_all.sh b/examples/run_all.sh index 3f18bd7..ab797c9 100644 --- a/examples/run_all.sh +++ b/examples/run_all.sh @@ -1,5 +1,15 @@ #!/bin/bash + +if [ $# -eq 0 ]; then + echo "Usage: $0 API_KEY [ALT_URL]" 1>&2 + exit 1 +fi + for f in *.py do + if [ -n "$2" ]; then + python $f --key $1 --url $2 + else python $f --key $1 + fi done diff --git a/examples/semantic_vectors.py b/examples/semantic_vectors.py new file mode 100644 index 0000000..ef99e5b --- /dev/null +++ b/examples/semantic_vectors.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Analytics API to get text vectors from a piece of text. +""" + +import argparse +import json +import os + +from rosette.api import API, DocumentParameters, RosetteException + + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + # Create an API instance + api = API(user_key=key, service_url=alt_url) + + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#semantic-vectors + + # api.set_option('perToken', 'true') + + semantic_vectors_data = "Cambridge, Massachusetts" + params = DocumentParameters() + params["content"] = semantic_vectors_data + try: + return api.semantic_vectors(params) + except RosetteException as exception: + print(exception) + + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/sentences.py b/examples/sentences.py index b1b682d..f0c3e12 100644 --- a/examples/sentences.py +++ b/examples/sentences.py @@ -1,32 +1,38 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get sentences in a piece of text. +Example code to call Analytics API to get sentences in a piece of text. """ import argparse import json import os -from rosette.api import API, DocumentParameters +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) - sentences_data = "This land is your land. This land is my land\nFrom California to the New York island;\nFrom the red wood forest to the Gulf Stream waters\n\nThis land was made for you and Me.\n\nAs I was walking that ribbon of highway,\nI saw above me that endless skyway:\nI saw below me that golden valley:\nThis land was made for you and me." + sentences_data = "This land is your land. This land is my land, from California to the New York island; from the red wood forest to the Gulf Stream waters. This land was made for you and Me. As I was walking that ribbon of highway, I saw above me that endless skyway: I saw below me that golden valley: This land was made for you and me." params = DocumentParameters() params["content"] = sentences_data - return api.sentences(params) + try: + return api.sentences(params) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/sentiment.py b/examples/sentiment.py index c43074c..1a292d8 100644 --- a/examples/sentiment.py +++ b/examples/sentiment.py @@ -1,7 +1,7 @@ +#!/usr/bin/env python # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get the sentiment of a local file. +Example code to call Analytics API to get the sentiment of a local file. """ import argparse @@ -9,38 +9,51 @@ import os import tempfile -from rosette.api import API, DocumentParameters +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create default file to read from - f = tempfile.NamedTemporaryFile(suffix=".html") + temp_file = tempfile.NamedTemporaryFile(suffix=".html") sentiment_file_data = "New Ghostbusters Film

Original Ghostbuster Dan Aykroyd, who also co-wrote the 1984 Ghostbusters film, couldn’t be more pleased with the new all-female Ghostbusters cast, telling The Hollywood Reporter, “The Aykroyd family is delighted by this inheritance of the Ghostbusters torch by these most magnificent women in comedy.”

" message = sentiment_file_data - f.write(message) - f.seek(0) + temp_file.write(message if isinstance(message, bytes) else message.encode()) + temp_file.seek(0) # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#sentiment-analysis + + # api.set_option('modelType','dnn') #Valid for English only params = DocumentParameters() params["language"] = "eng" # Use an HTML file to load data instead of a string - params.load_document_file(f.name) - result = api.sentiment(params) - - # Clean up the file - f.close() + params.load_document_file(temp_file.name) + try: + result = api.sentiment(params) + except RosetteException as exception: + print(exception) + finally: + # Clean up the file + temp_file.close() return result -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/similar_terms.py b/examples/similar_terms.py new file mode 100644 index 0000000..753e397 --- /dev/null +++ b/examples/similar_terms.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Analytics API to get similar terms for an input. +""" + +import argparse +import json +import os + +from rosette.api import API, DocumentParameters, RosetteException + + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + # Create an API instance + api = API(user_key=key, service_url=alt_url) + + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#similar-terms + + api.set_option("resultLanguages", ['spa', 'deu', 'jpn']) + + similar_terms_data = "spy" + params = DocumentParameters() + params["content"] = similar_terms_data + try: + return api.similar_terms(params) + except RosetteException as exception: + print(exception) + + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/syntax_dependencies.py b/examples/syntax_dependencies.py new file mode 100644 index 0000000..e5e99a0 --- /dev/null +++ b/examples/syntax_dependencies.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Analytics API to get the syntactic dependencies of a document (at a given URL). +""" + +import argparse +import json +import os + +from rosette.api import API, DocumentParameters, RosetteException + + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + syntax_dependencies_data = "Yoshinori Ohsumi, a Japanese cell biologist, was awarded the Nobel Prize in Physiology or Medicine on Monday." + params = DocumentParameters() + params["content"] = syntax_dependencies_data + # Create an API instance + api = API(user_key=key, service_url=alt_url) + try: + return api.syntax_dependencies(params) + except RosetteException as exception: + print(exception) + + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/tokens.py b/examples/tokens.py index 128a23c..e98601c 100644 --- a/examples/tokens.py +++ b/examples/tokens.py @@ -1,31 +1,44 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get the tokens (words) in a piece of text. +Example code to call Analytics API to get the tokens (words) in a piece of text. """ import argparse import json import os -from rosette.api import API, DocumentParameters +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) + + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#tokenization + + # api.set_option('modelType','perceptron') #Valid for Chinese and Japanese only tokens_data = "北京大学生物系主任办公室内部会议" params = DocumentParameters() params["content"] = tokens_data - return api.tokens(params) + try: + return api.tokens(params) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/topics.py b/examples/topics.py new file mode 100644 index 0000000..e33a745 --- /dev/null +++ b/examples/topics.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Analytics API to get the topics (key phrases and concepts) in a piece of text. +""" + +import argparse +import json +import os + +from rosette.api import API, DocumentParameters, RosetteException + + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + # Create an API instance + api = API(user_key=key, service_url=alt_url) + + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#topic-extraction + + # api.set_option('keyphraseSalienceThreshold','.5') + # api.set_option('conceptSalienceThreshold','.1') + + topics_data = "Lily Collins is in talks to join Nicholas Hoult in Chernin Entertainment and Fox Searchlight's J.R.R. Tolkien biopic Tolkien. Anthony Boyle, known for playing Scorpius Malfoy in the British play Harry Potter and the Cursed Child, also has signed on for the film centered on the famed author. In Tolkien, Hoult will play the author of the Hobbit and Lord of the Rings book series that were later adapted into two Hollywood trilogies from Peter Jackson. Dome Karukoski is directing the project." + params = DocumentParameters() + params["content"] = topics_data + try: + return api.topics(params) + except RosetteException as exception: + print(exception) + + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/transliteration.py b/examples/transliteration.py new file mode 100644 index 0000000..bc7c5da --- /dev/null +++ b/examples/transliteration.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Analytics API to transliterate a piece of text. +""" + +import argparse +import json +import os + +from rosette.api import API, DocumentParameters, RosetteException + + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + # Create an API instance + api = API(user_key=key, service_url=alt_url) + + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#transliteration + + # To transliterate from native Arabic script to Arabizi add: + # api.set_option('reversed','True') + + transliteration_data = "ana r2ye7 el gam3a el sa3a 3 el 3asr" + params = DocumentParameters() + params["content"] = transliteration_data + + try: + return api.transliteration(params) + except RosetteException as exception: + print(exception) + + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/pytest.ini b/pytest.ini index fc6bcc6..b37e476 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,4 @@ [pytest] -pep8ignore = E501 norecursedirs = .tox - target \ No newline at end of file + target diff --git a/rosette/__init__.py b/rosette/__init__.py index 98b864e..5c89253 100644 --- a/rosette/__init__.py +++ b/rosette/__init__.py @@ -1,14 +1,10 @@ """ -Python client for the Rosette API. - -Copyright (c) 2014-2015 Basis Technology Corporation. - +Python client for the Babel Street Analytics API. +Copyright (c) 2014-2024 Basis Technology Corporation. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,4 +12,4 @@ limitations under the License. """ -__version__ = '1.1.0' +__version__ = '1.31.0' diff --git a/rosette/api.py b/rosette/api.py index 318affa..9dadf15 100644 --- a/rosette/api.py +++ b/rosette/api.py @@ -1,9 +1,9 @@ #!/usr/bin/env python """ -Python client for the Rosette API. +Python client for the Babel Street Analytics API. -Copyright (c) 2014-2015 Basis Technology Corporation. +Copyright (c) 2014-2024 Basis Technology Corporation. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -22,33 +22,31 @@ import json import logging import sys -import time import os -from socket import gaierror +import re import requests - -_BINDING_VERSION = "1.1" +import platform + +_APPLICATION_JSON = 'application/json' +_BINDING_LANGUAGE = 'python' +_BINDING_VERSION = '1.31.0' +# TODO Remove legacies in future release +_LEGACY_CONCURRENCY_HEADER = 'x-rosetteapi-concurrency' +_CONCURRENCY_HEADER = 'x-babelstreetapi-concurrency' +_LEGACY_CUSTOM_HEADER_PREFIX = 'X-RosetteAPI-' +_CUSTOM_HEADER_PREFIX = "X-BabelStreetAPI-" +_CUSTOM_HEADER_PATTERN = re.compile('^(:?' + _CUSTOM_HEADER_PREFIX + '|' + _LEGACY_CUSTOM_HEADER_PREFIX + ')') _GZIP_BYTEARRAY = bytearray([0x1F, 0x8b, 0x08]) -_IsPy3 = sys.version_info[0] == 3 - - -try: - import urlparse -except ImportError: - import urllib.parse as urlparse -try: - import httplib -except ImportError: - import http.client as httplib +_ISPY3 = sys.version_info[0] == 3 -if _IsPy3: +if _ISPY3: _GZIP_SIGNATURE = _GZIP_BYTEARRAY else: _GZIP_SIGNATURE = str(_GZIP_BYTEARRAY) -class _ReturnObject: +class _ReturnObject(object): def __init__(self, js, code): self._json = js @@ -59,69 +57,36 @@ def json(self): def _my_loads(obj, response_headers): - if _IsPy3: - d1 = json.loads(obj.decode("utf-8")).copy() - d1.update(response_headers) - return d1 # if py3, need chars. + if _ISPY3: + temp = json.loads(obj.decode("utf-8")).copy() + temp.update(response_headers) + return temp # if py3, need chars. else: - d2 = json.loads(obj).copy() - d2.update(response_headers) - return d2 + temp = json.loads(obj).copy() + temp.update(response_headers) + return temp class RosetteException(Exception): - """Exception thrown by all Rosette API operations for errors local and remote. + """Exception thrown by all Analytics API operations for errors local and remote. TBD. Right now, the only valid operation is conversion to __str__. """ def __init__(self, status, message, response_message): + super(RosetteException, self).__init__(message) self.status = status self.message = message self.response_message = response_message def __str__(self): sst = self.status - if not (isinstance(sst, str)): + if not isinstance(sst, str): sst = repr(sst) return sst + ": " + self.message + ":\n " + self.response_message -class _PseudoEnum: - - def __init__(self): - pass - - @classmethod - def validate(cls, value, name): - values = [] - for (k, v) in vars(cls).items(): - if not k.startswith("__"): - values += [v] - - # this is still needed to make sure that the parameter NAMES are known. - # If python didn't allow setting unknown values, this would be a - # language error. - if value not in values: - raise RosetteException( - "unknownVariable", - "The value supplied for " + - name + - " is not one of " + - ", ".join(values) + - ".", - repr(value)) - - -class MorphologyOutput(_PseudoEnum): - LEMMAS = "lemmas" - PARTS_OF_SPEECH = "parts-of-speech" - COMPOUND_COMPONENTS = "compound-components" - HAN_READINGS = "han-readings" - COMPLETE = "complete" - - -class _DocumentParamSetBase(object): +class _RequestParametersBase(object): def __init__(self, repertoire): self.__params = {} @@ -131,40 +96,46 @@ def __init__(self, repertoire): def __setitem__(self, key, val): if key not in self.__params: raise RosetteException( - "badKey", "Unknown Rosette parameter key", repr(key)) + "badKey", "Unknown Analytics parameter key", repr(key)) self.__params[key] = val def __getitem__(self, key): if key not in self.__params: raise RosetteException( - "badKey", "Unknown Rosette parameter key", repr(key)) + "badKey", "Unknown Analytics parameter key", repr(key)) return self.__params[key] def validate(self): + """validation""" pass - def serialize(self): + def serialize(self, options): + """serialize keys with values""" self.validate() - v = {} - for (key, val) in self.__params.items(): + values = {} + for key, val in self.__params.items(): if val is None: - pass + continue else: - v[key] = val - return v + values[key] = val + + if options is not None and len(options) > 0: + values['options'] = options + return values -def _byteify(s): # py 3 only - l = len(s) - b = bytearray(l) - for ix in range(l): - oc = ord(s[ix]) - assert (oc < 256) - b[ix] = oc - return b +def _byteify(value): # py 3 only + length = len(value) + byte_array = bytearray(length) + for index in range(length): + ordinal = ord(value[index]) + assert ordinal < 256 + byte_array[index] = ordinal + return byte_array -class DocumentParameters(_DocumentParamSetBase): + +class DocumentParameters(_RequestParametersBase): """Parameter object for all operations requiring input other than translated_name. Two fields, C{content} and C{inputUri}, are set via @@ -181,10 +152,10 @@ class DocumentParameters(_DocumentParamSetBase): def __init__(self): """Create a L{DocumentParameters} object.""" - _DocumentParamSetBase.__init__( - self, ("content", "contentUri", "language", "genre")) + _RequestParametersBase.__init__( + self, ("content", "contentUri", "language", "profileId")) self.file_name = "" - self.useMultipart = False + self.use_multipart = False def validate(self): """Internal. Do not use.""" @@ -201,10 +172,10 @@ def validate(self): "Cannot supply both Content and ContentUri", "bad arguments") - def serialize(self): + def serialize(self, options): """Internal. Do not use.""" self.validate() - slz = super(DocumentParameters, self).serialize() + slz = super(DocumentParameters, self).serialize(options) return slz def load_document_file(self, path): @@ -213,45 +184,35 @@ def load_document_file(self, path): be determined by the server. @parameter path: Pathname of a file acceptable to the C{open} function. """ - self.useMultipart = True + self.use_multipart = True self.file_name = path - self.load_document_string(open(path, "rb").read()) + with open(path, "rb") as f: + self.load_document_string(f.read()) - def load_document_string(self, s): + def load_document_string(self, content_as_string): """Loads a string into the object. The string will be taken as bytes or as Unicode dependent upon its native python type. @parameter s: A string, possibly a unicode-string, to be loaded for subsequent analysis. """ - self["content"] = s - + self["content"] = content_as_string -class RelationshipsParameters(DocumentParameters): - """Parameter object for relationships endpoint. Inherits from L(DocumentParameters), but allows the user - to specify the relationships-unique options parameter.""" - - def __init__(self): - """Create a L{RelationshipsParameters} object.""" - self.useMultipart = False - _DocumentParamSetBase.__init__( - self, ("content", "contentUri", "language", "options", "genre")) - - -class NameTranslationParameters(_DocumentParamSetBase): +class NameTranslationParameters(_RequestParametersBase): """Parameter object for C{name-translation} endpoint. - The following values may be set by the indexing (i.e.,C{ parms["name"]}) operator. The values are all - strings (when not C{None}). + The following values may be set by the indexing (i.e.,C{ parms["name"]}) operator. + The values are all strings (when not C{None}). All are optional except C{name} and C{targetLanguage}. Scripts are in - ISO15924 codes, and languages in ISO639 (two- or three-letter) codes. See the Name Translation documentation for - more description of these terms, as well as the content of the return result. + ISO15924 codes, and languages in ISO639 (two- or three-letter) codes. See the Name + Translation documentation for more description of these terms, as well as the + content of the return result. C{name} The name to be translated. C{targetLangauge} The language into which the name is to be translated. - C{entityType} The entity type (TBD) of the name. + C{entityType} The entity type of the name. PERSON (default), LOCATION, or ORGANIZATION C{sourceLanguageOfOrigin} The language of origin of the name. @@ -265,8 +226,8 @@ class NameTranslationParameters(_DocumentParamSetBase): """ def __init__(self): - self.useMultipart = False - _DocumentParamSetBase.__init__( + self.use_multipart = False + _RequestParametersBase.__init__( self, ("name", "targetLanguage", @@ -275,22 +236,59 @@ def __init__(self): "sourceLanguageOfUse", "sourceScript", "targetScript", - "targetScheme", - "genre")) + "targetScheme")) + + def validate(self): + """Internal. Do not use.""" + for option in "name", "targetLanguage": # required + if self[option] is None: + raise RosetteException( + "missingParameter", + "Required Name Translation parameter is missing: " + option, + repr(option)) + + +class AddressSimilarityParameters(_RequestParametersBase): + """Parameter object for C{address-similarity} endpoint. + + C{address1} and C{address2} are required. + + `parameters` is optional. + + C{address1} The address to be matched, a C{address} object or address string. + + C{address2} The address to be matched, a C{address} object or address string. + + The C{address} object contains these optional fields: + city, island, district, stateDistrict, state, countryRegion, country, worldRegion, postCode, poBox + + `parameters` is a dictionary listing any parameter overrides to include. For example, `postCodeAddressFieldWeight`. + Setting `parameters` is not cumulative. Define all overrides at once. If defined multiple times, only the + final declaration is used. + + See `examples/address_similarity.py` + """ + + def __init__(self): + self.use_multipart = False + _RequestParametersBase.__init__(self, ("address1", "address2", "parameters")) def validate(self): """Internal. Do not use.""" - for n in ("name", "targetLanguage"): # required - if self[n] is None: + for option in "address1", "address2": # required + if self[option] is None: raise RosetteException( "missingParameter", - "Required Name Translation parameter not supplied", - repr(n)) + "Required Address Similarity parameter is missing: " + option, + repr(option)) -class NameSimilarityParameters(_DocumentParamSetBase): +class NameSimilarityParameters(_RequestParametersBase): """Parameter object for C{name-similarity} endpoint. - All are required. + + C{name1} and C{name2} are required. + + `parameters` is optional. C{name1} The name to be matched, a C{name} object. @@ -298,34 +296,83 @@ class NameSimilarityParameters(_DocumentParamSetBase): The C{name} object contains these fields: - C{text} Text of the name, required. + C{text} Text of the name, required. + + C{language} Language of the name in ISO639 three-letter code, optional. + + C{script} The ISO15924 code of the name, optional. - C{language} Language of the name in ISO639 three-letter code, optional. + C{entityType} The entity type, can be "PERSON", "LOCATION" or "ORGANIZATION", optional. - C{script} The ISO15924 code of the name, optional. + `parameters` is a dictionary listing any parameter overrides to include. For example, `deletionScore`. + Setting `parameters` is not cumulative. Define all overrides at once. If defined multiple times, only the + final declaration is used. - C{entityType} The entity type, can be "PERSON", "LOCATION" or "ORGANIZATION", optional. + See `examples/name_similarity.py` """ def __init__(self): - self.useMultipart = False - _DocumentParamSetBase.__init__(self, ("name1", "name2")) + self.use_multipart = False + _RequestParametersBase.__init__(self, ("name1", "name2", "parameters")) def validate(self): """Internal. Do not use.""" - for n in ("name1", "name2"): # required - if self[n] is None: + for option in "name1", "name2": # required + if self[option] is None: raise RosetteException( "missingParameter", - "Required Name Similarity parameter not supplied", - repr(n)) + "Required Name Similarity parameter is missing: " + option, + repr(option)) -class EndpointCaller: +class NameDeduplicationParameters(_RequestParametersBase): + """Parameter object for C{name-deduplication} endpoint. + Required: + C{names} A list of C{name} objects + C{threshold} Threshold to use to restrict cluster size. Can be null to use default value. + """ + + def __init__(self): + self.use_multipart = False + _RequestParametersBase.__init__(self, ("names", "threshold")) + + def validate(self): + """Internal. Do not use.""" + if self["names"] is None: # required + raise RosetteException( + "missingParameter", + "Required Name De-Duplication parameter is missing: names", + repr("names")) + + +class RecordSimilarityParameters(_RequestParametersBase): + """Parameter object for C{record-similarity} endpoint. + Required: + C{records} The records to be compared; where each left record is compared to the associated right record. + C{properties} Parameters used in the call + C{fields} The definition of the fields used in the comparison. There must be a minimum of 1 field and + can have a maximum of 5 fields. + """ + + def __init__(self): + self.use_multipart = False + _RequestParametersBase.__init__(self, ("fields", "properties", "records")) + + def validate(self): + """Internal. Do not use.""" + for option in ["records","fields"]: # required + if self[option] is None: + raise RosetteException( + "missingParameter", + "Required Record Similarity parameter is missing: " + option, + repr(option)) + + +class EndpointCaller(object): """L{EndpointCaller} objects are invoked via their instance methods to obtain results - from the Rosette server described by the L{API} object from which they + from the Analytics server described by the L{API} object from which they are created. Each L{EndpointCaller} object communicates with a specific endpoint - of the Rosette server, specified at its creation. Use the specific + of the Analytics server, specified at its creation. Use the specific instance methods of the L{API} object to create L{EndpointCaller} objects bound to corresponding endpoints. @@ -335,7 +382,7 @@ class EndpointCaller: The results of all operations are returned as python dictionaries, whose keys and values correspond exactly to those of the corresponding - JSON return value described in the Rosette web service documentation. + JSON return value described in the Analytics web service documentation. """ def __init__(self, api, suburl): @@ -345,14 +392,14 @@ def __init__(self, api, suburl): self.service_url = api.service_url self.user_key = api.user_key self.logger = api.logger - self.useMultipart = False + self.use_multipart = False self.suburl = suburl self.debug = api.debug self.api = api - def __finish_result(self, r, ename): - code = r.status_code - the_json = r.json() + def __finish_result(self, response, ename): + code = response.status_code + the_json = response.json() if code == 200: return the_json else: @@ -366,21 +413,45 @@ def __finish_result(self, r, ename): complaint_url = ename + " " + self.suburl raise RosetteException(code, complaint_url + - " : failed to communicate with Rosette", msg) + " : failed to communicate with Babel Street Analytics API", msg) + + def __set_headers(self): + headers = {'Accept': _APPLICATION_JSON, + _CUSTOM_HEADER_PREFIX + 'Binding': _BINDING_LANGUAGE, + _CUSTOM_HEADER_PREFIX + 'Binding-Version': _BINDING_VERSION, + #TODO Remove in future release + _LEGACY_CUSTOM_HEADER_PREFIX + 'Binding': _BINDING_LANGUAGE, + _LEGACY_CUSTOM_HEADER_PREFIX + 'Binding-Version': _BINDING_VERSION} + + custom_headers = self.api.get_custom_headers() + if custom_headers is not None: + for key in custom_headers.keys(): + if _CUSTOM_HEADER_PATTERN.match(key) is not None: + headers[key] = custom_headers[key] + else: + raise RosetteException("badHeader", + "Custom header name must begin with \"" + _CUSTOM_HEADER_PREFIX + "\" or \"" + + _LEGACY_CUSTOM_HEADER_PREFIX + "\"", + key) + self.api.clear_custom_headers() + + if self.debug: + headers[_LEGACY_CUSTOM_HEADER_PREFIX + 'Devel'] = 'true' + + if self.user_key is not None: + headers["X-BabelStreetAPI-Key"] = self.user_key + + return headers def info(self): """Issues an "info" request to the L{EndpointCaller}'s specific endpoint. @return: A dictionary telling server version and other identifying data.""" - url = self.service_url + "info" - headers = {'Accept': 'application/json', 'X-RosetteAPI-Binding': 'python', 'X-RosetteAPI-Binding-Version': _BINDING_VERSION} - if self.debug: - headers['X-RosetteAPI-Devel'] = 'true' + url = self.service_url + self.api.endpoints["INFO"] + headers = self.__set_headers() self.logger.info('info: ' + url) - if self.user_key is not None: - headers["X-RosetteAPI-Key"] = self.user_key - r = self.api._get_http(url, headers=headers) - return self.__finish_result(r, "info") + response = self.api.get_http(url, headers=headers) + return self.__finish_result(response, "info") def ping(self): """Issues a "ping" request to the L{EndpointCaller}'s (server-wide) endpoint. @@ -388,60 +459,66 @@ def ping(self): or is not the right server or some other error occurs, it will be signalled.""" - url = self.service_url + 'ping' - headers = {'Accept': 'application/json', 'X-RosetteAPI-Binding': 'python', 'X-RosetteAPI-Binding-Version': _BINDING_VERSION} - if self.debug: - headers['X-RosetteAPI-Devel'] = 'true' + url = self.service_url + self.api.endpoints['PING'] + headers = self.__set_headers() self.logger.info('Ping: ' + url) - if self.user_key is not None: - headers["X-RosetteAPI-Key"] = self.user_key - r = self.api._get_http(url, headers=headers) - return self.__finish_result(r, "ping") + response = self.api.get_http(url, headers=headers) + return self.__finish_result(response, "ping") - def call(self, parameters): + def call(self, parameters, paramtype=None): """Invokes the endpoint to which this L{EndpointCaller} is bound. Passes data and metadata specified by C{parameters} to the server endpoint to which this L{EndpointCaller} object is bound. For all - endpoints except C{name-translation} and C{name-similarity}, it must be a L{DocumentParameters} - object or a string; for C{name-translation}, it must be an L{NameTranslationParameters} object; - for C{name-similarity}, it must be an L{NameSimilarityParameters} object. For relationships, - it may be an L(DocumentParameters) or an L(RelationshipsParameters). + endpoints except C{name-translation} and C{name-similarity}, it must be + a L{DocumentParameters} object or a string; for C{name-translation}, it + must be an L{NameTranslationParameters} object; for C{name-similarity}, + it must be an L{NameSimilarityParameters} object. For relationships, + it may be an L(DocumentParameters). In all cases, the result is returned as a python dictionary conforming to the JSON object described in the endpoint's entry - in the Rosette web service documentation. + in the Analytics web service documentation. @param parameters: An object specifying the data, and possible metadata, to be processed by the endpoint. See the details for those object types. - @type parameters: For C{name-translation}, L{NameTranslationParameters}, otherwise L{DocumentParameters} or L{str} + @type parameters: Parameters types or L{str} for document request. + @param paramtype: Required parameters type. @return: A python dictionary expressing the result of the invocation. """ + if paramtype and not isinstance(parameters, paramtype): + raise RosetteException( + "incompatible", + "The parameters must be " + str(paramtype), + self.suburl) - if not isinstance(parameters, _DocumentParamSetBase): - if self.suburl != "name-similarity" and self.suburl != "name-translation": - text = parameters - parameters = DocumentParameters() - parameters['content'] = text - else: - raise RosetteException( - "incompatible", - "Text-only input only works for DocumentParameter endpoints", - self.suburl) + if type(parameters) == str: + text = parameters + parameters = DocumentParameters() + parameters['content'] = text - self.useMultipart = parameters.useMultipart + if not paramtype and not isinstance(parameters, DocumentParameters): + raise RosetteException( + "incompatible", + "The parameters must be string or DocumentParameters", + self.suburl) + + self.use_multipart = parameters.use_multipart url = self.service_url + self.suburl - params_to_serialize = parameters.serialize() + params_to_serialize = parameters.serialize(self.api.options) headers = {} if self.user_key is not None: - headers["X-RosetteAPI-Key"] = self.user_key - headers["X-RosetteAPI-Binding"] = "python" - headers["X-RosetteAPI-Binding-Version"] = _BINDING_VERSION - if self.useMultipart: + headers = self.__set_headers() + + if self.use_multipart: + payload = None + if self.api.url_parameters: + payload = self.api.url_parameters + params = dict( (key, value) for key, - value in params_to_serialize.iteritems() if key == 'language') + value in params_to_serialize.items() if key == 'language') files = { 'content': ( os.path.basename( @@ -451,46 +528,46 @@ def call(self, parameters): 'request': ( 'request_options', json.dumps(params), - 'application/json')} + _APPLICATION_JSON)} request = requests.Request( - 'POST', url, files=files, headers=headers) - prepared_request = request.prepare() - session = requests.Session() - resp = session.send(prepared_request) - rdata = resp.content - response_headers = {"responseHeaders": dict(resp.headers)} - status = resp.status_code - r = _ReturnObject(_my_loads(rdata, response_headers), status) + 'POST', url, files=files, headers=headers, params=payload) + prepared_request = self.api.session.prepare_request(request) + settings = self.api.session.merge_environment_settings(prepared_request.url, {}, {}, None, None) + response = self.api.session.send(prepared_request, **settings) + rdata = response.content + response_headers = {"responseHeaders": dict(response.headers)} + status = response.status_code + response = _ReturnObject( + _my_loads(rdata, response_headers), status) else: if self.debug: - headers['X-RosetteAPI-Devel'] = True + headers[_LEGACY_CUSTOM_HEADER_PREFIX + 'Devel'] = 'true' self.logger.info('operate: ' + url) - headers['Accept'] = "application/json" + headers['Accept'] = _APPLICATION_JSON headers['Accept-Encoding'] = "gzip" - headers['Content-Type'] = "application/json" - r = self.api._post_http(url, params_to_serialize, headers) - return self.__finish_result(r, "operate") + headers['Content-Type'] = _APPLICATION_JSON + response = self.api.post_http(url, params_to_serialize, headers) + return self.__finish_result(response, "operate") -class API: +class API(object): """ - Rosette Python Client Binding API; representation of a Rosette server. + Analytics Python Client Binding API; representation of an Analytics server. Call instance methods upon this object to obtain L{EndpointCaller} objects - which can communicate with particular Rosette server endpoints. + which can communicate with particular Analytics server endpoints. """ def __init__( self, user_key=None, - service_url='https://api.rosette.com/rest/v1/', + service_url='https://analytics.babelstreet.com/rest/v1/', retries=5, - reuse_connection=True, refresh_duration=0.5, debug=False): """ Create an L{API} object. - @param user_key: (Optional; required for servers requiring authentication.) An authentication string to be sent - as user_key with all requests. The default Rosette server requires authentication. - to the server. + @param user_key: (Optional; required for servers requiring authentication.) + An authentication string to be sent as user_key with all requests. The + default Analytics server requires authentication to the server. """ # logging.basicConfig(filename="binding.log", filemode="w", level=logging.DEBUG) self.user_key = user_key @@ -500,88 +577,146 @@ def __init__( self.logger.info('Initialized on ' + self.service_url) self.debug = debug - if (retries < 1): + if retries < 1: retries = 1 - if (refresh_duration < 0): + if refresh_duration < 0: refresh_duration = 0 - self.num_retries = retries - self.reuse_connection = reuse_connection self.connection_refresh_duration = refresh_duration - self.http_connection = None - - def _connect(self, parsedUrl): - """ Simple connection method - @param parsedUrl: The URL on which to process + self.options = {} + self.custom_headers = {} + self.url_parameters = {} + self.max_pool_size = 1 + self.session = requests.Session() + self.user_agent_string = 'Babel-Street-Analytics-API-Python/' + _BINDING_VERSION + '/' + platform.python_version() + + self.morphology_output = { + 'LEMMAS': 'lemmas', + 'PARTS_OF_SPEECH': 'parts-of-speech', + 'COMPOUND_COMPONENTS': 'compound-components', + 'HAN_READINGS': 'han-readings', + 'COMPLETE': 'complete' + } + + self.endpoints = { + 'ADDRESS_SIMILARITY': 'address-similarity', + 'CATEGORIES': 'categories', + 'ENTITIES': 'entities', + 'INFO': 'info', + 'LANGUAGE': 'language', + 'MORPHOLOGY': 'morphology', + 'NAME_TRANSLATION': 'name-translation', + 'NAME_SIMILARITY': 'name-similarity', + 'NAME_DEDUPLICATION': 'name-deduplication', + 'PING': 'ping', + 'RELATIONSHIPS': 'relationships', + 'SEMANTIC_VECTORS': 'semantics/vector', + 'SENTENCES': 'sentences', + 'SENTIMENT': 'sentiment', + 'SIMILAR_TERMS': 'semantics/similar', + 'SYNTAX_DEPENDENCIES': 'syntax/dependencies', + 'TEXT_EMBEDDING': 'semantics/vector', + 'TOKENS': 'tokens', + 'TOPICS': 'topics', + 'TRANSLITERATION': 'transliteration', + 'EVENTS': 'events', + 'RECORD_SIMILARITY': 'record-similarity' + } + + def __del__(self): + try: + self.session.close() + except ReferenceError: + pass + + def get_binding_version(self): + """ Return the current binding version """ + return _BINDING_VERSION + + def get_user_agent_string(self): + """ Return the User-Agent string """ + return self.user_agent_string + + def set_pool_size(self, new_pool_size): + """Sets the connection pool size. + @parameter new_pool_size: pool size to set """ - if not self.reuse_connection or self.http_connection is None: - loc = parsedUrl.netloc - if parsedUrl.scheme == "https": - self.http_connection = httplib.HTTPSConnection(loc) - else: - self.http_connection = httplib.HTTPConnection(loc) + self.max_pool_size = new_pool_size + adapter = requests.adapters.HTTPAdapter( + pool_maxsize=new_pool_size) + if 'https:' in self.service_url: + self.session.mount('https://', adapter) + else: + self.session.mount('http://', adapter) # NOSONAR - def _make_request(self, op, url, data, headers): - """ - Handles the actual request, retrying if a 429 is encountered + def __adjust_concurrency(self, dict_headers): + if _CONCURRENCY_HEADER in dict_headers: + if dict_headers[_CONCURRENCY_HEADER] != self.max_pool_size: + self.set_pool_size(dict_headers[_CONCURRENCY_HEADER]) + elif _LEGACY_CONCURRENCY_HEADER in dict_headers: + if dict_headers[_LEGACY_CONCURRENCY_HEADER] != self.max_pool_size: + self.set_pool_size(dict_headers[_LEGACY_CONCURRENCY_HEADER]) - @param op: POST or GET + def _make_request(self, operation, url, data, headers): + """ + @param operation: POST or GET @param url: endpoing URL @param data: request data @param headers: request headers """ - headers['User-Agent'] = "RosetteAPIPython/" + _BINDING_VERSION - parsedUrl = urlparse.urlparse(url) - - self._connect(parsedUrl) + headers['User-Agent'] = self.get_user_agent_string() message = None code = "unknownError" rdata = None response_headers = {} - for i in range(self.num_retries + 1): - try: - self.http_connection.request(op, url, data, headers) - response = self.http_connection.getresponse() - status = response.status - rdata = response.read() - response_headers["responseHeaders"] = ( - dict(response.getheaders())) - if status == 200: - if not self.reuse_connection: - self.http_connection.close() - return rdata, status, response_headers - if status == 429: - code = status - message = "{0} ({1})".format(rdata, i) - time.sleep(self.connection_refresh_duration) - self.http_connection.close() - self._connect(parsedUrl) - continue - if rdata is not None: - try: - the_json = _my_loads(rdata, response_headers) - if 'message' in the_json: - message = the_json['message'] - if "code" in the_json: - code = the_json['code'] - else: - code = status - raise RosetteException(code, message, url) - except: - raise - except (httplib.BadStatusLine, gaierror): - raise RosetteException( - "ConnectionError", - "Unable to establish connection to the Rosette API server", - url) - if not self.reuse_connection: - self.http_connection.close() + payload = None + if self.url_parameters: + payload = self.url_parameters + + request = requests.Request( + operation, url, data=data, headers=headers, params=payload) + prepared_request = self.session.prepare_request(request) + # Take into account environment settings, e.g. HTTP_PROXY and HTTPS_PROXY + settings = self.session.merge_environment_settings(prepared_request.url, {}, {}, None, None) + + try: + response = self.session.send(prepared_request, **settings) + status = response.status_code + rdata = response.content + dict_headers = dict(response.headers) + self.__adjust_concurrency(dict_headers) + response_headers = {"responseHeaders": dict_headers} + + if status == 200: + return rdata, status, response_headers + if rdata is not None: + try: + the_json = _my_loads(rdata, response_headers) + if 'message' in the_json: + message = the_json['message'] + if "code" in the_json: + code = the_json['code'] + else: + code = status + if not message: + message = rdata + raise RosetteException(code, message, url) + except json.JSONDecodeError as exception: + raise RosetteException( + exception, + "Problem decoding JSON", + rdata) + except requests.exceptions.RequestException as exception: + raise RosetteException( + exception, + "Unable to establish connection to the Analytics API server", + url) raise RosetteException(code, message, url) - def _get_http(self, url, headers): + def get_http(self, url, headers): """ Simple wrapper for the GET request @@ -592,7 +727,7 @@ def _get_http(self, url, headers): "GET", url, None, headers) return _ReturnObject(_my_loads(rdata, response_headers), status) - def _post_http(self, url, data, headers): + def post_http(self, url, data, headers): """ Simple wrapper for the POST request @@ -614,6 +749,98 @@ def _post_http(self, url, data, headers): return _ReturnObject(_my_loads(rdata, response_headers), status) + def get_pool_size(self): + """ + Returns the maximum pool size, which is the returned x-rosetteapi-concurrency value + """ + return int(self.max_pool_size) + + def set_option(self, name, value): + """ + Sets an option + + @param name: name of option + @param value: value of option + """ + if value is None: + self.options.pop(name, None) + else: + self.options[name] = value + + def get_option(self, name): + """ + Gets an option + + @param name: name of option + + @return: value of option + """ + if name in self.options.keys(): + return self.options[name] + else: + return None + + def clear_options(self): + """ + Clears all options + """ + self.options.clear() + + def set_url_parameter(self, name, value): + """ + Sets a URL parameter + + @param name: name of parameter + @param value: value of parameter + """ + if value is None: + self.url_parameters.pop(name, None) + else: + self.url_parameters[name] = value + + def get_url_parameter(self, name): + """ + Gets a URL parameter + + @param name: name of parameter + + @return: value of parameter + """ + if name in self.url_parameters.keys(): + return self.url_parameters[name] + else: + return None + + def clear_url_parameters(self): + """ + Clears all options + """ + self.url_parameters.clear() + + def set_custom_headers(self, name, value): + """ + Sets custom headers + + @param headers: array of custom headers to be set + """ + if value is None: + self.custom_headers.pop(name, None) + else: + self.custom_headers[name] = value + + def get_custom_headers(self): + """ + Get custom headers + """ + return self.custom_headers + + def clear_custom_headers(self): + """ + Clears custom headers + """ + + self.custom_headers.clear() + def ping(self): """ Create a ping L{EndpointCaller} for the server and ping it. @@ -636,7 +863,7 @@ def language(self, parameters): @type parameters: L{DocumentParameters} or L{str} @return: A python dictionary containing the results of language identification.""" - return EndpointCaller(self, "language").call(parameters) + return EndpointCaller(self, self.endpoints['LANGUAGE']).call(parameters) def sentences(self, parameters): """ @@ -645,7 +872,7 @@ def sentences(self, parameters): and possible metadata, to be processed by the sentence identifier. @type parameters: L{DocumentParameters} or L{str} @return: A python dictionary containing the results of sentence identification.""" - return EndpointCaller(self, "sentences").call(parameters) + return EndpointCaller(self, self.endpoints['SENTENCES']).call(parameters) def tokens(self, parameters): """ @@ -654,9 +881,9 @@ def tokens(self, parameters): and possible metadata, to be processed by the tokens identifier. @type parameters: L{DocumentParameters} or L{str} @return: A python dictionary containing the results of tokenization.""" - return EndpointCaller(self, "tokens").call(parameters) + return EndpointCaller(self, self.endpoints['TOKENS']).call(parameters) - def morphology(self, parameters, facet=MorphologyOutput.COMPLETE): + def morphology(self, parameters, facet=""): """ Create an L{EndpointCaller} to returns a specific facet of the morphological analyses of texts to which it is applied and call it. @@ -666,24 +893,20 @@ def morphology(self, parameters, facet=MorphologyOutput.COMPLETE): @param facet: The facet desired, to be returned by the created L{EndpointCaller}. @type facet: An element of L{MorphologyOutput}. @return: A python dictionary containing the results of morphological analysis.""" - return EndpointCaller(self, "morphology/" + facet).call(parameters) + if facet == "": + facet = self.morphology_output['COMPLETE'] + return EndpointCaller(self, self.endpoints['MORPHOLOGY'] + "/" + facet).call(parameters) - def entities(self, parameters, resolve_entities=False): + def entities(self, parameters): """ Create an L{EndpointCaller} to identify named entities found in the texts - to which it is applied and call it. Linked entity information is optional, and - its need must be specified at the time the operator is created. + to which it is applied and call it. @param parameters: An object specifying the data, and possible metadata, to be processed by the entity identifier. @type parameters: L{DocumentParameters} or L{str} - @param resolve_entities: Specifies whether or not linked entity information will - be wanted. - @type resolve_entities: Boolean @return: A python dictionary containing the results of entity extraction.""" - if resolve_entities: - return EndpointCaller(self, "entities/linked").call(parameters) - else: - return EndpointCaller(self, "entities").call(parameters) + + return EndpointCaller(self, self.endpoints['ENTITIES']).call(parameters) def categories(self, parameters): """ @@ -693,7 +916,7 @@ def categories(self, parameters): and possible metadata, to be processed by the category identifier. @type parameters: L{DocumentParameters} or L{str} @return: A python dictionary containing the results of categorization.""" - return EndpointCaller(self, "categories").call(parameters) + return EndpointCaller(self, self.endpoints['CATEGORIES']).call(parameters) def sentiment(self, parameters): """ @@ -707,7 +930,7 @@ def sentiment(self, parameters): to which is applied. @return: An L{EndpointCaller} object which can return sentiments of texts to which it is applied.""" - return EndpointCaller(self, "sentiment").call(parameters) + return EndpointCaller(self, self.endpoints['SENTIMENT']).call(parameters) def relationships(self, parameters): """ @@ -715,9 +938,18 @@ def relationships(self, parameters): which it is applied and call it. @param parameters: An object specifying the data, and possible metadata, to be processed by the relationships identifier. - @type parameters: L{DocumentParameters}, L(RelationshipsParameters), or L{str} + @type parameters: L{DocumentParameters} or L{str} @return: A python dictionary containing the results of relationship extraction.""" - return EndpointCaller(self, "relationships").call(parameters) + return EndpointCaller(self, self.endpoints['RELATIONSHIPS']).call(parameters) + + def address_similarity(self, parameters): + """ + Create an L{EndpointCaller} to perform address similarity scoring and call it. + @param parameters: An object specifying the data, + and possible metadata, to be processed by the name matcher. + @type parameters: L{AddressSimilarityParameters} + @return: A python dictionary containing the results of name matching.""" + return EndpointCaller(self, self.endpoints['ADDRESS_SIMILARITY']).call(parameters, AddressSimilarityParameters) def name_translation(self, parameters): """ @@ -727,7 +959,7 @@ def name_translation(self, parameters): and possible metadata, to be processed by the name translator. @type parameters: L{NameTranslationParameters} @return: A python dictionary containing the results of name translation.""" - return EndpointCaller(self, "name-translation").call(parameters) + return EndpointCaller(self, self.endpoints['NAME_TRANSLATION']).call(parameters, NameTranslationParameters) def translated_name(self, parameters): """ deprecated @@ -746,7 +978,7 @@ def name_similarity(self, parameters): and possible metadata, to be processed by the name matcher. @type parameters: L{NameSimilarityParameters} @return: A python dictionary containing the results of name matching.""" - return EndpointCaller(self, "name-similarity").call(parameters) + return EndpointCaller(self, self.endpoints['NAME_SIMILARITY']).call(parameters, NameSimilarityParameters) def matched_name(self, parameters): """ deprecated @@ -756,3 +988,79 @@ def matched_name(self, parameters): @type parameters: L{NameSimilarityParameters} @return: A python dictionary containing the results of name matching.""" return self.name_similarity(parameters) + + def name_deduplication(self, parameters): + """ + Fuzzy de-duplication of a list of names + @param parameters: An object specifying a list of names as well + as a threshold + @type parameters: L{NameDeduplicationParameters} + @return: A python dictionary containing the results of de-duplication""" + return EndpointCaller(self, self.endpoints['NAME_DEDUPLICATION']).call(parameters, NameDeduplicationParameters) + + def record_similarity(self, parameters): + """ + Create an L{EndpointCaller} to get similarity core between a list of records and call it. + @param parameters: An object specifying the data, + and possible metadata, to be processed by the record matcher. + @type parameters: L{RecordSimilarityParameters} + @return: A python dictionary containing the results of record matching.""" + return EndpointCaller(self, self.endpoints['RECORD_SIMILARITY']).call(parameters, RecordSimilarityParameters) + + def text_embedding(self, parameters): + """ deprecated + Create an L{EndpointCaller} to identify text vectors found in the texts + to which it is applied and call it. + @type parameters: L{DocumentParameters} or L{str} + @return: A python dictionary containing the results of text embedding.""" + return self.semantic_vectors(parameters) + + def semantic_vectors(self, parameters): + """ + Create an L{EndpointCaller} to identify text vectors found in the texts + to which it is applied and call it. + @type parameters: L{DocumentParameters} or L{str} + @return: A python dictionary containing the results of semantic vectors.""" + return EndpointCaller(self, self.endpoints['SEMANTIC_VECTORS']).call(parameters) + + def syntax_dependencies(self, parameters): + """ + Create an L{EndpointCaller} to identify the syntactic dependencies in the texts + to which it is applied and call it. + @type parameters: L{DocumentParameters} or L{str} + @return: A python dictionary containing the results of syntactic dependencies + identification""" + return EndpointCaller(self, self.endpoints['SYNTAX_DEPENDENCIES']).call(parameters) + + def transliteration(self, parameters): + """ + Transliterate given context + @type parameters: L{DocumentParameters} + @return: A python dictionary containing the results of the transliteration""" + return EndpointCaller(self, self.endpoints['TRANSLITERATION']).call(parameters) + + def topics(self, parameters): + """ + Topics returns keyphrases and concepts related to the provided content + @type parameters: DocumentParameters + @return; A python dictionary containing the results""" + return EndpointCaller(self, self.endpoints['TOPICS']).call(parameters) + + def similar_terms(self, parameters): + """ + Create an L{EndpointCaller} to identify terms most similar to the input in + the requested languages + :param parameters: DocumentParameters + :return: A python dictionary containing the similar terms and their similarity + """ + return EndpointCaller(self, self.endpoints['SIMILAR_TERMS']).call(parameters) + + def events(self, parameters): + """ + Create an L{EndpointCaller} to identify events found in the texts. + @param parameters: An object specifying the data, + and possible metadata, to be processed by the 'events' identifier. + @type parameters: L{DocumentParameters} or L{str} + @return: A python dictionary containing the results of event extraction. + """ + return EndpointCaller(self, self.endpoints['EVENTS']).call(parameters) diff --git a/setup.py b/setup.py index 25f8d39..1bcd653 100755 --- a/setup.py +++ b/setup.py @@ -1,48 +1,59 @@ #!/usr/bin/env python -from setuptools import setup -import rosette +"""setup.py""" import os import io +from setuptools import setup +import rosette NAME = "rosette_api" -DESCRIPTION = "Rosette API Python client SDK" -AUTHOR = "Basis Technology Corp." -AUTHOR_EMAIL = "rosette_api@basistech.com" -HOMEPAGE = "https://developer.rosette.com" +DESCRIPTION = "Babel Street Analytics API Python client SDK" +AUTHOR = "Analytics by Babel Street" +AUTHOR_EMAIL = "analyticssupport@babelstreet.com" +HOMEPAGE = "https://github.com/rosette-api/python" VERSION = rosette.__version__ -here = os.path.abspath(os.path.dirname(__file__)) +HERE = os.path.abspath(os.path.dirname(__file__)) def read(*filenames, **kwargs): + """read function""" encoding = kwargs.get('encoding', 'utf-8') sep = kwargs.get('sep', '\n') buf = [] for filename in filenames: - with io.open(filename, encoding=encoding) as f: - buf.append(f.read()) + with io.open(filename, encoding=encoding) as the_file: + buf.append(the_file.read()) return sep.join(buf) -long_description = read('README.md') -setup(name=NAME, - author=AUTHOR, - author_email=AUTHOR_EMAIL, - description=DESCRIPTION, - license='Apache License', - long_description=long_description, - packages=['rosette'], - install_requires=['requests'], - platforms='any', - url=HOMEPAGE, - version=VERSION, - classifiers=[ - 'Programming Language :: Python', - 'Development Status :: 4 - Beta', - 'Natural Language :: English', - 'Environment :: Web Environment', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: Apache Software License', - 'Operating System :: OS Independent', - 'Topic :: Software Development :: Libraries :: Python Modules'] - ) +LONG_DESCRIPTION = read('README.md') + +setup( + name=NAME, + author=AUTHOR, + author_email=AUTHOR_EMAIL, + description=DESCRIPTION, + license='Apache License', + long_description=LONG_DESCRIPTION, + long_description_content_type='text/markdown', + packages=['rosette'], + install_requires=['requests'], + platforms='any', + url=HOMEPAGE, + version=VERSION, + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Environment :: Web Environment', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: Apache Software License', + 'Natural Language :: English', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: 3.13', + 'Topic :: Software Development :: Libraries :: Python Modules' + ] +) diff --git a/sonar-project.properties b/sonar-project.properties new file mode 100644 index 0000000..2bdb883 --- /dev/null +++ b/sonar-project.properties @@ -0,0 +1,5 @@ +sonar.projectKey=rosette-api-python-binding +sonar.sources=rosette +sonar.exclusions=**/tests/**,**/docs/**,**/examples/** +sonar.python.coverage.reportPaths=coverage.xml +#sonar.branch.name=RCB-596-pool-size diff --git a/tests/__init__.py b/tests/__init__.py index 35f570e..4256e37 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ -Copyright (c) 2014-2015 Basis Technology Corporation. +Copyright (c) 2014-2022 Basis Technology Corporation. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/tests/mock-data/README.md b/tests/mock-data/README.md deleted file mode 100644 index 4572be8..0000000 --- a/tests/mock-data/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# mock-data - -This is the mock data that is used for unit testing of the Rosette API bindings. It contains a variety of request, response and status files that are used by each binding's unit testing framework to mock the server interactions. diff --git a/tests/mock-data/request/ara-doc-entities.json b/tests/mock-data/request/ara-doc-entities.json deleted file mode 100644 index c8ec3dc..0000000 --- a/tests/mock-data/request/ara-doc-entities.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "content": "الخميس 5/2/1431 هـ - الموافق 21/1/2010 م (آخر تحديث) الساعة 10:01 (مكة المكرمة)، 7:01 (غرينتش)\n\nناتو يفكر بمسؤول مدني لأفغانستان\n\nيخطط حلف شمال الأطلسي (ناتو) لتعيين مسؤول مدني كبير في أفغانستان، وسط دعوات لتحسين التنسيق السياسي والتنموي في البلاد وفق ما نقلته صحيفة وول ستريت. وقالت الصحيفة إن السفير البريطاني في كابل في مقدمة المرشحين لهذا المنصب والذي من المحتمل أن يعلن بالتزامن مع مؤتمر دولي عن مستقبل أفغانستان المقرر عقده في لندن في 28 يناير/كانون الثاني المقبل.\n وأضافت الصحيفة -في تقرير لها من كابل- أن المسؤول الجديد سيترأس دعامة مدنية للتحالف الذي تقوده الولايات المتحدة لإدارة التمويل والمساعدات للولايات الأفغانية لتحاشي المؤسسات الأفغانية الفاسدة\". وكان الأمين العام للأمم المتحدة بان كي مون دعا هذا الشهر لتعيين مسؤول مدني رفيع ضمن قوة المساعدة الدولية لإرساء الأمن في أفغانستان (إيساف) التي يقودها حلف الأطلسي للمساعدة في تنسيق الجهود السياسية والتنموية في الحرب التي دخلت عامها التاسع. وأضاف أن تعيين هذا المسؤول سيتيح تحسين التنسيق بين العمل السياسي والتنموي وخصوصا عبر فرق إعادة البناء في الولايات الأفغانية. وقالت جورنال ستريت إن المنصب الجديد سيكون نظيرا للأميركي ستانلي ماكريستال قائد القوات الأميركية وقوات حلف الأطلسي في أفغانستان. ويتوقع وصول أربعين ألف جندي آخرين لأفغانستان في الأشهر القليلة المقبلة في إطار إستراتيجية لمجابهة العمليات المسلحة لحركة طالبان. وأشارت الصحيفة إلى أن خطة تعيين السفير البريطاني مارك سيدويل وجدت تأييد الولايات المتحدة ومن المرجح أن يصادق عليها باقي الحلفاء.", - "language": "ara" -} \ No newline at end of file diff --git a/tests/mock-data/request/eng-doc-entities.json b/tests/mock-data/request/eng-doc-entities.json deleted file mode 100644 index a10d166..0000000 --- a/tests/mock-data/request/eng-doc-entities.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "content": "Samsung Can Sell Its Tablet In The U.S. Market\n\n\nThe war between Samsung Electronics and Apple seems to be never ending. The companies have engaged in an international warfare, across continents, with more than 20 cases in 10 countries. The latest news in the Samsung vs. Apple conflict comes from U.S. where a judge decided that Samsung can sell its tablet in the U.S. market.\n\nIn United States, Apple initiated a legal action against Samsung in April, claiming that the South Korean smartphones and tablets “slavishly” copy the iPhone and the iPad. As a result, Apple requested that Samsung will be prohibited from selling the gadgets in the U.S. market. The much expected ruling came late on Friday, as U.S. District Judge Lucy Koh denied Apple’s request for a preliminary injuction against Samsung.\n\nU.S. District Judge Lucy Koh said that “It is not clear than an injuction on Samsung’s accused devices would prevent Apple from being irreparably harmed”. As a result, Koh rejected Apple’s request to bid sales of three Samsung smartphones models and the Samsung Tab 10.1. In the third quarter, Samsung had 23.8 percent of the global smartphone market, nine points higher than Apple.\n\nThe U.S. judge wrote that “Apple has established a likelihood of success on the merits at trial” regarding some of Samsung’s smartphones. Apple would likely prove Samsung infringed one of its tablet patents, but it did not show that it was likely to overcome Samsung’s challenges to the patent’s validity.\n\nAlthough this might be a minor victory for Samsung, as Apple still has high changes of winning the overall lawsuit, Koh’s decision makes it possible for the South Korean company to start Christmas sales.\n\nAnalysts say that global tablet sales are expected to boost to more than 50 million this year, with Apple still the leader of the market. The Silicon Valley based company sold 11.12 million units during the September quarter. So far, Apple sold more than 30 million iPads worldwide.\n\nLast week, a judge in Australia ruled in favor of Apple by extending a ban on Samsung’s iPad sales within the country. Australia is a key market for Samsung Galaxy Tablet and two weeks of banned sales isn’t much, but it can get urgent if Samsung won’t start selling the gadget before Christmas.\n\n\n\nhttp://newsinabox.net/2202/samsung-can-sell-its-tablet-in-the-u-s-market.html\n2011.12.05", - "language": "eng" -} \ No newline at end of file diff --git a/tests/mock-data/request/fra-doc-entities.json b/tests/mock-data/request/fra-doc-entities.json deleted file mode 100644 index 2317b00..0000000 --- a/tests/mock-data/request/fra-doc-entities.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "content": "Les Etats-Unis face au défi persistant du chômage\n\nUn chômeur américain sur deux dit souffrir d'anxiété ou de dépression et a dû emprunter de l'argent à des amis. Selon une enquête publiée, lundi 18 janvier, par le Census Bureau (équivalent de l'Insee), en deux ans, le nombre de couples au chômage avec enfants mineurs a doublé. Près de 40 % des ces parents notent des \"modifications de comportement\" chez leurs enfants qu'ils attribuent à leur perte du travail. La moitié évoquent une \"transformation fondamentale\" de leur existence - le premier des changements consistant en la perte presque instantanée de toute couverture sociale.\nDurant tout le mois de décembre, à l'approche de l'anniversaire de la prise de fonctions du président Barack Obama, le 20 janvier 2009, les reportages et les enquêtes sur les conséquences de ces destructions d'emploi sont devenus une préoccupation obsédante dans les médias. \"Je me sens comme la lie de la terre\", disait récemment un chômeur à une heure de grande écoute. Alors, l'Amérique découvre sur les écrans tous ces visages ravagés qui évoquent leur \"honte\" et s'interroge : 10 % de chômage, comment en est-on arrivé là ?\n\nPour beaucoup d'analystes, c'est pour avoir négligé l'impact du chômage alors que se profilaient des signes de reprise économique (et surtout financière) que la Maison Blanche et le Parti démocrate se trouvent confrontés à une forte désaffection de l'opinion à dix mois des élections à mi-mandat de novembre.\n\nAlors que le thème de l'emploi envahissait le champ des préoccupations, M. Obama a continué de donner la priorité à la réforme de la santé, à la lutte contre le réchauffement climatique ou la régulation financière. Tous sujets essentiels mais \"bien moins palpables, dans une période de crise aiguë, que le chômage, qui arase tout\", note Dean Baker, codirecteur du Centre de recherches sur les politiques économiques, un groupe de réflexion démocrate à Washington.\n\nUne étude du département du travail, publiée le 8 janvier, montre une différence essentielle entre l'impact de la crise actuelle sur l'emploi et les précédentes récessions. Lors de celles de 1974-1976 et de 1990-1993, le chômage était moins important, tant en chiffres absolus qu'en durée. Cette comparaison reste valide pour la récession de 1981-1983, qui vit le président républicain Ronald Reagan, un an après son élection, enregistrer son plus bas soutien dans l'opinion.\n\nPour résumer le désastre actuel : les Etats-Unis ont perdu depuis deux ans 5,24 % de leur emploi, alors que la chute n'avait été que de 1,4 % à 3 %, au pire, lors des récessions précédentes. Entre chômage total et partiel, la crise touche le travail de près d' un Américain sur cinq. N'ayant cessé de répéter que son pays affrontait \"la pire crise depuis la Grande Dépression\" des années 1930, Barack Obama avait donné le sentiment d'être très conscient du problème. Or là est le paradoxe : une fois élu, le président est apparu comme un dirigeant qui a fait un diagnostic correct mais n'en a tiré aucune conclusion.\n\nL'Amérique avait perdu 3 078 000 chômeurs lors de la dernière année du mandat de George Bush ; elle en a perdu 4 228 000 en un an de présidence Obama ! Le 10 janvier, invoquant \"l'urgence\", Christina Romer, présidente du conseil économique de M. Obama, a appelé à vite injecter 75 milliards de dollars supplémentaires pour régénérer l'emploi. Nul ne doute que M. Obama privilégiera cette nécessité en 2010.\n\nComment remédier au problème ? La plupart des analystes, constatant le \"découplage\" entre la reprise économique et celle de l'emploi, sont circonspects. Chef économiste de Goldman Sachs, Jan Hatzius, dans un texte intitulé \"10 questions pour 2010\", s'attend \"à un solde positif de 100 000 emplois par mois dès le second trimestre, insuffisant pour modifier significativement le taux de chômage\". Compte tenu de leur évolution démographique, les Etats-Unis doivent en effet créer 100 000 emplois mensuels pour juste stabiliser l'emploi.\n\nDean Baker explique encore que la marge de manoeuvre présidentielle est restreinte : \"Obama a raté le coche en limitant son plan de relance. Maintenant, il lui est politiquement impossible de retourner devant le Congrès pour l'augmenter. Les conséquences sont désastreuses. Même s'il lui reste 200 milliards de dollars à dépenser (sur 787 milliards), l'essentiel de l'impact du plan est passé.\"\n\nLe pire, selon lui, est que le président a fait son choix en toute connaissance de cause, pour \"privilégier la politique\". C'est-à-dire un compromis avec certains républicains qu'il n'a jamais obtenu. Pour M. Baker, cette quête du \"consensus\" a entraîné son parti dans l'état où il est. Et de conclure : \"Les républicains ont une stratégie efficace : ils bloquent tout changement pour dénoncer ensuite un président qui n'agit pas. Il est temps que celui-ci dénonce ce comportement de sabotage de l'économie nationale.\"\n\nMercredi, M. Obama a dit \"regretter d'avoir perdu le sens du contact direct avec les Américains sur leurs valeurs essentielles\". Son principal conseiller, David Axelrod, a évoqué \"les salaires bloqués, les emplois perdus\". Pour autant, changera-t-il d'attitude vis-à-vis du Congrès ? Peu y croient.\n\nLa Chambre a voté en décembre 2009 une loi sur la création d'emplois de 174 milliards de dollars non encore adoptée au Sénat. Les républicains assimilent désormais toute dépense publique à une gabegie. Harry Reid, le leader des démocrates au Sénat, négocierait déjà avec eux. Certains imaginent un abandon, pourtant peu probable, du plan d'assurance santé afin de parvenir à un soutien républicain aux embauches dans les PME. D'autres envisagent d'affecter à l'emploi 75 milliards de dollars pris sur le plan de sauvetage de la finance américaine (dit TARP). Plus encore que M. Obama, c'est son parti, tétanisé par la perspective d'une défaite électorale dans dix mois, qui a besoin de mesures rapides.\n\n\n\nhttp://www.lemonde.fr/ameriques/article/2010/01/21/les-etats-unis-face-au-defi-persistant-du-chomage_1294839_3222.html#xtor=RSS-3210\n2010.01.21", - "language": "fra" -} \ No newline at end of file diff --git a/tests/mock-data/request/jpn-doc-entities.json b/tests/mock-data/request/jpn-doc-entities.json deleted file mode 100644 index 8d0328a..0000000 --- a/tests/mock-data/request/jpn-doc-entities.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "content": "ケネディはマサチューセッツ州ブルックラインで、アイルランド系移民の子孫で投資家のジョセフ・P・ケネディ・シニアの次男として生まれた。名前は母方の祖父でボストン市長も勤めたジョン・F・フィッツジェラルドにちなむ。13歳のときにチョート・スクール(コネチカット州ウォリングフォードの寄宿学校)に入学し、その後1935年にイギリスのロンドン・スクール・オブ・エコノミクスに1年間留学した。帰国後ハーバード大学に入学を認められていたものの、親しい友人が進学を決めたプリンストン大学に入学することにした。しかし、クリスマス休暇中にかかった黄疸のため退学している。\n\n1936年の秋にはハーバード大学に転校したが、在学中にフットボールの試合で背中をひどく痛めた。在学中ヨーロッパへ2度旅行しており、2度目の旅行では父親が大使を務めていたイギリスを訪れている。1940年6月、1938年のミュンヘン協定におけるイギリス外交政策の分析についての卒論『イギリスはなぜ眠ったか』を書き、ハーバードを優等で卒業した。\n\n第二次世界大戦後、彼は戦死した兄ジョセフ・P・ケネディ・ジュニアに代わり政界に入った。1946年にジェームズ・M・カーレイがボストン市長になるために民主党下院議員を辞職した時、ケネディはその議席をかけた補欠選挙に立候補した。父のジョセフが実業家であったこともあり、政治資金には困らなかったので、実現不可能な公約をする必要はなく理想主義を語ることができた。長く精力的なキャンペーンの末に、大差で共和党候補に勝ち、29歳で下院議員となった。当初は父のジョセフとコネがあった同じアイルランド系のジョセフ・マッカーシー上院議員の赤狩りに協力していた。リベラル派のエレノア・ルーズヴェルトはそのことを忘れず、後々までケネディを嫌っていた。下院3期目の1952年には上院議員選挙に出馬し、約70,000票の大差で共和党候補ヘンリー・カボット・ロッジ・ジュニアを破った。以後の彼の支持基盤は北部都市圏のリベラル派インテリ層となる。\n\nケネディは1953年9月12日にフランス系移民の名門の娘であるジャクリーン・リー・ブーヴィエと結婚した。彼はその後2年間に多数回の脊柱の手術を受け上院本会議を長期にわたって欠席したが、手術から回復するまでの間、8名の上院議員の政治的に勇敢であった行為についての本『勇気ある人々』を出版した。この本はその後ピューリツァー賞を受賞した。賞金は黒人の通う学校へ寄付したと言われている。\n\n\n\nhttp://ja.wikipedia.org/wiki/%E3%82%B8%E3%83%A7%E3%83%B3%E3%83%BBF%E3%83%BB%E3%82%B1%E3%83%8D%E3%83%87%E3%82%A3\n2010.01.26", - "language": "jpn" -} \ No newline at end of file diff --git a/tests/mock-data/request/pus-doc-entities.json b/tests/mock-data/request/pus-doc-entities.json deleted file mode 100644 index fd10025..0000000 --- a/tests/mock-data/request/pus-doc-entities.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "content": "د افغانستان د كابيني لوي لست\nد افغانستان ولسمشر حامد كرزي په خپله نوي كابينه كي د بهرنيو چارو د وزير په توګه د امنيت په برخه كي خپل سلاكار زلمي رسول نوماند كړي دي .\n\n\nد افغانستان ولسمشر حامد كرزي په خپله نوي كابينه كي د بهرنيو چارو د وزير په توګه د امنيت په برخه كي خپل سلاكار زلمي رسول نوماند كړي دي .\nنن د نوموړي هيواد د ولسمشر مرستيال كريم خليلي په پارلمان كي د نوي كابيني د ۱۶ تنو وزيرانو نومونه ولوستل . همداشان كريم خليلي د پاتو دوه تنو وزيرانو د نومونو په اړه څه ونه ويل .\nد ويلو ده چي په نوموړي نوي لست كي د هغه وزيرانو نومونه هم شامل دي چي تيره اووني يي د پارلمان نه د اعتماد رايي نوي ترلاسه كړي .\nد يادولو وړ ده چي د افغانستان پارلمان تيره اووني په خپله عمومي ناسته كي د ۲۴ تنو وزيرانو د جمع نه ۷ تنو ته د اعتماد راي وركړه . ۱- د بهرنيو چارو نوماند وزير: زلمی رسول نوموړی د ولسمشر امنيتي سلاکار دی او په طب کې دوکتورا لري . ٢- د عدليې نوماند وزير : حبيب الله غالب غالب په شرعياتو کې دوکتورا لري چې ويل کېږي چې د عبدرب رسول سياف لخوا معرفي شوی دی . ٣- د لوړو زده کړو نوماند وزير : محمد هاشم عصمت الهي په اړيکو او ورځپاڼه ليکلوو ليکلو کې دوکتورا لري او د اصف محسني کس دی . ٤- د حج او ارشاد نوماند وزير : ډاکټر محمد يوسف نيازی ښاغلی نيازی په اسلامي زده کړو کې دوکتورا لري . ٥- د فوايد عامې نوماند وزير : محمد بشير لعلي ٦- د عامې روغتيا نوماند وزير: ډاکټر ثريا دليل د طب په مديريتي برخه کې دوکتورا لري، په قام ازبکه ده او په کينيا او افغانستان کې يې يونيسف کې کارکړی دی . ٧- د اقتصاد نوماند وزير : عبدالهادي ارغنديوال ښاغلی ارغنديوال په اقتصاد کې ليسانس لري . ٨- د سوداګرۍ او صنايعو نوماند وزير : محمد هادي حکيمي ښاغلی حکيمي په حقوقو او نړيواله سوداګرۍ کې يې زده کړې کړې دي . ٩- د کليو د پراختيا او بيا رغاونې نوماند وزير: جارالله منصوري منصوري په سياسي علومو کې يې لوړې زده کړې کړې دي . ١٠ - د ټولنيزو چارو، شهيدانو او معلولينو نوماند وزير: امنه افضلي اغلې افضلي په چاپېريال ساتنه کې لوړې زده کړې لري . ١١- د ترانسپورت او هوايي چلند وزير : عبدالرحيم اوراز ښاغلی اوراز ساختماني انجنير او په بهرنيو چارو وزارت کې يې کار کړی، ويل کېږي چې د ملی جنبش لخوا معرفي شوی . ١٢- د ښځو چارو نومانده وزيره : پلوشه حسن اغلې حسن د بيارغاونې په چارو کې ليسانس لري . ١٣- د کډوالو او راستنېدونکو چارو نوماند وزير : انجنير عبدالرحيم نوموړی انجنير دی . ١٤- د سرحدونو چارو، قامونو او قبايلو نوماند وزير : ارسلا جمال ښاغلی جمال په اقتصاد کې ليسانس لري او مخکې د خوست والي و . ١٥- له نشه يي توکيو سره د مبارزې نوماند وزير: ضرار احمد مقبل ښاغلی مقبل د کرزي په تېره دوره کې د څه مودې لپاره د کورنيو چار وزير و . ١٦- د ښاري پراختيا نوماند وزير : انجنير سلطان حسين حصاري ښاغلی حصاري د ښار جوړونې په برخه کې دوکتورا لري\n\n\n\nhttp://www.trtpashto.com/trtinternational/pa/newsDetail.aspx?HaberKodu=efb3ec08-e5d5-4007-addc-0cc81652bc62\n2010.01.24", - "language": "pus" -} \ No newline at end of file diff --git a/tests/mock-data/request/spa-doc-entities.json b/tests/mock-data/request/spa-doc-entities.json deleted file mode 100644 index df35cd2..0000000 --- a/tests/mock-data/request/spa-doc-entities.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "content": "\"Entrega el mar, devuelve el mar...\", gritaron algunos de los ciudadanos paceños apostados en la Plaza Murillo, a las afueras del palacio de gobierno y la sede del Congreso de Bolivia, mientras la Presidenta Michelle Bachelet salía de la extensa ceremonia en que Evo Morales renovó por cinco años más como Mandatario de ese país.\nLa Presidenta ya había abordado el tema marítimo a primera hora de ayer, mientras visitaba un hospital para niños. Ahí, Bachelet destacó que en 2006 había acordado con Morales una agenda de 13 puntos, en la que \"hemos hecho progresos importantes\". \"Y todos esperamos que en los años venideros nuestros Estados continúen profundizándola\", agregó.\nAludía así a la agenda que, en su punto sexto, contempla el \"tema marítimo\", pero cuyos detalles son poco conocidos.\nPor lo mismo, cuando fue consultada sobre qué esperaría que hiciera sobre este punto específico su sucesor, Sebastián Piñera, ella reforzó su mensaje. \"Lo que tiene que hacer cualquier gobierno en Chile es seguir profundizando todos los puntos de trabajo en una agenda que ha sido concordada en común\", dijo la Mandataria.\nLa postura asumida ayer por Bachelet se produjo sólo días después de que Evo Morales dejara entrever su inquietud por la actitud que tendrá Piñera ante la aspiración boliviana de una salida al mar. En La Paz creen que el nuevo gobernante chileno restringirá al máximo el diálogo sobre este punto.\nDicha inquietud tuvo su origen en el debate presidencial de hace dos semanas entre Piñera y Eduardo Frei, cuando el actual Presidente electo dijo que no conversaría sobre cesión de soberanía y que sólo tenía en carpeta mejorar el acceso boliviano al Pacífico, frase que es leída en la diplomacia boliviana como \"simples facilidades portuarias\". Morales respondió el martes que \"cualquier compromiso es de Estado a Estado\", y su canciller, David Choquehuanca, pidió mantener la agenda bilateral.\n\n\n\nhttp://diario.elmercurio.com/2010/01/23/nacional/politica/noticias/47D6CA26-B011-40B5-AEAB-D0E6A5F52E59.htm?id={47D6CA26-B011-40B5-AEAB-D0E6A5F52E59}\n2010.01.23", - "language": "spa" -} \ No newline at end of file diff --git a/tests/mock-data/request/xxx-doc-entities.json b/tests/mock-data/request/xxx-doc-entities.json deleted file mode 100644 index d6c8bcc..0000000 --- a/tests/mock-data/request/xxx-doc-entities.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "content": "3.11.06 - Not und Elend in ihren Heimatländern lassen immer mehr Afrikaner die Reise nach Europa antreten. Insbesondere Spanien ist betroffen - allein 24.000 Flüchtlinge sind in diesem Jahr in Teneriffa angekommen. Nun berät die Europäische Union über Maßnahmen gegen den Flüchtlingsstrom. Nicht alle Länder sehen darin ein Gemeinschaftsproblem - der deutsche Innenminister Schäuble appelliert an die Eigenverantwortung der Länder.\n\nDie Flüchtlinge, die auf Teneriffa im Lager ausharren, kommen aus Ländern, in denen sie kaum existieren können - auch wegen der Konkurrenz aus Europa, wie Professor Klaus J. Bade, Migrationsforscher an der Universität Osnabrück weiß: \"Unsere Textilsammlungen landen auf kommerziellen Märkten und ruinieren die Textilindustrie. Und die schwimmenden Fischfabriken - Fisch unten rein, Dose oben raus - vor den afrikanischen Küsten ruinieren die Küstenfischerei. Ergebnis: In Somalia transportieren inzwischen ruinierte Fischer mit ihren Booten die Illegalen in Richtung Europa.\"\n\nDoch sich vor Illegalen zu schützen, damit müsse jedes Land selbst fertig werden, wie Wolfgang Schäuble (CDU) betont: \"In Brüssel ist der Ruf immer wohlfeil: Es muss alles europäisch gemacht werden. Jedes Land mit Außengrenzen muss seine Außengrenzen schon selber kontrollieren. Wenn wir die Verantwortung nach Europa schieben, wird es weder bürgernäher noch effizienter, sondern ganz im Gegenteil.\"" -} \ No newline at end of file diff --git a/tests/mock-data/request/zho-doc-entities.json b/tests/mock-data/request/zho-doc-entities.json deleted file mode 100644 index 569eead..0000000 --- a/tests/mock-data/request/zho-doc-entities.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "content": "新华网联合国1月22日电(记者 白洁 王湘江)第64届联合国大会22日一致通过决议,呼吁192个成员国尽快响应联合国发起的海地救援紧急募捐呼吁,强调各国应对联合国主导的救灾工作予以支持。\n\n联大当天在纽约联合国总部就海地地震举行全体会议。第64届联大代理主席、哈萨克斯坦常驻联合国代表艾季莫娃在致辞中说,海地灾后的长期重建和发展工作需要国际社会在未来几个月甚至几年内长期关注。\n\n她说,海底地震后,国际社会立即做出反应,对海地人民和政府予以声援和支持。已交付的人道主义援助物资满足了海地人民的一些迫切需求,但还有许多工作要做。\n\n\n\nhttp://news.xinhuanet.com/world/2010-01/23/content_12860329.htm\n2010.01.24", - "language": "zho" -} \ No newline at end of file diff --git a/tests/mock-data/response/ara-doc-entities.json b/tests/mock-data/response/ara-doc-entities.json deleted file mode 100644 index bfac52a..0000000 --- a/tests/mock-data/response/ara-doc-entities.json +++ /dev/null @@ -1,210 +0,0 @@ -{ - "entities": [ - { - "confidence": 0.020943284034729004, - "count": 4, - "indocChainId": 6, - "mention": "أفغانستان", - "normalized": "أفغانستان", - "type": "LOCATION" - }, - { - "confidence": 1.0, - "count": 3, - "indocChainId": 15, - "mention": "الأفغانية", - "normalized": "الأفغانية", - "type": "NATIONALITY" - }, - { - "confidence": 0.013337045907974243, - "count": 2, - "indocChainId": 2, - "mention": "ناتو", - "normalized": "ناتو", - "type": "ORGANIZATION" - }, - { - "confidence": 0.021363019943237305, - "count": 2, - "indocChainId": 3, - "mention": "لأفغانستان", - "normalized": "لأفغانستان", - "type": "LOCATION" - }, - { - "confidence": 0.03828367590904236, - "count": 2, - "indocChainId": 8, - "mention": "السفير", - "normalized": "السفير", - "type": "TITLE" - }, - { - "confidence": 1.0, - "count": 2, - "indocChainId": 9, - "mention": "البريطاني", - "normalized": "البريطاني", - "type": "NATIONALITY" - }, - { - "confidence": 0.03427225351333618, - "count": 2, - "indocChainId": 10, - "mention": "كابل", - "normalized": "كابل", - "type": "LOCATION" - }, - { - "confidence": 1.0, - "count": 2, - "indocChainId": 14, - "mention": "الولايات المتحدة", - "normalized": "الولايات المتحدة", - "type": "LOCATION" - }, - { - "confidence": 0.009779423475265503, - "count": 2, - "indocChainId": 22, - "mention": "حلف الأطلسي", - "normalized": "حلف الأطلسي", - "type": "ORGANIZATION" - }, - { - "confidence": 0.01446753740310669, - "count": 1, - "indocChainId": 0, - "mention": "مكة المكرمة", - "normalized": "مكة المكرمة", - "type": "LOCATION" - }, - { - "confidence": 0.01943296194076538, - "count": 1, - "indocChainId": 1, - "mention": "غرينتش", - "normalized": "غرينتش", - "type": "LOCATION" - }, - { - "confidence": 0.012137770652770996, - "count": 1, - "indocChainId": 4, - "mention": "حلف شمال الأطلسي", - "normalized": "حلف شمال الأطلسي", - "type": "ORGANIZATION" - }, - { - "confidence": 0.0020059943199157715, - "count": 1, - "indocChainId": 7, - "mention": "وول ستريت", - "normalized": "وول ستريت", - "type": "ORGANIZATION" - }, - { - "confidence": 0.03418374061584473, - "count": 1, - "indocChainId": 12, - "mention": "لندن", - "normalized": "لندن", - "type": "LOCATION" - }, - { - "confidence": 0.00932997465133667, - "count": 1, - "indocChainId": 17, - "mention": "الأمين العام", - "normalized": "الأمين العام", - "type": "TITLE" - }, - { - "confidence": 0.01955312490463257, - "count": 1, - "indocChainId": 18, - "mention": "للأمم المتحدة", - "normalized": "لأمم المتحدة", - "type": "ORGANIZATION" - }, - { - "confidence": 0.008319079875946045, - "count": 1, - "indocChainId": 19, - "mention": "بان كي مون", - "normalized": "بان كي مون", - "type": "PERSON" - }, - { - "confidence": 0.02144944667816162, - "count": 1, - "indocChainId": 21, - "mention": "إيساف", - "normalized": "إيساف", - "type": "ORGANIZATION" - }, - { - "confidence": 0.006335079669952393, - "count": 1, - "indocChainId": 23, - "mention": "الولايات", - "normalized": "الولايات", - "type": "LOCATION" - }, - { - "confidence": 1.0, - "count": 1, - "indocChainId": 25, - "mention": "للأميركي", - "normalized": "لأميركي", - "type": "NATIONALITY" - }, - { - "confidence": 0.0270041823387146, - "count": 1, - "indocChainId": 26, - "mention": "ستانلي ماكريستال", - "normalized": "ستانلي ماكريستال", - "type": "PERSON" - }, - { - "confidence": 0.03057950735092163, - "count": 1, - "indocChainId": 27, - "mention": "قائد", - "normalized": "قائد", - "type": "TITLE" - }, - { - "confidence": 1.0, - "count": 1, - "indocChainId": 28, - "mention": "الأميركية", - "normalized": "الأميركية", - "type": "NATIONALITY" - }, - { - "confidence": 0.015597224235534668, - "count": 1, - "indocChainId": 32, - "mention": "لحركة طالبان", - "normalized": "حركة طالبان", - "type": "ORGANIZATION" - }, - { - "confidence": 0.05023258924484253, - "count": 1, - "indocChainId": 35, - "mention": "مارك سيدويل", - "normalized": "مارك سيدويل", - "type": "PERSON" - } - ], - "requestId": "f1c7d4d6-a219-42b2-9b30-b636a80d3746", - "timers": { - "rblJe": 84, - "rexJe": 59, - "rliJe": 3 - } -} \ No newline at end of file diff --git a/tests/mock-data/response/ara-doc-entities.status b/tests/mock-data/response/ara-doc-entities.status deleted file mode 100644 index ae4ee13..0000000 --- a/tests/mock-data/response/ara-doc-entities.status +++ /dev/null @@ -1 +0,0 @@ -200 \ No newline at end of file diff --git a/tests/mock-data/response/eng-doc-entities.json b/tests/mock-data/response/eng-doc-entities.json deleted file mode 100644 index 7352644..0000000 --- a/tests/mock-data/response/eng-doc-entities.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "entities": [ - { - "confidence": 1.0, - "count": 17, - "indocChainId": 0, - "mention": "Samsung", - "normalized": "Samsung", - "type": "ORGANIZATION" - }, - { - "confidence": 0.04526931473187038, - "count": 14, - "indocChainId": 3, - "mention": "Apple", - "normalized": "Apple", - "type": "ORGANIZATION" - }, - { - "confidence": 1.0, - "count": 7, - "indocChainId": 10, - "mention": "U.S.", - "normalized": "U.S.", - "type": "LOCATION" - }, - { - "confidence": 0.026389598846435547, - "count": 3, - "indocChainId": 7, - "mention": "judge", - "normalized": "judge", - "type": "TITLE" - }, - { - "confidence": 1.0, - "count": 2, - "indocChainId": 13, - "mention": "South Korean", - "normalized": "South Korean", - "type": "NATIONALITY" - }, - { - "confidence": 0.03619256615638733, - "count": 2, - "indocChainId": 15, - "mention": "iPad", - "normalized": "iPad", - "type": "PRODUCT" - }, - { - "confidence": 0.026187777519226074, - "count": 2, - "indocChainId": 20, - "mention": "District Judge", - "normalized": "District Judge", - "type": "TITLE" - }, - { - "confidence": 0.028430074453353882, - "count": 2, - "indocChainId": 21, - "mention": "Lucy Koh", - "normalized": "Lucy Koh", - "type": "PERSON" - }, - { - "confidence": 0.011759281158447266, - "count": 2, - "indocChainId": 21, - "mention": "Koh", - "normalized": "Koh", - "type": "PERSON" - }, - { - "confidence": 1.0, - "count": 2, - "indocChainId": 52, - "mention": "Australia", - "normalized": "Australia", - "type": "LOCATION" - }, - { - "confidence": 1.0, - "count": 1, - "indocChainId": 2, - "mention": "Samsung Electronics", - "normalized": "Samsung Electronics", - "type": "ORGANIZATION" - }, - { - "confidence": 1.0, - "count": 1, - "indocChainId": 10, - "mention": "United States", - "normalized": "United States", - "type": "LOCATION" - }, - { - "confidence": 0.02483654022216797, - "count": 1, - "indocChainId": 14, - "mention": "iPhone", - "normalized": "iPhone", - "type": "PRODUCT" - }, - { - "confidence": 0.018296480178833008, - "count": 1, - "indocChainId": 33, - "mention": "Tab 10.1", - "normalized": "Tab 10.1", - "type": "PRODUCT" - }, - { - "confidence": 0.04169809818267822, - "count": 1, - "indocChainId": 48, - "mention": "Silicon Valley", - "normalized": "Silicon Valley", - "type": "LOCATION" - }, - { - "confidence": 0.026794254779815674, - "count": 1, - "indocChainId": 50, - "mention": "iPads", - "normalized": "iPads", - "type": "PRODUCT" - }, - { - "confidence": 0.009662508964538574, - "count": 1, - "indocChainId": 58, - "mention": "Galaxy Tablet", - "normalized": "Galaxy Tablet", - "type": "PRODUCT" - }, - { - "confidence": 1.0, - "count": 1, - "indocChainId": 60, - "mention": "http://newsinabox.net/2202/samsung-can-sell-its-tablet-in-the-u-s-market.html", - "normalized": "http://newsinabox.net/2202/samsung-can-sell-its-tablet-in-the-u-s-market.html", - "type": "IDENTIFIER:URL" - } - ], - "requestId": "2be64f9f-6246-4366-aab3-16f635ed87a5", - "timers": { - "rblJe": 29, - "rexJe": 656, - "rliJe": 35 - }, - "responseHeaders": { - "connection": "keep-alive", - "content-length": "637", - "content-type": "application/json", - "date": "Wed, 02 Mar 2016 23:09:51 GMT", - "server": "openresty/1.7.4.1", - "x-rosetteapi-request-id": "2be64f9f-6246-4366-aab3-16f635ed87a5" - } -} \ No newline at end of file diff --git a/tests/mock-data/response/eng-doc-entities.status b/tests/mock-data/response/eng-doc-entities.status deleted file mode 100644 index ae4ee13..0000000 --- a/tests/mock-data/response/eng-doc-entities.status +++ /dev/null @@ -1 +0,0 @@ -200 \ No newline at end of file diff --git a/tests/mock-data/response/eng-sentence-entities.json b/tests/mock-data/response/eng-sentence-entities.json deleted file mode 100644 index 703bc91..0000000 --- a/tests/mock-data/response/eng-sentence-entities.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "entities": [ - { - "confidence": 1.0, - "count": 2, - "indocChainId": 0, - "mention": "U.S.", - "normalized": "U.S.", - "type": "LOCATION" - }, - { - "confidence": 1.0, - "count": 1, - "indocChainId": 1, - "mention": "Iraq", - "normalized": "Iraq", - "type": "LOCATION" - }, - { - "confidence": 1.0, - "count": 1, - "indocChainId": 2, - "mention": "Afghanistan", - "normalized": "Afghanistan", - "type": "LOCATION" - }, - { - "confidence": 0.009895622730255127, - "count": 1, - "indocChainId": 3, - "mention": "commander in chief", - "normalized": "commander in chief", - "type": "TITLE" - }, - { - "confidence": 1.0, - "count": 1, - "indocChainId": 4, - "mention": "American", - "normalized": "American", - "type": "NATIONALITY" - } - ], - "requestId": "75686168-76e1-490c-b960-a4d3a4b0cf5d", - "timers": { - "rblJe": 3, - "rexJe": 5, - "rliJe": 14 - } -} \ No newline at end of file diff --git a/tests/mock-data/response/eng-sentence-entities.status b/tests/mock-data/response/eng-sentence-entities.status deleted file mode 100644 index ae4ee13..0000000 --- a/tests/mock-data/response/eng-sentence-entities.status +++ /dev/null @@ -1 +0,0 @@ -200 \ No newline at end of file diff --git a/tests/mock-data/response/fra-doc-entities.json b/tests/mock-data/response/fra-doc-entities.json deleted file mode 100644 index e4d6aba..0000000 --- a/tests/mock-data/response/fra-doc-entities.json +++ /dev/null @@ -1,242 +0,0 @@ -{ - "entities": [ - { - "confidence": 0.05105915239879063, - "count": 7, - "indocChainId": 4, - "mention": "Obama", - "normalized": "Obama", - "type": "PERSON" - }, - { - "confidence": 0.03971593578656515, - "count": 6, - "indocChainId": 8, - "mention": "M.", - "normalized": "M.", - "type": "TITLE" - }, - { - "confidence": 0.028711426258087158, - "count": 5, - "indocChainId": 3, - "mention": "président", - "normalized": "président", - "type": "TITLE" - }, - { - "confidence": 0.031732420126597084, - "count": 3, - "indocChainId": 0, - "mention": "Etats-Unis", - "normalized": "Etats-Unis", - "type": "LOCATION" - }, - { - "confidence": 0.03095032771428426, - "count": 3, - "indocChainId": 35, - "mention": "républicains", - "normalized": "républicains", - "type": "ORGANIZATION" - }, - { - "confidence": 0.01543426513671875, - "count": 2, - "indocChainId": 4, - "mention": "Barack Obama", - "normalized": "Barack Obama", - "type": "PERSON" - }, - { - "confidence": 0.019789844751358032, - "count": 2, - "indocChainId": 5, - "mention": "Amérique", - "normalized": "Amérique", - "type": "LOCATION" - }, - { - "confidence": 0.018558651208877563, - "count": 2, - "indocChainId": 10, - "mention": "Dean Baker", - "normalized": "Dean Baker", - "type": "PERSON" - }, - { - "confidence": 0.00832781195640564, - "count": 2, - "indocChainId": 14, - "mention": "républicain", - "normalized": "républicain", - "type": "ORGANIZATION" - }, - { - "confidence": 0.011866271495819092, - "count": 2, - "indocChainId": 33, - "mention": "Congrès", - "normalized": "Congrès", - "type": "ORGANIZATION" - }, - { - "confidence": 0.03181558847427368, - "count": 2, - "indocChainId": 44, - "mention": "Sénat", - "normalized": "Sénat", - "type": "ORGANIZATION" - }, - { - "confidence": 0.016073524951934814, - "count": 1, - "indocChainId": 1, - "mention": "Census Bureau", - "normalized": "Census Bureau", - "type": "ORGANIZATION" - }, - { - "confidence": 0.013657450675964355, - "count": 1, - "indocChainId": 2, - "mention": "Insee", - "normalized": "Insee", - "type": "ORGANIZATION" - }, - { - "confidence": 0.009501934051513672, - "count": 1, - "indocChainId": 6, - "mention": "Maison Blanche", - "normalized": "Maison Blanche", - "type": "ORGANIZATION" - }, - { - "confidence": 0.01004105806350708, - "count": 1, - "indocChainId": 7, - "mention": "Parti démocrate", - "normalized": "Parti démocrate", - "type": "ORGANIZATION" - }, - { - "confidence": 0.03106057643890381, - "count": 1, - "indocChainId": 10, - "mention": "Baker", - "normalized": "Baker", - "type": "PERSON" - }, - { - "confidence": 0.006049156188964844, - "count": 1, - "indocChainId": 11, - "mention": "démocrate", - "normalized": "démocrate", - "type": "ORGANIZATION" - }, - { - "confidence": 0.011113584041595459, - "count": 1, - "indocChainId": 12, - "mention": "Washington", - "normalized": "Washington", - "type": "LOCATION" - }, - { - "confidence": 0.010755836963653564, - "count": 1, - "indocChainId": 15, - "mention": "Ronald Reagan", - "normalized": "Ronald Reagan", - "type": "PERSON" - }, - { - "confidence": 0.020975351333618164, - "count": 1, - "indocChainId": 20, - "mention": "George Bush", - "normalized": "George Bush", - "type": "PERSON" - }, - { - "confidence": 0.01764160394668579, - "count": 1, - "indocChainId": 22, - "mention": "Christina Romer", - "normalized": "Christina Romer", - "type": "PERSON" - }, - { - "confidence": 0.003275454044342041, - "count": 1, - "indocChainId": 23, - "mention": "présidente du conseil économique", - "normalized": "présidente du conseil économique", - "type": "TITLE" - }, - { - "confidence": 1.0, - "count": 1, - "indocChainId": 28, - "mention": "Goldman Sachs", - "normalized": "Goldman Sachs", - "type": "ORGANIZATION" - }, - { - "confidence": 0.014990746974945068, - "count": 1, - "indocChainId": 29, - "mention": "Jan Hatzius", - "normalized": "Jan Hatzius", - "type": "PERSON" - }, - { - "confidence": 0.03833216428756714, - "count": 1, - "indocChainId": 42, - "mention": "David Axelrod", - "normalized": "David Axelrod", - "type": "PERSON" - }, - { - "confidence": 0.05773395299911499, - "count": 1, - "indocChainId": 46, - "mention": "Harry Reid", - "normalized": "Harry Reid", - "type": "PERSON" - }, - { - "confidence": 0.022663354873657227, - "count": 1, - "indocChainId": 47, - "mention": "démocrates", - "normalized": "démocrates", - "type": "ORGANIZATION" - }, - { - "confidence": 0.004793286323547363, - "count": 1, - "indocChainId": 50, - "mention": "TARP", - "normalized": "TARP", - "type": "ORGANIZATION" - }, - { - "confidence": 1.0, - "count": 1, - "indocChainId": 53, - "mention": "http://www.lemonde.fr/ameriques/article/2010/01/21/les-etats-unis-face-au-defi-persistant-du-chomage_1294839_3222.html#xtor=RSS-3210", - "normalized": "http://www.lemonde.fr/ameriques/article/2010/01/21/les-etats-unis-face-au-defi-persistant-du-chomage_1294839_3222.html#xtor=RSS-3210", - "type": "IDENTIFIER:URL" - } - ], - "requestId": "fe809e0f-8caf-4180-8f1a-cbd27258df14", - "timers": { - "rblJe": 13, - "rexJe": 54, - "rliJe": 6 - } -} \ No newline at end of file diff --git a/tests/mock-data/response/fra-doc-entities.status b/tests/mock-data/response/fra-doc-entities.status deleted file mode 100644 index ae4ee13..0000000 --- a/tests/mock-data/response/fra-doc-entities.status +++ /dev/null @@ -1 +0,0 @@ -200 \ No newline at end of file diff --git a/tests/mock-data/response/info.json b/tests/mock-data/response/info.json deleted file mode 100644 index a6f2acc..0000000 --- a/tests/mock-data/response/info.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "buildNumber": "6bafb29d", - "buildTime": "2015.05.08_12:31:26", - "name": "Rosette API", - "version": "0.5.0" -} diff --git a/tests/mock-data/response/jpn-doc-entities.json b/tests/mock-data/response/jpn-doc-entities.json deleted file mode 100644 index 68cdab5..0000000 --- a/tests/mock-data/response/jpn-doc-entities.json +++ /dev/null @@ -1,298 +0,0 @@ -{ - "entities": [ - { - "confidence": 0.005091860890388489, - "count": 4, - "indocChainId": 11, - "mention": "イギリス", - "normalized": "イギリス", - "type": "ORGANIZATION" - }, - { - "confidence": 1.0, - "count": 3, - "indocChainId": 29, - "mention": "下院", - "normalized": "下院", - "type": "ORGANIZATION" - }, - { - "confidence": 1.0, - "count": 3, - "indocChainId": 37, - "mention": "上院", - "normalized": "上院", - "type": "ORGANIZATION" - }, - { - "confidence": 0.007921993732452393, - "count": 2, - "indocChainId": 2, - "mention": "アイルランド", - "normalized": "アイルランド", - "type": "LOCATION" - }, - { - "confidence": 0.029875636100769043, - "count": 2, - "indocChainId": 3, - "mention": "ジョセフ・P・ケネディ", - "normalized": "ジョセフ・P・ケネディ", - "type": "PERSON" - }, - { - "confidence": 0.009616047143936157, - "count": 2, - "indocChainId": 3, - "mention": "ジョセフ", - "normalized": "ジョセフ", - "type": "PERSON" - }, - { - "confidence": 0.005369991064071655, - "count": 2, - "indocChainId": 3, - "mention": "ケネディ", - "normalized": "ケネディ", - "type": "PERSON" - }, - { - "confidence": 0.007853776216506958, - "count": 2, - "indocChainId": 5, - "mention": "ボストン", - "normalized": "ボストン", - "type": "LOCATION" - }, - { - "confidence": 0.007853776216506958, - "count": 2, - "indocChainId": 6, - "mention": "市長", - "normalized": "市長", - "type": "TITLE" - }, - { - "confidence": 1.0, - "count": 2, - "indocChainId": 14, - "mention": "ハーバード大学", - "normalized": "ハーバード大学", - "type": "ORGANIZATION" - }, - { - "confidence": 1.0, - "count": 2, - "indocChainId": 32, - "mention": "共和党", - "normalized": "共和党", - "type": "ORGANIZATION" - }, - { - "confidence": 0.015272259712219238, - "count": 1, - "indocChainId": 0, - "mention": "マサチューセッツ州", - "normalized": "マサチューセッツ州", - "type": "LOCATION" - }, - { - "confidence": 0.00015401840209960938, - "count": 1, - "indocChainId": 1, - "mention": "ブルックライン", - "normalized": "ブルックライン", - "type": "LOCATION" - }, - { - "confidence": 0.0026397705078125, - "count": 1, - "indocChainId": 4, - "mention": "シニア", - "normalized": "シニア", - "type": "ORGANIZATION" - }, - { - "confidence": 0.019479751586914062, - "count": 1, - "indocChainId": 7, - "mention": "ジョン・F・フィッツジェラルド", - "normalized": "ジョン・F・フィッツジェラルド", - "type": "PERSON" - }, - { - "confidence": 0.005173742771148682, - "count": 1, - "indocChainId": 8, - "mention": "チョート・スクール", - "normalized": "チョート・スクール", - "type": "PERSON" - }, - { - "confidence": 0.01989346742630005, - "count": 1, - "indocChainId": 9, - "mention": "コネチカット州", - "normalized": "コネチカット州", - "type": "LOCATION" - }, - { - "confidence": 0.027547240257263184, - "count": 1, - "indocChainId": 10, - "mention": "ウォリングフォード", - "normalized": "ウォリングフォード", - "type": "LOCATION" - }, - { - "confidence": 0.0014181733131408691, - "count": 1, - "indocChainId": 12, - "mention": "ロンドン", - "normalized": "ロンドン", - "type": "LOCATION" - }, - { - "confidence": 0.0014181733131408691, - "count": 1, - "indocChainId": 13, - "mention": "オブ・エコノミクス", - "normalized": "オブ・エコノミクス", - "type": "ORGANIZATION" - }, - { - "confidence": 1.0, - "count": 1, - "indocChainId": 15, - "mention": "プリンストン大学", - "normalized": "プリンストン大学", - "type": "ORGANIZATION" - }, - { - "confidence": 0.012808084487915039, - "count": 1, - "indocChainId": 17, - "mention": "ヨーロッパ", - "normalized": "ヨーロッパ", - "type": "LOCATION" - }, - { - "confidence": 0.012429237365722656, - "count": 1, - "indocChainId": 18, - "mention": "大使", - "normalized": "大使", - "type": "TITLE" - }, - { - "confidence": 0.010786056518554688, - "count": 1, - "indocChainId": 20, - "mention": "ミュンヘン", - "normalized": "ミュンヘン", - "type": "LOCATION" - }, - { - "confidence": 1.0, - "count": 1, - "indocChainId": 23, - "mention": "ハーバード", - "normalized": "ハーバード", - "type": "ORGANIZATION" - }, - { - "confidence": 0.0019164681434631348, - "count": 1, - "indocChainId": 25, - "mention": "ジェームズ・M・カーレイ", - "normalized": "ジェームズ・M・カーレイ", - "type": "ORGANIZATION" - }, - { - "confidence": 1.0, - "count": 1, - "indocChainId": 28, - "mention": "民主党", - "normalized": "民主党", - "type": "ORGANIZATION" - }, - { - "confidence": 0.04107320308685303, - "count": 1, - "indocChainId": 36, - "mention": "ジョセフ・マッカーシー", - "normalized": "ジョセフ・マッカーシー", - "type": "PERSON" - }, - { - "confidence": 0.0024489164352416992, - "count": 1, - "indocChainId": 38, - "mention": "赤狩り", - "normalized": "赤狩り", - "type": "ORGANIZATION" - }, - { - "confidence": 0.01878488063812256, - "count": 1, - "indocChainId": 39, - "mention": "エレノア・ルーズヴェルト", - "normalized": "エレノア・ルーズヴェルト", - "type": "PERSON" - }, - { - "confidence": 0.010483860969543457, - "count": 1, - "indocChainId": 44, - "mention": "ヘンリー・カボット・ロッジ", - "normalized": "ヘンリー・カボット・ロッジ", - "type": "PERSON" - }, - { - "confidence": 0.03342700004577637, - "count": 1, - "indocChainId": 45, - "mention": "都市", - "normalized": "都市", - "type": "LOCATION" - }, - { - "confidence": 0.021946430206298828, - "count": 1, - "indocChainId": 46, - "mention": "フランス", - "normalized": "フランス", - "type": "LOCATION" - }, - { - "confidence": 0.009429752826690674, - "count": 1, - "indocChainId": 47, - "mention": "ジャクリーン・リー・ブーヴィエ", - "normalized": "ジャクリーン・リー・ブーヴィエ", - "type": "PERSON" - }, - { - "confidence": 1.0, - "count": 1, - "indocChainId": 49, - "mention": "黒人", - "normalized": "黒人", - "type": "NATIONALITY" - }, - { - "confidence": 1.0, - "count": 1, - "indocChainId": 50, - "mention": "http://ja.wikipedia.org/wiki/%E3%82%B8%E3%83%A7%E3%83%B3%E3%83%BBF%E3%83%BB%E3%82%B1%E3%83%8D%E3%83%87%E3%82%A3", - "normalized": "http://ja.wikipedia.org/wiki/%E3%82%B8%E3%83%A7%E3%83%B3%E3%83%BBF%E3%83%BB%E3%82%B1%E3%83%8D%E3%83%87%E3%82%A3", - "type": "IDENTIFIER:URL" - } - ], - "requestId": "d32a9669-1a57-40ee-8f8d-893d2c9ac20f", - "timers": { - "rblJe": 27, - "rexJe": 67, - "rliJe": 3 - } -} \ No newline at end of file diff --git a/tests/mock-data/response/jpn-doc-entities.status b/tests/mock-data/response/jpn-doc-entities.status deleted file mode 100644 index ae4ee13..0000000 --- a/tests/mock-data/response/jpn-doc-entities.status +++ /dev/null @@ -1 +0,0 @@ -200 \ No newline at end of file diff --git a/tests/mock-data/response/ping.json b/tests/mock-data/response/ping.json deleted file mode 100644 index e114494..0000000 --- a/tests/mock-data/response/ping.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "message":"Rosette API at your service", - "time":1433962008758 -} diff --git a/tests/mock-data/response/pus-doc-entities.json b/tests/mock-data/response/pus-doc-entities.json deleted file mode 100644 index 4b0054e..0000000 --- a/tests/mock-data/response/pus-doc-entities.json +++ /dev/null @@ -1,506 +0,0 @@ -{ - "entities": [ - { - "confidence": 0.03916314670017788, - "count": 7, - "indocChainId": 35, - "mention": "ښاغلی", - "normalized": "ښاغلی", - "type": "TITLE" - }, - { - "confidence": 0.007801820834477742, - "count": 6, - "indocChainId": 29, - "mention": "نوماند وزير", - "normalized": "نوماند وزير", - "type": "TITLE" - }, - { - "confidence": 0.03787636756896973, - "count": 5, - "indocChainId": 0, - "mention": "افغانستان", - "normalized": "افغانستان", - "type": "LOCATION" - }, - { - "confidence": 0.029533326625823975, - "count": 3, - "indocChainId": 15, - "mention": "پارلمان", - "normalized": "پارلمان", - "type": "ORGANIZATION" - }, - { - "confidence": 0.03755847613016764, - "count": 3, - "indocChainId": 63, - "mention": "انجنير", - "normalized": "انجنير", - "type": "TITLE" - }, - { - "confidence": 0.05754426121711731, - "count": 2, - "indocChainId": 2, - "mention": "ولسمشر", - "normalized": "ولسمشر", - "type": "TITLE" - }, - { - "confidence": 0.033583104610443115, - "count": 2, - "indocChainId": 3, - "mention": "حامد كرزي", - "normalized": "حامد كرزي", - "type": "PERSON" - }, - { - "confidence": 0.031616926193237305, - "count": 2, - "indocChainId": 4, - "mention": "بهرنيو چارو د وزير", - "normalized": "بهرنيو چارو د وزير", - "type": "TITLE" - }, - { - "confidence": 0.00478130578994751, - "count": 2, - "indocChainId": 5, - "mention": "سلاكار", - "normalized": "سلاكار", - "type": "TITLE" - }, - { - "confidence": 0.022341817617416382, - "count": 2, - "indocChainId": 6, - "mention": "زلمي رسول", - "normalized": "زلمي رسول", - "type": "PERSON" - }, - { - "confidence": 0.04105132818222046, - "count": 2, - "indocChainId": 14, - "mention": "كريم خليلي", - "normalized": "كريم خليلي", - "type": "PERSON" - }, - { - "confidence": 0.00576329231262207, - "count": 2, - "indocChainId": 17, - "mention": "وزيرانو", - "normalized": "وزيرانو", - "type": "TITLE" - }, - { - "confidence": 0.030164211988449097, - "count": 2, - "indocChainId": 33, - "mention": "ډاکټر", - "normalized": "ډاکټر", - "type": "TITLE" - }, - { - "confidence": 0.02519279718399048, - "count": 2, - "indocChainId": 57, - "mention": "اغلې", - "normalized": "اغلې", - "type": "TITLE" - }, - { - "confidence": 0.019838571548461914, - "count": 1, - "indocChainId": 13, - "mention": "ولسمشر مرستيال", - "normalized": "ولسمشر مرستيال", - "type": "TITLE" - }, - { - "confidence": 0.030272245407104492, - "count": 1, - "indocChainId": 22, - "mention": "بهرنيو چارو نوماند وزير", - "normalized": "بهرنيو چارو نوماند وزير", - "type": "TITLE" - }, - { - "confidence": 0.04382455348968506, - "count": 1, - "indocChainId": 23, - "mention": "زلمی رسول", - "normalized": "زلمی رسول", - "type": "PERSON" - }, - { - "confidence": 0.0041877031326293945, - "count": 1, - "indocChainId": 24, - "mention": "ولسمشر امنيتي سلاکار", - "normalized": "ولسمشر امنيتي سلاکار", - "type": "TITLE" - }, - { - "confidence": 0.028324902057647705, - "count": 1, - "indocChainId": 25, - "mention": "عدليې نوماند وزير", - "normalized": "عدليې نوماند وزير", - "type": "TITLE" - }, - { - "confidence": 0.004933297634124756, - "count": 1, - "indocChainId": 26, - "mention": "حبيب الله غالب غالب", - "normalized": "حبيب الله غالب غالب", - "type": "PERSON" - }, - { - "confidence": 0.005223214626312256, - "count": 1, - "indocChainId": 27, - "mention": "شرعياتو", - "normalized": "شرعياتو", - "type": "LOCATION" - }, - { - "confidence": 0.02999025583267212, - "count": 1, - "indocChainId": 28, - "mention": "عبدرب رسول سياف", - "normalized": "عبدرب رسول سياف", - "type": "PERSON" - }, - { - "confidence": 0.012441575527191162, - "count": 1, - "indocChainId": 30, - "mention": "محمد هاشم عصمت الهي", - "normalized": "محمد هاشم عصمت الهي", - "type": "PERSON" - }, - { - "confidence": 0.02618861198425293, - "count": 1, - "indocChainId": 31, - "mention": "اصف محسني", - "normalized": "اصف محسني", - "type": "PERSON" - }, - { - "confidence": 0.009061336517333984, - "count": 1, - "indocChainId": 34, - "mention": "نيازی", - "normalized": "نيازی", - "type": "PERSON" - }, - { - "confidence": 0.00313723087310791, - "count": 1, - "indocChainId": 34, - "mention": "محمد يوسف نيازی", - "normalized": "محمد يوسف نيازی", - "type": "PERSON" - }, - { - "confidence": 0.0046912431716918945, - "count": 1, - "indocChainId": 37, - "mention": "عامې نوماند وزير", - "normalized": "عامې نوماند وزير", - "type": "TITLE" - }, - { - "confidence": 2.962350845336914e-05, - "count": 1, - "indocChainId": 38, - "mention": "محمد بشير لعلي", - "normalized": "محمد بشير لعلي", - "type": "PERSON" - }, - { - "confidence": 0.0180090069770813, - "count": 1, - "indocChainId": 39, - "mention": "عامې روغتيا نوماند وزير", - "normalized": "عامې روغتيا نوماند وزير", - "type": "TITLE" - }, - { - "confidence": 0.01904827356338501, - "count": 1, - "indocChainId": 41, - "mention": "ثريا", - "normalized": "ثريا", - "type": "PERSON" - }, - { - "confidence": 0.002719104290008545, - "count": 1, - "indocChainId": 42, - "mention": "کينيا", - "normalized": "کينيا", - "type": "LOCATION" - }, - { - "confidence": 0.004633128643035889, - "count": 1, - "indocChainId": 44, - "mention": "يونيسف", - "normalized": "يونيسف", - "type": "ORGANIZATION" - }, - { - "confidence": 0.018250882625579834, - "count": 1, - "indocChainId": 45, - "mention": "اقتصاد نوماند وزير", - "normalized": "اقتصاد نوماند وزير", - "type": "TITLE" - }, - { - "confidence": 0.03505206108093262, - "count": 1, - "indocChainId": 46, - "mention": "عبدالهادي ارغنديوال", - "normalized": "عبدالهادي ارغنديوال", - "type": "PERSON" - }, - { - "confidence": 0.05095309019088745, - "count": 1, - "indocChainId": 46, - "mention": "ارغنديوال", - "normalized": "ارغنديوال", - "type": "PERSON" - }, - { - "confidence": 0.00369340181350708, - "count": 1, - "indocChainId": 49, - "mention": "سوداګرۍ او صنايعو نوماند وزير", - "normalized": "سوداګرۍ او صنايعو نوماند وزير", - "type": "TITLE" - }, - { - "confidence": 0.013631701469421387, - "count": 1, - "indocChainId": 50, - "mention": "محمد هادي حکيمي", - "normalized": "محمد هادي حکيمي", - "type": "PERSON" - }, - { - "confidence": 0.029611587524414062, - "count": 1, - "indocChainId": 50, - "mention": "حکيمي", - "normalized": "حکيمي", - "type": "PERSON" - }, - { - "confidence": 0.0037049055099487305, - "count": 1, - "indocChainId": 54, - "mention": "جارالله", - "normalized": "جارالله", - "type": "PERSON" - }, - { - "confidence": 0.004224538803100586, - "count": 1, - "indocChainId": 55, - "mention": "معلولينو نوماند وزير", - "normalized": "معلولينو نوماند وزير", - "type": "TITLE" - }, - { - "confidence": 0.01109844446182251, - "count": 1, - "indocChainId": 56, - "mention": "امنه افضلي", - "normalized": "امنه افضلي", - "type": "PERSON" - }, - { - "confidence": 0.034655988216400146, - "count": 1, - "indocChainId": 56, - "mention": "افضلي", - "normalized": "افضلي", - "type": "PERSON" - }, - { - "confidence": 0.010983049869537354, - "count": 1, - "indocChainId": 59, - "mention": "چلند وزير", - "normalized": "چلند وزير", - "type": "TITLE" - }, - { - "confidence": 0.004098773002624512, - "count": 1, - "indocChainId": 60, - "mention": "عبدالرحيم اوراز", - "normalized": "عبدالرحيم اوراز", - "type": "PERSON" - }, - { - "confidence": 0.08212143182754517, - "count": 1, - "indocChainId": 60, - "mention": "عبدالرحيم", - "normalized": "عبدالرحيم", - "type": "PERSON" - }, - { - "confidence": 0.004256248474121094, - "count": 1, - "indocChainId": 60, - "mention": "اوراز", - "normalized": "اوراز", - "type": "PERSON" - }, - { - "confidence": 0.03391331434249878, - "count": 1, - "indocChainId": 64, - "mention": "بهرنيو چارو وزارت", - "normalized": "بهرنيو چارو وزارت", - "type": "ORGANIZATION" - }, - { - "confidence": 0.021835267543792725, - "count": 1, - "indocChainId": 65, - "mention": "ښځو چارو نومانده وزيره", - "normalized": "ښځو چارو نومانده وزيره", - "type": "TITLE" - }, - { - "confidence": 0.004233062267303467, - "count": 1, - "indocChainId": 66, - "mention": "پلوشه حسن", - "normalized": "پلوشه حسن", - "type": "PERSON" - }, - { - "confidence": 0.03508961200714111, - "count": 1, - "indocChainId": 66, - "mention": "حسن", - "normalized": "حسن", - "type": "PERSON" - }, - { - "confidence": 0.027159810066223145, - "count": 1, - "indocChainId": 69, - "mention": "کډوالو او راستنېدونکو چارو نوماند وزير", - "normalized": "کډوالو او راستنېدونکو چارو نوماند وزير", - "type": "TITLE" - }, - { - "confidence": 0.0005469918251037598, - "count": 1, - "indocChainId": 72, - "mention": "سرحدونو", - "normalized": "سرحدونو", - "type": "ORGANIZATION" - }, - { - "confidence": 0.029864192008972168, - "count": 1, - "indocChainId": 74, - "mention": "جمال", - "normalized": "جمال", - "type": "PERSON" - }, - { - "confidence": 0.029639482498168945, - "count": 1, - "indocChainId": 74, - "mention": "ارسلا جمال", - "normalized": "ارسلا جمال", - "type": "PERSON" - }, - { - "confidence": 0.04481011629104614, - "count": 1, - "indocChainId": 77, - "mention": "خوست", - "normalized": "خوست", - "type": "LOCATION" - }, - { - "confidence": 0.022002756595611572, - "count": 1, - "indocChainId": 79, - "mention": "مقبل", - "normalized": "مقبل", - "type": "PERSON" - }, - { - "confidence": 0.019781112670898438, - "count": 1, - "indocChainId": 79, - "mention": "ضرار احمد مقبل", - "normalized": "ضرار احمد مقبل", - "type": "PERSON" - }, - { - "confidence": 0.0150221586227417, - "count": 1, - "indocChainId": 82, - "mention": "کرزي", - "normalized": "کرزي", - "type": "PERSON" - }, - { - "confidence": 0.036535441875457764, - "count": 1, - "indocChainId": 83, - "mention": "کورنيو چار وزير", - "normalized": "کورنيو چار وزير", - "type": "TITLE" - }, - { - "confidence": 0.0030456185340881348, - "count": 1, - "indocChainId": 86, - "mention": "سلطان حسين حصاري", - "normalized": "سلطان حسين حصاري", - "type": "PERSON" - }, - { - "confidence": 0.009828627109527588, - "count": 1, - "indocChainId": 86, - "mention": "حصاري", - "normalized": "حصاري", - "type": "PERSON" - }, - { - "confidence": 1.0, - "count": 1, - "indocChainId": 89, - "mention": "http://www.trtpashto.com/trtinternational/pa/newsDetail.aspx?HaberKodu=efb3ec08-e5d5-4007-addc-0cc81652bc62", - "normalized": "http://www.trtpashto.com/trtinternational/pa/newsDetail.aspx?HaberKodu=efb3ec08-e5d5-4007-addc-0cc81652bc62", - "type": "IDENTIFIER:URL" - } - ], - "requestId": "517da744-d53c-46a0-bf0a-c0f179966dbe", - "timers": { - "rblJe": 2, - "rexJe": 42, - "rliJe": 5 - } -} \ No newline at end of file diff --git a/tests/mock-data/response/pus-doc-entities.status b/tests/mock-data/response/pus-doc-entities.status deleted file mode 100644 index ae4ee13..0000000 --- a/tests/mock-data/response/pus-doc-entities.status +++ /dev/null @@ -1 +0,0 @@ -200 \ No newline at end of file diff --git a/tests/mock-data/response/spa-doc-entities.json b/tests/mock-data/response/spa-doc-entities.json deleted file mode 100644 index 40669e3..0000000 --- a/tests/mock-data/response/spa-doc-entities.json +++ /dev/null @@ -1,154 +0,0 @@ -{ - "entities": [ - { - "confidence": 0.022009849548339844, - "count": 2, - "indocChainId": 3, - "mention": "Presidenta", - "normalized": "Presidenta", - "type": "TITLE" - }, - { - "confidence": 0.027858316898345947, - "count": 2, - "indocChainId": 4, - "mention": "Bachelet", - "normalized": "Bachelet", - "type": "PERSON" - }, - { - "confidence": 0.022280961275100708, - "count": 2, - "indocChainId": 5, - "mention": "Morales", - "normalized": "Morales", - "type": "PERSON" - }, - { - "confidence": 0.014749795198440552, - "count": 2, - "indocChainId": 5, - "mention": "Evo Morales", - "normalized": "Evo Morales", - "type": "PERSON" - }, - { - "confidence": 0.015497177839279175, - "count": 2, - "indocChainId": 9, - "mention": "Piñera", - "normalized": "Piñera", - "type": "PERSON" - }, - { - "confidence": 0.0006507039070129395, - "count": 1, - "indocChainId": 0, - "mention": "Plaza Murillo", - "normalized": "Plaza Murillo", - "type": "PERSON" - }, - { - "confidence": 0.014606237411499023, - "count": 1, - "indocChainId": 1, - "mention": "Congreso", - "normalized": "Congreso", - "type": "ORGANIZATION" - }, - { - "confidence": 0.0029370784759521484, - "count": 1, - "indocChainId": 2, - "mention": "Bolivia", - "normalized": "Bolivia", - "type": "LOCATION" - }, - { - "confidence": 0.00901263952255249, - "count": 1, - "indocChainId": 4, - "mention": "Michelle Bachelet", - "normalized": "Michelle Bachelet", - "type": "PERSON" - }, - { - "confidence": 0.028369784355163574, - "count": 1, - "indocChainId": 9, - "mention": "Sebastián Piñera", - "normalized": "Sebastián Piñera", - "type": "PERSON" - }, - { - "confidence": 0.03386145830154419, - "count": 1, - "indocChainId": 10, - "mention": "Chile", - "normalized": "Chile", - "type": "LOCATION" - }, - { - "confidence": 0.002088606357574463, - "count": 1, - "indocChainId": 11, - "mention": "Mandataria", - "normalized": "Mandataria", - "type": "ORGANIZATION" - }, - { - "confidence": 0.03489595651626587, - "count": 1, - "indocChainId": 15, - "mention": "La Paz", - "normalized": "La Paz", - "type": "LOCATION" - }, - { - "confidence": 0.013820111751556396, - "count": 1, - "indocChainId": 17, - "mention": "Eduardo Frei", - "normalized": "Eduardo Frei", - "type": "PERSON" - }, - { - "confidence": 0.015299737453460693, - "count": 1, - "indocChainId": 18, - "mention": "Presidente", - "normalized": "Presidente", - "type": "TITLE" - }, - { - "confidence": 0.0008376836776733398, - "count": 1, - "indocChainId": 19, - "mention": "Pacífico", - "normalized": "Pacífico", - "type": "ORGANIZATION" - }, - { - "confidence": 0.02315753698348999, - "count": 1, - "indocChainId": 21, - "mention": "David Choquehuanca", - "normalized": "David Choquehuanca", - "type": "PERSON" - }, - { - "confidence": 1.0, - "count": 1, - "indocChainId": 22, - "mention": "http://diario.elmercurio.com/2010/01/23/nacional/politica/noticias/47D6CA26-B011-40B5-AEAB-D0E6A5F52E59.htm?id={47D6CA26-B011-40B5-AEAB-D0E6A5F52E59}", - "normalized": "http://diario.elmercurio.com/2010/01/23/nacional/politica/noticias/47D6CA26-B011-40B5-AEAB-D0E6A5F52E59.htm?id={47D6CA26-B011-40B5-AEAB-D0E6A5F52E59}", - "type": "IDENTIFIER:URL" - } - ], - "requestId": "c3de7a33-2be8-41ea-9b4b-bd683be93442", - "timers": { - "rblJe": 8, - "rexJe": 31, - "rliJe": 4 - } -} \ No newline at end of file diff --git a/tests/mock-data/response/spa-doc-entities.status b/tests/mock-data/response/spa-doc-entities.status deleted file mode 100644 index ae4ee13..0000000 --- a/tests/mock-data/response/spa-doc-entities.status +++ /dev/null @@ -1 +0,0 @@ -200 \ No newline at end of file diff --git a/tests/mock-data/response/xxx-doc-entities.json b/tests/mock-data/response/xxx-doc-entities.json deleted file mode 100644 index 8a4e747..0000000 --- a/tests/mock-data/response/xxx-doc-entities.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "entities": [ - { - "confidence": 0.05986414849758148, - "count": 4, - "indocChainId": 0, - "mention": "Europa", - "normalized": "Europa", - "type": "LOCATION" - }, - { - "confidence": 0.008987635374069214, - "count": 2, - "indocChainId": 2, - "mention": "Teneriffa", - "normalized": "Teneriffa", - "type": "LOCATION" - }, - { - "confidence": 0.035944998264312744, - "count": 1, - "indocChainId": 1, - "mention": "Spanien", - "normalized": "Spanien", - "type": "LOCATION" - }, - { - "confidence": 0.03711158037185669, - "count": 1, - "indocChainId": 3, - "mention": "Europäische Union", - "normalized": "Europäische Union", - "type": "LOCATION" - }, - { - "confidence": 0.03636401891708374, - "count": 1, - "indocChainId": 4, - "mention": "Innenminister", - "normalized": "Innenminister", - "type": "TITLE" - }, - { - "confidence": 0.01726752519607544, - "count": 1, - "indocChainId": 8, - "mention": "Professor", - "normalized": "Professor", - "type": "TITLE" - }, - { - "confidence": 0.00736004114151001, - "count": 1, - "indocChainId": 9, - "mention": "Klaus J. Bade", - "normalized": "Klaus J. Bade", - "type": "PERSON" - }, - { - "confidence": 0.013958752155303955, - "count": 1, - "indocChainId": 10, - "mention": "Universität Osnabrück", - "normalized": "Universität Osnabrück", - "type": "ORGANIZATION" - }, - { - "confidence": 0.03818148374557495, - "count": 1, - "indocChainId": 11, - "mention": "Somalia", - "normalized": "Somalia", - "type": "LOCATION" - }, - { - "confidence": 0.03774428367614746, - "count": 1, - "indocChainId": 12, - "mention": "Fischer", - "normalized": "Fischer", - "type": "PERSON" - }, - { - "confidence": 0.019688069820404053, - "count": 1, - "indocChainId": 14, - "mention": "Wolfgang Schäuble", - "normalized": "Wolfgang Schäuble", - "type": "PERSON" - }, - { - "confidence": 0.06329077482223511, - "count": 1, - "indocChainId": 14, - "mention": "Schäuble", - "normalized": "Schäuble", - "type": "PERSON" - }, - { - "confidence": 0.024634122848510742, - "count": 1, - "indocChainId": 15, - "mention": "CDU", - "normalized": "CDU", - "type": "ORGANIZATION" - }, - { - "confidence": 0.023647606372833252, - "count": 1, - "indocChainId": 16, - "mention": "Brüssel", - "normalized": "Brüssel", - "type": "LOCATION" - } - ], - "requestId": "31401656-5766-4965-a472-51e3744b87bb", - "timers": { - "rblJe": 4, - "rexJe": 23, - "rliJe": 3 - } -} \ No newline at end of file diff --git a/tests/mock-data/response/xxx-doc-entities.status b/tests/mock-data/response/xxx-doc-entities.status deleted file mode 100644 index ae4ee13..0000000 --- a/tests/mock-data/response/xxx-doc-entities.status +++ /dev/null @@ -1 +0,0 @@ -200 \ No newline at end of file diff --git a/tests/mock-data/response/zho-doc-entities.json b/tests/mock-data/response/zho-doc-entities.json deleted file mode 100644 index d66a989..0000000 --- a/tests/mock-data/response/zho-doc-entities.json +++ /dev/null @@ -1,98 +0,0 @@ -{ - "entities": [ - { - "confidence": 0.029630446434020997, - "count": 5, - "indocChainId": 1, - "mention": "联合国", - "normalized": "联合国", - "type": "ORGANIZATION" - }, - { - "confidence": 0.016980469226837158, - "count": 5, - "indocChainId": 6, - "mention": "海地", - "normalized": "海地", - "type": "LOCATION" - }, - { - "confidence": 0.032270610332489014, - "count": 2, - "indocChainId": 7, - "mention": "联大", - "normalized": "联大", - "type": "ORGANIZATION" - }, - { - "confidence": 0.011038780212402344, - "count": 1, - "indocChainId": 0, - "mention": "新华网", - "normalized": "新华网", - "type": "ORGANIZATION" - }, - { - "confidence": 0.022758543491363525, - "count": 1, - "indocChainId": 2, - "mention": "记者", - "normalized": "记者", - "type": "TITLE" - }, - { - "confidence": 0.00045305490493774414, - "count": 1, - "indocChainId": 3, - "mention": "白洁 王湘江", - "normalized": "白洁 王湘江", - "type": "PERSON" - }, - { - "confidence": 0.08852261304855347, - "count": 1, - "indocChainId": 8, - "mention": "纽约", - "normalized": "纽约", - "type": "LOCATION" - }, - { - "confidence": 0.005361199378967285, - "count": 1, - "indocChainId": 12, - "mention": "主席", - "normalized": "主席", - "type": "TITLE" - }, - { - "confidence": 0.015633702278137207, - "count": 1, - "indocChainId": 13, - "mention": "哈萨克斯坦", - "normalized": "哈萨克斯坦", - "type": "LOCATION" - }, - { - "confidence": 0.010837018489837646, - "count": 1, - "indocChainId": 15, - "mention": "艾季莫娃", - "normalized": "艾季莫娃", - "type": "PERSON" - }, - { - "confidence": 1.0, - "count": 1, - "indocChainId": 19, - "mention": "http://news.xinhuanet.com/world/2010-01/23/content_12860329.htm", - "normalized": "http://news.xinhuanet.com/world/2010-01/23/content_12860329.htm", - "type": "IDENTIFIER:URL" - } - ], - "requestId": "96870360-8add-4767-a808-aaccea4fb39a", - "timers": { - "rblJe": 40, - "rexJe": 80, - "rliJe": 10 - } -} \ No newline at end of file diff --git a/tests/mock-data/response/zho-doc-entities.status b/tests/mock-data/response/zho-doc-entities.status deleted file mode 100644 index ae4ee13..0000000 --- a/tests/mock-data/response/zho-doc-entities.status +++ /dev/null @@ -1 +0,0 @@ -200 \ No newline at end of file diff --git a/tests/test_rosette_api.py b/tests/test_rosette_api.py index 9ec5073..ca8085c 100644 --- a/tests/test_rosette_api.py +++ b/tests/test_rosette_api.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ -Copyright (c) 2014-2015 Basis Technology Corporation. +Copyright (c) 2014-2024 Basis Technology Corporation. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,323 +18,391 @@ # To run tests, run `py.test test_rosette_api.py` -import glob -import httpretty import json -import os -import pytest -import re import sys -try: - from StringIO import StringIO as streamIO -except ImportError: - from io import BytesIO as streamIO -import gzip -from rosette.api import API, DocumentParameters, NameTranslationParameters, NameSimilarityParameters, RelationshipsParameters, RosetteException +import platform +import pook +import pytest +from rosette.api import (AddressSimilarityParameters, + API, + DocumentParameters, + NameTranslationParameters, + NameSimilarityParameters, + NameDeduplicationParameters, + RecordSimilarityParameters, + RosetteException) + +_ISPY3 = sys.version_info[0] == 3 + -_IsPy3 = sys.version_info[0] == 3 +def get_base_url(): + return "https://analytics.babelstreet.com/rest/" @pytest.fixture -def json_response(scope="module"): - body = json.dumps({'name': 'Rosette API', 'versionChecked': True}) +def json_response(): + """ fixture to return info body""" + body = json.dumps({'name': 'Babel Street Analytics', + 'versionChecked': True}) return body @pytest.fixture def api(): - api = API('bogus_key') - return api + """ fixture to return api key""" + tmp_api = API('bogus_key') + return tmp_api @pytest.fixture -def json_429(scope="module"): - body = json.dumps({'message': 'too many requests', 'versionChecked': True}) +def json_409(): + """ fixture to return 409 body""" + body = json.dumps({'code': 'incompatibleClientVersion', + 'message': 'the version of client library used' + ' is not compatible with this server', + 'versionChecked': True}) return body @pytest.fixture -def doc_params(scope="module"): +def doc_params(): + """ fixture to return basic DocumentParameters""" params = DocumentParameters() params['content'] = 'Sample test string' return params -# Of Note: httpretty provides a short hand decorator, @httpretty.activate, that wraps the decorated -# function with httpretty.enable() and ends it with httpretty.disable(). However, when combined with -# pytest fixtures, the passed in fixture arguments are ignored, resulting in a TypeError. Use the old -# enable/disable to avoid this. - -# Test that pinging the API is working properly -# @httpretty.activate - - -def test_ping(api, json_response): - httpretty.enable() - httpretty.register_uri(httpretty.GET, "https://api.rosette.com/rest/v1/ping", - body=json_response, status=200, content_type="application/json") - - result = api.ping() - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() - -# Test that getting the info about the API is being called correctly +@pytest.fixture +def doc_map(): + """ fixture for a simple map of doc request """ + return {'content': 'Simple test string'} -def test_info(api, json_response): - httpretty.enable() - httpretty.register_uri(httpretty.GET, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - result = api.info() - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() +def test_option_get_set_clear(api): + """Tests the get/set/clear methods""" + api.set_option('test', 'foo') + assert api.get_option('test') == 'foo' -# Test for 429 + api.clear_options() + assert api.get_option('test') is None -def test_for_429(api, json_429): - httpretty.enable() - httpretty.register_uri(httpretty.GET, "https://api.rosette.com/rest/v1/info", - body=json_429, status=429, content_type="application/json") +def test_option_clear_single_option(api): + """Test the clear single option""" + api.set_option('test', 'foo') + assert api.get_option('test') == 'foo' - with pytest.raises(RosetteException) as e_rosette: - result = api.info() + api.set_option('test', None) + assert api.get_option('test') is None - assert e_rosette.value.status == 429 - httpretty.disable() - httpretty.reset() -# Test the language endpoint +def test_url_parameter_getsetclear(api): + """Tests get/set/clear url parameter""" + api.set_url_parameter('test', 'foo') + assert api.get_url_parameter('test') == 'foo' + api.clear_url_parameters() + assert api.get_url_parameter('test') is None -def test_the_language_endpoint(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/language", - body=json_response, status=200, content_type="application/json") - result = api.language(doc_params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() +def test_url_parameter_clear_single(api): + """Test the clearing of a single url parameter""" + api.set_url_parameter('test', 'foo') + assert api.get_url_parameter('test') == 'foo' -# Test the sentences endpoint + api.set_url_parameter('test', None) + assert api.get_url_parameter('test') is None -def test_the_sentences_endpoint(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/sentences", - body=json_response, status=200, content_type="application/json") +def test_custom_header_props(api): + """Test custom header get/set/clear""" + key = 'X-BabelStreetAPI-Test' + value = 'foo' + api.set_custom_headers(key, value) + assert value == api.get_custom_headers()[key] - result = api.sentences(doc_params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() + api.clear_custom_headers() + assert len(api.get_custom_headers()) == 0 -# Test the tokens endpoint +def test_invalid_header(api): + """Test for invalid header""" + key = 'test' + value = 'foo' + api.set_custom_headers(key, value) -def test_the_tokens_endpoint(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/tokens", - body=json_response, status=200, content_type="application/json") + with pytest.raises(RosetteException) as e_rosette: + api.info() - result = api.tokens(doc_params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() + assert e_rosette.value.status == 'badHeader' -# Test the morphology complete endpoint +def test_user_agent(api): + """ Test user agent """ + value = ("Babel-Street-Analytics-API-Python/" + + api.get_binding_version() + "/" + platform.python_version()) + assert value == api.get_user_agent_string() -def test_the_morphology_complete_endpoint(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/morphology/complete", - body=json_response, status=200, content_type="application/json") - result = api.morphology(doc_params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() +@pook.on +def test_ping_pook(api, json_response): + pook.get(url=get_base_url() + "v1/ping", + response_json=json_response, + reply=200) -# Test the morphology lemmas endpoint + result = api.ping() + assert result["name"] == "Babel Street Analytics" -def test_the_morphology_lemmas_endpoint(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/morphology/lemmas", - body=json_response, status=200, content_type="application/json") +@pook.on +def test_info(api, json_response): + pook.get(url=get_base_url() + "v1/info", + response_json=json_response, + reply=200) - result = api.morphology(doc_params, 'lemmas') - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() + result = api.info() + assert result["name"] == "Babel Street Analytics" -# Test the morphology parts-of-speech endpoint +@pook.on +def test_for_409(api, json_409): + pook.get(url=get_base_url() + "v1/info", + response_json=json_409, + reply=409) -def test_the_morphology_parts_of_speech_endpoint( - api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/morphology/parts-of-speech", - body=json_response, status=200, content_type="application/json") + with pytest.raises(RosetteException) as e_rosette: + result = api.info() - result = api.morphology(doc_params, 'parts-of-speech') - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() + assert e_rosette.value.status == 'incompatibleClientVersion' -# Test the morphology compound-components endpoint +@pook.on +@pytest.mark.parametrize("header_key", + ['x-rosetteapi-concurrency', + 'x-babelstreetapi-concurrency']) +def test_the_max_pool_size_header(json_response, doc_params, header_key): + pook.post(url=get_base_url() + "v1/language", + response_json=json_response, + reply=200, + response_headers={header_key: 5}) -def test_the_morphology_compound_components_endpoint( - api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/morphology/compound-components", - body=json_response, status=200, content_type="application/json") + api = API('bogus_key') + assert api.get_pool_size() == 1 + result = api.language(doc_params) + assert result["name"] == "Babel Street Analytics" + assert api.get_pool_size() == 5 + api.set_pool_size(11) + assert api.get_pool_size() == 11 - result = api.morphology(doc_params, 'compound-components') - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() -# Test the morphology han-readings endpoint +@pook.on +def test_the_max_pool_size_both(json_response, doc_params): + pook.post(url=get_base_url() + "v1/language", + response_json=json_response, + reply=200, + response_headers={'x-rosetteapi-concurrency': 5, + 'x-babelstreetapi-concurrency': 8}) + api = API('bogus_key') + assert api.get_pool_size() == 1 + result = api.language(doc_params) + assert result["name"] == "Babel Street Analytics" + assert api.get_pool_size() == 8 + api.set_pool_size(11) + assert api.get_pool_size() == 11 -def test_the_morphology_han_readings_endpoint(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/morphology/han-readings", - body=json_response, status=200, content_type="application/json") - result = api.morphology(doc_params, 'han-readings') - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() +@pook.on +def test_a_doc_endpoint_fails_on_map(api, json_response, doc_map): + pook.post(url=get_base_url() + "v1/language", + response_json=json_response, + reply=200) -# Test the entities endpoint + with pytest.raises(RosetteException) as e_rosette: + result = api.language(doc_map) + assert e_rosette.value.status == 'incompatible' + + +@pook.on +@pytest.mark.parametrize("endpoint", + ['categories', + 'entities', + 'events', + 'language', + 'morphology/complete', + 'morphology/compound-components', + 'morphology/han-readings', + 'morphology/lemmas', + 'morphology/parts-of-speech', + 'relationships', + 'semantics/similar', + 'semantics/vector', + 'sentences', + 'sentiment', + 'syntax/dependencies', + 'tokens', + 'topics', + 'transliteration']) +def test_document_endpoints(api, json_response, doc_params, endpoint): + pook.post(url=get_base_url() + "v1/" + endpoint, + response_json=json_response, + reply=200) + + # TODO: Convert to match-case when minimum supported version is 3.10 + if endpoint == "categories": + result = api.categories(doc_params) + elif endpoint == "entities": + result = api.entities(doc_params) + elif endpoint == "events": + result = api.events(doc_params) + elif endpoint == "language": + result = api.language(doc_params) + elif endpoint == "morphology/complete": + result = api.morphology(doc_params) + elif endpoint == "morphology/compound-components": + result = api.morphology(doc_params, "compound-components") + elif endpoint == "morphology/han-readings": + result = api.morphology(doc_params, "han-readings") + elif endpoint == "morphology/lemmas": + result = api.morphology(doc_params, "lemmas") + elif endpoint == "morphology/parts-of-speech": + result = api.morphology(doc_params, "parts-of-speech") + elif endpoint == "relationships": + api.set_option('accuracyMode', 'PRECISION') + result = api.relationships(doc_params) + elif endpoint == "semantics/similar": + result = api.similar_terms(doc_params) + elif endpoint == "semantics/vector": + result = api.semantic_vectors(doc_params) + elif endpoint == "sentences": + result = api.sentences(doc_params) + elif endpoint == "sentiment": + result = api.sentiment(doc_params) + elif endpoint == "syntax/dependencies": + result = api.syntax_dependencies(doc_params) + elif endpoint == "tokens": + result = api.tokens(doc_params) + elif endpoint == "topics": + result = api.topics(doc_params) + elif endpoint == "transliteration": + result = api.transliteration(doc_params) + else: + raise Exception("Unknown endpoint.") + + assert result["name"] == "Babel Street Analytics" + + +@pook.on +def test_the_multipart_operation(api, json_response, doc_params, tmpdir): + pook.post(url=get_base_url() + "v1/sentiment", + response_json=json_response, + reply=200) + tmp_file = tmpdir.mkdir("sub").join("testfile.txt") + tmp_file.write(json_response) + doc_params.load_document_file = tmp_file + result = api.sentiment(doc_params) + assert result["name"] == "Babel Street Analytics" -def test_the_entities_endpoint(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/entities", - body=json_response, status=200, content_type="application/json") - result = api.entities(doc_params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() +@pook.on +def test_incompatible_type(api, json_response): + pook.post(url=get_base_url() + "v1/sentences", + response_json=json_response, + reply=200) -# Test the entities/linked endpoint + params = NameTranslationParameters() + params["name"] = "some data to translate" + params["entityType"] = "PERSON" + params["targetLanguage"] = "eng" + params["targetScript"] = "Latn" + # oops, called sentences + with pytest.raises(RosetteException) as e_rosette: + api.sentences(params) -def test_the_entities_linked_endpoint(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/entities/linked", - body=json_response, status=200, content_type="application/json") - result = api.entities(doc_params, True) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() +@pook.on +def test_the_name_translation_endpoint(api, json_response): + pook.post(url=get_base_url() + "v1/name-translation", + response_json=json_response, + reply=200) -# Test the categories endpoint + params = NameTranslationParameters() + params["name"] = "some data to translate" + params["entityType"] = "PERSON" + params["targetLanguage"] = "eng" + params["targetScript"] = "Latn" + result = api.name_translation(params) + assert result["name"] == "Babel Street Analytics" -def test_the_categories_endpoint(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/categories", - body=json_response, status=200, content_type="application/json") +@pook.on +def test_the_name_requests_with_text(api, json_response): + pook.post(url=get_base_url() + "v1/name-similarity", + response_json=json_response, + reply=200) - result = api.categories(doc_params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() + with pytest.raises(RosetteException) as e_rosette: + result = api.name_similarity("should fail") + assert e_rosette.value.status == 'incompatible' -# Test the sentiment endpoint + with pytest.raises(RosetteException) as e_rosette: + result = api.name_translation("should fail") + assert e_rosette.value.status == 'incompatible' + with pytest.raises(RosetteException) as e_rosette: + result = api.name_deduplication("should fail") + assert e_rosette.value.status == 'incompatible' -def test_the_sentiment_endpoint(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/sentiment", - body=json_response, status=200, content_type="application/json") + with pytest.raises(RosetteException) as e_rosette: + result = api.address_similarity("should fail") + assert e_rosette.value.status == 'incompatible' - result = api.sentiment(doc_params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() + with pytest.raises(RosetteException) as e_rosette: + result = api.record_similarity("should fail") + assert e_rosette.value.status == 'incompatible' -# Test the multipart operation +@pook.on +def test_the_name_similarity_single_parameters(api, json_response): + pook.post(url=get_base_url() + "v1/name-similarity", + response_json=json_response, + reply=200) -def test_the_multipart_operation(api, json_response, doc_params, tmpdir): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/sentiment", - body=json_response, status=200, content_type="application/json") - - p = tmpdir.mkdir("sub").join("testfile.txt") - p.write(json_response) - doc_params.load_document_file = p - result = api.sentiment(doc_params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() + matched_name_data1 = "John Mike Smith" + matched_name_data2 = "John Joe Smith" + params = NameSimilarityParameters() + params["name1"] = {"text": matched_name_data1} + params["name2"] = {"text": matched_name_data2} + params["parameters"] = {"conflictScore": "0.9"} -# Test the name translation endpoint + result = api.name_similarity(params) + assert result["name"] == "Babel Street Analytics" -def test_the_name_translation_endpoint(api, json_response): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/name-translation", - body=json_response, status=200, content_type="application/json") +@pook.on +def test_the_name_similarity_multiple_parameters(api, json_response): + pook.post(url=get_base_url() + "v1/name-similarity", + response_json=json_response, + reply=200) - params = NameTranslationParameters() - params["name"] = "some data to translate" - params["entityType"] = "PERSON" - params["targetLanguage"] = "eng" - params["targetScript"] = "Latn" - result = api.name_translation(params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() + matched_name_data1 = "John Mike Smith" + matched_name_data2 = "John Joe Smith" + params = NameSimilarityParameters() + params["name1"] = {"text": matched_name_data1} + params["name2"] = {"text": matched_name_data2} + params["parameters"] = {"conflictScore": "0.9", "deletionScore": "0.5"} -# Test the name similarity endpoint + result = api.name_similarity(params) + assert result["name"] == "Babel Street Analytics" +@pook.on def test_the_name_similarity_endpoint(api, json_response): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/name-similarity", - body=json_response, status=200, content_type="application/json") + pook.post(url=get_base_url() + "v1/name-similarity", + response_json=json_response, + reply=200) matched_name_data1 = "Michael Jackson" matched_name_data2 = "迈克尔·杰克逊" @@ -346,154 +414,295 @@ def test_the_name_similarity_endpoint(api, json_response): params["name2"] = {"text": matched_name_data2, "entityType": "PERSON"} result = api.name_similarity(params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() + assert result["name"] == "Babel Street Analytics" -# Test the relationships endpoint +@pook.on +def test_name_deduplication_parameters(api, json_response): + pook.post(url=get_base_url() + "v1/name-deduplication", + response_json=json_response, + reply=200) -def test_the_relationships_endpoint(api, json_response): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/relationships", - body=json_response, status=200, content_type="application/json") + params = NameDeduplicationParameters() - params = RelationshipsParameters() - params["content"] = "some text data" - params["options"] = {"accuracyMode": "PRECISION"} - result = api.relationships(params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() + with pytest.raises(RosetteException) as e_rosette: + api.name_deduplication(params) + + assert e_rosette.value.status == 'missingParameter' + assert (e_rosette.value.message == + 'Required Name De-Duplication parameter is missing: names') + + params["names"] = ["John Smith", "Johnathon Smith", "Fred Jones"] + + result = api.name_deduplication(params) + assert result["name"] == "Babel Street Analytics" + + +@pook.on +def test_the_name_deduplication_endpoint(api, json_response): + pook.post(url=get_base_url() + "v1/name-deduplication", + response_json=json_response, + reply=200) + + dedup_list = ["John Smith", "Johnathon Smith", "Fred Jones"] + threshold = 0.75 + params = NameDeduplicationParameters() + params["names"] = dedup_list + params["threshold"] = threshold -# Test for non 200 + result = api.name_deduplication(params) + assert result["name"] == "Babel Street Analytics" -def test_for_404(api, json_response): - httpretty.enable() - body = json.dumps({'message': 'not found'}) - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.GET, "https://api.rosette.com/rest/v1/info", - body=body, status=404, content_type="application/json") +@pook.on +def test_for_404(api): + pook.get(url=get_base_url() + "v1/info", + response_json={'message': 'not found'}, + reply=404) with pytest.raises(RosetteException) as e_rosette: - result = api.info() + api.info() assert e_rosette.value.status == 404 assert e_rosette.value.message == 'not found' - httpretty.disable() - httpretty.reset() - -# Test for content and contentUri -def test_for_content_and_contentUri(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/entities", - body=json_response, status=200, content_type="application/json") +@pook.on +def test_both_content_and_content_uri(api, json_response, doc_params): + pook.post(url=get_base_url() + "v1/entities", + response_json=json_response, + reply=200) - doc_params['contentUri'] = 'http://google.com' + doc_params['contentUri'] = 'https://example.com' with pytest.raises(RosetteException) as e_rosette: - result = api.entities(doc_params) + api.entities(doc_params) assert e_rosette.value.status == 'badArgument' - assert e_rosette.value.message == 'Cannot supply both Content and ContentUri' - httpretty.disable() - httpretty.reset() - -# Test for content and contentUri + assert (e_rosette.value.message == + 'Cannot supply both Content and ContentUri') -def test_for_no_content_or_contentUri(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/entities", - body=json_response, status=200, content_type="application/json") +@pook.on +def test_for_no_content_or_content_uri(api, json_response, doc_params): + pook.post(url=get_base_url() + "v1/entities", + response_json=json_response, + reply=200) doc_params['content'] = None with pytest.raises(RosetteException) as e_rosette: - result = api.entities(doc_params) + api.entities(doc_params) assert e_rosette.value.status == 'badArgument' - assert e_rosette.value.message == 'Must supply one of Content or ContentUri' - httpretty.disable() - httpretty.reset() + assert (e_rosette.value.message == + 'Must supply one of Content or ContentUri') + + +@pook.on +def test_for_address_similarity_required_parameters(api, json_response): + pook.post(url=get_base_url() + "v1/address-similarity", + response_json=json_response, + reply=200) + + params = AddressSimilarityParameters() + + with pytest.raises(RosetteException) as e_rosette: + api.address_similarity(params) + + assert e_rosette.value.status == 'missingParameter' + assert (e_rosette.value.message == + 'Required Address Similarity parameter is missing: address1') + + params["address1"] = {"houseNumber": "1600", + "road": "Pennsylvania Ave NW", + "city": "Washington", + "state": "DC", + "postCode": "20500"} + + with pytest.raises(RosetteException) as e_rosette: + api.address_similarity(params) + + assert e_rosette.value.status == 'missingParameter' + assert (e_rosette.value.message == + 'Required Address Similarity parameter is missing: address2') + + params["address2"] =\ + {"text": "160 Pennsilvana Avenue, Washington, D.C., 20500"} + + result = api.address_similarity(params) + assert result["name"] == "Babel Street Analytics" + + +@pook.on +def test_for_address_similarity_optional_parameters(api, json_response): + pook.post(url=get_base_url() + "v1/address-similarity", + response_json=json_response, + reply=200) + + params = AddressSimilarityParameters() + + params["address1"] = {"houseNumber": "1600", + "road": "Pennsylvania Ave NW", + "city": "Washington", + "state": "DC", + "postCode": "20500"} + + params["address2"] =\ + {"text": "160 Pennsilvana Avenue, Washington, D.C., 20500"} -# Test for required Name Similarity parameters + params["parameters"] = {"houseNumberAddressFieldWeight": "0.9"} + result = api.address_similarity(params) + assert result["name"] == "Babel Street Analytics" + +@pook.on def test_for_name_similarity_required_parameters(api, json_response): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/name-similarity", - body=json_response, status=200, content_type="application/json") + pook.post(url=get_base_url() + "v1/name-similarity", + response_json=json_response, + reply=200) matched_name_data1 = "Michael Jackson" matched_name_data2 = "迈克尔·杰克逊" params = NameSimilarityParameters() with pytest.raises(RosetteException) as e_rosette: - result = api.name_similarity(params) + api.name_similarity(params) assert e_rosette.value.status == 'missingParameter' - assert e_rosette.value.message == 'Required Name Similarity parameter not supplied' + assert (e_rosette.value.message == + 'Required Name Similarity parameter is missing: name1') params["name1"] = { "text": matched_name_data1, "language": "eng", "entityType": "PERSON"} with pytest.raises(RosetteException) as e_rosette: - result = api.name_similarity(params) + api.name_similarity(params) assert e_rosette.value.status == 'missingParameter' - assert e_rosette.value.message == 'Required Name Similarity parameter not supplied' + assert (e_rosette.value.message == + 'Required Name Similarity parameter is missing: name2') params["name2"] = {"text": matched_name_data2, "entityType": "PERSON"} result = api.name_similarity(params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() - -# Test for required Name Translation parameters + assert result["name"] == "Babel Street Analytics" +@pook.on def test_for_name_translation_required_parameters(api, json_response): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/name-translation", - body=json_response, status=200, content_type="application/json") + pook.post(url=get_base_url() + "v1/name-translation", + response_json=json_response, + reply=200) params = NameTranslationParameters() params["entityType"] = "PERSON" params["targetScript"] = "Latn" with pytest.raises(RosetteException) as e_rosette: - result = api.name_translation(params) + api.name_translation(params) assert e_rosette.value.status == 'missingParameter' - assert e_rosette.value.message == 'Required Name Translation parameter not supplied' + assert (e_rosette.value.message == + 'Required Name Translation parameter is missing: name') params["name"] = "some data to translate" with pytest.raises(RosetteException) as e_rosette: - result = api.name_translation(params) + api.name_translation(params) assert e_rosette.value.status == 'missingParameter' - assert e_rosette.value.message == 'Required Name Translation parameter not supplied' + assert (e_rosette.value.message == + 'Required Name Translation parameter is missing: targetLanguage') params["targetLanguage"] = "eng" result = api.name_translation(params) - assert result["name"] == "Rosette API" + assert result["name"] == "Babel Street Analytics" + + +@pook.on +def test_the_deprecated_endpoints(api, json_response, doc_params): + # TEXT_EMBEDDING calls SEMANTIC_VECTORS + pook.post(url=get_base_url() + "v1/semantics/vector", + response_json=json_response, + reply=200) + + result = api.text_embedding(doc_params) + assert result["name"] == "Babel Street Analytics" + + # MATCHED_NAME calls NAME_SIMILARITY + pook.post(url=get_base_url() + "v1/name-similarity", + response_json=json_response, + reply=200) + + name_similarity_params = NameSimilarityParameters() + + name_similarity_params["name1"] = { + "text": "Michael Jackson", + "language": "eng", + "entityType": "PERSON"} + + name_similarity_params["name2"] =\ + {"text": "迈克尔·杰克逊", "entityType": "PERSON"} + + result = api.matched_name(name_similarity_params) + assert result["name"] == "Babel Street Analytics" + + # TRANSLATED_NAME calls NAME_TRANSLATION + pook.post(url=get_base_url() + "v1/name-translation", + response_json=json_response, + reply=200) + + name_translation_params = NameTranslationParameters() + name_translation_params["entityType"] = "PERSON" + name_translation_params["targetScript"] = "Latn" + name_translation_params["name"] = "some data to translate" + name_translation_params["targetLanguage"] = "eng" + + result = api.translated_name(name_translation_params) + assert result["name"] == "Babel Street Analytics" + + +@pook.on +def test_the_record_similarity_endpoint(api, json_response): + pook.post(url=get_base_url() + "v1/record-similarity", + response_json=json_response, + reply=200) + + params = RecordSimilarityParameters() + params["fields"] = {} + params["properties"] = {} + params["records"] = {} + result = api.record_similarity(params) + assert result["name"] == "Babel Street Analytics" + + +@pook.on +def test_for_record_similarity_required_parameters(api, json_response): + pook.post(url=get_base_url() + "v1/record-similarity", + response_json=json_response, + reply=200) + + params = RecordSimilarityParameters() + + with pytest.raises(RosetteException) as e_rosette: + api.record_similarity(params) + + assert e_rosette.value.status == 'missingParameter' + assert (e_rosette.value.message == + 'Required Record Similarity parameter is missing: records') + + params["records"] = {} + + with pytest.raises(RosetteException) as e_rosette: + api.record_similarity(params) + + assert e_rosette.value.status == 'missingParameter' + assert (e_rosette.value.message == + 'Required Record Similarity parameter is missing: fields') + + params["fields"] = {} - httpretty.disable() - httpretty.reset() + result = api.record_similarity(params) + assert result["name"] == "Babel Street Analytics" diff --git a/tests/tox.ini b/tests/tox.ini new file mode 100644 index 0000000..ed9b966 --- /dev/null +++ b/tests/tox.ini @@ -0,0 +1,12 @@ +[tox] +skipsdist = True +envlist = py2, py3 + +[testenv] +commands = + pytest -s +deps = + pytest + pook + epydoc + requests diff --git a/tox.ini b/tox.ini index bc1dadf..7f53adf 100644 --- a/tox.ini +++ b/tox.ini @@ -1,20 +1,21 @@ -# Tox (http://tox.testrun.org/) is a tool for running tests -# in multiple virtualenvs. This configuration file will run the -# test suite on all supported python versions. To use it, "pip install tox" -# and then run "tox" from this directory. - [tox] +envlist = py3 skipsdist = True -envlist = py26, py27, py33, py34 [testenv] -commands = - {envpython} setup.py install - {envbindir}/py.test --pep8 deps = pytest pep8 - pytest-pep8 - httpretty==0.8.14 + pook epydoc requests + coverage + build + +commands = + python -m build + coverage run -m pytest + coverage xml + +[coverage:run] +relative_files = True