diff --git a/.gitignore b/.gitignore index ea16fe4..be7f8d9 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,9 @@ ehthumbs.db Thumbs.db +# VS Code +.vscode + # Jetbrains **/.idea/* !**/.idea/runConfigurations/ @@ -23,6 +26,7 @@ __pycache__/ # Distribution / packaging .Python +doc/ env/ build/ develop-eggs/ @@ -58,6 +62,7 @@ htmlcov/ nosetests.xml coverage.xml *,cover +.scannerwork/ # Translations *.mo @@ -71,3 +76,9 @@ docs/_build/ # PyBuilder target/ + +settings.json +*.orig + +# Sonar +.scannerwork/ diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index dd2b691..0000000 --- a/.travis.yml +++ /dev/null @@ -1,13 +0,0 @@ -language: python -install: - - pip install tox -script: - - tox - -notifications: - slack: - rooms: - - secure: 4FRaTAAiYyeUvgw2RhmblgbNiJO4wmd34OBgWcwURjP9oVmFfSwR9r1LNCdUGxrPOghexSY2DjXIuvIrfTfi/xYbhHb3Kw7PEAyB8IuBMlKtY4NSFou62S2VhYpxyg58T+C7P2zi0eDnDE06pwTCoGPaimxMZQY91yQ0yPYDPVXbwe5SjEgamzlwGBxlS/0A6w1iCPHg27/iO2hXtdW3oLS2I0F/Q8Q95RBkX9hpg6yqHlTV7jRbSqvQ9OFBqk/tXMHQvhoPDGgCgQDuykJuaAYx7g9d0YL0eEYYOh9B/TJ/kNOwdRFBu5kuQ2/nFS5Z0S3Y3UIhdYjUmm9gSMnwIbYnrW22EqDJLoT9Zi3Gv7Prg/8/fSkWsof7BJTMSuXUqO1AxDGKIxFv9uSF1daZoY+AC1ooU1xDu1nNvWVYPlkwEdDxxmHpFkGT3ESTZYccPovQl8Z5K0I1BBAVdJKDzm07lE6VHbxkKcvK6gG0TN3uLxnSlQtjkfJ+aVMq1kxeVsB9lEsKs9oezsKzzbftMm525aXPg+OAv+31CUFWxvT/p4ps8Q+AV6aZpoPHkpK8VryyNirUeZ/m4m4ebDHhD9vcN+JqE9gzshT+0U3g19SvLiUMQtbuZ2BUvrq2hh2LEGs03AFZaNg9AEUVA1PQRhV5NILyoS/lbiBYJPT39Sg= -# - secure: D4VxkkZlj7uaaFbqEBITkJCusVeii436N8X6GijuosUSaee9lqGYUF5ZS9lV6VGMKs719IfSJsCc2v/N4nc9Y/8AKgXd7AWHUwaRR+MC6rLwv9xqH8ZlkTPUKHUXkUQe1f9042PcMxzYa9r0+uKniM9l915Yx0PLaawJrWe3ZVig/uBhn5FueLhtUACzLJcjNcri/BKEgmP2+EgUsupUhujd0MsQd8xN5YIIv6VM5oD0XWXZCnBqHoZK+Qq7LfwDnTmUk7juqTOAjefF6v8IJTDELvSdK9QW7f9x7h0ICabIrI+Gl4IbNLJYEnELBi+X7da2YJUTgUoQrlBB4z+T8KybD8myqV/pJc1n0xrk84g1MtYQ3ahZn7eD3DTjIvK+ML8qAAGwxibF8VsV+GSoo5FcB6e3vV+glHODlwszstVs11PpL1grurVdoa5HPhUbm1jtQixOwO7h8GgGpwO20PuMLfyOfTjfoMcJ3ISjGNHRmUwcCH9avcaHqMLPLHht0z+8inVlDZeTcN+Wi8tk1YXwuQYUkTW46iFmoZNGdygRjTu43h9hCqdG7vcoKrR8oDbMT8X/sfs+z/UaBbuYBr9dT8rrdKJoa9115PaYpCs529DLrw1iGmzr5CkAXDElc1y2F352nwVS+dX7/OIQz1/dbT1ozWQ9Blx6ez5XN84= -# - secure: g/eOC2QCn7oUW234u/P1kmh9OASO9t5g6iC+DflNdnedHnpw8IEG9CsLAnNynOY0KvC/2/Q6BAIFpT/qO8BPdaC2eQnd9LDurOXExxvjKZgTujhqgzijJGzVPSL8//hafj0Ec2+iJZFo/DYMPqUpYDsiKxX53+SMz9/GN/PKVKsI7bzuaOm04xXktU0A76pVfaG0N/qFSVHI64SShACoUXRpTzyUbmksRgzk7FB162R+TH1soPsR3vPh8c2SSWC9msrDc4iljcBhSFTdjL68z+srjDPYMSoOcFXXFNP+dmL+Q0veL/E4e40e7CWIU1O3grOcEcCkaSoZVSpGduNnCst8h6MpgauPtrgwHk4zGMoSl+L6al+nFo/3h2dXeebrQ0tY/hRfZi4Q8xwqG9083TBqi71fTpoFZ7sNtrY4Kdtl5Oa2CFUo7lVn1JB9qQwSa0eai3Whv0RyRqqQe77aDUj0dfD2R3Q61rX5OF/f++W0XtNwHQubzmj8HD/cFneShIQsbl9KgYXoR6HiXcbBiNdmmZjSrzkPYh7vlsujYrz6Cg2msKybWJ/FfOz0tS1cjlCtiCMOTExN6tEF8YCp9l+s+5RLe61pgFPy4Snr9pEjuTS5DYliTyZMY2ZZC1clBQtgE8E2qCG1QSzqnqqiGxj2K4zmLPpB4y1XpW8e3yk= - diff --git a/CI.Jenkinsfile b/CI.Jenkinsfile new file mode 100644 index 0000000..8704189 --- /dev/null +++ b/CI.Jenkinsfile @@ -0,0 +1,65 @@ + + +def versions = [3.9, 3.10, 3.11, 3.12, 3.13] + +def runSonnarForPythonVersion(sourceDir, ver){ + mySonarOpts="-Dsonar.sources=/source -Dsonar.host.url=${env.SONAR_HOST_URL} -Dsonar.login=${env.SONAR_AUTH_TOKEN}" + if("${env.CHANGE_ID}" != "null"){ + mySonarOpts = "$mySonarOpts -Dsonar.pullrequest.key=${env.CHANGE_ID} -Dsonar.pullrequest.branch=${env.BRANCH_NAME}" + } else { + mySonarOpts = "$mySonarOpts -Dsonar.branch.name=${env.BRANCH_NAME}" + } + if ("${env.CHANGE_BRANCH}" != "null") { + mySonarOpts="$mySonarOpts -Dsonar.pullrequest.base=${env.CHANGE_TARGET} -Dsonar.pullrequest.branch=${env.CHANGE_BRANCH}" + } + + // Only run Sonar once. + // Check for new versions at https://binaries.sonarsource.com/?prefix=Distribution/sonar-scanner-cli/ + sonarScannerVersion="6.2.1.4610-linux-x64" + if(ver == 3.13) { + sonarExec="cd /root/ && \ + wget -q https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-${sonarScannerVersion}.zip && \ + unzip -q sonar-scanner-cli-${sonarScannerVersion}.zip && \ + cd /source && \ + /root/sonar-scanner-${sonarScannerVersion}/bin/sonar-scanner ${mySonarOpts}" + } else { + sonarExec="echo Skipping Sonar for this version." + } + + sh "docker run \ + --pull always \ + --rm --volume ${sourceDir}:/source \ + python:${ver}-slim \ + bash -c \"apt-get update && \ + apt-get install -y wget unzip && \ + pip3 install tox && \ + cd /source && \ + tox && \ + ${sonarExec} && \ + echo && \ + echo [INFO] Re-permission files for cleanup. && \ + chown -R 9960:9960 /source\"" +} + +node ("docker-light") { + def sourceDir = pwd() + try { + stage("Clean up") { + step([$class: 'WsCleanup']) + } + stage("Checkout Code") { + checkout scm + } + stage("Build & Test") { + withSonarQubeEnv { + + versions.each { ver -> + runSonnarForPythonVersion(sourceDir, ver) + } + } + } + } catch (e) { + currentBuild.result = "FAILED" + throw e + } +} diff --git a/DEVELOPER.md b/DEVELOPER.md new file mode 100644 index 0000000..dfa4aa0 --- /dev/null +++ b/DEVELOPER.md @@ -0,0 +1,77 @@ +## Developer Information + +#### Sonar Scanning +* Uncomment the `sonar.branch.name` line in `sonar-project.properties` and adjust the value to match your branch name. +* Install the `coverage` module in to your virtual environment. + ``` + virtualenv -p python3 ~/venvs/python-binding-development + source ~/venvs/python-binding-development/bin/activate + pip install --upgrade pip + pip install coverage + ``` +* Generate the coverage data. + ``` + coverage run --source=rosette -m pytest + ``` +* Check the results locally + ``` + coverage report + ``` +* Generate the XML coverage report + ``` + coverage xml + ``` +* Push the results to Sonar + ``` + sonar_host=https://sonar.basistech.net + sonar_token= # Generate a token at https://sonar.basistech.net/account/security/ + + docker run \ + --rm \ + -e SONAR_HOST_URL="${sonar_host}" \ + -e SONAR_LOGIN="${sonar_token}" \ + -v "$(pwd):/usr/src" \ + sonarsource/sonar-scanner-cli + + ``` + +### Testing +To test changes you have made to the binding, you can use a pre-configured Docker environment. This environment will: +- Compile the binding within the container. +- Install the binding within the container. +- Execute one or more example files using the installed binding. +- The example files can be executed against a Cloud release or an Enterprise release. +- If a test suite exists, it will also be executed. + +``` +git clone git@github.com:rosette-api/python.git +cd python +# Modify the binding... +docker run -e API_KEY=$API_KEY -v $(pwd):/source rosette/docker-python +``` + +Optional parameters for the `docker run` execution are: + +- `-e ALT_URL=` + - For testing against an Enterprise environment or the staging environment. +- `-e FILENAME=` + - For testing a single example file instead of all the example files. + +To alter the behavior of the pre-configured Docker environment, you can see the Dockerfile source and entry-point +script [here](https://github.com/RosetteTextAnalytics/rapid-development-tools/tree/master/binding-dockerfiles). + +### Documentation Generation +The existing README for documentation generation is [here](docs/README.md). +The next time the API documentation is touched, please refresh the README and migrate it here. + +### Examples README +There's an old [Docker README](examples/docker) in the examples directory that might be a candidate for removal. + +### Building A Release +See the [instructions](https://github.com/RosetteTextAnalytics/rapid-development-tools/tree/master/publish) + +### TODOs +- Inconsistent references with `rosette_api` and `rosette-api` +- Doc generation README cleanup? +- Example Docker file still needed? +- `docker-compose.yaml` still needed? diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 0000000..58a6d77 --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,35 @@ +properties([ + pipelineTriggers([[$class: "SCMTrigger", scmpoll_spec: "H/15 * * * *"]]) +]) + +node ("docker-light") { + def SOURCEDIR = pwd() + try { + stage("Clean up") { + step([$class: 'WsCleanup']) + } + stage("Checkout Code") { + checkout scm + } + stage("Test with Docker") { + echo "${env.ALT_URL}" + def useUrl = ("${env.ALT_URL}" == "null") ? "${env.BINDING_TEST_URL}" : "${env.ALT_URL}" + withEnv(["API_KEY=${env.ROSETTE_API_KEY}", "ALT_URL=${useUrl}"]) { + sh "docker pull rosette/docker-python" + sh "docker run --rm -e API_KEY=${API_KEY} -e ALT_URL=${ALT_URL} -v ${SOURCEDIR}:/source rosette/docker-python" + } + } + slack(true) + } catch (e) { + currentBuild.result = "FAILED" + slack(false) + throw e + } +} + +def slack(boolean success) { + def color = success ? "#00FF00" : "#FF0000" + def status = success ? "SUCCESSFUL" : "FAILED" + def message = status + ": Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]' (${env.BUILD_URL})" + slackSend(color: color, channel: "#rapid", message: message) +} diff --git a/Jenkinsfile.examples b/Jenkinsfile.examples new file mode 100644 index 0000000..887147a --- /dev/null +++ b/Jenkinsfile.examples @@ -0,0 +1,38 @@ +node { + def SOURCEDIR = pwd() + def TEST_CONTAINER = 'examples/python-test' + def DOCKERFILE_DIR = './examples/docker' + try { + stage("Clean up") { + step([$class: 'WsCleanup']) + } + stage("Checkout Code") { + checkout scm + } + stage("Build Dockerfile") { + dir ("${DOCKERFILE_DIR}") { + sh "docker build --no-cache -t ${TEST_CONTAINER} ." + } + } + stage("Run Examples") { + echo "${env.ALT_URL}" + def useUrl = ("${env.ALT_URL}" == "null") ? "${env.BINDING_TEST_URL}" : "${env.ALT_URL}" + withEnv(["API_KEY=${env.ROSETTE_API_KEY}", "ALT_URL=${useUrl}"]) { + sh "docker run --rm -e API_KEY=${API_KEY} -e ALT_URL=${ALT_URL} -v ${SOURCEDIR}:/source ${TEST_CONTAINER}" + } + } + postToTeams(true) + } catch (e) { + currentBuild.result = "FAILED" + postToTeams(false) + throw e + } +} + +def postToTeams(boolean success) { + def webhookUrl = "${env.TEAMS_PNC_JENKINS_WEBHOOK_URL}" + def color = success ? "#00FF00" : "#FF0000" + def status = success ? "SUCCESSFUL" : "FAILED" + def message = "*" + status + ":* '${env.JOB_NAME}' - [${env.BUILD_NUMBER}] - ${env.BUILD_URL}" + office365ConnectorSend(webhookUrl: webhookUrl, color: color, message: message, status: status) +} diff --git a/LICENSE.txt b/LICENSE.txt index 8e1eca0..5723e02 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2014-2016 Basis Technology Corporation. +Copyright (c) 2014-2019 Basis Technology Corporation. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index 075fbf1..bc34f47 100644 --- a/README.md +++ b/README.md @@ -1,92 +1,48 @@ -[![Build Status](https://travis-ci.org/rosette-api/python.svg?branch=master)](https://travis-ci.org/rosette-api/python) + + + + + Babel Street Logo + + -# This is the Python client binding for Rosette API. +# Analytics by Babel Street -Installation ------------- +[![PyPI version](https://badge.fury.io/py/rosette-api.svg)](https://badge.fury.io/py/rosette-api) +[![Python Versions](https://img.shields.io/pypi/pyversions/rosette-api.svg?color=dark%20green&label=Python%20Versions)](https://img.shields.io/pypi/pyversions/rosette-api.svg?color=dark%20green&label=Python%20Versions) -The Python binding requires Python 2.6 or greater and is available through pip: +Our product is a full text processing pipeline from data preparation to extracting the most relevant information and +analysis utilizing precise, focused AI that has built-in human understanding. Text Analytics provides foundational +linguistic analysis for identifying languages and relating words. The result is enriched and normalized text for +high-speed search and processing without translation. -`pip install rosette_api` - -Basic Usage ------------ - -```python -# 1. Set utf-8 encoding. -# -*- coding: utf-8 -*- - -# 2. Imports from rosette.api. -from rosette.api import API, DocumentParameters, MorphologyOutput - -# 3. Create API object. -api = API("[your_api-key]") - -# 4. Create parameters object -params = DocumentParameters() - -# 5. Set parameters. -params["content"] = "The quick brown fox jumped over the lazy dog. Yes he did." - -# 6. Make a call. -result = api.morphology(params) - -# result is a Python dictionary that contains - -{u'tokens': [u'The', u'quick', u'brown', u'fox', u'jumped', u'over', u'the', u'lazy', u'dog', u'.', u'Yes', u'he', u'did', u'.'], u'posTags': [u'DET', u'ADJ', u'ADJ', u'NOUN', u'VERB', u'ADP', u'DET', u'ADJ', u'NOUN', u'PUNCT', u'VERB', u'PRON', u'VERB', u'PUNCT'], u'compoundComponents': [None, None, None, None, None, None, None, None, None, None, None, None, None, None], u'lemmas': [u'the', u'quick', u'brown', u'fox', u'jump', u'over', u'the', u'lazy', u'dog', u'.', u'yes', u'he', u'do', u'.'], u'hanReadings': [None, None, None, None, None, None, None, None, None, None, None, None, None, None]} -``` +Text Analytics extracts events and entities — people, organizations, and places — from unstructured text and adds the +structure of associating those entities into events that deliver only the necessary information for near real-time +decision making. Accompanying tools shorten the process of training AI models to recognize domain-specific events. -The samples use the following procedure: +The product delivers a multitude of ways to sharpen and expand search results. Semantic similarity expands search +beyond keywords to words with the same meaning, even in other languages. Sentiment analysis and topic extraction help +filter results to what’s relevant. -1. If the application reads text in, set encoding to utf-8 in the first line of the script. +## Analytics API Access +- Analytics Cloud [Sign Up](https://developer.babelstreet.com/signup) -2. Import the `rosette.api` packages that your application needs. The `rosette.api` packages include - * `API` - * `DocumentParameters` - * `NameSimilarityParameters` - * `NameTranslationParameters` - * `MorphologyOutput` - * `DataFormat` +## Quick Start -3. Create an `API` object with the `user_key` parameter. - -4. Create a parameters object for your request input: - - | Parameter | Endpoint | - | ----|----| - | `NameSimilarityParameters` | for `/name-similarity` | - | `NameTranslationParameters` | for `/translated-name` | - | `DocumentParameters` | for all other endpoints | - - -5. Set the parameters required for your operation: "`content`" or "`contentUri`" for `DocumentParameters`; -"`name`" and "`targetLanguage`" for `NameTranslationParameters`; "`name1.text`" and "`name2.text`" for - `NameSimilarityParameters`; Other parameters are optional. - -6. Invoke the `API` method for the endpoint you are calling. The methods are - * `entities(linked)` where `linked` is `False` for entity extraction and `True` for entity linking. - * `categories()` - * `sentiment()` - * `language()` - * `morphology(tag)` where tag is a member of `MorphologyOutput`: `LEMMAS`, `PARTS_OF_SPEECH`, `COMPOUND_COMPONENTS`, `HAN_READINGS`, or `COMPLETE`. An empty tag is equivalent to `COMPLETE`. - * `sentences()` - * `tokens()` - * `relationships()` - * `name_translation()` - * `name_similarity()` - * `matched_name()` *deprecated - * `translated_name()` *deprecated - -7. The API will return a dictionary with the results. - -See [examples](examples) for more request samples. - -API Documentation ------------------ +#### Installation +`pip install rosette_api` -See [documentation](http://rosette-api.github.io/python) +#### Examples +View small example programs for each Analytics endpoint +in the [examples](https://github.com/rosette-api/python/tree/develop/examples) directory. -Additional Information ----------------------- +#### Documentation & Support +- [Binding API](https://rosette-api.github.io/python/) +- [Analytics Platform API](https://docs.babelstreet.com/API/en/index-en.html) +- [Binding Release Notes](https://github.com/rosette-api/python/wiki/Release-Notes) +- [Analytics Platform Release Notes](https://docs.babelstreet.com/Release/en/rosette-cloud.html) +- [Support](https://babelstreet.my.site.com/support/s/) +- [Binding License: Apache 2.0](https://github.com/rosette-api/python/blob/develop/LICENSE.txt) -Visit [Rosette API site](https://developer.rosette.com) +## Binding Developer Information +If you are modifying the binding code, please refer to the [developer README](https://github.com/rosette-api/python/tree/develop/DEVELOPER.md) file. diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..1ca7e96 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,15 @@ +application: + image: rosette/docker-python + environment: + - API_KEY=$API_KEY + - HTTP_PROXY=http://squid:3128 + - HTTPS_PROXY=https://squid:3128 + volumes: + - .:/source + links: + - proxy:squid + +proxy: + image: datadog/squid + ports: + - 3128:3128 diff --git a/docker/Dockerfile b/docker/Dockerfile deleted file mode 100644 index f8b6975..0000000 --- a/docker/Dockerfile +++ /dev/null @@ -1,54 +0,0 @@ -FROM ubuntu:14.04 -MAINTAINER Fiona Hasanaj - -ENV DEBIAN_FRONTEND noninteractive -RUN locale-gen en_US.UTF-8 && /usr/sbin/update-locale LANG=en_US.UTF-8 -ENV LANG en_US.UTF-8 - -# proper init to handle signal propagation and zombie reaping -ADD https://github.com/krallin/tini/releases/download/v0.8.4/tini /tini -RUN chmod +x /tini -ENTRYPOINT ["/tini", "--"] - -RUN apt-get update && \ - apt-get -y install \ - wget \ - curl \ - libssl-dev \ - libffi-dev \ - python-pip \ - python-software-properties \ - software-properties-common && \ - add-apt-repository -y ppa:fkrull/deadsnakes && \ - apt-get update && \ - apt-get -y install \ - python2.6 \ - python2.7 \ - python3.3 \ - python3.4 \ - python3.5 \ - git\ - pypy && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* - -RUN mkdir /install && \ - wget -O /install/pypy3-2.4-linux_x86_64-portable.tar.bz2 \ - "https://bitbucket.org/squeaky/portable-pypy/downloads/pypy3-2.4-linux_x86_64-portable.tar.bz2" && \ - tar jxf /install/pypy3-*.tar.bz2 -C /install && \ - rm /install/pypy3-*.tar.bz2 && \ - ln -s /install/pypy3-*/bin/pypy3 /usr/local/bin/pypy3 - -RUN pip install -U pip && pip install tox -RUN pip install --upgrade autopep8 - -# copy over the necessary files -COPY run_python.sh /python-dev/run_python.sh -RUN chmod 755 /python-dev/run_python.sh -COPY tox.ini /python-dev/tox.ini -WORKDIR /python-dev - -# allow interactive bash inside docker container -CMD ./run_python.sh - -VOLUME ["/source"] diff --git a/docker/README.md b/docker/README.md deleted file mode 100644 index 3838653..0000000 --- a/docker/README.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -# Docker Image for Python Examples ---- -### Summary -To simplify the running of the Python examples, the Dockerfile will build an image where the examples can be tested against the development source. - -### Basic Usage -Build the docker image, e.g. `docker build -t basistech/python:1.1 .` - -Run an example as `docker run -e API_KEY=api-key -v "path-to-local-python-dir:/source" basistech/python:1.1` - -To test against a specific source file, add `-e FILENAME=filename` before the `-v`, to test against an alternate url, add `-e ALT_URL=alternate_url`. \ No newline at end of file diff --git a/docker/run_python.sh b/docker/run_python.sh deleted file mode 100644 index 04e8c50..0000000 --- a/docker/run_python.sh +++ /dev/null @@ -1,115 +0,0 @@ -#!/bin/bash - -retcode=0 -ping_url="https://api.rosette.com/rest/v1" -errors=( "Exception" "processingFailure" "badRequest" "ParseError" "ValueError" "SyntaxError" "AttributeError" "ImportError" ) - -#------------------ Functions ---------------------------------------------------- -#Gets called when the user doesn't provide any args -function HELP { - echo -e "\nusage: --key API_KEY [--FILENAME filename] [--url ALT_URL]" - echo " API_KEY - Rosette API key (required)" - echo " FILENAME - Python source file (optional)" - echo " ALT_URL - Alternate service URL (optional)" - echo "Compiles and runs the source file(s) using the local development source." - exit 1 -} - -if [ ! -z ${ALT_URL} ]; then - ping_url=${ALT_URL} -fi - -#Checks if Rosette API key is valid -function checkAPI { - match=$(curl "${ping_url}/ping" -H "X-RosetteAPI-Key: ${API_KEY}" | grep -o "forbidden") - if [ ! -z $match ]; then - echo -e "\nInvalid Rosette API Key" - exit 1 - fi -} - -function cleanURL() { - # strip the trailing slash off of the alt_url if necessary - if [ ! -z "${ALT_URL}" ]; then - case ${ALT_URL} in - */) ALT_URL=${ALT_URL::-1} - echo "Slash detected" - ;; - esac - ping_url=${ALT_URL} - fi -} - -function validateURL() { - match=$(curl "${ping_url}/ping" -H "X-RosetteAPI-Key: ${API_KEY}" | grep -o "Rosette API") - if [ "${match}" = "" ]; then - echo -e "\n${ping_url} server not responding\n" - exit 1 - fi -} - -function runExample() { - echo -e "\n---------- ${1} start -------------" - result="" - if [ -z ${ALT_URL} ]; then - result="$(python ${1} --key ${API_KEY} 2>&1 )" - else - result="$(python ${1} --key ${API_KEY} --url ${ALT_URL} 2>&1 )" - fi - echo "${result}" - echo -e "\n---------- ${1} end -------------" - for err in "${errors[@]}"; do - if [[ ${result} == *"${err}"* ]]; then - retcode=1 - fi - done -} -#------------------ Functions End ------------------------------------------------ - -#Gets API_KEY, FILENAME and ALT_URL if present -while getopts ":API_KEY:FILENAME:ALT_URL" arg; do - case "${arg}" in - API_KEY) - API_KEY=${OPTARG} - ;; - ALT_URL) - ALT_URL=${OPTARG} - ;; - FILENAME) - FILENAME=${OPTARG} - ;; - esac -done - -cleanURL - -validateURL - - -#Copy the mounted content in /source to current WORKDIR -cp -r -n /source/* . - -#Run the examples -if [ ! -z ${API_KEY} ]; then - checkAPI - #Prerequisite - python /python-dev/setup.py install - cd /python-dev/examples - if [ ! -z ${FILENAME} ]; then - echo -e "\nRunning example against: ${ping_url}\n" - runExample ${FILENAME} - else - echo -e "\nRunning examples against: ${ping_url}\n" - for file in *.py; do - runExample ${file} - done - fi -else - HELP -fi - -#Run unit tests -cd /python-dev -tox - -exit ${retcode} diff --git a/docker/tox.ini b/docker/tox.ini deleted file mode 100644 index 5955b11..0000000 --- a/docker/tox.ini +++ /dev/null @@ -1,18 +0,0 @@ -# Tox (http://tox.testrun.org/) is a tool for running tests -# in multiple virtualenvs. This configuration file will run the -# test suite on all supported python versions. To use it, "pip install tox" -# and then run "tox" from this directory. - -[tox] -skipsdist = True -envlist = py26, py27, py33, py34 - -[testenv] -commands = - py.test {toxinidir}/tests -s --pep8 -deps = - pytest - pytest-pep8 - httpretty - epydoc - requests diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d8f4f61 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = python3 -msphinx +SPHINXPROJ = PythonBinding +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..b9f2584 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,23 @@ +## Generating documentation with Sphinx + +The best tutorial for generating documentation can be found [here](http://gisellezeno.com/tutorials/sphinx-for-python-documentation.html) + +Files in the source directory are generally static and should not need to be updated. If another python module is created in `../rosette`, then the source may need to be regenerated using + +`sphinx-apidoc -f -o source/ ../rosette/` + +This will overwrite the *.rst files, which may then require some editing to provide the desired look. Edits to date: +1. index.rst: Changed the `Welcome ...` title to `Python Binding` +1. index.rst: Added minor summary, "This is the API documentation for the Babel Street Analytics API Python Binding. For examples and usage, please refer to our `API Guide `_." +1. conf.py: removed blank line at end of file +1. conf.py: added Babel Street logo +1. conf.py: blank project (let logo handle it) +1. conf.py: added version (auto updated by publish) +1. conf.py: added author +1. conf.py: enabled `sys.path.insert(0, os.path.abspath('../..'))` + +To change the logo, edit conf.py, `html_logo` + +To generate the html run `make html`. The output will be written to `build/html`. This is the step that is run by the `publish.sh` script when publishing the Python binding. Note that the version, which is noted in `conf.py` is not displayed anywhere, but is updated during the publish phase. + +You can view the generated html locally, by navigating to `docs/build/html` and opening `index.html` \ No newline at end of file diff --git a/docs/source/_static/README.md b/docs/source/_static/README.md new file mode 100644 index 0000000..6fa98c9 --- /dev/null +++ b/docs/source/_static/README.md @@ -0,0 +1 @@ +### Place static sphinx files here if needed \ No newline at end of file diff --git a/docs/source/_templates/README.md b/docs/source/_templates/README.md new file mode 100644 index 0000000..055e18a --- /dev/null +++ b/docs/source/_templates/README.md @@ -0,0 +1,3 @@ +### Place sphinx templates here (if desired) + +Default is alabaster \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..2114fa5 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Python Binding documentation build configuration file, created by +# sphinx-quickstart on Fri Jul 28 09:16:12 2017. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys +sys.path.insert(0, os.path.abspath('../..')) + + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = ['sphinx.ext.autodoc'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = '' +copyright = '2024, Babel Street' +author = 'Babel Street' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '1.31.0' +# The full version, including alpha/beta/rc tags. +release = '1.31.0' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This patterns also effect to html_static_path and html_extra_path +exclude_patterns = [] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'alabaster' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] +html_logo = 'logo-400x113.png' +html_favicon = 'favicon-16x16.png' + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# This is required for the alabaster theme +# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars +html_sidebars = { + '**': [ + 'about.html', + 'relations.html', # needs 'show_related': True theme option to display + 'searchbox.html', + 'donate.html', + ] +} + + +# -- Options for HTMLHelp output ------------------------------------------ + +# Output file base name for HTML help builder. +htmlhelp_basename = 'PythonBindingdoc' + + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'PythonBinding.tex', 'Python Binding Documentation', + 'Basis Technology', 'manual'), +] + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'pythonbinding', 'Python Binding Documentation', + [author], 1) +] + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'PythonBinding', 'Python Binding Documentation', + author, 'PythonBinding', 'One line description of project.', + 'Miscellaneous'), +] diff --git a/docs/source/favicon-16x16.png b/docs/source/favicon-16x16.png new file mode 100644 index 0000000..2750b93 Binary files /dev/null and b/docs/source/favicon-16x16.png differ diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..9436fde --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,22 @@ +.. Python Binding documentation master file, created by + sphinx-quickstart on Fri Jul 28 09:16:12 2017. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + + +Python Binding +========================================== +This is the API documentation for the Babel Street Analytics API Python Binding. For examples and usage, please refer to our `API Guide `_. + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/source/logo-400x113.png b/docs/source/logo-400x113.png new file mode 100644 index 0000000..b411943 Binary files /dev/null and b/docs/source/logo-400x113.png differ diff --git a/docs/source/modules.rst b/docs/source/modules.rst new file mode 100644 index 0000000..7a6d415 --- /dev/null +++ b/docs/source/modules.rst @@ -0,0 +1,7 @@ +rosette +======= + +.. toctree:: + :maxdepth: 4 + + rosette diff --git a/docs/source/rosette.rst b/docs/source/rosette.rst new file mode 100644 index 0000000..a6f70fb --- /dev/null +++ b/docs/source/rosette.rst @@ -0,0 +1,22 @@ +rosette package +=============== + +Submodules +---------- + +rosette\.api module +------------------- + +.. automodule:: rosette.api + :members: + :undoc-members: + :show-inheritance: + + +Module contents +--------------- + +.. automodule:: rosette + :members: + :undoc-members: + :show-inheritance: diff --git a/examples/README.md b/examples/README.md index 5413e59..80a19b0 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,42 +1,50 @@ -Python Examples -================== - -These examples are scripts that can be run independently to demonstrate the Rosette API functionality. - -Prerequisite: Either run `pip install rosette_api` or run `python setup.py install` in the python top level folder. - -Alternatively, you can run all the examples with the command line: -`find -maxdepth 1 -name "*.py" -exec tox -- {} --key api-key --url alternate_url \;` - -You can now run your desired _endpoint_.py file to see it in action. -For example, run `python/examples/categories.py` if you want to see the categories -functionality demonstrated. - -All files require you to input your Rosette API User Key after --key to run. -For example: `python ping.py --key 1234567890` -All also allow you to input your own service URL if desired. -For example: `python ping.py --key 1234567890 --service_url http://www.myurl.com` -Some (specified below) allow an additional input of either a file (.html or .txt) or a URL with `--file` or `--url` - -Each example, when run, prints its output to the console. - -| File Name | What it does | -| ------------- |------------- | -| categories.py | Gets the category of a document at a URL | -| entities.py | Gets the entities from a piece of text | -| entities_linked.py | Gets the linked (to Wikipedia) entities from a piece of text | -| info.py | Gets information about Rosette API | -| language.py | Gets the language of a piece of text | -| matched-name.py | Gets the similarity score of two names | -| morphology_complete.py | Gets the complete morphological analysis of a piece of text| -| morphology_compound-components.py | Gets the de-compounded words from a piece of text | -| morphology_han-readings.py | Gets the Chinese words from a piece of text | -| morphology_lemmas.py | Gets the lemmas of words from a piece of text | -| morphology_parts-of-speech.py | Gets the part-of-speech tags for words in a piece of text | -| ping.py | Pings the Rosette API to check for reachability | -| relationships.py | Gets the relationships between entities from a piece of text | -| sentences.py | Gets the sentences from a piece of text | -| sentiment.py | Gets the sentiment of a local file | -| tokens.py | Gets the tokens (words) from a piece of text | -| translated-name.py | Translates a name from one language to another | - +## Endpoint Examples + +Each example file demonstrates one of the capabilities of the Babel Street Analytics Platform. + +Here are some methods for running the examples. Each example will also accept an optional `--url` parameter for +overriding the default URL. + +A note on prerequisites. Analytics API only supports TLS 1.2 so ensure your toolchain also supports it. + +#### Virtualenv/Latest Release +``` +git clone git@github.com:rosette-api/python.git +cd python/examples +python -m venv analytics_venv +source analytics_venv/bin/activate +pip install rosette_api +python ping.py -k $API_KEY +``` + +#### Virtualenv/Local Source +``` +git clone git@github.com:rosette-api/python.git +cd python +python -m venv analytics_venv +source analytics_venv/bin/activate +python setup.py install +cd examples +python ping.py -k $API_KEY +``` + +#### Docker/Latest Release +``` +git clone git@github.com:rosette-api/python.git +cd python/examples +docker run -it -v $(pwd):/source --entrypoint bash python:3.12-slim +cd /source +pip install rosette_api +python ping.py -k $API_KEY +``` + +#### Docker/Local Source +``` +git clone git@github.com:rosette-api/python.git +cd python +docker run -it -v $(pwd):/source --entrypoint bash python:3.12-slim +cd /source +python setup.py install +cd examples +python ping.py -k $API_KEY +``` diff --git a/examples/address_similarity.py b/examples/address_similarity.py new file mode 100644 index 0000000..2175817 --- /dev/null +++ b/examples/address_similarity.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Analytics API to get match score (similarity) of two addresses. +""" + +import argparse +import json +import os + +from rosette.api import API, AddressSimilarityParameters, RosetteException + + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + # Create an API instance + api = API(user_key=key, service_url=alt_url) + + params = AddressSimilarityParameters() + params["address1"] = {"houseNumber": "1600", "road": "Pennsylvania Ave NW", "city": "Washington", "state": "DC", "postCode": "20500"} + params["address2"] = "160 Pennsilvana Avenue, Washington, D.C., 20500" + #params["parameters"] = {"houseNumberAddressFieldWeight": "0.9"} + + try: + return api.address_similarity(params) + except RosetteException as exception: + print(exception) + + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/categories.py b/examples/categories.py index 4731edb..6f09c75 100644 --- a/examples/categories.py +++ b/examples/categories.py @@ -1,33 +1,48 @@ # -*- coding: utf-8 -*- """ -Example code to call Rosette API to get the category of a document (at a given URL). +Example code to call Analytics API to get the category of a document (at a given URL). """ import argparse import json import os -from rosette.api import API, DocumentParameters +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): - categories_url_data = "http://www.onlocationvacations.com/2015/03/05/the-new-ghostbusters-movie-begins-filming-in-boston-in-june/" - url = categories_url_data + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + categories_text_data = "If you are a fan of the British television series Downton Abbey and you are planning to be in New York anytime before April 2nd, there is a perfect stop for you while in town." # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) + + # Set selected API options + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#categorization + + # api.set_option('singleLabel', 'true') + # api.set_option('scoreThreshold',- 0.20) + params = DocumentParameters() - # Use a URL to input data instead of a string - params["contentUri"] = url - return api.categories(params) + params["content"] = categories_text_data + try: + return api.categories(params) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/docker/Dockerfile b/examples/docker/Dockerfile deleted file mode 100644 index 7b46dbe..0000000 --- a/examples/docker/Dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -FROM python:2.7.11 -MAINTAINER Fiona Hasanaj -ENV MAINTENANCE_DATE 03.28.2016 - -# install necessary software -RUN apt-get -y update && apt-get install -y vim && apt-get install -y git && pip install rosette_api - -COPY run_python.sh /python/examples/run_python.sh -RUN chmod 755 /python/examples/run_python.sh -WORKDIR /python/examples - -# allow interactive bash inside docker container -CMD ./run_python.sh - -VOLUME ["/source"] diff --git a/examples/docker/README.md b/examples/docker/README.md deleted file mode 100644 index 8335d53..0000000 --- a/examples/docker/README.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -# Docker Image for Python Examples ---- -### Summary -To simplify the running of the Python examples, the Dockerfile will build an image and install the latest rosette-api library. - -### Basic Usage -Build the docker image, e.g. `docker build -t basistech/python:1.1 .` - -Run an example as `docker run -e API_KEY=api-key -v "path-to-example-source:/source" basistech/python:1.1` - -To test against a specific source file, add `-e FILENAME=filename` before the `-v` - -Also, to test against an alternate url, add `-e ALT_URL=alternate_url` before the `-v` \ No newline at end of file diff --git a/examples/docker/run_python.sh b/examples/docker/run_python.sh deleted file mode 100644 index 84ec092..0000000 --- a/examples/docker/run_python.sh +++ /dev/null @@ -1,109 +0,0 @@ -#!/bin/bash - -retcode=0 -ping_url="https://api.rosette.com/rest/v1" -errors=( "Exception" "processingFailure" "badRequest" "ParseError" "ValueError" "SyntaxError" "AttributeError" "ImportError" ) - -#------------------ Functions ---------------------------------------------------- - -#Gets called when the user doesn't provide any args -function HELP { - echo -e "\nusage: source_file.py --key API_KEY [--url ALT_URL]" - echo " API_KEY - Rosette API key (required)" - echo " FILENAME - Python source file (optional)" - echo " ALT_URL - Alternate service URL (optional)" - echo "Compiles and runs the source file(s) using the published rosette-api" - exit 1 -} - -if [ ! -z ${ALT_URL} ]; then - ping_url=${ALT_URL} -fi - -#Checks if Rosette API key is valid -function checkAPI() { - match=$(curl "${ping_url}/ping" -H "X-RosetteAPI-Key: ${API_KEY}" | grep -o "forbidden") - if [ ! -z $match ]; then - echo -e "\nInvalid Rosette API Key" - exit 1 - fi -} - -function cleanURL() { - # strip the trailing slash off of the alt_url if necessary - if [ ! -z "${ALT_URL}" ]; then - case ${ALT_URL} in - */) ALT_URL=${ALT_URL::-1} - echo "Slash detected" - ;; - esac - ping_url=${ALT_URL} - fi -} - -function validateURL() { - match=$(curl "${ping_url}/ping" -H "X-RosetteAPI-Key: ${API_KEY}" -H "user_key: ${API_KEY}" | grep -o "Rosette API") - if [ "${match}" = "" ]; then - echo -e "\n${ping_url} server not responding\n" - exit 1 - fi -} - -function runExample() { - echo -e "\n---------- ${1} start -------------" - result="" - if [ -z ${ALT_URL} ]; then - result="$(python ${1} --key ${API_KEY} 2>&1 )" - else - result="$(python ${1} --key ${API_KEY} --url ${ALT_URL} 2>&1 )" - fi - echo "${result}" - echo -e "\n---------- ${1} end -------------" - for err in "${errors[@]}"; do - if [[ ${result} == *"${err}"* ]]; then - retcode=1 - fi - done -} - -#------------------ Functions End ------------------------------------------------ - -#Gets API_KEY, FILENAME and ALT_URL if present -while getopts ":API_KEY:FILENAME:ALT_URL" arg; do - case "${arg}" in - API_KEY) - API_KEY=${OPTARG} - ;; - FILENAME) - FILENAME=${OPTARG} - ;; - ALT_URL) - ALT_URL=${OPTARG} - ;; - esac -done - -cleanURL - -validateURL - -#Copy the examples from the mounted content in /source to current WORKDIR -cp /source/examples/*.* . - -#Run the examples -if [ ! -z ${API_KEY} ]; then - checkAPI - if [ ! -z ${FILENAME} ]; then - echo -e "\nRunning example against: ${ping_url}\n" - runExample ${FILENAME} - else - echo -e "\nRunning examples against: ${ping_url}\n" - for file in *.py; do - runExample ${file} - done - fi -else - HELP -fi - -exit ${retcode} diff --git a/examples/entities.py b/examples/entities.py index 1029913..beba9de 100644 --- a/examples/entities.py +++ b/examples/entities.py @@ -1,30 +1,46 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get entities from a piece of text. +Example code to call Analytics API to get entities from a piece of text. """ import argparse import json import os -from rosette.api import API, DocumentParameters +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) - entities_text_data = "Bill Murray will appear in new Ghostbusters film: Dr. Peter Venkman was spotted filming a cameo in Boston this… http://dlvr.it/BnsFfS" + api = API(user_key=key, service_url=alt_url) + + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#entity-extraction-and-linking + + # api.set_option('calculateSalience','true') + # api.set_option('linkEntities','false') + # api.set_option('useIndocServer', True) + + entities_text_data = "The Securities and Exchange Commission today announced the leadership of the agency’s trial unit. Bridget Fitzpatrick has been named Chief Litigation Counsel of the SEC and David Gottesman will continue to serve as the agency’s Deputy Chief Litigation Counsel. Since December 2016, Ms. Fitzpatrick and Mr. Gottesman have served as Co-Acting Chief Litigation Counsel. In that role, they were jointly responsible for supervising the trial unit at the agency’s Washington D.C. headquarters as well as coordinating with litigators in the SEC’s 11 regional offices around the country." params = DocumentParameters() params["content"] = entities_text_data - params["genre"] = "social-media" - return api.entities(params) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') + try: + return api.entities(params) + except RosetteException as exception: + print(exception) + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/entities_linked.py b/examples/entities_linked.py deleted file mode 100644 index 512f7f5..0000000 --- a/examples/entities_linked.py +++ /dev/null @@ -1,33 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -Example code to call Rosette API to get linked (against Wikipedia) entities from a piece of text. -""" - -import argparse -import json -import os - -from rosette.api import API, DocumentParameters - - -def run(key, altUrl='https://api.rosette.com/rest/v1/'): - # Create an API instance - api = API(user_key=key, service_url=altUrl) - - entities_linked_text_data = "Last month director Paul Feig announced the movie will have an all-star female cast including Kristen Wiig, Melissa McCarthy, Leslie Jones and Kate McKinnon." - params = DocumentParameters() - params["content"] = entities_linked_text_data - params["genre"] = "social-media" - # This syntax is deprecated, call api.entities(params) - return api.entities(params, True) - - -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') - -if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) diff --git a/examples/events.py b/examples/events.py new file mode 100644 index 0000000..79ffc3e --- /dev/null +++ b/examples/events.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Analytics API to get events from a piece of text. +""" + +import argparse +import json +import os + +from rosette.api import API, DocumentParameters, RosetteException + + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + # Create an API instance + api = API(user_key=key, service_url=alt_url) + + events_text_data = "I am looking for flights to Super Bowl 2022 in Inglewood, LA." + params = DocumentParameters() + params["content"] = events_text_data + + try: + return api.events(params) + except RosetteException as exception: + print(exception) + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/events_negation.py b/examples/events_negation.py new file mode 100644 index 0000000..becc731 --- /dev/null +++ b/examples/events_negation.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Analytics API to get events, based on a set negation option, from a piece of text. +""" + +import argparse +import json +import os + +from rosette.api import API, DocumentParameters, RosetteException + + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + # Create an API instance + api = API(user_key=key, service_url=alt_url) + + # Double negative, meaning that the event should be skipped with "IGNORE" or "ONLY_NEGATIVE" + # and recognized under "BOTH" or "ONLY_POSITIVE" + events_text_data = "Sam didn't not take a flight to Boston." + params = DocumentParameters() + params["content"] = events_text_data + api.set_option('negation', 'ONLY_POSITIVE') + + + try: + return api.events(params) + except RosetteException as exception: + print(exception) + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/info.py b/examples/info.py index a4c0bd5..8fba621 100644 --- a/examples/info.py +++ b/examples/info.py @@ -1,28 +1,34 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get information such as version and build +Example code to call Analytics API to get information such as version and build """ import argparse import json import os -from rosette.api import API, DocumentParameters +from rosette.api import API, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) - return api.info() + try: + return api.info() + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/language.py b/examples/language.py index 30e886b..e4fa87a 100644 --- a/examples/language.py +++ b/examples/language.py @@ -1,32 +1,38 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to determine the language of a piece of text. +Example code to call Analytics API to determine the language of a piece of text. """ import argparse import json import os -from rosette.api import API, DocumentParameters +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) language_data = "Por favor Señorita, says the man." params = DocumentParameters() params["content"] = language_data - api.setCustomHeaders("X-RosetteAPI-App", "python-app") - return api.language(params) + api.set_custom_headers("X-RosetteAPI-App", "python-app") + try: + return api.language(params) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/language_multilingual.py b/examples/language_multilingual.py new file mode 100644 index 0000000..36bd8e6 --- /dev/null +++ b/examples/language_multilingual.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Analytics API to determine the language of a piece of text. +""" + +import argparse +import json +import os + +from rosette.api import API, DocumentParameters, RosetteException + + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + # Create an API instance + api = API(user_key=key, service_url=alt_url) + + language_multilingual_data = "On Thursday, as protesters gathered in Washington D.C., the United States Federal Communications Commission under Chairman Ajit Pai voted 3-2 to overturn a 2015 decision, commonly called Net Neutrality, that forbade Internet service providers (ISPs) such as Verizon, Comcast, and AT&T from blocking individual websites or charging websites or customers more for faster load times. Quatre femmes ont été nommées au Conseil de rédaction de la loi du Qatar. Jeudi, le décret royal du Qatar a annoncé que 28 nouveaux membres ont été nommés pour le Conseil de la Choura du pays. ذكرت مصادر أمنية يونانية، أن 9 موقوفين من منظمة \"د هـ ك ب ج\" الذين كانت قد أوقفتهم الشرطة اليونانية في وقت سابق كانوا يخططون لاغتيال الرئيس التركي رجب طيب أردوغان." + params = DocumentParameters() + params["content"] = language_multilingual_data + api.set_custom_headers("X-RosetteAPI-App", "python-app") + api.set_option('multilingual', True) + + try: + return api.language(params) + except RosetteException as exception: + print(exception) + + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/morphology_complete.py b/examples/morphology_complete.py index 6e3dc40..10b1004 100644 --- a/examples/morphology_complete.py +++ b/examples/morphology_complete.py @@ -1,31 +1,44 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get the complete morphological analysis of a piece of text. +Example code to call Analytics API to get the complete morphological analysis of a piece of text. """ import argparse import json import os -from rosette.api import API, DocumentParameters +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) + + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#morphological-analysis-introduction + + # api.set_option('modelType','perceptron') #Valid for Chinese and Japanese only - morphology_complete_data = "The quick brown fox jumped over the lazy dog. Yes he did." + morphology_complete_data = "The quick brown fox jumped over the lazy dog. 👍🏾 Yes he did. B)" params = DocumentParameters() params["content"] = morphology_complete_data - return api.morphology(params) + try: + return api.morphology(params) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/morphology_compound-components.py b/examples/morphology_compound-components.py index 596f39d..5bacddb 100644 --- a/examples/morphology_compound-components.py +++ b/examples/morphology_compound-components.py @@ -1,31 +1,44 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get de-compounded words from a piece of text. +Example code to call Analytics API to get de-compounded words from a piece of text. """ import argparse import json import os -from rosette.api import API, DocumentParameters, MorphologyOutput +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) + + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#morphological-analysis-introduction + + # api.set_option('modelType','perceptron') #Valid for Chinese and Japanese only morphology_compound_components_data = "Rechtsschutzversicherungsgesellschaften" params = DocumentParameters() params["content"] = morphology_compound_components_data - return api.morphology(params, MorphologyOutput.COMPOUND_COMPONENTS) + try: + return api.morphology(params, api.morphology_output['COMPOUND_COMPONENTS']) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/morphology_han-readings.py b/examples/morphology_han-readings.py index 535b314..f5c12f6 100644 --- a/examples/morphology_han-readings.py +++ b/examples/morphology_han-readings.py @@ -1,31 +1,43 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get Chinese readings of words in a piece of text. +Example code to call Analytics API to get Chinese readings of words in a piece of text. """ import argparse import json import os -from rosette.api import API, DocumentParameters, MorphologyOutput +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#morphological-analysis-introduction + + # api.set_option('modelType','perceptron') #Valid for Chinese and Japanese only morphology_han_readings_data = "北京大学生物系主任办公室内部会议" params = DocumentParameters() params["content"] = morphology_han_readings_data - return api.morphology(params, MorphologyOutput.HAN_READINGS) + try: + return api.morphology(params, api.morphology_output['HAN_READINGS']) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/morphology_lemmas.py b/examples/morphology_lemmas.py index 9617712..dc7bb8d 100644 --- a/examples/morphology_lemmas.py +++ b/examples/morphology_lemmas.py @@ -1,31 +1,44 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get lemmas for words in a piece of text. +Example code to call Analytics API to get lemmas for words in a piece of text. """ import argparse import json import os -from rosette.api import API, DocumentParameters, MorphologyOutput +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) + + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#morphological-analysis-introduction + + # api.set_option('modelType','perceptron') #Valid for Chinese and Japanese only morphology_lemmas_data = "The fact is that the geese just went back to get a rest and I'm not banking on their return soon" params = DocumentParameters() params["content"] = morphology_lemmas_data - return api.morphology(params, MorphologyOutput.LEMMAS) + try: + return api.morphology(params, api.morphology_output['LEMMAS']) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/morphology_parts-of-speech.py b/examples/morphology_parts-of-speech.py index 69dbcdb..f020ca2 100644 --- a/examples/morphology_parts-of-speech.py +++ b/examples/morphology_parts-of-speech.py @@ -1,31 +1,44 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get part-of-speech tags for words in a piece of text. +Example code to call Analytics API to get part-of-speech tags for words in a piece of text. """ import argparse import json import os -from rosette.api import API, DocumentParameters, MorphologyOutput +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) + + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#morphological-analysis-introduction + + # api.set_option('modelType','perceptron') # Valid for Chinese and Japanese only morphology_parts_of_speech_data = "The fact is that the geese just went back to get a rest and I'm not banking on their return soon" params = DocumentParameters() params["content"] = morphology_parts_of_speech_data - return api.morphology(params, MorphologyOutput.PARTS_OF_SPEECH) + try: + return api.morphology(params, api.morphology_output['PARTS_OF_SPEECH']) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/name_deduplication.py b/examples/name_deduplication.py new file mode 100644 index 0000000..7c69e20 --- /dev/null +++ b/examples/name_deduplication.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Analytics API to deduplicate a list of names. +""" + +import argparse +import json +import os + +from rosette.api import API, NameDeduplicationParameters, RosetteException + + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + # Create an API instance + api = API(user_key=key, service_url=alt_url) + + name_dedupe_data = "Alice Terry,Alice Thierry,Betty Grable,Betty Gable,Norma Shearer,Norm Shearer,Brigitte Helm,Bridget Helem,Judy Holliday,Julie Halliday" + threshold = 0.75 + params = NameDeduplicationParameters() + params["names"] = name_dedupe_data.split(',') + params["threshold"] = threshold + try: + return api.name_deduplication(params) + except RosetteException as exception: + print(exception) + + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/name_similarity.py b/examples/name_similarity.py index b4ed053..b8a51ec 100644 --- a/examples/name_similarity.py +++ b/examples/name_similarity.py @@ -1,33 +1,41 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get match score (similarity) of two names. +Example code to call Analytics API to get match score (similarity) of two names. """ import argparse import json import os -from rosette.api import API, NameSimilarityParameters +from rosette.api import API, NameSimilarityParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) matched_name_data1 = "Michael Jackson" matched_name_data2 = "迈克尔·杰克逊" params = NameSimilarityParameters() params["name1"] = {"text": matched_name_data1, "language": "eng", "entityType": "PERSON"} params["name2"] = {"text": matched_name_data2, "entityType": "PERSON"} - return api.name_similarity(params) + #params["parameters"] = {"conflictScore": "0.9", "deletionScore": "0.2"} + + try: + return api.name_similarity(params) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/name_translation.py b/examples/name_translation.py index c6704e4..455fc50 100644 --- a/examples/name_translation.py +++ b/examples/name_translation.py @@ -1,19 +1,19 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to translate a name from one language to another. +Example code to call Analytics API to translate a name from one language to another. """ import argparse import json import os -from rosette.api import API, NameTranslationParameters +from rosette.api import API, NameTranslationParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) translated_name_data = "معمر محمد أبو منيار القذاف" params = NameTranslationParameters() @@ -21,14 +21,20 @@ def run(key, altUrl='https://api.rosette.com/rest/v1/'): params["entityType"] = "PERSON" params["targetLanguage"] = "eng" params["targetScript"] = "Latn" - return api.name_translation(params) + try: + return api.name_translation(params) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/ping.py b/examples/ping.py index 98506f3..f908367 100644 --- a/examples/ping.py +++ b/examples/ping.py @@ -1,28 +1,34 @@ # -*- coding: utf-8 -*- - """ -Example code to send Rosette API a ping to check its reachability. +Example code to send Analytics API a ping to check its reachability. """ import argparse import json import os -from rosette.api import API +from rosette.api import API, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) - return api.ping() + try: + return api.ping() + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/record_similarity.py b/examples/record_similarity.py new file mode 100644 index 0000000..a2c1fe2 --- /dev/null +++ b/examples/record_similarity.py @@ -0,0 +1,131 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Analytics API to get similarity score between a list of records +""" + +import argparse +import json +import os + +from rosette.api import API, RecordSimilarityParameters, RosetteException + + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + # Create an API instance + api = API(user_key=key, service_url=alt_url) + + fields = { + "primaryName": { + "type": "rni_name", + "weight": 0.5 + }, + "dob": { + "type": "rni_date", + "weight": 0.2 + }, + "addr": { + "type": "rni_address", + "weight": 0.5 + }, + "dob2": { + "type": "rni_date", + "weight": 0.1 + }, + "jobTitle": { + "type": "rni_string", + "weight": 0.2 + }, + "age": { + "type": "rni_number", + "weight": 0.4 + }, + "isRetired": { + "type": "rni_boolean", + "weight": 0.05 + } + } + properties = { + "threshold": 0.7, + "includeExplainInfo": True + } + records = { + "left": [ + { + "primaryName": { + "text": "Ethan R", + "entityType": "PERSON", + "language": "eng", + "languageOfOrigin": "eng", + "script": "Latn" + }, + "dob": "1993-04-16", + "addr": "123 Roadlane Ave", + "dob2": { + "date": "04161993", + "format": "MMddyyyy" + }, + "jobTitle": "software engineer" + }, + { + "dob": { + "date": "1993-04-16" + }, + "primaryName": { + "text": "Evan R" + }, + "age": 47, + "isRetired": False + } + ], + "right": [ + { + "dob": { + "date": "1993-04-16" + }, + "primaryName": { + "text": "Seth R", + "language": "eng" + }, + "jobTitle": "manager", + "isRetired": True + }, + { + "primaryName": "Ivan R", + "dob": { + "date": "1993-04-16" + }, + "addr": { + "houseNumber": "123", + "road": "Roadlane Ave" + }, + "dob2": { + "date": "1993/04/16" + }, + "age": 72, + "isRetired": True + } + ] + } + params = RecordSimilarityParameters() + params["fields"] = fields + params["properties"] = properties + params["records"] = records + + try: + return api.record_similarity(params) + except RosetteException as exception: + print(exception) + + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/relationships.py b/examples/relationships.py index e117ac9..490a527 100644 --- a/examples/relationships.py +++ b/examples/relationships.py @@ -1,31 +1,36 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get entities's relationships from a piece of text. +Example code to call Analytics API to get entities's relationships from a piece of text. """ import argparse import json import os -from rosette.api import API, DocumentParameters +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) - relationships_text_data = "The Ghostbusters movie was filmed in Boston." + api = API(user_key=key, service_url=alt_url) + relationships_text_data = "FLIR Systems is headquartered in Oregon and produces thermal imaging, night vision, and infrared cameras and sensor systems. According to the SEC’s order instituting a settled administrative proceeding, FLIR entered into a multi-million dollar contract to provide thermal binoculars to the Saudi government in November 2008. Timms and Ramahi were the primary sales employees responsible for the contract, and also were involved in negotiations to sell FLIR’s security cameras to the same government officials. At the time, Timms was the head of FLIR’s Middle East office in Dubai." params = DocumentParameters() params["content"] = relationships_text_data - api.setOption('accuracyMode', 'PRECISION') - return api.relationships(params) + try: + return api.relationships(params) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/run_all.sh b/examples/run_all.sh index 3f18bd7..ab797c9 100644 --- a/examples/run_all.sh +++ b/examples/run_all.sh @@ -1,5 +1,15 @@ #!/bin/bash + +if [ $# -eq 0 ]; then + echo "Usage: $0 API_KEY [ALT_URL]" 1>&2 + exit 1 +fi + for f in *.py do + if [ -n "$2" ]; then + python $f --key $1 --url $2 + else python $f --key $1 + fi done diff --git a/examples/semantic_vectors.py b/examples/semantic_vectors.py new file mode 100644 index 0000000..ef99e5b --- /dev/null +++ b/examples/semantic_vectors.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Analytics API to get text vectors from a piece of text. +""" + +import argparse +import json +import os + +from rosette.api import API, DocumentParameters, RosetteException + + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + # Create an API instance + api = API(user_key=key, service_url=alt_url) + + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#semantic-vectors + + # api.set_option('perToken', 'true') + + semantic_vectors_data = "Cambridge, Massachusetts" + params = DocumentParameters() + params["content"] = semantic_vectors_data + try: + return api.semantic_vectors(params) + except RosetteException as exception: + print(exception) + + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/sentences.py b/examples/sentences.py index b1b682d..f0c3e12 100644 --- a/examples/sentences.py +++ b/examples/sentences.py @@ -1,32 +1,38 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get sentences in a piece of text. +Example code to call Analytics API to get sentences in a piece of text. """ import argparse import json import os -from rosette.api import API, DocumentParameters +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) - sentences_data = "This land is your land. This land is my land\nFrom California to the New York island;\nFrom the red wood forest to the Gulf Stream waters\n\nThis land was made for you and Me.\n\nAs I was walking that ribbon of highway,\nI saw above me that endless skyway:\nI saw below me that golden valley:\nThis land was made for you and me." + sentences_data = "This land is your land. This land is my land, from California to the New York island; from the red wood forest to the Gulf Stream waters. This land was made for you and Me. As I was walking that ribbon of highway, I saw above me that endless skyway: I saw below me that golden valley: This land was made for you and me." params = DocumentParameters() params["content"] = sentences_data - return api.sentences(params) + try: + return api.sentences(params) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/sentiment.py b/examples/sentiment.py index c43074c..1a292d8 100644 --- a/examples/sentiment.py +++ b/examples/sentiment.py @@ -1,7 +1,7 @@ +#!/usr/bin/env python # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get the sentiment of a local file. +Example code to call Analytics API to get the sentiment of a local file. """ import argparse @@ -9,38 +9,51 @@ import os import tempfile -from rosette.api import API, DocumentParameters +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create default file to read from - f = tempfile.NamedTemporaryFile(suffix=".html") + temp_file = tempfile.NamedTemporaryFile(suffix=".html") sentiment_file_data = "New Ghostbusters Film

Original Ghostbuster Dan Aykroyd, who also co-wrote the 1984 Ghostbusters film, couldn’t be more pleased with the new all-female Ghostbusters cast, telling The Hollywood Reporter, “The Aykroyd family is delighted by this inheritance of the Ghostbusters torch by these most magnificent women in comedy.”

" message = sentiment_file_data - f.write(message) - f.seek(0) + temp_file.write(message if isinstance(message, bytes) else message.encode()) + temp_file.seek(0) # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#sentiment-analysis + + # api.set_option('modelType','dnn') #Valid for English only params = DocumentParameters() params["language"] = "eng" # Use an HTML file to load data instead of a string - params.load_document_file(f.name) - result = api.sentiment(params) - - # Clean up the file - f.close() + params.load_document_file(temp_file.name) + try: + result = api.sentiment(params) + except RosetteException as exception: + print(exception) + finally: + # Clean up the file + temp_file.close() return result -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/similar_terms.py b/examples/similar_terms.py new file mode 100644 index 0000000..753e397 --- /dev/null +++ b/examples/similar_terms.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Analytics API to get similar terms for an input. +""" + +import argparse +import json +import os + +from rosette.api import API, DocumentParameters, RosetteException + + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + # Create an API instance + api = API(user_key=key, service_url=alt_url) + + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#similar-terms + + api.set_option("resultLanguages", ['spa', 'deu', 'jpn']) + + similar_terms_data = "spy" + params = DocumentParameters() + params["content"] = similar_terms_data + try: + return api.similar_terms(params) + except RosetteException as exception: + print(exception) + + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/syntax_dependencies.py b/examples/syntax_dependencies.py new file mode 100644 index 0000000..e5e99a0 --- /dev/null +++ b/examples/syntax_dependencies.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Analytics API to get the syntactic dependencies of a document (at a given URL). +""" + +import argparse +import json +import os + +from rosette.api import API, DocumentParameters, RosetteException + + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + syntax_dependencies_data = "Yoshinori Ohsumi, a Japanese cell biologist, was awarded the Nobel Prize in Physiology or Medicine on Monday." + params = DocumentParameters() + params["content"] = syntax_dependencies_data + # Create an API instance + api = API(user_key=key, service_url=alt_url) + try: + return api.syntax_dependencies(params) + except RosetteException as exception: + print(exception) + + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/tokens.py b/examples/tokens.py index 128a23c..e98601c 100644 --- a/examples/tokens.py +++ b/examples/tokens.py @@ -1,31 +1,44 @@ # -*- coding: utf-8 -*- - """ -Example code to call Rosette API to get the tokens (words) in a piece of text. +Example code to call Analytics API to get the tokens (words) in a piece of text. """ import argparse import json import os -from rosette.api import API, DocumentParameters +from rosette.api import API, DocumentParameters, RosetteException -def run(key, altUrl='https://api.rosette.com/rest/v1/'): +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ # Create an API instance - api = API(user_key=key, service_url=altUrl) + api = API(user_key=key, service_url=alt_url) + + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#tokenization + + # api.set_option('modelType','perceptron') #Valid for Chinese and Japanese only tokens_data = "北京大学生物系主任办公室内部会议" params = DocumentParameters() params["content"] = tokens_data - return api.tokens(params) + try: + return api.tokens(params) + except RosetteException as exception: + print(exception) -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Calls the ' + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') -parser.add_argument('-k', '--key', help='Rosette API Key', required=True) -parser.add_argument('-u', '--url', help="Alternative API URL", default='https://api.rosette.com/rest/v1/') +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') if __name__ == '__main__': - args = parser.parse_args() - result = run(args.key, args.url) - print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8")) + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/topics.py b/examples/topics.py new file mode 100644 index 0000000..e33a745 --- /dev/null +++ b/examples/topics.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Analytics API to get the topics (key phrases and concepts) in a piece of text. +""" + +import argparse +import json +import os + +from rosette.api import API, DocumentParameters, RosetteException + + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + # Create an API instance + api = API(user_key=key, service_url=alt_url) + + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#topic-extraction + + # api.set_option('keyphraseSalienceThreshold','.5') + # api.set_option('conceptSalienceThreshold','.1') + + topics_data = "Lily Collins is in talks to join Nicholas Hoult in Chernin Entertainment and Fox Searchlight's J.R.R. Tolkien biopic Tolkien. Anthony Boyle, known for playing Scorpius Malfoy in the British play Harry Potter and the Cursed Child, also has signed on for the film centered on the famed author. In Tolkien, Hoult will play the author of the Hobbit and Lord of the Rings book series that were later adapted into two Hollywood trilogies from Peter Jackson. Dome Karukoski is directing the project." + params = DocumentParameters() + params["content"] = topics_data + try: + return api.topics(params) + except RosetteException as exception: + print(exception) + + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/examples/transliteration.py b/examples/transliteration.py new file mode 100644 index 0000000..bc7c5da --- /dev/null +++ b/examples/transliteration.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Analytics API to transliterate a piece of text. +""" + +import argparse +import json +import os + +from rosette.api import API, DocumentParameters, RosetteException + + +def run(key, alt_url='https://analytics.babelstreet.com/rest/v1/'): + """ Run the example """ + # Create an API instance + api = API(user_key=key, service_url=alt_url) + + # Set selected API options. + # For more information on the functionality of these + # and other available options, see Analytics Features & Functions + # https://developer.babelstreet.com/features-and-functions#transliteration + + # To transliterate from native Arabic script to Arabizi add: + # api.set_option('reversed','True') + + transliteration_data = "ana r2ye7 el gam3a el sa3a 3 el 3asr" + params = DocumentParameters() + params["content"] = transliteration_data + + try: + return api.transliteration(params) + except RosetteException as exception: + print(exception) + + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Analytics API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://analytics.babelstreet.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/pytest.ini b/pytest.ini index fc6bcc6..b37e476 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,4 @@ [pytest] -pep8ignore = E501 norecursedirs = .tox - target \ No newline at end of file + target diff --git a/rosette/__init__.py b/rosette/__init__.py index e6efe5e..5c89253 100644 --- a/rosette/__init__.py +++ b/rosette/__init__.py @@ -1,14 +1,10 @@ """ -Python client for the Rosette API. - -Copyright (c) 2014-2015 Basis Technology Corporation. - +Python client for the Babel Street Analytics API. +Copyright (c) 2014-2024 Basis Technology Corporation. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,4 +12,4 @@ limitations under the License. """ -__version__ = '1.2.0' +__version__ = '1.31.0' diff --git a/rosette/api.py b/rosette/api.py index 7149f4b..9dadf15 100644 --- a/rosette/api.py +++ b/rosette/api.py @@ -1,9 +1,9 @@ #!/usr/bin/env python """ -Python client for the Rosette API. +Python client for the Babel Street Analytics API. -Copyright (c) 2014-2015 Basis Technology Corporation. +Copyright (c) 2014-2024 Basis Technology Corporation. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -22,35 +22,31 @@ import json import logging import sys -import time import os -from socket import gaierror -import requests import re -import warnings - -_BINDING_VERSION = '1.2.0' +import requests +import platform + +_APPLICATION_JSON = 'application/json' +_BINDING_LANGUAGE = 'python' +_BINDING_VERSION = '1.31.0' +# TODO Remove legacies in future release +_LEGACY_CONCURRENCY_HEADER = 'x-rosetteapi-concurrency' +_CONCURRENCY_HEADER = 'x-babelstreetapi-concurrency' +_LEGACY_CUSTOM_HEADER_PREFIX = 'X-RosetteAPI-' +_CUSTOM_HEADER_PREFIX = "X-BabelStreetAPI-" +_CUSTOM_HEADER_PATTERN = re.compile('^(:?' + _CUSTOM_HEADER_PREFIX + '|' + _LEGACY_CUSTOM_HEADER_PREFIX + ')') _GZIP_BYTEARRAY = bytearray([0x1F, 0x8b, 0x08]) -_IsPy3 = sys.version_info[0] == 3 - - -try: - import urlparse -except ImportError: - import urllib.parse as urlparse -try: - import httplib -except ImportError: - import http.client as httplib +_ISPY3 = sys.version_info[0] == 3 -if _IsPy3: +if _ISPY3: _GZIP_SIGNATURE = _GZIP_BYTEARRAY else: _GZIP_SIGNATURE = str(_GZIP_BYTEARRAY) -class _ReturnObject: +class _ReturnObject(object): def __init__(self, js, code): self._json = js @@ -61,69 +57,36 @@ def json(self): def _my_loads(obj, response_headers): - if _IsPy3: - d1 = json.loads(obj.decode("utf-8")).copy() - d1.update(response_headers) - return d1 # if py3, need chars. + if _ISPY3: + temp = json.loads(obj.decode("utf-8")).copy() + temp.update(response_headers) + return temp # if py3, need chars. else: - d2 = json.loads(obj).copy() - d2.update(response_headers) - return d2 + temp = json.loads(obj).copy() + temp.update(response_headers) + return temp class RosetteException(Exception): - """Exception thrown by all Rosette API operations for errors local and remote. + """Exception thrown by all Analytics API operations for errors local and remote. TBD. Right now, the only valid operation is conversion to __str__. """ def __init__(self, status, message, response_message): + super(RosetteException, self).__init__(message) self.status = status self.message = message self.response_message = response_message def __str__(self): sst = self.status - if not (isinstance(sst, str)): + if not isinstance(sst, str): sst = repr(sst) return sst + ": " + self.message + ":\n " + self.response_message -class _PseudoEnum: - - def __init__(self): - pass - - @classmethod - def validate(cls, value, name): - values = [] - for (k, v) in vars(cls).items(): - if not k.startswith("__"): - values += [v] - - # this is still needed to make sure that the parameter NAMES are known. - # If python didn't allow setting unknown values, this would be a - # language error. - if value not in values: - raise RosetteException( - "unknownVariable", - "The value supplied for " + - name + - " is not one of " + - ", ".join(values) + - ".", - repr(value)) - - -class MorphologyOutput(_PseudoEnum): - LEMMAS = "lemmas" - PARTS_OF_SPEECH = "parts-of-speech" - COMPOUND_COMPONENTS = "compound-components" - HAN_READINGS = "han-readings" - COMPLETE = "complete" - - -class _DocumentParamSetBase(object): +class _RequestParametersBase(object): def __init__(self, repertoire): self.__params = {} @@ -133,44 +96,46 @@ def __init__(self, repertoire): def __setitem__(self, key, val): if key not in self.__params: raise RosetteException( - "badKey", "Unknown Rosette parameter key", repr(key)) + "badKey", "Unknown Analytics parameter key", repr(key)) self.__params[key] = val def __getitem__(self, key): if key not in self.__params: raise RosetteException( - "badKey", "Unknown Rosette parameter key", repr(key)) + "badKey", "Unknown Analytics parameter key", repr(key)) return self.__params[key] def validate(self): + """validation""" pass def serialize(self, options): + """serialize keys with values""" self.validate() - v = {} - for (key, val) in self.__params.items(): + values = {} + for key, val in self.__params.items(): if val is None: - pass + continue else: - v[key] = val + values[key] = val if options is not None and len(options) > 0: - v['options'] = options + values['options'] = options - return v + return values -def _byteify(s): # py 3 only - l = len(s) - b = bytearray(l) - for ix in range(l): - oc = ord(s[ix]) - assert (oc < 256) - b[ix] = oc - return b +def _byteify(value): # py 3 only + length = len(value) + byte_array = bytearray(length) + for index in range(length): + ordinal = ord(value[index]) + assert ordinal < 256 + byte_array[index] = ordinal + return byte_array -class DocumentParameters(_DocumentParamSetBase): +class DocumentParameters(_RequestParametersBase): """Parameter object for all operations requiring input other than translated_name. Two fields, C{content} and C{inputUri}, are set via @@ -187,10 +152,10 @@ class DocumentParameters(_DocumentParamSetBase): def __init__(self): """Create a L{DocumentParameters} object.""" - _DocumentParamSetBase.__init__( - self, ("content", "contentUri", "language", "genre")) + _RequestParametersBase.__init__( + self, ("content", "contentUri", "language", "profileId")) self.file_name = "" - self.useMultipart = False + self.use_multipart = False def validate(self): """Internal. Do not use.""" @@ -219,33 +184,35 @@ def load_document_file(self, path): be determined by the server. @parameter path: Pathname of a file acceptable to the C{open} function. """ - self.useMultipart = True + self.use_multipart = True self.file_name = path - self.load_document_string(open(path, "rb").read()) + with open(path, "rb") as f: + self.load_document_string(f.read()) - def load_document_string(self, s): + def load_document_string(self, content_as_string): """Loads a string into the object. The string will be taken as bytes or as Unicode dependent upon its native python type. @parameter s: A string, possibly a unicode-string, to be loaded for subsequent analysis. """ - self["content"] = s + self["content"] = content_as_string -class NameTranslationParameters(_DocumentParamSetBase): +class NameTranslationParameters(_RequestParametersBase): """Parameter object for C{name-translation} endpoint. - The following values may be set by the indexing (i.e.,C{ parms["name"]}) operator. The values are all - strings (when not C{None}). + The following values may be set by the indexing (i.e.,C{ parms["name"]}) operator. + The values are all strings (when not C{None}). All are optional except C{name} and C{targetLanguage}. Scripts are in - ISO15924 codes, and languages in ISO639 (two- or three-letter) codes. See the Name Translation documentation for - more description of these terms, as well as the content of the return result. + ISO15924 codes, and languages in ISO639 (two- or three-letter) codes. See the Name + Translation documentation for more description of these terms, as well as the + content of the return result. C{name} The name to be translated. C{targetLangauge} The language into which the name is to be translated. - C{entityType} The entity type (TBD) of the name. + C{entityType} The entity type of the name. PERSON (default), LOCATION, or ORGANIZATION C{sourceLanguageOfOrigin} The language of origin of the name. @@ -259,8 +226,8 @@ class NameTranslationParameters(_DocumentParamSetBase): """ def __init__(self): - self.useMultipart = False - _DocumentParamSetBase.__init__( + self.use_multipart = False + _RequestParametersBase.__init__( self, ("name", "targetLanguage", @@ -269,22 +236,59 @@ def __init__(self): "sourceLanguageOfUse", "sourceScript", "targetScript", - "targetScheme", - "genre")) + "targetScheme")) + + def validate(self): + """Internal. Do not use.""" + for option in "name", "targetLanguage": # required + if self[option] is None: + raise RosetteException( + "missingParameter", + "Required Name Translation parameter is missing: " + option, + repr(option)) + + +class AddressSimilarityParameters(_RequestParametersBase): + """Parameter object for C{address-similarity} endpoint. + + C{address1} and C{address2} are required. + + `parameters` is optional. + + C{address1} The address to be matched, a C{address} object or address string. + + C{address2} The address to be matched, a C{address} object or address string. + + The C{address} object contains these optional fields: + city, island, district, stateDistrict, state, countryRegion, country, worldRegion, postCode, poBox + + `parameters` is a dictionary listing any parameter overrides to include. For example, `postCodeAddressFieldWeight`. + Setting `parameters` is not cumulative. Define all overrides at once. If defined multiple times, only the + final declaration is used. + + See `examples/address_similarity.py` + """ + + def __init__(self): + self.use_multipart = False + _RequestParametersBase.__init__(self, ("address1", "address2", "parameters")) def validate(self): """Internal. Do not use.""" - for n in ("name", "targetLanguage"): # required - if self[n] is None: + for option in "address1", "address2": # required + if self[option] is None: raise RosetteException( "missingParameter", - "Required Name Translation parameter not supplied", - repr(n)) + "Required Address Similarity parameter is missing: " + option, + repr(option)) -class NameSimilarityParameters(_DocumentParamSetBase): +class NameSimilarityParameters(_RequestParametersBase): """Parameter object for C{name-similarity} endpoint. - All are required. + + C{name1} and C{name2} are required. + + `parameters` is optional. C{name1} The name to be matched, a C{name} object. @@ -292,34 +296,83 @@ class NameSimilarityParameters(_DocumentParamSetBase): The C{name} object contains these fields: - C{text} Text of the name, required. + C{text} Text of the name, required. + + C{language} Language of the name in ISO639 three-letter code, optional. - C{language} Language of the name in ISO639 three-letter code, optional. + C{script} The ISO15924 code of the name, optional. - C{script} The ISO15924 code of the name, optional. + C{entityType} The entity type, can be "PERSON", "LOCATION" or "ORGANIZATION", optional. - C{entityType} The entity type, can be "PERSON", "LOCATION" or "ORGANIZATION", optional. + `parameters` is a dictionary listing any parameter overrides to include. For example, `deletionScore`. + Setting `parameters` is not cumulative. Define all overrides at once. If defined multiple times, only the + final declaration is used. + + See `examples/name_similarity.py` + """ + + def __init__(self): + self.use_multipart = False + _RequestParametersBase.__init__(self, ("name1", "name2", "parameters")) + + def validate(self): + """Internal. Do not use.""" + for option in "name1", "name2": # required + if self[option] is None: + raise RosetteException( + "missingParameter", + "Required Name Similarity parameter is missing: " + option, + repr(option)) + + +class NameDeduplicationParameters(_RequestParametersBase): + """Parameter object for C{name-deduplication} endpoint. + Required: + C{names} A list of C{name} objects + C{threshold} Threshold to use to restrict cluster size. Can be null to use default value. + """ + + def __init__(self): + self.use_multipart = False + _RequestParametersBase.__init__(self, ("names", "threshold")) + + def validate(self): + """Internal. Do not use.""" + if self["names"] is None: # required + raise RosetteException( + "missingParameter", + "Required Name De-Duplication parameter is missing: names", + repr("names")) + + +class RecordSimilarityParameters(_RequestParametersBase): + """Parameter object for C{record-similarity} endpoint. + Required: + C{records} The records to be compared; where each left record is compared to the associated right record. + C{properties} Parameters used in the call + C{fields} The definition of the fields used in the comparison. There must be a minimum of 1 field and + can have a maximum of 5 fields. """ def __init__(self): - self.useMultipart = False - _DocumentParamSetBase.__init__(self, ("name1", "name2")) + self.use_multipart = False + _RequestParametersBase.__init__(self, ("fields", "properties", "records")) def validate(self): """Internal. Do not use.""" - for n in ("name1", "name2"): # required - if self[n] is None: + for option in ["records","fields"]: # required + if self[option] is None: raise RosetteException( "missingParameter", - "Required Name Similarity parameter not supplied", - repr(n)) + "Required Record Similarity parameter is missing: " + option, + repr(option)) -class EndpointCaller: +class EndpointCaller(object): """L{EndpointCaller} objects are invoked via their instance methods to obtain results - from the Rosette server described by the L{API} object from which they + from the Analytics server described by the L{API} object from which they are created. Each L{EndpointCaller} object communicates with a specific endpoint - of the Rosette server, specified at its creation. Use the specific + of the Analytics server, specified at its creation. Use the specific instance methods of the L{API} object to create L{EndpointCaller} objects bound to corresponding endpoints. @@ -329,7 +382,7 @@ class EndpointCaller: The results of all operations are returned as python dictionaries, whose keys and values correspond exactly to those of the corresponding - JSON return value described in the Rosette web service documentation. + JSON return value described in the Analytics web service documentation. """ def __init__(self, api, suburl): @@ -339,14 +392,14 @@ def __init__(self, api, suburl): self.service_url = api.service_url self.user_key = api.user_key self.logger = api.logger - self.useMultipart = False + self.use_multipart = False self.suburl = suburl self.debug = api.debug self.api = api - def __finish_result(self, r, ename): - code = r.status_code - the_json = r.json() + def __finish_result(self, response, ename): + code = response.status_code + the_json = response.json() if code == 200: return the_json else: @@ -360,32 +413,45 @@ def __finish_result(self, r, ename): complaint_url = ename + " " + self.suburl raise RosetteException(code, complaint_url + - " : failed to communicate with Rosette", msg) + " : failed to communicate with Babel Street Analytics API", msg) + + def __set_headers(self): + headers = {'Accept': _APPLICATION_JSON, + _CUSTOM_HEADER_PREFIX + 'Binding': _BINDING_LANGUAGE, + _CUSTOM_HEADER_PREFIX + 'Binding-Version': _BINDING_VERSION, + #TODO Remove in future release + _LEGACY_CUSTOM_HEADER_PREFIX + 'Binding': _BINDING_LANGUAGE, + _LEGACY_CUSTOM_HEADER_PREFIX + 'Binding-Version': _BINDING_VERSION} + + custom_headers = self.api.get_custom_headers() + if custom_headers is not None: + for key in custom_headers.keys(): + if _CUSTOM_HEADER_PATTERN.match(key) is not None: + headers[key] = custom_headers[key] + else: + raise RosetteException("badHeader", + "Custom header name must begin with \"" + _CUSTOM_HEADER_PREFIX + "\" or \"" + + _LEGACY_CUSTOM_HEADER_PREFIX + "\"", + key) + self.api.clear_custom_headers() + + if self.debug: + headers[_LEGACY_CUSTOM_HEADER_PREFIX + 'Devel'] = 'true' + + if self.user_key is not None: + headers["X-BabelStreetAPI-Key"] = self.user_key + + return headers def info(self): """Issues an "info" request to the L{EndpointCaller}'s specific endpoint. @return: A dictionary telling server version and other identifying data.""" - url = self.service_url + "info" - headers = {'Accept': 'application/json', 'X-RosetteAPI-Binding': 'python', 'X-RosetteAPI-Binding-Version': _BINDING_VERSION} - - customHeaders = self.api.getCustomHeaders() - pattern = re.compile('^X-RosetteAPI-') - if customHeaders is not None: - for key in customHeaders.keys(): - if pattern.match(key) is not None: - headers[key] = customHeaders[key] - else: - raise RosetteException("badHeader", "Custom header name must begin with \"X-RosetteAPI-\"", key) - self.api.clearCustomHeaders() - - if self.debug: - headers['X-RosetteAPI-Devel'] = 'true' + url = self.service_url + self.api.endpoints["INFO"] + headers = self.__set_headers() self.logger.info('info: ' + url) - if self.user_key is not None: - headers["X-RosetteAPI-Key"] = self.user_key - r = self.api._get_http(url, headers=headers) - return self.__finish_result(r, "info") + response = self.api.get_http(url, headers=headers) + return self.__finish_result(response, "info") def ping(self): """Issues a "ping" request to the L{EndpointCaller}'s (server-wide) endpoint. @@ -393,83 +459,66 @@ def ping(self): or is not the right server or some other error occurs, it will be signalled.""" - url = self.service_url + 'ping' - headers = {'Accept': 'application/json', 'X-RosetteAPI-Binding': 'python', 'X-RosetteAPI-Binding-Version': _BINDING_VERSION} - - customHeaders = self.api.getCustomHeaders() - pattern = re.compile('^X-RosetteAPI-') - if customHeaders is not None: - for key in customHeaders.keys(): - if pattern.match(key) is not None: - headers[key] = customHeaders[key] - else: - raise RosetteException("badHeader", "Custom header name must begin with \"X-RosetteAPI-\"", key) - self.api.clearCustomHeaders() - - if self.debug: - headers['X-RosetteAPI-Devel'] = 'true' + url = self.service_url + self.api.endpoints['PING'] + headers = self.__set_headers() self.logger.info('Ping: ' + url) - if self.user_key is not None: - headers["X-RosetteAPI-Key"] = self.user_key - r = self.api._get_http(url, headers=headers) - return self.__finish_result(r, "ping") + response = self.api.get_http(url, headers=headers) + return self.__finish_result(response, "ping") - def call(self, parameters): + def call(self, parameters, paramtype=None): """Invokes the endpoint to which this L{EndpointCaller} is bound. Passes data and metadata specified by C{parameters} to the server endpoint to which this L{EndpointCaller} object is bound. For all - endpoints except C{name-translation} and C{name-similarity}, it must be a L{DocumentParameters} - object or a string; for C{name-translation}, it must be an L{NameTranslationParameters} object; - for C{name-similarity}, it must be an L{NameSimilarityParameters} object. For relationships, + endpoints except C{name-translation} and C{name-similarity}, it must be + a L{DocumentParameters} object or a string; for C{name-translation}, it + must be an L{NameTranslationParameters} object; for C{name-similarity}, + it must be an L{NameSimilarityParameters} object. For relationships, it may be an L(DocumentParameters). In all cases, the result is returned as a python dictionary conforming to the JSON object described in the endpoint's entry - in the Rosette web service documentation. + in the Analytics web service documentation. @param parameters: An object specifying the data, and possible metadata, to be processed by the endpoint. See the details for those object types. - @type parameters: For C{name-translation}, L{NameTranslationParameters}, otherwise L{DocumentParameters} or L{str} + @type parameters: Parameters types or L{str} for document request. + @param paramtype: Required parameters type. @return: A python dictionary expressing the result of the invocation. """ + if paramtype and not isinstance(parameters, paramtype): + raise RosetteException( + "incompatible", + "The parameters must be " + str(paramtype), + self.suburl) - if not isinstance(parameters, _DocumentParamSetBase): - if self.suburl != "name-similarity" and self.suburl != "name-translation": - text = parameters - parameters = DocumentParameters() - parameters['content'] = text - else: - raise RosetteException( - "incompatible", - "Text-only input only works for DocumentParameter endpoints", - self.suburl) + if type(parameters) == str: + text = parameters + parameters = DocumentParameters() + parameters['content'] = text - self.useMultipart = parameters.useMultipart + if not paramtype and not isinstance(parameters, DocumentParameters): + raise RosetteException( + "incompatible", + "The parameters must be string or DocumentParameters", + self.suburl) + + self.use_multipart = parameters.use_multipart url = self.service_url + self.suburl params_to_serialize = parameters.serialize(self.api.options) headers = {} if self.user_key is not None: + headers = self.__set_headers() - customHeaders = self.api.getCustomHeaders() - pattern = re.compile('^X-RosetteAPI-') - if customHeaders is not None: - for key in customHeaders.keys(): - if pattern.match(key) is not None: - headers[key] = customHeaders[key] - else: - raise RosetteException("badHeader", "Custom header name must begin with \"X-RosetteAPI-\"", key) - self.api.clearCustomHeaders() + if self.use_multipart: + payload = None + if self.api.url_parameters: + payload = self.api.url_parameters - headers["X-RosetteAPI-Key"] = self.user_key - headers["X-RosetteAPI-Binding"] = "python" - headers["X-RosetteAPI-Binding-Version"] = _BINDING_VERSION - - if self.useMultipart: params = dict( (key, value) for key, - value in params_to_serialize.iteritems() if key == 'language') + value in params_to_serialize.items() if key == 'language') files = { 'content': ( os.path.basename( @@ -479,46 +528,46 @@ def call(self, parameters): 'request': ( 'request_options', json.dumps(params), - 'application/json')} + _APPLICATION_JSON)} request = requests.Request( - 'POST', url, files=files, headers=headers) - prepared_request = request.prepare() - session = requests.Session() - resp = session.send(prepared_request) - rdata = resp.content - response_headers = {"responseHeaders": dict(resp.headers)} - status = resp.status_code - r = _ReturnObject(_my_loads(rdata, response_headers), status) + 'POST', url, files=files, headers=headers, params=payload) + prepared_request = self.api.session.prepare_request(request) + settings = self.api.session.merge_environment_settings(prepared_request.url, {}, {}, None, None) + response = self.api.session.send(prepared_request, **settings) + rdata = response.content + response_headers = {"responseHeaders": dict(response.headers)} + status = response.status_code + response = _ReturnObject( + _my_loads(rdata, response_headers), status) else: if self.debug: - headers['X-RosetteAPI-Devel'] = True + headers[_LEGACY_CUSTOM_HEADER_PREFIX + 'Devel'] = 'true' self.logger.info('operate: ' + url) - headers['Accept'] = "application/json" + headers['Accept'] = _APPLICATION_JSON headers['Accept-Encoding'] = "gzip" - headers['Content-Type'] = "application/json" - r = self.api._post_http(url, params_to_serialize, headers) - return self.__finish_result(r, "operate") + headers['Content-Type'] = _APPLICATION_JSON + response = self.api.post_http(url, params_to_serialize, headers) + return self.__finish_result(response, "operate") -class API: +class API(object): """ - Rosette Python Client Binding API; representation of a Rosette server. + Analytics Python Client Binding API; representation of an Analytics server. Call instance methods upon this object to obtain L{EndpointCaller} objects - which can communicate with particular Rosette server endpoints. + which can communicate with particular Analytics server endpoints. """ def __init__( self, user_key=None, - service_url='https://api.rosette.com/rest/v1/', + service_url='https://analytics.babelstreet.com/rest/v1/', retries=5, - reuse_connection=True, refresh_duration=0.5, debug=False): """ Create an L{API} object. - @param user_key: (Optional; required for servers requiring authentication.) An authentication string to be sent - as user_key with all requests. The default Rosette server requires authentication. - to the server. + @param user_key: (Optional; required for servers requiring authentication.) + An authentication string to be sent as user_key with all requests. The + default Analytics server requires authentication to the server. """ # logging.basicConfig(filename="binding.log", filemode="w", level=logging.DEBUG) self.user_key = user_key @@ -528,90 +577,146 @@ def __init__( self.logger.info('Initialized on ' + self.service_url) self.debug = debug - if (retries < 1): + if retries < 1: retries = 1 - if (refresh_duration < 0): + if refresh_duration < 0: refresh_duration = 0 - self.num_retries = retries - self.reuse_connection = reuse_connection self.connection_refresh_duration = refresh_duration - self.http_connection = None self.options = {} - self.customHeaders = {} - - def _connect(self, parsedUrl): - """ Simple connection method - @param parsedUrl: The URL on which to process + self.custom_headers = {} + self.url_parameters = {} + self.max_pool_size = 1 + self.session = requests.Session() + self.user_agent_string = 'Babel-Street-Analytics-API-Python/' + _BINDING_VERSION + '/' + platform.python_version() + + self.morphology_output = { + 'LEMMAS': 'lemmas', + 'PARTS_OF_SPEECH': 'parts-of-speech', + 'COMPOUND_COMPONENTS': 'compound-components', + 'HAN_READINGS': 'han-readings', + 'COMPLETE': 'complete' + } + + self.endpoints = { + 'ADDRESS_SIMILARITY': 'address-similarity', + 'CATEGORIES': 'categories', + 'ENTITIES': 'entities', + 'INFO': 'info', + 'LANGUAGE': 'language', + 'MORPHOLOGY': 'morphology', + 'NAME_TRANSLATION': 'name-translation', + 'NAME_SIMILARITY': 'name-similarity', + 'NAME_DEDUPLICATION': 'name-deduplication', + 'PING': 'ping', + 'RELATIONSHIPS': 'relationships', + 'SEMANTIC_VECTORS': 'semantics/vector', + 'SENTENCES': 'sentences', + 'SENTIMENT': 'sentiment', + 'SIMILAR_TERMS': 'semantics/similar', + 'SYNTAX_DEPENDENCIES': 'syntax/dependencies', + 'TEXT_EMBEDDING': 'semantics/vector', + 'TOKENS': 'tokens', + 'TOPICS': 'topics', + 'TRANSLITERATION': 'transliteration', + 'EVENTS': 'events', + 'RECORD_SIMILARITY': 'record-similarity' + } + + def __del__(self): + try: + self.session.close() + except ReferenceError: + pass + + def get_binding_version(self): + """ Return the current binding version """ + return _BINDING_VERSION + + def get_user_agent_string(self): + """ Return the User-Agent string """ + return self.user_agent_string + + def set_pool_size(self, new_pool_size): + """Sets the connection pool size. + @parameter new_pool_size: pool size to set """ - if not self.reuse_connection or self.http_connection is None: - loc = parsedUrl.netloc - if parsedUrl.scheme == "https": - self.http_connection = httplib.HTTPSConnection(loc) - else: - self.http_connection = httplib.HTTPConnection(loc) + self.max_pool_size = new_pool_size + adapter = requests.adapters.HTTPAdapter( + pool_maxsize=new_pool_size) + if 'https:' in self.service_url: + self.session.mount('https://', adapter) + else: + self.session.mount('http://', adapter) # NOSONAR - def _make_request(self, op, url, data, headers): - """ - Handles the actual request, retrying if a 429 is encountered + def __adjust_concurrency(self, dict_headers): + if _CONCURRENCY_HEADER in dict_headers: + if dict_headers[_CONCURRENCY_HEADER] != self.max_pool_size: + self.set_pool_size(dict_headers[_CONCURRENCY_HEADER]) + elif _LEGACY_CONCURRENCY_HEADER in dict_headers: + if dict_headers[_LEGACY_CONCURRENCY_HEADER] != self.max_pool_size: + self.set_pool_size(dict_headers[_LEGACY_CONCURRENCY_HEADER]) - @param op: POST or GET + def _make_request(self, operation, url, data, headers): + """ + @param operation: POST or GET @param url: endpoing URL @param data: request data @param headers: request headers """ - headers['User-Agent'] = "RosetteAPIPython/" + _BINDING_VERSION - parsedUrl = urlparse.urlparse(url) - - self._connect(parsedUrl) + headers['User-Agent'] = self.get_user_agent_string() message = None code = "unknownError" rdata = None response_headers = {} - for i in range(self.num_retries + 1): - try: - self.http_connection.request(op, url, data, headers) - response = self.http_connection.getresponse() - status = response.status - rdata = response.read() - response_headers["responseHeaders"] = ( - dict(response.getheaders())) - if status == 200: - if not self.reuse_connection: - self.http_connection.close() - return rdata, status, response_headers - if status == 429: - code = status - message = "{0} ({1})".format(rdata, i) - time.sleep(self.connection_refresh_duration) - self.http_connection.close() - self._connect(parsedUrl) - continue - if rdata is not None: - try: - the_json = _my_loads(rdata, response_headers) - if 'message' in the_json: - message = the_json['message'] - if "code" in the_json: - code = the_json['code'] - else: - code = status - raise RosetteException(code, message, url) - except: - raise - except (httplib.BadStatusLine, gaierror): - raise RosetteException( - "ConnectionError", - "Unable to establish connection to the Rosette API server", - url) - if not self.reuse_connection: - self.http_connection.close() + payload = None + if self.url_parameters: + payload = self.url_parameters + + request = requests.Request( + operation, url, data=data, headers=headers, params=payload) + prepared_request = self.session.prepare_request(request) + # Take into account environment settings, e.g. HTTP_PROXY and HTTPS_PROXY + settings = self.session.merge_environment_settings(prepared_request.url, {}, {}, None, None) + + try: + response = self.session.send(prepared_request, **settings) + status = response.status_code + rdata = response.content + dict_headers = dict(response.headers) + self.__adjust_concurrency(dict_headers) + response_headers = {"responseHeaders": dict_headers} + + if status == 200: + return rdata, status, response_headers + if rdata is not None: + try: + the_json = _my_loads(rdata, response_headers) + if 'message' in the_json: + message = the_json['message'] + if "code" in the_json: + code = the_json['code'] + else: + code = status + if not message: + message = rdata + raise RosetteException(code, message, url) + except json.JSONDecodeError as exception: + raise RosetteException( + exception, + "Problem decoding JSON", + rdata) + except requests.exceptions.RequestException as exception: + raise RosetteException( + exception, + "Unable to establish connection to the Analytics API server", + url) raise RosetteException(code, message, url) - def _get_http(self, url, headers): + def get_http(self, url, headers): """ Simple wrapper for the GET request @@ -622,7 +727,7 @@ def _get_http(self, url, headers): "GET", url, None, headers) return _ReturnObject(_my_loads(rdata, response_headers), status) - def _post_http(self, url, data, headers): + def post_http(self, url, data, headers): """ Simple wrapper for the POST request @@ -644,7 +749,13 @@ def _post_http(self, url, data, headers): return _ReturnObject(_my_loads(rdata, response_headers), status) - def setOption(self, name, value): + def get_pool_size(self): + """ + Returns the maximum pool size, which is the returned x-rosetteapi-concurrency value + """ + return int(self.max_pool_size) + + def set_option(self, name, value): """ Sets an option @@ -656,7 +767,7 @@ def setOption(self, name, value): else: self.options[name] = value - def getOption(self, name): + def get_option(self, name): """ Gets an option @@ -669,35 +780,66 @@ def getOption(self, name): else: return None - def clearOptions(self): + def clear_options(self): """ Clears all options """ self.options.clear() - def setCustomHeaders(self, name, value): + def set_url_parameter(self, name, value): + """ + Sets a URL parameter + + @param name: name of parameter + @param value: value of parameter + """ + if value is None: + self.url_parameters.pop(name, None) + else: + self.url_parameters[name] = value + + def get_url_parameter(self, name): + """ + Gets a URL parameter + + @param name: name of parameter + + @return: value of parameter + """ + if name in self.url_parameters.keys(): + return self.url_parameters[name] + else: + return None + + def clear_url_parameters(self): + """ + Clears all options + """ + self.url_parameters.clear() + + def set_custom_headers(self, name, value): """ Sets custom headers @param headers: array of custom headers to be set """ if value is None: - self.customHeaders.pop(name, None) + self.custom_headers.pop(name, None) else: - self.customHeaders[name] = value + self.custom_headers[name] = value - def getCustomHeaders(self): + def get_custom_headers(self): """ Get custom headers """ - return self.customHeaders + return self.custom_headers - def clearCustomHeaders(self): + def clear_custom_headers(self): """ Clears custom headers """ - self.customHeaders.clear() + self.custom_headers.clear() def ping(self): """ @@ -721,7 +863,7 @@ def language(self, parameters): @type parameters: L{DocumentParameters} or L{str} @return: A python dictionary containing the results of language identification.""" - return EndpointCaller(self, "language").call(parameters) + return EndpointCaller(self, self.endpoints['LANGUAGE']).call(parameters) def sentences(self, parameters): """ @@ -730,7 +872,7 @@ def sentences(self, parameters): and possible metadata, to be processed by the sentence identifier. @type parameters: L{DocumentParameters} or L{str} @return: A python dictionary containing the results of sentence identification.""" - return EndpointCaller(self, "sentences").call(parameters) + return EndpointCaller(self, self.endpoints['SENTENCES']).call(parameters) def tokens(self, parameters): """ @@ -739,9 +881,9 @@ def tokens(self, parameters): and possible metadata, to be processed by the tokens identifier. @type parameters: L{DocumentParameters} or L{str} @return: A python dictionary containing the results of tokenization.""" - return EndpointCaller(self, "tokens").call(parameters) + return EndpointCaller(self, self.endpoints['TOKENS']).call(parameters) - def morphology(self, parameters, facet=MorphologyOutput.COMPLETE): + def morphology(self, parameters, facet=""): """ Create an L{EndpointCaller} to returns a specific facet of the morphological analyses of texts to which it is applied and call it. @@ -751,25 +893,20 @@ def morphology(self, parameters, facet=MorphologyOutput.COMPLETE): @param facet: The facet desired, to be returned by the created L{EndpointCaller}. @type facet: An element of L{MorphologyOutput}. @return: A python dictionary containing the results of morphological analysis.""" - return EndpointCaller(self, "morphology/" + facet).call(parameters) + if facet == "": + facet = self.morphology_output['COMPLETE'] + return EndpointCaller(self, self.endpoints['MORPHOLOGY'] + "/" + facet).call(parameters) - def entities(self, parameters, resolve_entities=False): + def entities(self, parameters): """ Create an L{EndpointCaller} to identify named entities found in the texts - to which it is applied and call it. Linked entity information is optional, and - its need must be specified at the time the operator is created. + to which it is applied and call it. @param parameters: An object specifying the data, and possible metadata, to be processed by the entity identifier. @type parameters: L{DocumentParameters} or L{str} - @param resolve_entities: Specifies whether or not linked entity information will - be wanted. - @type resolve_entities: Boolean @return: A python dictionary containing the results of entity extraction.""" - if resolve_entities: - warnings.warn("entities(params,resolve_entities) is deprecated and replaced by entities(params).", DeprecationWarning) - return EndpointCaller(self, "entities/linked").call(parameters) - else: - return EndpointCaller(self, "entities").call(parameters) + + return EndpointCaller(self, self.endpoints['ENTITIES']).call(parameters) def categories(self, parameters): """ @@ -779,7 +916,7 @@ def categories(self, parameters): and possible metadata, to be processed by the category identifier. @type parameters: L{DocumentParameters} or L{str} @return: A python dictionary containing the results of categorization.""" - return EndpointCaller(self, "categories").call(parameters) + return EndpointCaller(self, self.endpoints['CATEGORIES']).call(parameters) def sentiment(self, parameters): """ @@ -793,7 +930,7 @@ def sentiment(self, parameters): to which is applied. @return: An L{EndpointCaller} object which can return sentiments of texts to which it is applied.""" - return EndpointCaller(self, "sentiment").call(parameters) + return EndpointCaller(self, self.endpoints['SENTIMENT']).call(parameters) def relationships(self, parameters): """ @@ -803,7 +940,16 @@ def relationships(self, parameters): and possible metadata, to be processed by the relationships identifier. @type parameters: L{DocumentParameters} or L{str} @return: A python dictionary containing the results of relationship extraction.""" - return EndpointCaller(self, "relationships").call(parameters) + return EndpointCaller(self, self.endpoints['RELATIONSHIPS']).call(parameters) + + def address_similarity(self, parameters): + """ + Create an L{EndpointCaller} to perform address similarity scoring and call it. + @param parameters: An object specifying the data, + and possible metadata, to be processed by the name matcher. + @type parameters: L{AddressSimilarityParameters} + @return: A python dictionary containing the results of name matching.""" + return EndpointCaller(self, self.endpoints['ADDRESS_SIMILARITY']).call(parameters, AddressSimilarityParameters) def name_translation(self, parameters): """ @@ -813,7 +959,7 @@ def name_translation(self, parameters): and possible metadata, to be processed by the name translator. @type parameters: L{NameTranslationParameters} @return: A python dictionary containing the results of name translation.""" - return EndpointCaller(self, "name-translation").call(parameters) + return EndpointCaller(self, self.endpoints['NAME_TRANSLATION']).call(parameters, NameTranslationParameters) def translated_name(self, parameters): """ deprecated @@ -832,7 +978,7 @@ def name_similarity(self, parameters): and possible metadata, to be processed by the name matcher. @type parameters: L{NameSimilarityParameters} @return: A python dictionary containing the results of name matching.""" - return EndpointCaller(self, "name-similarity").call(parameters) + return EndpointCaller(self, self.endpoints['NAME_SIMILARITY']).call(parameters, NameSimilarityParameters) def matched_name(self, parameters): """ deprecated @@ -842,3 +988,79 @@ def matched_name(self, parameters): @type parameters: L{NameSimilarityParameters} @return: A python dictionary containing the results of name matching.""" return self.name_similarity(parameters) + + def name_deduplication(self, parameters): + """ + Fuzzy de-duplication of a list of names + @param parameters: An object specifying a list of names as well + as a threshold + @type parameters: L{NameDeduplicationParameters} + @return: A python dictionary containing the results of de-duplication""" + return EndpointCaller(self, self.endpoints['NAME_DEDUPLICATION']).call(parameters, NameDeduplicationParameters) + + def record_similarity(self, parameters): + """ + Create an L{EndpointCaller} to get similarity core between a list of records and call it. + @param parameters: An object specifying the data, + and possible metadata, to be processed by the record matcher. + @type parameters: L{RecordSimilarityParameters} + @return: A python dictionary containing the results of record matching.""" + return EndpointCaller(self, self.endpoints['RECORD_SIMILARITY']).call(parameters, RecordSimilarityParameters) + + def text_embedding(self, parameters): + """ deprecated + Create an L{EndpointCaller} to identify text vectors found in the texts + to which it is applied and call it. + @type parameters: L{DocumentParameters} or L{str} + @return: A python dictionary containing the results of text embedding.""" + return self.semantic_vectors(parameters) + + def semantic_vectors(self, parameters): + """ + Create an L{EndpointCaller} to identify text vectors found in the texts + to which it is applied and call it. + @type parameters: L{DocumentParameters} or L{str} + @return: A python dictionary containing the results of semantic vectors.""" + return EndpointCaller(self, self.endpoints['SEMANTIC_VECTORS']).call(parameters) + + def syntax_dependencies(self, parameters): + """ + Create an L{EndpointCaller} to identify the syntactic dependencies in the texts + to which it is applied and call it. + @type parameters: L{DocumentParameters} or L{str} + @return: A python dictionary containing the results of syntactic dependencies + identification""" + return EndpointCaller(self, self.endpoints['SYNTAX_DEPENDENCIES']).call(parameters) + + def transliteration(self, parameters): + """ + Transliterate given context + @type parameters: L{DocumentParameters} + @return: A python dictionary containing the results of the transliteration""" + return EndpointCaller(self, self.endpoints['TRANSLITERATION']).call(parameters) + + def topics(self, parameters): + """ + Topics returns keyphrases and concepts related to the provided content + @type parameters: DocumentParameters + @return; A python dictionary containing the results""" + return EndpointCaller(self, self.endpoints['TOPICS']).call(parameters) + + def similar_terms(self, parameters): + """ + Create an L{EndpointCaller} to identify terms most similar to the input in + the requested languages + :param parameters: DocumentParameters + :return: A python dictionary containing the similar terms and their similarity + """ + return EndpointCaller(self, self.endpoints['SIMILAR_TERMS']).call(parameters) + + def events(self, parameters): + """ + Create an L{EndpointCaller} to identify events found in the texts. + @param parameters: An object specifying the data, + and possible metadata, to be processed by the 'events' identifier. + @type parameters: L{DocumentParameters} or L{str} + @return: A python dictionary containing the results of event extraction. + """ + return EndpointCaller(self, self.endpoints['EVENTS']).call(parameters) diff --git a/setup.py b/setup.py index 25f8d39..1bcd653 100755 --- a/setup.py +++ b/setup.py @@ -1,48 +1,59 @@ #!/usr/bin/env python -from setuptools import setup -import rosette +"""setup.py""" import os import io +from setuptools import setup +import rosette NAME = "rosette_api" -DESCRIPTION = "Rosette API Python client SDK" -AUTHOR = "Basis Technology Corp." -AUTHOR_EMAIL = "rosette_api@basistech.com" -HOMEPAGE = "https://developer.rosette.com" +DESCRIPTION = "Babel Street Analytics API Python client SDK" +AUTHOR = "Analytics by Babel Street" +AUTHOR_EMAIL = "analyticssupport@babelstreet.com" +HOMEPAGE = "https://github.com/rosette-api/python" VERSION = rosette.__version__ -here = os.path.abspath(os.path.dirname(__file__)) +HERE = os.path.abspath(os.path.dirname(__file__)) def read(*filenames, **kwargs): + """read function""" encoding = kwargs.get('encoding', 'utf-8') sep = kwargs.get('sep', '\n') buf = [] for filename in filenames: - with io.open(filename, encoding=encoding) as f: - buf.append(f.read()) + with io.open(filename, encoding=encoding) as the_file: + buf.append(the_file.read()) return sep.join(buf) -long_description = read('README.md') -setup(name=NAME, - author=AUTHOR, - author_email=AUTHOR_EMAIL, - description=DESCRIPTION, - license='Apache License', - long_description=long_description, - packages=['rosette'], - install_requires=['requests'], - platforms='any', - url=HOMEPAGE, - version=VERSION, - classifiers=[ - 'Programming Language :: Python', - 'Development Status :: 4 - Beta', - 'Natural Language :: English', - 'Environment :: Web Environment', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: Apache Software License', - 'Operating System :: OS Independent', - 'Topic :: Software Development :: Libraries :: Python Modules'] - ) +LONG_DESCRIPTION = read('README.md') + +setup( + name=NAME, + author=AUTHOR, + author_email=AUTHOR_EMAIL, + description=DESCRIPTION, + license='Apache License', + long_description=LONG_DESCRIPTION, + long_description_content_type='text/markdown', + packages=['rosette'], + install_requires=['requests'], + platforms='any', + url=HOMEPAGE, + version=VERSION, + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Environment :: Web Environment', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: Apache Software License', + 'Natural Language :: English', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: 3.13', + 'Topic :: Software Development :: Libraries :: Python Modules' + ] +) diff --git a/sonar-project.properties b/sonar-project.properties new file mode 100644 index 0000000..2bdb883 --- /dev/null +++ b/sonar-project.properties @@ -0,0 +1,5 @@ +sonar.projectKey=rosette-api-python-binding +sonar.sources=rosette +sonar.exclusions=**/tests/**,**/docs/**,**/examples/** +sonar.python.coverage.reportPaths=coverage.xml +#sonar.branch.name=RCB-596-pool-size diff --git a/tests/__init__.py b/tests/__init__.py index 35f570e..4256e37 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ -Copyright (c) 2014-2015 Basis Technology Corporation. +Copyright (c) 2014-2022 Basis Technology Corporation. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/tests/test_rosette_api.py b/tests/test_rosette_api.py index f17209c..ca8085c 100644 --- a/tests/test_rosette_api.py +++ b/tests/test_rosette_api.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ -Copyright (c) 2014-2015 Basis Technology Corporation. +Copyright (c) 2014-2024 Basis Technology Corporation. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,546 +18,691 @@ # To run tests, run `py.test test_rosette_api.py` -import glob -import httpretty import json -import os -import pytest -import re import sys -try: - from StringIO import StringIO as streamIO -except ImportError: - from io import BytesIO as streamIO -import gzip -from rosette.api import API, DocumentParameters, NameTranslationParameters, NameSimilarityParameters, RosetteException +import platform +import pook +import pytest +from rosette.api import (AddressSimilarityParameters, + API, + DocumentParameters, + NameTranslationParameters, + NameSimilarityParameters, + NameDeduplicationParameters, + RecordSimilarityParameters, + RosetteException) -_IsPy3 = sys.version_info[0] == 3 +_ISPY3 = sys.version_info[0] == 3 -@pytest.fixture -def json_response(scope="module"): - body = json.dumps({'name': 'Rosette API', 'versionChecked': True}) - return body +def get_base_url(): + return "https://analytics.babelstreet.com/rest/" @pytest.fixture -def api(): - api = API('bogus_key') - return api +def json_response(): + """ fixture to return info body""" + body = json.dumps({'name': 'Babel Street Analytics', + 'versionChecked': True}) + return body @pytest.fixture -def json_429(scope="module"): - body = json.dumps({'message': 'too many requests', 'versionChecked': True}) - return body +def api(): + """ fixture to return api key""" + tmp_api = API('bogus_key') + return tmp_api @pytest.fixture -def json_409(scope="module"): - body = json.dumps({'code': 'incompatibleClientVersion', 'message': 'the version of client library used is not compatible with this server', 'versionChecked': True}) +def json_409(): + """ fixture to return 409 body""" + body = json.dumps({'code': 'incompatibleClientVersion', + 'message': 'the version of client library used' + ' is not compatible with this server', + 'versionChecked': True}) return body @pytest.fixture -def doc_params(scope="module"): +def doc_params(): + """ fixture to return basic DocumentParameters""" params = DocumentParameters() params['content'] = 'Sample test string' return params -# Of Note: httpretty provides a short hand decorator, @httpretty.activate, that wraps the decorated -# function with httpretty.enable() and ends it with httpretty.disable(). However, when combined with -# pytest fixtures, the passed in fixture arguments are ignored, resulting in a TypeError. Use the old -# enable/disable to avoid this. -# Test the option set/get/clear +@pytest.fixture +def doc_map(): + """ fixture for a simple map of doc request """ + return {'content': 'Simple test string'} def test_option_get_set_clear(api): - api.setOption('test', 'foo') - assert 'foo' == api.getOption('test') + """Tests the get/set/clear methods""" + api.set_option('test', 'foo') + assert api.get_option('test') == 'foo' - api.clearOptions() - assert api.getOption('test') is None + api.clear_options() + assert api.get_option('test') is None def test_option_clear_single_option(api): - api.setOption('test', 'foo') - assert 'foo' == api.getOption('test') + """Test the clear single option""" + api.set_option('test', 'foo') + assert api.get_option('test') == 'foo' - api.setOption('test', None) - assert api.getOption('test') is None + api.set_option('test', None) + assert api.get_option('test') is None -# Test the custom header set/get/clear +def test_url_parameter_getsetclear(api): + """Tests get/set/clear url parameter""" + api.set_url_parameter('test', 'foo') + assert api.get_url_parameter('test') == 'foo' + + api.clear_url_parameters() + assert api.get_url_parameter('test') is None -def test_custom_header_get_set_clear(api): - key = 'X-RosetteAPI-Test' - value = 'foo' - api.setCustomHeaders(key, value) - assert value == api.getCustomHeaders()[key] - api.clearCustomHeaders() - assert len(api.getCustomHeaders()) is 0 +def test_url_parameter_clear_single(api): + """Test the clearing of a single url parameter""" + api.set_url_parameter('test', 'foo') + assert api.get_url_parameter('test') == 'foo' -# Test for invalid header name + api.set_url_parameter('test', None) + assert api.get_url_parameter('test') is None + + +def test_custom_header_props(api): + """Test custom header get/set/clear""" + key = 'X-BabelStreetAPI-Test' + value = 'foo' + api.set_custom_headers(key, value) + assert value == api.get_custom_headers()[key] + + api.clear_custom_headers() + assert len(api.get_custom_headers()) == 0 def test_invalid_header(api): + """Test for invalid header""" key = 'test' value = 'foo' - api.setCustomHeaders(key, value) + api.set_custom_headers(key, value) with pytest.raises(RosetteException) as e_rosette: - result = api.info() + api.info() assert e_rosette.value.status == 'badHeader' -# Test that pinging the API is working properly -# @httpretty.activate +def test_user_agent(api): + """ Test user agent """ + value = ("Babel-Street-Analytics-API-Python/" + + api.get_binding_version() + "/" + platform.python_version()) + assert value == api.get_user_agent_string() -def test_ping(api, json_response): - httpretty.enable() - httpretty.register_uri(httpretty.GET, "https://api.rosette.com/rest/v1/ping", - body=json_response, status=200, content_type="application/json") - result = api.ping() - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() +@pook.on +def test_ping_pook(api, json_response): + pook.get(url=get_base_url() + "v1/ping", + response_json=json_response, + reply=200) -# Test that getting the info about the API is being called correctly + result = api.ping() + assert result["name"] == "Babel Street Analytics" +@pook.on def test_info(api, json_response): - httpretty.enable() - httpretty.register_uri(httpretty.GET, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") + pook.get(url=get_base_url() + "v1/info", + response_json=json_response, + reply=200) result = api.info() - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() - -# Test for 429 - - -def test_for_429(api, json_429): - httpretty.enable() - httpretty.register_uri(httpretty.GET, "https://api.rosette.com/rest/v1/info", - body=json_429, status=429, content_type="application/json") - - with pytest.raises(RosetteException) as e_rosette: - result = api.info() - - assert e_rosette.value.status == 429 - httpretty.disable() - httpretty.reset() - -# Test for 429 + assert result["name"] == "Babel Street Analytics" +@pook.on def test_for_409(api, json_409): - httpretty.enable() - httpretty.register_uri(httpretty.GET, "https://api.rosette.com/rest/v1/info", - body=json_409, status=409, content_type="application/json") + pook.get(url=get_base_url() + "v1/info", + response_json=json_409, + reply=409) with pytest.raises(RosetteException) as e_rosette: result = api.info() assert e_rosette.value.status == 'incompatibleClientVersion' - httpretty.disable() - httpretty.reset() -# Test the language endpoint +@pook.on +@pytest.mark.parametrize("header_key", + ['x-rosetteapi-concurrency', + 'x-babelstreetapi-concurrency']) +def test_the_max_pool_size_header(json_response, doc_params, header_key): + pook.post(url=get_base_url() + "v1/language", + response_json=json_response, + reply=200, + response_headers={header_key: 5}) -def test_the_language_endpoint(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/language", - body=json_response, status=200, content_type="application/json") - + api = API('bogus_key') + assert api.get_pool_size() == 1 result = api.language(doc_params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() - -# Test the sentences endpoint + assert result["name"] == "Babel Street Analytics" + assert api.get_pool_size() == 5 + api.set_pool_size(11) + assert api.get_pool_size() == 11 -def test_the_sentences_endpoint(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/sentences", - body=json_response, status=200, content_type="application/json") +@pook.on +def test_the_max_pool_size_both(json_response, doc_params): + pook.post(url=get_base_url() + "v1/language", + response_json=json_response, + reply=200, + response_headers={'x-rosetteapi-concurrency': 5, + 'x-babelstreetapi-concurrency': 8}) - result = api.sentences(doc_params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() + api = API('bogus_key') + assert api.get_pool_size() == 1 + result = api.language(doc_params) + assert result["name"] == "Babel Street Analytics" + assert api.get_pool_size() == 8 + api.set_pool_size(11) + assert api.get_pool_size() == 11 -# Test the tokens endpoint +@pook.on +def test_a_doc_endpoint_fails_on_map(api, json_response, doc_map): + pook.post(url=get_base_url() + "v1/language", + response_json=json_response, + reply=200) -def test_the_tokens_endpoint(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/tokens", - body=json_response, status=200, content_type="application/json") + with pytest.raises(RosetteException) as e_rosette: + result = api.language(doc_map) + assert e_rosette.value.status == 'incompatible' + + +@pook.on +@pytest.mark.parametrize("endpoint", + ['categories', + 'entities', + 'events', + 'language', + 'morphology/complete', + 'morphology/compound-components', + 'morphology/han-readings', + 'morphology/lemmas', + 'morphology/parts-of-speech', + 'relationships', + 'semantics/similar', + 'semantics/vector', + 'sentences', + 'sentiment', + 'syntax/dependencies', + 'tokens', + 'topics', + 'transliteration']) +def test_document_endpoints(api, json_response, doc_params, endpoint): + pook.post(url=get_base_url() + "v1/" + endpoint, + response_json=json_response, + reply=200) + + # TODO: Convert to match-case when minimum supported version is 3.10 + if endpoint == "categories": + result = api.categories(doc_params) + elif endpoint == "entities": + result = api.entities(doc_params) + elif endpoint == "events": + result = api.events(doc_params) + elif endpoint == "language": + result = api.language(doc_params) + elif endpoint == "morphology/complete": + result = api.morphology(doc_params) + elif endpoint == "morphology/compound-components": + result = api.morphology(doc_params, "compound-components") + elif endpoint == "morphology/han-readings": + result = api.morphology(doc_params, "han-readings") + elif endpoint == "morphology/lemmas": + result = api.morphology(doc_params, "lemmas") + elif endpoint == "morphology/parts-of-speech": + result = api.morphology(doc_params, "parts-of-speech") + elif endpoint == "relationships": + api.set_option('accuracyMode', 'PRECISION') + result = api.relationships(doc_params) + elif endpoint == "semantics/similar": + result = api.similar_terms(doc_params) + elif endpoint == "semantics/vector": + result = api.semantic_vectors(doc_params) + elif endpoint == "sentences": + result = api.sentences(doc_params) + elif endpoint == "sentiment": + result = api.sentiment(doc_params) + elif endpoint == "syntax/dependencies": + result = api.syntax_dependencies(doc_params) + elif endpoint == "tokens": + result = api.tokens(doc_params) + elif endpoint == "topics": + result = api.topics(doc_params) + elif endpoint == "transliteration": + result = api.transliteration(doc_params) + else: + raise Exception("Unknown endpoint.") + + assert result["name"] == "Babel Street Analytics" + + +@pook.on +def test_the_multipart_operation(api, json_response, doc_params, tmpdir): + pook.post(url=get_base_url() + "v1/sentiment", + response_json=json_response, + reply=200) - result = api.tokens(doc_params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() + tmp_file = tmpdir.mkdir("sub").join("testfile.txt") + tmp_file.write(json_response) + doc_params.load_document_file = tmp_file + result = api.sentiment(doc_params) + assert result["name"] == "Babel Street Analytics" -# Test the morphology complete endpoint +@pook.on +def test_incompatible_type(api, json_response): + pook.post(url=get_base_url() + "v1/sentences", + response_json=json_response, + reply=200) -def test_the_morphology_complete_endpoint(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/morphology/complete", - body=json_response, status=200, content_type="application/json") + params = NameTranslationParameters() + params["name"] = "some data to translate" + params["entityType"] = "PERSON" + params["targetLanguage"] = "eng" + params["targetScript"] = "Latn" - result = api.morphology(doc_params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() + # oops, called sentences + with pytest.raises(RosetteException) as e_rosette: + api.sentences(params) -# Test the morphology lemmas endpoint +@pook.on +def test_the_name_translation_endpoint(api, json_response): + pook.post(url=get_base_url() + "v1/name-translation", + response_json=json_response, + reply=200) -def test_the_morphology_lemmas_endpoint(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/morphology/lemmas", - body=json_response, status=200, content_type="application/json") + params = NameTranslationParameters() + params["name"] = "some data to translate" + params["entityType"] = "PERSON" + params["targetLanguage"] = "eng" + params["targetScript"] = "Latn" + result = api.name_translation(params) + assert result["name"] == "Babel Street Analytics" - result = api.morphology(doc_params, 'lemmas') - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() -# Test the morphology parts-of-speech endpoint +@pook.on +def test_the_name_requests_with_text(api, json_response): + pook.post(url=get_base_url() + "v1/name-similarity", + response_json=json_response, + reply=200) + with pytest.raises(RosetteException) as e_rosette: + result = api.name_similarity("should fail") + assert e_rosette.value.status == 'incompatible' -def test_the_morphology_parts_of_speech_endpoint( - api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/morphology/parts-of-speech", - body=json_response, status=200, content_type="application/json") + with pytest.raises(RosetteException) as e_rosette: + result = api.name_translation("should fail") + assert e_rosette.value.status == 'incompatible' - result = api.morphology(doc_params, 'parts-of-speech') - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() + with pytest.raises(RosetteException) as e_rosette: + result = api.name_deduplication("should fail") + assert e_rosette.value.status == 'incompatible' -# Test the morphology compound-components endpoint + with pytest.raises(RosetteException) as e_rosette: + result = api.address_similarity("should fail") + assert e_rosette.value.status == 'incompatible' + with pytest.raises(RosetteException) as e_rosette: + result = api.record_similarity("should fail") + assert e_rosette.value.status == 'incompatible' -def test_the_morphology_compound_components_endpoint( - api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/morphology/compound-components", - body=json_response, status=200, content_type="application/json") - result = api.morphology(doc_params, 'compound-components') - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() +@pook.on +def test_the_name_similarity_single_parameters(api, json_response): + pook.post(url=get_base_url() + "v1/name-similarity", + response_json=json_response, + reply=200) -# Test the morphology han-readings endpoint + matched_name_data1 = "John Mike Smith" + matched_name_data2 = "John Joe Smith" + params = NameSimilarityParameters() + params["name1"] = {"text": matched_name_data1} + params["name2"] = {"text": matched_name_data2} + params["parameters"] = {"conflictScore": "0.9"} + result = api.name_similarity(params) + assert result["name"] == "Babel Street Analytics" -def test_the_morphology_han_readings_endpoint(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/morphology/han-readings", - body=json_response, status=200, content_type="application/json") - result = api.morphology(doc_params, 'han-readings') - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() +@pook.on +def test_the_name_similarity_multiple_parameters(api, json_response): + pook.post(url=get_base_url() + "v1/name-similarity", + response_json=json_response, + reply=200) -# Test the entities endpoint + matched_name_data1 = "John Mike Smith" + matched_name_data2 = "John Joe Smith" + params = NameSimilarityParameters() + params["name1"] = {"text": matched_name_data1} + params["name2"] = {"text": matched_name_data2} + params["parameters"] = {"conflictScore": "0.9", "deletionScore": "0.5"} + result = api.name_similarity(params) + assert result["name"] == "Babel Street Analytics" -def test_the_entities_endpoint(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/entities", - body=json_response, status=200, content_type="application/json") - result = api.entities(doc_params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() +@pook.on +def test_the_name_similarity_endpoint(api, json_response): + pook.post(url=get_base_url() + "v1/name-similarity", + response_json=json_response, + reply=200) -# Test the entities/linked endpoint + matched_name_data1 = "Michael Jackson" + matched_name_data2 = "迈克尔·杰克逊" + params = NameSimilarityParameters() + params["name1"] = { + "text": matched_name_data1, + "language": "eng", + "entityType": "PERSON"} + params["name2"] = {"text": matched_name_data2, "entityType": "PERSON"} + result = api.name_similarity(params) + assert result["name"] == "Babel Street Analytics" -def test_the_entities_linked_endpoint(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/entities/linked", - body=json_response, status=200, content_type="application/json") - result = api.entities(doc_params, True) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() +@pook.on +def test_name_deduplication_parameters(api, json_response): + pook.post(url=get_base_url() + "v1/name-deduplication", + response_json=json_response, + reply=200) -# Test the categories endpoint + params = NameDeduplicationParameters() + with pytest.raises(RosetteException) as e_rosette: + api.name_deduplication(params) -def test_the_categories_endpoint(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/categories", - body=json_response, status=200, content_type="application/json") + assert e_rosette.value.status == 'missingParameter' + assert (e_rosette.value.message == + 'Required Name De-Duplication parameter is missing: names') - result = api.categories(doc_params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() + params["names"] = ["John Smith", "Johnathon Smith", "Fred Jones"] -# Test the sentiment endpoint + result = api.name_deduplication(params) + assert result["name"] == "Babel Street Analytics" -def test_the_sentiment_endpoint(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/sentiment", - body=json_response, status=200, content_type="application/json") +@pook.on +def test_the_name_deduplication_endpoint(api, json_response): + pook.post(url=get_base_url() + "v1/name-deduplication", + response_json=json_response, + reply=200) - result = api.sentiment(doc_params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() + dedup_list = ["John Smith", "Johnathon Smith", "Fred Jones"] + threshold = 0.75 + params = NameDeduplicationParameters() + params["names"] = dedup_list + params["threshold"] = threshold -# Test the multipart operation + result = api.name_deduplication(params) + assert result["name"] == "Babel Street Analytics" -def test_the_multipart_operation(api, json_response, doc_params, tmpdir): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/sentiment", - body=json_response, status=200, content_type="application/json") - - p = tmpdir.mkdir("sub").join("testfile.txt") - p.write(json_response) - doc_params.load_document_file = p - result = api.sentiment(doc_params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() +@pook.on +def test_for_404(api): + pook.get(url=get_base_url() + "v1/info", + response_json={'message': 'not found'}, + reply=404) -# Test the name translation endpoint + with pytest.raises(RosetteException) as e_rosette: + api.info() + assert e_rosette.value.status == 404 + assert e_rosette.value.message == 'not found' -def test_the_name_translation_endpoint(api, json_response): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/name-translation", - body=json_response, status=200, content_type="application/json") - params = NameTranslationParameters() - params["name"] = "some data to translate" - params["entityType"] = "PERSON" - params["targetLanguage"] = "eng" - params["targetScript"] = "Latn" - result = api.name_translation(params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() +@pook.on +def test_both_content_and_content_uri(api, json_response, doc_params): + pook.post(url=get_base_url() + "v1/entities", + response_json=json_response, + reply=200) -# Test the name similarity endpoint + doc_params['contentUri'] = 'https://example.com' + with pytest.raises(RosetteException) as e_rosette: + api.entities(doc_params) + assert e_rosette.value.status == 'badArgument' + assert (e_rosette.value.message == + 'Cannot supply both Content and ContentUri') -def test_the_name_similarity_endpoint(api, json_response): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/name-similarity", - body=json_response, status=200, content_type="application/json") - matched_name_data1 = "Michael Jackson" - matched_name_data2 = "迈克尔·杰克逊" - params = NameSimilarityParameters() - params["name1"] = { - "text": matched_name_data1, - "language": "eng", - "entityType": "PERSON"} - params["name2"] = {"text": matched_name_data2, "entityType": "PERSON"} +@pook.on +def test_for_no_content_or_content_uri(api, json_response, doc_params): + pook.post(url=get_base_url() + "v1/entities", + response_json=json_response, + reply=200) - result = api.name_similarity(params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() + doc_params['content'] = None + with pytest.raises(RosetteException) as e_rosette: + api.entities(doc_params) -# Test the relationships endpoint + assert e_rosette.value.status == 'badArgument' + assert (e_rosette.value.message == + 'Must supply one of Content or ContentUri') -def test_the_relationships_endpoint(api, json_response): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/relationships", - body=json_response, status=200, content_type="application/json") +@pook.on +def test_for_address_similarity_required_parameters(api, json_response): + pook.post(url=get_base_url() + "v1/address-similarity", + response_json=json_response, + reply=200) - params = DocumentParameters() - params["content"] = "some text data" - api.setOption('accuracyMode', 'PRECISION') - result = api.relationships(params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() + params = AddressSimilarityParameters() -# Test for non 200 + with pytest.raises(RosetteException) as e_rosette: + api.address_similarity(params) + assert e_rosette.value.status == 'missingParameter' + assert (e_rosette.value.message == + 'Required Address Similarity parameter is missing: address1') -def test_for_404(api, json_response): - httpretty.enable() - body = json.dumps({'message': 'not found'}) - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.GET, "https://api.rosette.com/rest/v1/info", - body=body, status=404, content_type="application/json") + params["address1"] = {"houseNumber": "1600", + "road": "Pennsylvania Ave NW", + "city": "Washington", + "state": "DC", + "postCode": "20500"} with pytest.raises(RosetteException) as e_rosette: - result = api.info() - - assert e_rosette.value.status == 404 - assert e_rosette.value.message == 'not found' - httpretty.disable() - httpretty.reset() - -# Test for content and contentUri + api.address_similarity(params) + assert e_rosette.value.status == 'missingParameter' + assert (e_rosette.value.message == + 'Required Address Similarity parameter is missing: address2') -def test_for_content_and_contentUri(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/entities", - body=json_response, status=200, content_type="application/json") + params["address2"] =\ + {"text": "160 Pennsilvana Avenue, Washington, D.C., 20500"} - doc_params['contentUri'] = 'http://google.com' - with pytest.raises(RosetteException) as e_rosette: - result = api.entities(doc_params) + result = api.address_similarity(params) + assert result["name"] == "Babel Street Analytics" - assert e_rosette.value.status == 'badArgument' - assert e_rosette.value.message == 'Cannot supply both Content and ContentUri' - httpretty.disable() - httpretty.reset() -# Test for content and contentUri +@pook.on +def test_for_address_similarity_optional_parameters(api, json_response): + pook.post(url=get_base_url() + "v1/address-similarity", + response_json=json_response, + reply=200) + params = AddressSimilarityParameters() -def test_for_no_content_or_contentUri(api, json_response, doc_params): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/entities", - body=json_response, status=200, content_type="application/json") + params["address1"] = {"houseNumber": "1600", + "road": "Pennsylvania Ave NW", + "city": "Washington", + "state": "DC", + "postCode": "20500"} - doc_params['content'] = None - with pytest.raises(RosetteException) as e_rosette: - result = api.entities(doc_params) + params["address2"] =\ + {"text": "160 Pennsilvana Avenue, Washington, D.C., 20500"} - assert e_rosette.value.status == 'badArgument' - assert e_rosette.value.message == 'Must supply one of Content or ContentUri' - httpretty.disable() - httpretty.reset() + params["parameters"] = {"houseNumberAddressFieldWeight": "0.9"} -# Test for required Name Similarity parameters + result = api.address_similarity(params) + assert result["name"] == "Babel Street Analytics" +@pook.on def test_for_name_similarity_required_parameters(api, json_response): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/name-similarity", - body=json_response, status=200, content_type="application/json") + pook.post(url=get_base_url() + "v1/name-similarity", + response_json=json_response, + reply=200) matched_name_data1 = "Michael Jackson" matched_name_data2 = "迈克尔·杰克逊" params = NameSimilarityParameters() with pytest.raises(RosetteException) as e_rosette: - result = api.name_similarity(params) + api.name_similarity(params) assert e_rosette.value.status == 'missingParameter' - assert e_rosette.value.message == 'Required Name Similarity parameter not supplied' + assert (e_rosette.value.message == + 'Required Name Similarity parameter is missing: name1') params["name1"] = { "text": matched_name_data1, "language": "eng", "entityType": "PERSON"} with pytest.raises(RosetteException) as e_rosette: - result = api.name_similarity(params) + api.name_similarity(params) assert e_rosette.value.status == 'missingParameter' - assert e_rosette.value.message == 'Required Name Similarity parameter not supplied' + assert (e_rosette.value.message == + 'Required Name Similarity parameter is missing: name2') params["name2"] = {"text": matched_name_data2, "entityType": "PERSON"} result = api.name_similarity(params) - assert result["name"] == "Rosette API" - httpretty.disable() - httpretty.reset() - -# Test for required Name Translation parameters + assert result["name"] == "Babel Street Analytics" +@pook.on def test_for_name_translation_required_parameters(api, json_response): - httpretty.enable() - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", - body=json_response, status=200, content_type="application/json") - httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/name-translation", - body=json_response, status=200, content_type="application/json") + pook.post(url=get_base_url() + "v1/name-translation", + response_json=json_response, + reply=200) params = NameTranslationParameters() params["entityType"] = "PERSON" params["targetScript"] = "Latn" with pytest.raises(RosetteException) as e_rosette: - result = api.name_translation(params) + api.name_translation(params) assert e_rosette.value.status == 'missingParameter' - assert e_rosette.value.message == 'Required Name Translation parameter not supplied' + assert (e_rosette.value.message == + 'Required Name Translation parameter is missing: name') params["name"] = "some data to translate" with pytest.raises(RosetteException) as e_rosette: - result = api.name_translation(params) + api.name_translation(params) assert e_rosette.value.status == 'missingParameter' - assert e_rosette.value.message == 'Required Name Translation parameter not supplied' + assert (e_rosette.value.message == + 'Required Name Translation parameter is missing: targetLanguage') params["targetLanguage"] = "eng" result = api.name_translation(params) - assert result["name"] == "Rosette API" + assert result["name"] == "Babel Street Analytics" + + +@pook.on +def test_the_deprecated_endpoints(api, json_response, doc_params): + # TEXT_EMBEDDING calls SEMANTIC_VECTORS + pook.post(url=get_base_url() + "v1/semantics/vector", + response_json=json_response, + reply=200) + + result = api.text_embedding(doc_params) + assert result["name"] == "Babel Street Analytics" + + # MATCHED_NAME calls NAME_SIMILARITY + pook.post(url=get_base_url() + "v1/name-similarity", + response_json=json_response, + reply=200) + + name_similarity_params = NameSimilarityParameters() + + name_similarity_params["name1"] = { + "text": "Michael Jackson", + "language": "eng", + "entityType": "PERSON"} + + name_similarity_params["name2"] =\ + {"text": "迈克尔·杰克逊", "entityType": "PERSON"} + + result = api.matched_name(name_similarity_params) + assert result["name"] == "Babel Street Analytics" + + # TRANSLATED_NAME calls NAME_TRANSLATION + pook.post(url=get_base_url() + "v1/name-translation", + response_json=json_response, + reply=200) + + name_translation_params = NameTranslationParameters() + name_translation_params["entityType"] = "PERSON" + name_translation_params["targetScript"] = "Latn" + name_translation_params["name"] = "some data to translate" + name_translation_params["targetLanguage"] = "eng" + + result = api.translated_name(name_translation_params) + assert result["name"] == "Babel Street Analytics" + + +@pook.on +def test_the_record_similarity_endpoint(api, json_response): + pook.post(url=get_base_url() + "v1/record-similarity", + response_json=json_response, + reply=200) + + params = RecordSimilarityParameters() + params["fields"] = {} + params["properties"] = {} + params["records"] = {} + result = api.record_similarity(params) + assert result["name"] == "Babel Street Analytics" + + +@pook.on +def test_for_record_similarity_required_parameters(api, json_response): + pook.post(url=get_base_url() + "v1/record-similarity", + response_json=json_response, + reply=200) + + params = RecordSimilarityParameters() + + with pytest.raises(RosetteException) as e_rosette: + api.record_similarity(params) + + assert e_rosette.value.status == 'missingParameter' + assert (e_rosette.value.message == + 'Required Record Similarity parameter is missing: records') + + params["records"] = {} + + with pytest.raises(RosetteException) as e_rosette: + api.record_similarity(params) + + assert e_rosette.value.status == 'missingParameter' + assert (e_rosette.value.message == + 'Required Record Similarity parameter is missing: fields') + + params["fields"] = {} - httpretty.disable() - httpretty.reset() + result = api.record_similarity(params) + assert result["name"] == "Babel Street Analytics" diff --git a/tests/tox.ini b/tests/tox.ini new file mode 100644 index 0000000..ed9b966 --- /dev/null +++ b/tests/tox.ini @@ -0,0 +1,12 @@ +[tox] +skipsdist = True +envlist = py2, py3 + +[testenv] +commands = + pytest -s +deps = + pytest + pook + epydoc + requests diff --git a/tox.ini b/tox.ini index bc1dadf..7f53adf 100644 --- a/tox.ini +++ b/tox.ini @@ -1,20 +1,21 @@ -# Tox (http://tox.testrun.org/) is a tool for running tests -# in multiple virtualenvs. This configuration file will run the -# test suite on all supported python versions. To use it, "pip install tox" -# and then run "tox" from this directory. - [tox] +envlist = py3 skipsdist = True -envlist = py26, py27, py33, py34 [testenv] -commands = - {envpython} setup.py install - {envbindir}/py.test --pep8 deps = pytest pep8 - pytest-pep8 - httpretty==0.8.14 + pook epydoc requests + coverage + build + +commands = + python -m build + coverage run -m pytest + coverage xml + +[coverage:run] +relative_files = True