diff --git a/README.md b/README.md index 36cf54b..5f2a805 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,17 @@ -Python Diffbot API Client -========= +#Python Diffbot API Client -Currently supports calls to the article endpoint. + +##Preface +Identify and extract the important parts of any web page in Python! This client currently supports calls to the automatic APIs. Installation ------------ To install activate a new virtual environment and run the following command: $ pip install -r requirements.txt +##Configuration + To run the example, you must first configure a working API token in config.py: $ cp config.py.example config.py; vim config.py; @@ -18,27 +20,75 @@ Then replace the string "SOME_TOKEN" with your API token. Finally, to run the e $ python example.py -Example use --------------- -An example of how to use the client in your code: +##Usage + +###Article API +An example call to the Article API: ``` diffbot = DiffbotClient() +token = "SOME_TOKEN" +version = 2 url = "http://shichuan.github.io/javascript-patterns/" -token = "YOUR_TOKEN_HERE" api = "article" +response = diffbot.request(url, token, api, version=2) +``` + +###Frontpage API +An example call to the Frontpage API: + +``` +diffbot = DiffbotClient() +token = "SOME_TOKEN" version = 2 +url = "http://www.huffingtonpost.com/" +api = "frontpage" +response = diffbot.request(url, token, api, version=version) +``` + +###Product API +An example call to the Product API: -response = diffbot.request(url, token, api, version=2) ``` +diffbot = DiffbotClient() +token = "SOME_TOKEN" +version = 2 +url = "http://www.overstock.com/Home-Garden/iRobot-650-Roomba-Vacuuming-Robot/7886009/product.html" +api = "product" +response = diffbot.request(url, token, api, version=version) +``` + +###Image API +An example call to the Image API: + +``` +diffbot = DiffbotClient() +token = "SOME_TOKEN" +version = 2 +url = "http://www.google.com/" +api = "image" +response = diffbot.request(url, token, api, version=version) +``` + +###Classifier API +An example call to the Classifier API: + +``` +diffbot = DiffbotClient() +token = "SOME_TOKEN" +version = 2 +url = "http://www.twitter.com/" +api = "analyze" +response = diffbot.request(url, token, api, version=version) +``` + -Testing ------------- +##Testing First install the test requirements with the following command: $ pip install -r test_requirements.txt -Unit and functional tests are configured to run using nose. From the project directory, simply run: +Currently there are some simple unit tests that mock the API calls and return data from fixtures in the filesystem. From the project directory, simply run: - $ nosetests + $ nosetests \ No newline at end of file diff --git a/client.py b/client.py index fd842b0..6fce288 100644 --- a/client.py +++ b/client.py @@ -5,24 +5,17 @@ class DiffbotClient(object): def __init__(self): self.base_url = "http://api.diffbot.com/" - def request(self, url, token, api, fields=[], version=2): + def request(self, url, token, api, fields=[], version=2, **kwargs): """ Returns a python object containing the requested resource from the diffbot api """ params = {"url": url, "token": token} + if fields: + params['fields'] = fields + if kwargs: + params.update(kwargs) response = requests.get(self.compose_url(api, version), params=params) obj = response.json() - obj = self.select_fields_from_response(obj, fields) - return obj - - @staticmethod - def select_fields_from_response(obj, fields): - """ - Returns the response object with the specified fields or all fields if - the fields list is empty - """ - if fields: - obj = dict((x, obj[x]) for x in fields) return obj def compose_url(self, api, version_number): diff --git a/example.py b/example.py index e69f3d3..574adec 100644 --- a/example.py +++ b/example.py @@ -2,14 +2,74 @@ from config import API_TOKEN import pprint + +print "Calling article API endpoint on the url: http://shichuan.github.io/javascript-patterns/...\n" diffbot = DiffbotClient() +token = API_TOKEN +version = 2 url = "http://shichuan.github.io/javascript-patterns/" +api = "article" +response = diffbot.request(url, token, api, version=2) +print "\nPrinting response:\n" +pp = pprint.PrettyPrinter(indent=4) +print pp.pprint(response) + +print +print "Calling article API endpoint with fields specified on the url: http://shichuan.github.io/javascript-patterns/...\n" +diffbot = DiffbotClient() token = API_TOKEN +version = 2 +url = "http://shichuan.github.io/javascript-patterns/" api = "article" +response = diffbot.request(url, token, api, fields=['title', 'type'], version=2) +print "\nPrinting response:\n" +pp = pprint.PrettyPrinter(indent=4) +print pp.pprint(response) + +print +print "Calling frontpage API endpoint on the url: http://www.huffingtonpost.com/...\n" +diffbot = DiffbotClient() +token = API_TOKEN +version = 2 +url = "http://www.huffingtonpost.com/" +api = "frontpage" +response = diffbot.request(url, token, api, version=version) +print "\nPrinting response:\n" +pp = pprint.PrettyPrinter(indent=4) +print pp.pprint(response) + +print +print "Calling product API endpoint on the url: http://www.overstock.com/Home-Garden/iRobot-650-Roomba-Vacuuming-Robot/7886009/product.html...\n" +diffbot = DiffbotClient() +token = API_TOKEN version = 2 +url = "http://www.overstock.com/Home-Garden/iRobot-650-Roomba-Vacuuming-Robot/7886009/product.html" +api = "product" +response = diffbot.request(url, token, api, version=version) +print "\nPrinting response:\n" +pp = pprint.PrettyPrinter(indent=4) +print pp.pprint(response) -print "Calling article API endpoint on the url: http://shichuan.github.io/javascript-patterns/...\n" -response = diffbot.request(url, token, api, version=2) +print +print "Calling image API endpoint on the url: http://www.google.com/...\n" +diffbot = DiffbotClient() +token = API_TOKEN +version = 2 +url = "http://www.google.com/" +api = "image" +response = diffbot.request(url, token, api, version=version) print "\nPrinting response:\n" pp = pprint.PrettyPrinter(indent=4) print pp.pprint(response) + +print +print "Calling classifier API endpoint on the url: http://www.twitter.com/...\n" +diffbot = DiffbotClient() +token = API_TOKEN +version = 2 +url = "http://www.twitter.com/" +api = "analyze" +response = diffbot.request(url, token, api, version=version) +print "\nPrinting response:\n" +pp = pprint.PrettyPrinter(indent=4) +print pp.pprint(response) \ No newline at end of file