From 44f4cd498d1c8ba0f764e74db0cfdf018ff3f161 Mon Sep 17 00:00:00 2001 From: Matt Banister Date: Tue, 4 Feb 2014 12:04:47 -0500 Subject: [PATCH 1/3] Fixed problem with filtering fields after API call. Fields are now sent in query string --- client.py | 13 ++----------- example.py | 6 ++++++ 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/client.py b/client.py index fd842b0..7ad2bcd 100644 --- a/client.py +++ b/client.py @@ -10,19 +10,10 @@ def request(self, url, token, api, fields=[], version=2): Returns a python object containing the requested resource from the diffbot api """ params = {"url": url, "token": token} + if fields: + params['fields'] = fields response = requests.get(self.compose_url(api, version), params=params) obj = response.json() - obj = self.select_fields_from_response(obj, fields) - return obj - - @staticmethod - def select_fields_from_response(obj, fields): - """ - Returns the response object with the specified fields or all fields if - the fields list is empty - """ - if fields: - obj = dict((x, obj[x]) for x in fields) return obj def compose_url(self, api, version_number): diff --git a/example.py b/example.py index e69f3d3..143b0aa 100644 --- a/example.py +++ b/example.py @@ -13,3 +13,9 @@ print "\nPrinting response:\n" pp = pprint.PrettyPrinter(indent=4) print pp.pprint(response) + +print "Calling article API endpoint with fields specified on the url: http://shichuan.github.io/javascript-patterns/...\n" +response = diffbot.request(url, token, api, fields=['title', 'type'], version=2) +print "\nPrinting response:\n" +pp = pprint.PrettyPrinter(indent=4) +print pp.pprint(response) From 11c736b0b58b2c0d1126f141f2126c65217d184f Mon Sep 17 00:00:00 2001 From: Matt Banister Date: Tue, 4 Feb 2014 13:09:19 -0500 Subject: [PATCH 2/3] Updated README and added some more example calls --- README.md | 76 ++++++++++++++++++++++++++++++++++++++++++++---------- example.py | 62 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 121 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 36cf54b..5f2a805 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,17 @@ -Python Diffbot API Client -========= +#Python Diffbot API Client -Currently supports calls to the article endpoint. + +##Preface +Identify and extract the important parts of any web page in Python! This client currently supports calls to the automatic APIs. Installation ------------ To install activate a new virtual environment and run the following command: $ pip install -r requirements.txt +##Configuration + To run the example, you must first configure a working API token in config.py: $ cp config.py.example config.py; vim config.py; @@ -18,27 +20,75 @@ Then replace the string "SOME_TOKEN" with your API token. Finally, to run the e $ python example.py -Example use --------------- -An example of how to use the client in your code: +##Usage + +###Article API +An example call to the Article API: ``` diffbot = DiffbotClient() +token = "SOME_TOKEN" +version = 2 url = "http://shichuan.github.io/javascript-patterns/" -token = "YOUR_TOKEN_HERE" api = "article" +response = diffbot.request(url, token, api, version=2) +``` + +###Frontpage API +An example call to the Frontpage API: + +``` +diffbot = DiffbotClient() +token = "SOME_TOKEN" version = 2 +url = "http://www.huffingtonpost.com/" +api = "frontpage" +response = diffbot.request(url, token, api, version=version) +``` + +###Product API +An example call to the Product API: -response = diffbot.request(url, token, api, version=2) ``` +diffbot = DiffbotClient() +token = "SOME_TOKEN" +version = 2 +url = "http://www.overstock.com/Home-Garden/iRobot-650-Roomba-Vacuuming-Robot/7886009/product.html" +api = "product" +response = diffbot.request(url, token, api, version=version) +``` + +###Image API +An example call to the Image API: + +``` +diffbot = DiffbotClient() +token = "SOME_TOKEN" +version = 2 +url = "http://www.google.com/" +api = "image" +response = diffbot.request(url, token, api, version=version) +``` + +###Classifier API +An example call to the Classifier API: + +``` +diffbot = DiffbotClient() +token = "SOME_TOKEN" +version = 2 +url = "http://www.twitter.com/" +api = "analyze" +response = diffbot.request(url, token, api, version=version) +``` + -Testing ------------- +##Testing First install the test requirements with the following command: $ pip install -r test_requirements.txt -Unit and functional tests are configured to run using nose. From the project directory, simply run: +Currently there are some simple unit tests that mock the API calls and return data from fixtures in the filesystem. From the project directory, simply run: - $ nosetests + $ nosetests \ No newline at end of file diff --git a/example.py b/example.py index 143b0aa..574adec 100644 --- a/example.py +++ b/example.py @@ -2,20 +2,74 @@ from config import API_TOKEN import pprint + +print "Calling article API endpoint on the url: http://shichuan.github.io/javascript-patterns/...\n" diffbot = DiffbotClient() -url = "http://shichuan.github.io/javascript-patterns/" token = API_TOKEN -api = "article" version = 2 - -print "Calling article API endpoint on the url: http://shichuan.github.io/javascript-patterns/...\n" +url = "http://shichuan.github.io/javascript-patterns/" +api = "article" response = diffbot.request(url, token, api, version=2) print "\nPrinting response:\n" pp = pprint.PrettyPrinter(indent=4) print pp.pprint(response) +print print "Calling article API endpoint with fields specified on the url: http://shichuan.github.io/javascript-patterns/...\n" +diffbot = DiffbotClient() +token = API_TOKEN +version = 2 +url = "http://shichuan.github.io/javascript-patterns/" +api = "article" response = diffbot.request(url, token, api, fields=['title', 'type'], version=2) print "\nPrinting response:\n" pp = pprint.PrettyPrinter(indent=4) print pp.pprint(response) + +print +print "Calling frontpage API endpoint on the url: http://www.huffingtonpost.com/...\n" +diffbot = DiffbotClient() +token = API_TOKEN +version = 2 +url = "http://www.huffingtonpost.com/" +api = "frontpage" +response = diffbot.request(url, token, api, version=version) +print "\nPrinting response:\n" +pp = pprint.PrettyPrinter(indent=4) +print pp.pprint(response) + +print +print "Calling product API endpoint on the url: http://www.overstock.com/Home-Garden/iRobot-650-Roomba-Vacuuming-Robot/7886009/product.html...\n" +diffbot = DiffbotClient() +token = API_TOKEN +version = 2 +url = "http://www.overstock.com/Home-Garden/iRobot-650-Roomba-Vacuuming-Robot/7886009/product.html" +api = "product" +response = diffbot.request(url, token, api, version=version) +print "\nPrinting response:\n" +pp = pprint.PrettyPrinter(indent=4) +print pp.pprint(response) + +print +print "Calling image API endpoint on the url: http://www.google.com/...\n" +diffbot = DiffbotClient() +token = API_TOKEN +version = 2 +url = "http://www.google.com/" +api = "image" +response = diffbot.request(url, token, api, version=version) +print "\nPrinting response:\n" +pp = pprint.PrettyPrinter(indent=4) +print pp.pprint(response) + +print +print "Calling classifier API endpoint on the url: http://www.twitter.com/...\n" +diffbot = DiffbotClient() +token = API_TOKEN +version = 2 +url = "http://www.twitter.com/" +api = "analyze" +response = diffbot.request(url, token, api, version=version) +print "\nPrinting response:\n" +pp = pprint.PrettyPrinter(indent=4) +print pp.pprint(response) \ No newline at end of file From 7567ba4f7031eed64909b07e2a09babc97798e6f Mon Sep 17 00:00:00 2001 From: Matt Banister Date: Tue, 4 Feb 2014 13:26:26 -0500 Subject: [PATCH 3/3] Add kwargs parameter to Diffbot.request that can be used to pass optional parameters to API --- client.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/client.py b/client.py index 7ad2bcd..6fce288 100644 --- a/client.py +++ b/client.py @@ -5,13 +5,15 @@ class DiffbotClient(object): def __init__(self): self.base_url = "http://api.diffbot.com/" - def request(self, url, token, api, fields=[], version=2): + def request(self, url, token, api, fields=[], version=2, **kwargs): """ Returns a python object containing the requested resource from the diffbot api """ params = {"url": url, "token": token} if fields: params['fields'] = fields + if kwargs: + params.update(kwargs) response = requests.get(self.compose_url(api, version), params=params) obj = response.json() return obj