From a63d129a3d9d8246334d3be0416c2c6c9b200264 Mon Sep 17 00:00:00 2001 From: Jeremy Bowers Date: Thu, 28 Feb 2013 15:09:29 -0500 Subject: [PATCH 1/8] Adds a tiny little Web app. --- projects/fdic/app.py | 34 +++++++++++++++++++++++++++++ projects/fdic/models.py | 18 +++++++++++++++ projects/fdic/requirements.txt | 2 ++ projects/fdic/templates/base.html | 13 +++++++++++ projects/fdic/templates/detail.html | 18 +++++++++++++++ projects/fdic/templates/list.html | 9 ++++++++ 6 files changed, 94 insertions(+) create mode 100644 projects/fdic/app.py create mode 100644 projects/fdic/models.py create mode 100644 projects/fdic/templates/base.html create mode 100644 projects/fdic/templates/detail.html create mode 100644 projects/fdic/templates/list.html diff --git a/projects/fdic/app.py b/projects/fdic/app.py new file mode 100644 index 0000000..832e5d6 --- /dev/null +++ b/projects/fdic/app.py @@ -0,0 +1,34 @@ +from flask import Flask, render_template + +from models import Bank + +app = Flask(__name__) + + +@app.route('/', methods=['GET']) +def failed_banks_list(): + + context = { + 'banks': Bank.select() + } + + return render_template('list.html', **context) + + +@app.route('/bank//', methods=['GET']) +def failed_bank_detail(cert_num): + this_bank = Bank.select()\ + .where(Bank.cert_num == int(cert_num)).get() + same_state_banks = Bank.select()\ + .where(Bank.state == this_bank.state)\ + .where(Bank.cert_num != int(cert_num)) + + context = { + 'bank': this_bank, + 'same_state_banks': same_state_banks + } + + return render_template('detail.html', **context) + +if __name__ == "__main__": + app.run(host='0.0.0.0', debug=True) diff --git a/projects/fdic/models.py b/projects/fdic/models.py new file mode 100644 index 0000000..8dff6d6 --- /dev/null +++ b/projects/fdic/models.py @@ -0,0 +1,18 @@ +from peewee import * + +db = SqliteDatabase('fdic.sqlite') + + +class Bank(Model): + bank = CharField() + city = CharField() + state = CharField() + cert_num = PrimaryKeyField() + acq_inst = CharField() + closed = DateField() + updated = DateField() + url = CharField() + + class Meta: + database = db + db_table = 'failed_banks' diff --git a/projects/fdic/requirements.txt b/projects/fdic/requirements.txt index d0e040a..9da0aba 100644 --- a/projects/fdic/requirements.txt +++ b/projects/fdic/requirements.txt @@ -1,2 +1,4 @@ beautifulsoup4 python-dateutil<=1.5 +flask-peewee +flask diff --git a/projects/fdic/templates/base.html b/projects/fdic/templates/base.html new file mode 100644 index 0000000..02178ec --- /dev/null +++ b/projects/fdic/templates/base.html @@ -0,0 +1,13 @@ + + + + + + +
+
+
{% block content %}{% endblock %}
+
+
+ + \ No newline at end of file diff --git a/projects/fdic/templates/detail.html b/projects/fdic/templates/detail.html new file mode 100644 index 0000000..6c58098 --- /dev/null +++ b/projects/fdic/templates/detail.html @@ -0,0 +1,18 @@ +{% extends 'base.html' %} + +{% block content %} +

{{ bank.bank }}

+

{{ bank.city}}, {{ bank.state }}

+

Closed {{ bank.closed }}

+

{{ bank.bank }} has the certification number {{ bank.cert_num }} and was closed on {{ bank.closed }}. +
It was acquired by {{ bank.acq_inst }}. +
See this bank on the FDIC Web site.

+ {% if same_state_banks > 0 %} +

{{ same_state_banks.count() }} more banks in {{ bank.state }}

+ +

+ {% endif %} + +{% endblock %} \ No newline at end of file diff --git a/projects/fdic/templates/list.html b/projects/fdic/templates/list.html new file mode 100644 index 0000000..0deed53 --- /dev/null +++ b/projects/fdic/templates/list.html @@ -0,0 +1,9 @@ +{% extends 'base.html' %} + +{% block content %} +

Failed banks

+ +{% endblock %} \ No newline at end of file From 2a5765c30e337bace2b5f09c8cf16f015cf6e6cb Mon Sep 17 00:00:00 2001 From: Jeremy Bowers Date: Thu, 28 Feb 2013 15:15:33 -0500 Subject: [PATCH 2/8] Comments added. --- projects/fdic/app.py | 21 +++++++++++++++++++++ projects/fdic/models.py | 13 +++++++++++++ 2 files changed, 34 insertions(+) diff --git a/projects/fdic/app.py b/projects/fdic/app.py index 832e5d6..65a51f9 100644 --- a/projects/fdic/app.py +++ b/projects/fdic/app.py @@ -1,34 +1,55 @@ +# Flask is what makes everything work. Import it. from flask import Flask, render_template +# Import our bank model. from models import Bank +# Flask needs to run! This gives it legs. app = Flask(__name__) +# Routes! @app.route('/', methods=['GET']) def failed_banks_list(): + """ + This route is for a list of ALL banks. + """ + # The context for this pages is just "banks", a list of all banks. context = { 'banks': Bank.select() } + # Render the template to list.html and with the context from above. return render_template('list.html', **context) @app.route('/bank//', methods=['GET']) def failed_bank_detail(cert_num): + """ + This route is for a single bank. + We're going to do TWO things. + a.) We're going to get the one bank. + b.) We're going to get all banks EXCEPT this bank in the same state. + """ + # a.) Get this bank. this_bank = Bank.select()\ .where(Bank.cert_num == int(cert_num)).get() + + # b.) Get the other banks in this state. same_state_banks = Bank.select()\ .where(Bank.state == this_bank.state)\ .where(Bank.cert_num != int(cert_num)) + # Set up the context; include both this bank and other banks from this state. context = { 'bank': this_bank, 'same_state_banks': same_state_banks } + # Render the template to detail.html and with that context. return render_template('detail.html', **context) +# Last bit! Just need to get flask to run when we run it. if __name__ == "__main__": app.run(host='0.0.0.0', debug=True) diff --git a/projects/fdic/models.py b/projects/fdic/models.py index 8dff6d6..60b3115 100644 --- a/projects/fdic/models.py +++ b/projects/fdic/models.py @@ -1,9 +1,15 @@ +# Import our library. from peewee import * +# Connect to the DB. db = SqliteDatabase('fdic.sqlite') +# Set up a bank. class Bank(Model): + """ + This defines a bank and all of the fields a bank has. + """ bank = CharField() city = CharField() state = CharField() @@ -13,6 +19,13 @@ class Bank(Model): updated = DateField() url = CharField() + # What is this thing? class Meta: + """ + It's a class INSIDE a class. + Don't let that bother you. + We need to attach this model to a database. + Also, we need to point to Schnaars's table. + """ database = db db_table = 'failed_banks' From 5d208c08691f315214019cbd49092206618a4fbe Mon Sep 17 00:00:00 2001 From: Jeremy Bowers Date: Thu, 28 Feb 2013 15:21:06 -0500 Subject: [PATCH 3/8] Fix requirements. Remove URL field. --- projects/fdic/models.py | 1 - projects/fdic/requirements.txt | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/projects/fdic/models.py b/projects/fdic/models.py index 60b3115..5a5de33 100644 --- a/projects/fdic/models.py +++ b/projects/fdic/models.py @@ -17,7 +17,6 @@ class Bank(Model): acq_inst = CharField() closed = DateField() updated = DateField() - url = CharField() # What is this thing? class Meta: diff --git a/projects/fdic/requirements.txt b/projects/fdic/requirements.txt index 9da0aba..aaba7ef 100644 --- a/projects/fdic/requirements.txt +++ b/projects/fdic/requirements.txt @@ -1,4 +1,4 @@ beautifulsoup4 python-dateutil<=1.5 -flask-peewee +peewee flask From 08a00110d5853010e2582a9a794ce8d315271246 Mon Sep 17 00:00:00 2001 From: Jeremy Bowers Date: Thu, 28 Feb 2013 15:21:32 -0500 Subject: [PATCH 4/8] Adds more stuff. --- projects/fdic/templates/detail.html | 1 - 1 file changed, 1 deletion(-) diff --git a/projects/fdic/templates/detail.html b/projects/fdic/templates/detail.html index 6c58098..34a7450 100644 --- a/projects/fdic/templates/detail.html +++ b/projects/fdic/templates/detail.html @@ -6,7 +6,6 @@

{{ bank.city}}, {{ bank.state }}

Closed {{ bank.closed }}

{{ bank.bank }} has the certification number {{ bank.cert_num }} and was closed on {{ bank.closed }}.
It was acquired by {{ bank.acq_inst }}. -
See this bank on the FDIC Web site.

{% if same_state_banks > 0 %}

{{ same_state_banks.count() }} more banks in {{ bank.state }}

    {% for bank in same_state_banks %} From d8a11e8e45f9a623195edece6978a783b81bb38e Mon Sep 17 00:00:00 2001 From: Jeremy Bowers Date: Thu, 28 Feb 2013 15:23:43 -0500 Subject: [PATCH 5/8] Template tweaking. --- projects/fdic/templates/detail.html | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/projects/fdic/templates/detail.html b/projects/fdic/templates/detail.html index 34a7450..d9eacdf 100644 --- a/projects/fdic/templates/detail.html +++ b/projects/fdic/templates/detail.html @@ -4,14 +4,14 @@

    {{ bank.bank }}

    {{ bank.city}}, {{ bank.state }}

    Closed {{ bank.closed }}

    -

    {{ bank.bank }} has the certification number {{ bank.cert_num }} and was closed on {{ bank.closed }}. -
    It was acquired by {{ bank.acq_inst }}. - {% if same_state_banks > 0 %} -

    {{ same_state_banks.count() }} more banks in {{ bank.state }}

    +
    {{ bank.bank }} has the certification number {{ bank.cert_num }} and was closed on {{ bank.closed }}. +
    It was acquired by {{ bank.acq_inst }}.
    + {% if same_state_banks.count() > 0 %} +

    {{ same_state_banks.count() }} more banks in {{ bank.state }}

    -

    - {% endif %} +
    + {% endif %}
    {% endblock %} \ No newline at end of file From b9fe101507a12591c8502490dd8fdfa6104cefa1 Mon Sep 17 00:00:00 2001 From: Jeremy Bowers Date: Thu, 28 Feb 2013 15:24:47 -0500 Subject: [PATCH 6/8] Bold some stuff. --- projects/fdic/templates/detail.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/fdic/templates/detail.html b/projects/fdic/templates/detail.html index d9eacdf..f5cd5f4 100644 --- a/projects/fdic/templates/detail.html +++ b/projects/fdic/templates/detail.html @@ -4,7 +4,7 @@

    {{ bank.bank }}

    {{ bank.city}}, {{ bank.state }}

    Closed {{ bank.closed }}

    -
    {{ bank.bank }} has the certification number {{ bank.cert_num }} and was closed on {{ bank.closed }}. +
    {{ bank.bank }} has the certification number {{ bank.cert_num }} and was closed on {{ bank.closed }}.
    It was acquired by {{ bank.acq_inst }}.
    {% if same_state_banks.count() > 0 %}

    {{ same_state_banks.count() }} more banks in {{ bank.state }}

    From 5eeed910da450741f711d026c07fbd6ee777578d Mon Sep 17 00:00:00 2001 From: jackiekazil Date: Sat, 12 Oct 2013 10:27:37 -0500 Subject: [PATCH 7/8] Fixing a bug w/ the url on the first accessed url. This is odd, but the url pattern was changed for just the first url. --- tutorials/webscraping101/fec_efiles_scrape.py | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/tutorials/webscraping101/fec_efiles_scrape.py b/tutorials/webscraping101/fec_efiles_scrape.py index b2c3c8b..f267903 100644 --- a/tutorials/webscraping101/fec_efiles_scrape.py +++ b/tutorials/webscraping101/fec_efiles_scrape.py @@ -94,6 +94,7 @@ # To get at the raw data for each filing, we'll combine the above BASE_URL with # unique FEC report numbers (found in the download_links that we extracted above). + for link in download_links: # Below, we use a single line of code to extract the unique FEC report number: @@ -124,7 +125,32 @@ # The first row in the FEC data contains useful info about the format of # the remaining rows in the file. - version = data[0][2] # e.g., 8.0 + # However, after the initial creation of this scraper, there is at least one bad + # link that we have to handle. + + # First we try to extract the version. If it is successful, then continue. + # If not, we moves to the exception handling section. + try: + version = data[0][2] # e.g., 8.0 + # This exception handling section looks for our bad link which causes the program + # to throw an IndexError. We going to define a special url for this case. + except IndexError: + # If you look at the code below, you will notice that it repeats what we had above. + # However, the csv_download link is redefined. + # For the best practice, we would pull out this pattern into a function. + # Then we would call the function above then again if the error occurs. + # We encourage you to try to turn this piece of code into a function that is + # called twice. + ALT_BASE_URL = 'http://query.nictusa.com/showcsv/nicweb26502/%s.fec' + csv_download_link = ALT_BASE_URL % fec_num + response = requests.get(csv_download_link) + data_rows = response.text.split('\n') + data = list(csv.reader(data_rows)) + version = data[0][2] # e.g., 8.0 + # If the program has another index error at this point, this means that our + # catch/fix didn't work. More troubleshooting and exception handling might + # be needed. + print "Downloaded Electronic filing with File Format Version %s" % version ### WHAT'S NEXT? ### From 5ce017faaab23361fb6e3b29728cf8966b7673f8 Mon Sep 17 00:00:00 2001 From: jackiekazil Date: Sat, 12 Oct 2013 10:28:33 -0500 Subject: [PATCH 8/8] Adding a friendly space. --- tutorials/webscraping101/fec_efiles_scrape.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tutorials/webscraping101/fec_efiles_scrape.py b/tutorials/webscraping101/fec_efiles_scrape.py index f267903..5cd68de 100644 --- a/tutorials/webscraping101/fec_efiles_scrape.py +++ b/tutorials/webscraping101/fec_efiles_scrape.py @@ -134,6 +134,7 @@ version = data[0][2] # e.g., 8.0 # This exception handling section looks for our bad link which causes the program # to throw an IndexError. We going to define a special url for this case. + except IndexError: # If you look at the code below, you will notice that it repeats what we had above. # However, the csv_download link is redefined.