forked from Global19-atlassian-net/html5lib-python
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathvalidate.py
More file actions
executable file
·69 lines (58 loc) · 2.03 KB
/
validate.py
File metadata and controls
executable file
·69 lines (58 loc) · 2.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python
"""usage: %prog [options] url-or-filename
Validate an HTML5 document using a non-schema-based conformance checker"""
#RELEASE move ./examples/
import sys
import os
from optparse import OptionParser
#RELEASE remove
sys.path.insert(0,os.path.abspath(os.path.join(__file__,'../src')))
#END RELEASE
from html5lib import html5parser#, liberalxmlparser
from html5lib import treebuilders
from html5lib import constants
from html5lib.filters import validator
def parse():
optParser = getOptParser()
opts,args = optParser.parse_args()
encoding = None
try:
f = args[-1]
# Try opening from the internet
if f.startswith('http://'):
try:
import urllib, cgi
f = urllib.urlopen(f)
contentType = f.headers.get('content-type')
if contentType:
(mediaType, params) = cgi.parse_header(contentType)
encoding = params.get('charset')
except: pass
elif f == '-':
f = sys.stdin
else:
try:
# Try opening from file system
f = open(f)
except IOError: pass
except IndexError:
sys.stderr.write("No filename provided. Use -h for help\n")
sys.exit(1)
treebuilder = treebuilders.getTreeBuilder("simpleTree")
# if opts.xml:
# p = liberalxmlparser.XHTMLParser(tree=treebuilder)
# else:
if 1:
p = html5parser.HTMLParser(tree=treebuilder, tokenizer=validator.HTMLConformanceChecker)
document = p.parse(f, encoding=encoding)
printOutput(p, document, opts)
def printOutput(parser, document, opts):
errList=[]
for pos, errorcode, datavars in parser.errors:
errList.append("Line %i Col %i"%pos + " " + constants.E.get(errorcode, 'Unknown error "%s"' % errorcode) % datavars)
sys.stdout.write("\nValidation errors:\n" + "\n".join(errList)+"\n")
def getOptParser():
parser = OptionParser(usage=__doc__)
return parser
if __name__ == "__main__":
parse()