diff --git a/publish b/publish index 6fc7cbd..603c870 100755 --- a/publish +++ b/publish @@ -25,6 +25,11 @@ rm -f examples/*.pyc cp -R examples build/ cp .htaccess build/ +echo "validating HTML" +for f in *.html; do + python3 util/validate.py "$f" > /dev/null || die "Failed to validate $f" +done + echo "building HTML distribution" htmlbasedir=diveintopython3-r"$revision"-"$today" htmldir=build/"$htmlbasedir" diff --git a/regular-expressions.html b/regular-expressions.html index f384049..5be0d59 100755 --- a/regular-expressions.html +++ b/regular-expressions.html @@ -84,7 +84,6 @@ body{counter-reset:h1 5}
The following are some general rules for constructing Roman numerals:
I is 1, II is 2, and III is 3. VI is 6 (literally, “5 and 1”), VII is 7, and VIII is 8.
I, X, C, and M) can be repeated up to three times. At 4, you need to subtract from the next highest fives character. You can't represent 4 as IIII; instead, it is represented as IV (“1 less than 5”). 40 is written as XL (“10 less than 50”), 41 as XLI, 42 as XLII, 43 as XLIII, and then 44 as XLIV (“10 less than 50, then 1 less than 5”).
9, you need to subtract from the next highest tens character: 8 is VIII, but 9 is IX (“1 less than 10”), not VIIII (since the I character can not be repeated four times). 90 is XC, 900 is CM.
diff --git a/util/validate.py b/util/validate.py
new file mode 100644
index 0000000..403c063
--- /dev/null
+++ b/util/validate.py
@@ -0,0 +1,17 @@
+import sys
+try:
+ import html5lib
+except ImportError:
+ sys.path.insert(0, '/Users/pilgrim/code/html5lib/python3/src/')
+ import html5lib
+
+input_filename = sys.argv[1]
+parser = html5lib.HTMLParser()
+with open(input_filename, encoding='utf-8') as stream:
+ data = stream.read()
+html5doc = parser.parse(data, encoding='utf-8')
+if parser.errors:
+ for ((line, column), errtype, params) in parser.errors:
+ print("Error: {} {} on line {} of {}".format(errtype, repr(params), line, input_filename), file=sys.stderr)
+ sys.exit(1)
+