| 1 |
try: True except NameError: setattr(__builtins__, 'True', 1) setattr(__builtins__, 'False', 0) |
| 2 |
def has_key(x, y): if hasattr(x, 'has_key'): return x.has_key(y) else: return y in x |
| 3 |
try: import htmlentitydefs import urlparse import HTMLParser except ImportError: #Python3 import html.entities as htmlentitydefs import urllib.parse as urlparse import html.parser as HTMLParser try: #Python3 import urllib.request as urllib except: import urllib import optparse, re, sys, codecs, types |
| 4 |
try: from textwrap import wrap except: pass |
| 5 |
# Use Unicode characters instead of their ascii psuedo-replacements UNICODE_SNOB = 0 |
| 6 |
# Put the links after each paragraph instead of at the end. LINKS_EACH_PARAGRAPH = 0 |
| 7 |
# Wrap long lines at position. 0 for no wrapping. (Requires Python 2.3.) BODY_WIDTH = 78 |
| 8 |
# Don't show internal links (href="#local-anchor") -- corresponding link targets # won't be visible in the plain text file anyway. SKIP_INTERNAL_LINKS = True |
| 9 |
# Use inline, rather than reference, formatting for images and links INLINE_LINKS = True |
| 10 |
# Number of pixels Google indents nested lists GOOGLE_LIST_INDENT = 36 |
| 11 |
IGNORE_ANCHORS = False IGNORE_IMAGES = False |
| 12 |
### Entity Nonsense ### |
| 13 |
def name2cp(k): if k == 'apos': return ord("'") if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3 return htmlentitydefs.name2codepoint[k] else: k = htmlentitydefs.entitydefs[k] if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1 return ord(codecs.latin_1_decode(k)[0]) |
| 14 |
unifiable = {'rsquo':"'", 'lsquo':"'", 'rdquo':'"', 'ldquo':'"', 'copy':'(C)', 'mdash':'--', 'nbsp':' ', 'rarr':'->', 'larr':'<-', 'middot':'*', 'ndash':'-', 'oelig':'oe', 'aelig':'ae', 'agrave':'a', 'aacute':'a', 'acirc':'a', 'atilde':'a', 'auml':'a', 'aring':'a', 'egrave':'e', 'eacute':'e', 'ecirc':'e', 'euml':'e', 'igrave':'i', 'iacute':'i', 'icirc':'i', 'iuml':'i', 'ograve':'o', 'oacute':'o', 'ocirc':'o', 'otilde':'o', 'ouml':'o', 'ugrave':'u', 'uacute':'u', 'ucirc':'u', 'uuml':'u', 'lrm':'', 'rlm':''} |
| 15 |
unifiable_n = {} |
| 16 |
for k in unifiable.keys(): unifiable_n[name2cp(k)] = unifiable[k] |
| 17 |
def charref(name): if name[0] in ['x','X']: c = int(name[1:], 16) else: c = int(name) if not UNICODE_SNOB and c in unifiable_n.keys(): return unifiable_n[c] else: try: return unichr(c) except NameError: #Python3 return chr(c) |
| 18 |
def entityref(c): if not UNICODE_SNOB and c in unifiable.keys(): return unifiable[c] else: try: name2cp(c) except KeyError: return "&" + c + ';' else: try: return unichr(name2cp(c)) except NameError: #Python3 return chr(name2cp(c)) |
| 19 |
def replaceEntities(s): s = s.group(1) if s[0] == "#": return charref(s[1:]) else: return entityref(s) |
| 20 |
r_unescape = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));") def unescape(s): return r_unescape.sub(replaceEntities, s) |
| 21 |
### End Entity Nonsense ### |
| 22 |
def onlywhite(line): """Return true if the line does only consist of whitespace characters.""" for c in line: if c is not ' ' and c is not ' ': return c is ' ' return line |
Комментарии