Project

General

Profile

Statistics
| Branch: | Revision:

root / env / lib / python2.7 / site-packages / django / utils / html_parser.py @ 1a305335

History | View | Annotate | Download (4.27 KB)

1
import HTMLParser as _HTMLParser
2
import re
3
import sys
4

    
5
current_version = sys.version_info
6

    
7
use_workaround = (
8
    (current_version < (2, 7, 3)) or
9
    (current_version >= (3, 0) and current_version < (3, 2, 3))
10
)
11

    
12
if not use_workaround:
13
    HTMLParser = _HTMLParser.HTMLParser
14
else:
15
    class HTMLParser(_HTMLParser.HTMLParser):
16
        """
17
        Patched version of stdlib's HTMLParser with patch from:
18
        http://bugs.python.org/issue670664
19
        """
20
        def __init__(self):
21
            _HTMLParser.HTMLParser.__init__(self)
22
            self.cdata_tag = None
23

    
24
        def set_cdata_mode(self, tag):
25
            try:
26
                self.interesting = _HTMLParser.interesting_cdata
27
            except AttributeError:
28
                self.interesting = re.compile(r'</\s*%s\s*>' % tag.lower(), re.I)
29
            self.cdata_tag = tag.lower()
30

    
31
        def clear_cdata_mode(self):
32
            self.interesting = _HTMLParser.interesting_normal
33
            self.cdata_tag = None
34

    
35
        # Internal -- handle starttag, return end or -1 if not terminated
36
        def parse_starttag(self, i):
37
            self.__starttag_text = None
38
            endpos = self.check_for_whole_start_tag(i)
39
            if endpos < 0:
40
                return endpos
41
            rawdata = self.rawdata
42
            self.__starttag_text = rawdata[i:endpos]
43

    
44
            # Now parse the data between i+1 and j into a tag and attrs
45
            attrs = []
46
            match = _HTMLParser.tagfind.match(rawdata, i + 1)
47
            assert match, 'unexpected call to parse_starttag()'
48
            k = match.end()
49
            self.lasttag = tag = rawdata[i + 1:k].lower()
50

    
51
            while k < endpos:
52
                m = _HTMLParser.attrfind.match(rawdata, k)
53
                if not m:
54
                    break
55
                attrname, rest, attrvalue = m.group(1, 2, 3)
56
                if not rest:
57
                    attrvalue = None
58
                elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
59
                     attrvalue[:1] == '"' == attrvalue[-1:]:
60
                    attrvalue = attrvalue[1:-1]
61
                    attrvalue = self.unescape(attrvalue)
62
                attrs.append((attrname.lower(), attrvalue))
63
                k = m.end()
64

    
65
            end = rawdata[k:endpos].strip()
66
            if end not in (">", "/>"):
67
                lineno, offset = self.getpos()
68
                if "\n" in self.__starttag_text:
69
                    lineno = lineno + self.__starttag_text.count("\n")
70
                    offset = len(self.__starttag_text) \
71
                             - self.__starttag_text.rfind("\n")
72
                else:
73
                    offset = offset + len(self.__starttag_text)
74
                self.error("junk characters in start tag: %r"
75
                           % (rawdata[k:endpos][:20],))
76
            if end.endswith('/>'):
77
                # XHTML-style empty tag: <span attr="value" />
78
                self.handle_startendtag(tag, attrs)
79
            else:
80
                self.handle_starttag(tag, attrs)
81
                if tag in self.CDATA_CONTENT_ELEMENTS:
82
                    self.set_cdata_mode(tag) # <--------------------------- Changed
83
            return endpos
84

    
85
        # Internal -- parse endtag, return end or -1 if incomplete
86
        def parse_endtag(self, i):
87
            rawdata = self.rawdata
88
            assert rawdata[i:i + 2] == "</", "unexpected call to parse_endtag"
89
            match = _HTMLParser.endendtag.search(rawdata, i + 1) # >
90
            if not match:
91
                return -1
92
            j = match.end()
93
            match = _HTMLParser.endtagfind.match(rawdata, i) # </ + tag + >
94
            if not match:
95
                if self.cdata_tag is not None: # *** add ***
96
                    self.handle_data(rawdata[i:j]) # *** add ***
97
                    return j # *** add ***
98
                self.error("bad end tag: %r" % (rawdata[i:j],))
99
            # --- changed start ---------------------------------------------------
100
            tag = match.group(1).strip()
101
            if self.cdata_tag is not None:
102
                if tag.lower() != self.cdata_tag:
103
                    self.handle_data(rawdata[i:j])
104
                    return j
105
            # --- changed end -----------------------------------------------------
106
            self.handle_endtag(tag.lower())
107
            self.clear_cdata_mode()
108
            return j