root / env / lib / python2.7 / site-packages / django / utils / http.py @ 1a305335
History | View | Annotate | Download (8 KB)
1 |
import calendar |
---|---|
2 |
import datetime |
3 |
import re |
4 |
import sys |
5 |
import urllib |
6 |
import urlparse |
7 |
from email.utils import formatdate |
8 |
|
9 |
from django.utils.datastructures import MultiValueDict |
10 |
from django.utils.encoding import smart_str, force_unicode |
11 |
from django.utils.functional import allow_lazy |
12 |
|
13 |
ETAG_MATCH = re.compile(r'(?:W/)?"((?:\\.|[^"])*)"')
|
14 |
|
15 |
MONTHS = 'jan feb mar apr may jun jul aug sep oct nov dec'.split()
|
16 |
__D = r'(?P<day>\d{2})'
|
17 |
__D2 = r'(?P<day>[ \d]\d)'
|
18 |
__M = r'(?P<mon>\w{3})'
|
19 |
__Y = r'(?P<year>\d{4})'
|
20 |
__Y2 = r'(?P<year>\d{2})'
|
21 |
__T = r'(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})'
|
22 |
RFC1123_DATE = re.compile(r'^\w{3}, %s %s %s %s GMT$' % (__D, __M, __Y, __T))
|
23 |
RFC850_DATE = re.compile(r'^\w{6,9}, %s-%s-%s %s GMT$' % (__D, __M, __Y2, __T))
|
24 |
ASCTIME_DATE = re.compile(r'^\w{3} %s %s %s %s$' % (__M, __D2, __T, __Y))
|
25 |
|
26 |
def urlquote(url, safe='/'): |
27 |
"""
|
28 |
A version of Python's urllib.quote() function that can operate on unicode
|
29 |
strings. The url is first UTF-8 encoded before quoting. The returned string
|
30 |
can safely be used as part of an argument to a subsequent iri_to_uri() call
|
31 |
without double-quoting occurring.
|
32 |
"""
|
33 |
return force_unicode(urllib.quote(smart_str(url), smart_str(safe)))
|
34 |
urlquote = allow_lazy(urlquote, unicode)
|
35 |
|
36 |
def urlquote_plus(url, safe=''): |
37 |
"""
|
38 |
A version of Python's urllib.quote_plus() function that can operate on
|
39 |
unicode strings. The url is first UTF-8 encoded before quoting. The
|
40 |
returned string can safely be used as part of an argument to a subsequent
|
41 |
iri_to_uri() call without double-quoting occurring.
|
42 |
"""
|
43 |
return force_unicode(urllib.quote_plus(smart_str(url), smart_str(safe)))
|
44 |
urlquote_plus = allow_lazy(urlquote_plus, unicode)
|
45 |
|
46 |
def urlunquote(quoted_url): |
47 |
"""
|
48 |
A wrapper for Python's urllib.unquote() function that can operate on
|
49 |
the result of django.utils.http.urlquote().
|
50 |
"""
|
51 |
return force_unicode(urllib.unquote(smart_str(quoted_url)))
|
52 |
urlunquote = allow_lazy(urlunquote, unicode)
|
53 |
|
54 |
def urlunquote_plus(quoted_url): |
55 |
"""
|
56 |
A wrapper for Python's urllib.unquote_plus() function that can operate on
|
57 |
the result of django.utils.http.urlquote_plus().
|
58 |
"""
|
59 |
return force_unicode(urllib.unquote_plus(smart_str(quoted_url)))
|
60 |
urlunquote_plus = allow_lazy(urlunquote_plus, unicode)
|
61 |
|
62 |
def urlencode(query, doseq=0): |
63 |
"""
|
64 |
A version of Python's urllib.urlencode() function that can operate on
|
65 |
unicode strings. The parameters are first case to UTF-8 encoded strings and
|
66 |
then encoded as per normal.
|
67 |
"""
|
68 |
if isinstance(query, MultiValueDict): |
69 |
query = query.lists() |
70 |
elif hasattr(query, 'items'): |
71 |
query = query.items() |
72 |
return urllib.urlencode(
|
73 |
[(smart_str(k), |
74 |
isinstance(v, (list,tuple)) and [smart_str(i) for i in v] or smart_str(v)) |
75 |
for k, v in query], |
76 |
doseq) |
77 |
|
78 |
def cookie_date(epoch_seconds=None): |
79 |
"""
|
80 |
Formats the time to ensure compatibility with Netscape's cookie standard.
|
81 |
|
82 |
Accepts a floating point number expressed in seconds since the epoch, in
|
83 |
UTC - such as that outputted by time.time(). If set to None, defaults to
|
84 |
the current time.
|
85 |
|
86 |
Outputs a string in the format 'Wdy, DD-Mon-YYYY HH:MM:SS GMT'.
|
87 |
"""
|
88 |
rfcdate = formatdate(epoch_seconds) |
89 |
return '%s-%s-%s GMT' % (rfcdate[:7], rfcdate[8:11], rfcdate[12:25]) |
90 |
|
91 |
def http_date(epoch_seconds=None): |
92 |
"""
|
93 |
Formats the time to match the RFC1123 date format as specified by HTTP
|
94 |
RFC2616 section 3.3.1.
|
95 |
|
96 |
Accepts a floating point number expressed in seconds since the epoch, in
|
97 |
UTC - such as that outputted by time.time(). If set to None, defaults to
|
98 |
the current time.
|
99 |
|
100 |
Outputs a string in the format 'Wdy, DD Mon YYYY HH:MM:SS GMT'.
|
101 |
"""
|
102 |
rfcdate = formatdate(epoch_seconds) |
103 |
return '%s GMT' % rfcdate[:25] |
104 |
|
105 |
def parse_http_date(date): |
106 |
"""
|
107 |
Parses a date format as specified by HTTP RFC2616 section 3.3.1.
|
108 |
|
109 |
The three formats allowed by the RFC are accepted, even if only the first
|
110 |
one is still in widespread use.
|
111 |
|
112 |
Returns an floating point number expressed in seconds since the epoch, in
|
113 |
UTC.
|
114 |
"""
|
115 |
# emails.Util.parsedate does the job for RFC1123 dates; unfortunately
|
116 |
# RFC2616 makes it mandatory to support RFC850 dates too. So we roll
|
117 |
# our own RFC-compliant parsing.
|
118 |
for regex in RFC1123_DATE, RFC850_DATE, ASCTIME_DATE: |
119 |
m = regex.match(date) |
120 |
if m is not None: |
121 |
break
|
122 |
else:
|
123 |
raise ValueError("%r is not in a valid HTTP date format" % date) |
124 |
try:
|
125 |
year = int(m.group('year')) |
126 |
if year < 100: |
127 |
if year < 70: |
128 |
year += 2000
|
129 |
else:
|
130 |
year += 1900
|
131 |
month = MONTHS.index(m.group('mon').lower()) + 1 |
132 |
day = int(m.group('day')) |
133 |
hour = int(m.group('hour')) |
134 |
min = int(m.group('min')) |
135 |
sec = int(m.group('sec')) |
136 |
result = datetime.datetime(year, month, day, hour, min, sec)
|
137 |
return calendar.timegm(result.utctimetuple())
|
138 |
except Exception: |
139 |
raise ValueError("%r is not a valid date" % date) |
140 |
|
141 |
def parse_http_date_safe(date): |
142 |
"""
|
143 |
Same as parse_http_date, but returns None if the input is invalid.
|
144 |
"""
|
145 |
try:
|
146 |
return parse_http_date(date)
|
147 |
except Exception: |
148 |
pass
|
149 |
|
150 |
# Base 36 functions: useful for generating compact URLs
|
151 |
|
152 |
def base36_to_int(s): |
153 |
"""
|
154 |
Converts a base 36 string to an ``int``. Raises ``ValueError` if the
|
155 |
input won't fit into an int.
|
156 |
"""
|
157 |
# To prevent overconsumption of server resources, reject any
|
158 |
# base36 string that is long than 13 base36 digits (13 digits
|
159 |
# is sufficient to base36-encode any 64-bit integer)
|
160 |
if len(s) > 13: |
161 |
raise ValueError("Base36 input too large") |
162 |
value = int(s, 36) |
163 |
# ... then do a final check that the value will fit into an int.
|
164 |
if value > sys.maxint:
|
165 |
raise ValueError("Base36 input too large") |
166 |
return value
|
167 |
|
168 |
def int_to_base36(i): |
169 |
"""
|
170 |
Converts an integer to a base36 string
|
171 |
"""
|
172 |
digits = "0123456789abcdefghijklmnopqrstuvwxyz"
|
173 |
factor = 0
|
174 |
if not 0 <= i <= sys.maxint: |
175 |
raise ValueError("Base36 conversion input too large or incorrect type.") |
176 |
# Find starting factor
|
177 |
while True: |
178 |
factor += 1
|
179 |
if i < 36 ** factor: |
180 |
factor -= 1
|
181 |
break
|
182 |
base36 = [] |
183 |
# Construct base36 representation
|
184 |
while factor >= 0: |
185 |
j = 36 ** factor
|
186 |
base36.append(digits[i // j]) |
187 |
i = i % j |
188 |
factor -= 1
|
189 |
return ''.join(base36) |
190 |
|
191 |
def parse_etags(etag_str): |
192 |
"""
|
193 |
Parses a string with one or several etags passed in If-None-Match and
|
194 |
If-Match headers by the rules in RFC 2616. Returns a list of etags
|
195 |
without surrounding double quotes (") and unescaped from \<CHAR>.
|
196 |
"""
|
197 |
etags = ETAG_MATCH.findall(etag_str) |
198 |
if not etags: |
199 |
# etag_str has wrong format, treat it as an opaque string then
|
200 |
return [etag_str]
|
201 |
etags = [e.decode('string_escape') for e in etags] |
202 |
return etags
|
203 |
|
204 |
def quote_etag(etag): |
205 |
"""
|
206 |
Wraps a string in double quotes escaping contents as necesary.
|
207 |
"""
|
208 |
return '"%s"' % etag.replace('\\', '\\\\').replace('"', '\\"') |
209 |
|
210 |
if sys.version_info >= (2, 6): |
211 |
def same_origin(url1, url2): |
212 |
"""
|
213 |
Checks if two URLs are 'same-origin'
|
214 |
"""
|
215 |
p1, p2 = urlparse.urlparse(url1), urlparse.urlparse(url2) |
216 |
return (p1.scheme, p1.hostname, p1.port) == (p2.scheme, p2.hostname, p2.port)
|
217 |
else:
|
218 |
# Python 2.5 compatibility. This actually works for Python 2.6 and above,
|
219 |
# but the above definition is much more obviously correct and so is
|
220 |
# preferred going forward.
|
221 |
def same_origin(url1, url2): |
222 |
"""
|
223 |
Checks if two URLs are 'same-origin'
|
224 |
"""
|
225 |
p1, p2 = urlparse.urlparse(url1), urlparse.urlparse(url2) |
226 |
return p1[0:2] == p2[0:2] |
227 |
|
228 |
def is_safe_url(url, host=None): |
229 |
"""
|
230 |
Return ``True`` if the url is a safe redirection (i.e. it doesn't point to
|
231 |
a different host).
|
232 |
|
233 |
Always returns ``False`` on an empty url.
|
234 |
"""
|
235 |
if not url: |
236 |
return False |
237 |
netloc = urlparse.urlparse(url)[1]
|
238 |
return not netloc or netloc == host |