root / env / lib / python2.7 / site-packages / distribute-0.6.19-py2.7.egg / setuptools / package_index.py @ 1a305335
History | View | Annotate | Download (29.7 KB)
1 |
"""PyPI and direct package downloading"""
|
---|---|
2 |
import sys, os.path, re, urlparse, urllib, urllib2, shutil, random, socket, cStringIO |
3 |
import httplib |
4 |
from pkg_resources import * |
5 |
from distutils import log |
6 |
from distutils.errors import DistutilsError |
7 |
try:
|
8 |
from hashlib import md5 |
9 |
except ImportError: |
10 |
from md5 import md5 |
11 |
from fnmatch import translate |
12 |
|
13 |
EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.]+)$')
|
14 |
HREF = re.compile("""href\\s*=\\s*['"]?([^'"> ]+)""", re.I)
|
15 |
# this is here to fix emacs' cruddy broken syntax highlighting
|
16 |
PYPI_MD5 = re.compile( |
17 |
'<a href="([^"#]+)">([^<]+)</a>\n\s+\\(<a (?:title="MD5 hash"\n\s+)'
|
18 |
'href="[^?]+\?:action=show_md5&digest=([0-9a-f]{32})">md5</a>\\)'
|
19 |
) |
20 |
URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):',re.I).match
|
21 |
EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
|
22 |
|
23 |
__all__ = [ |
24 |
'PackageIndex', 'distros_for_url', 'parse_bdist_wininst', |
25 |
'interpret_distro_name',
|
26 |
] |
27 |
|
28 |
_SOCKET_TIMEOUT = 15
|
29 |
|
30 |
def parse_bdist_wininst(name): |
31 |
"""Return (base,pyversion) or (None,None) for possible .exe name"""
|
32 |
|
33 |
lower = name.lower() |
34 |
base, py_ver, plat = None, None, None |
35 |
|
36 |
if lower.endswith('.exe'): |
37 |
if lower.endswith('.win32.exe'): |
38 |
base = name[:-10]
|
39 |
plat = 'win32'
|
40 |
elif lower.startswith('.win32-py',-16): |
41 |
py_ver = name[-7:-4] |
42 |
base = name[:-16]
|
43 |
plat = 'win32'
|
44 |
elif lower.endswith('.win-amd64.exe'): |
45 |
base = name[:-14]
|
46 |
plat = 'win-amd64'
|
47 |
elif lower.startswith('.win-amd64-py',-20): |
48 |
py_ver = name[-7:-4] |
49 |
base = name[:-20]
|
50 |
plat = 'win-amd64'
|
51 |
return base,py_ver,plat
|
52 |
|
53 |
|
54 |
def egg_info_for_url(url): |
55 |
scheme, server, path, parameters, query, fragment = urlparse.urlparse(url) |
56 |
base = urllib2.unquote(path.split('/')[-1]) |
57 |
if '#' in base: base, fragment = base.split('#',1) |
58 |
return base,fragment
|
59 |
|
60 |
def distros_for_url(url, metadata=None): |
61 |
"""Yield egg or source distribution objects that might be found at a URL"""
|
62 |
base, fragment = egg_info_for_url(url) |
63 |
for dist in distros_for_location(url, base, metadata): yield dist |
64 |
if fragment:
|
65 |
match = EGG_FRAGMENT.match(fragment) |
66 |
if match:
|
67 |
for dist in interpret_distro_name( |
68 |
url, match.group(1), metadata, precedence = CHECKOUT_DIST
|
69 |
): |
70 |
yield dist
|
71 |
|
72 |
def distros_for_location(location, basename, metadata=None): |
73 |
"""Yield egg or source distribution objects based on basename"""
|
74 |
if basename.endswith('.egg.zip'): |
75 |
basename = basename[:-4] # strip the .zip |
76 |
if basename.endswith('.egg') and '-' in basename: |
77 |
# only one, unambiguous interpretation
|
78 |
return [Distribution.from_location(location, basename, metadata)]
|
79 |
|
80 |
if basename.endswith('.exe'): |
81 |
win_base, py_ver, platform = parse_bdist_wininst(basename) |
82 |
if win_base is not None: |
83 |
return interpret_distro_name(
|
84 |
location, win_base, metadata, py_ver, BINARY_DIST, platform |
85 |
) |
86 |
|
87 |
# Try source distro extensions (.zip, .tgz, etc.)
|
88 |
#
|
89 |
for ext in EXTENSIONS: |
90 |
if basename.endswith(ext):
|
91 |
basename = basename[:-len(ext)]
|
92 |
return interpret_distro_name(location, basename, metadata)
|
93 |
return [] # no extension matched |
94 |
|
95 |
def distros_for_filename(filename, metadata=None): |
96 |
"""Yield possible egg or source distribution objects based on a filename"""
|
97 |
return distros_for_location(
|
98 |
normalize_path(filename), os.path.basename(filename), metadata |
99 |
) |
100 |
|
101 |
|
102 |
def interpret_distro_name(location, basename, metadata, |
103 |
py_version=None, precedence=SOURCE_DIST, platform=None |
104 |
): |
105 |
"""Generate alternative interpretations of a source distro name
|
106 |
|
107 |
Note: if `location` is a filesystem filename, you should call
|
108 |
``pkg_resources.normalize_path()`` on it before passing it to this
|
109 |
routine!
|
110 |
"""
|
111 |
# Generate alternative interpretations of a source distro name
|
112 |
# Because some packages are ambiguous as to name/versions split
|
113 |
# e.g. "adns-python-1.1.0", "egenix-mx-commercial", etc.
|
114 |
# So, we generate each possible interepretation (e.g. "adns, python-1.1.0"
|
115 |
# "adns-python, 1.1.0", and "adns-python-1.1.0, no version"). In practice,
|
116 |
# the spurious interpretations should be ignored, because in the event
|
117 |
# there's also an "adns" package, the spurious "python-1.1.0" version will
|
118 |
# compare lower than any numeric version number, and is therefore unlikely
|
119 |
# to match a request for it. It's still a potential problem, though, and
|
120 |
# in the long run PyPI and the distutils should go for "safe" names and
|
121 |
# versions in distribution archive names (sdist and bdist).
|
122 |
|
123 |
parts = basename.split('-')
|
124 |
if not py_version: |
125 |
for i,p in enumerate(parts[2:]): |
126 |
if len(p)==5 and p.startswith('py2.'): |
127 |
return # It's a bdist_dumb, not an sdist -- bail out |
128 |
|
129 |
for p in range(1,len(parts)+1): |
130 |
yield Distribution(
|
131 |
location, metadata, '-'.join(parts[:p]), '-'.join(parts[p:]), |
132 |
py_version=py_version, precedence = precedence, |
133 |
platform = platform |
134 |
) |
135 |
|
136 |
REL = re.compile("""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I)
|
137 |
# this line is here to fix emacs' cruddy broken syntax highlighting
|
138 |
|
139 |
def find_external_links(url, page): |
140 |
"""Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
|
141 |
|
142 |
for match in REL.finditer(page): |
143 |
tag, rel = match.groups() |
144 |
rels = map(str.strip, rel.lower().split(',')) |
145 |
if 'homepage' in rels or 'download' in rels: |
146 |
for match in HREF.finditer(tag): |
147 |
yield urlparse.urljoin(url, htmldecode(match.group(1))) |
148 |
|
149 |
for tag in ("<th>Home Page", "<th>Download URL"): |
150 |
pos = page.find(tag) |
151 |
if pos!=-1: |
152 |
match = HREF.search(page,pos) |
153 |
if match:
|
154 |
yield urlparse.urljoin(url, htmldecode(match.group(1))) |
155 |
|
156 |
user_agent = "Python-urllib/%s distribute/%s" % (
|
157 |
sys.version[:3], require('distribute')[0].version |
158 |
) |
159 |
|
160 |
|
161 |
class PackageIndex(Environment): |
162 |
"""A distribution index that scans web pages for download URLs"""
|
163 |
|
164 |
def __init__(self, index_url="http://pypi.python.org/simple", hosts=('*',), |
165 |
*args, **kw |
166 |
): |
167 |
Environment.__init__(self,*args,**kw)
|
168 |
self.index_url = index_url + "/"[:not index_url.endswith('/')] |
169 |
self.scanned_urls = {}
|
170 |
self.fetched_urls = {}
|
171 |
self.package_pages = {}
|
172 |
self.allows = re.compile('|'.join(map(translate,hosts))).match |
173 |
self.to_scan = []
|
174 |
|
175 |
|
176 |
|
177 |
def process_url(self, url, retrieve=False): |
178 |
"""Evaluate a URL as a possible download, and maybe retrieve it"""
|
179 |
if url in self.scanned_urls and not retrieve: |
180 |
return
|
181 |
self.scanned_urls[url] = True |
182 |
if not URL_SCHEME(url): |
183 |
self.process_filename(url)
|
184 |
return
|
185 |
else:
|
186 |
dists = list(distros_for_url(url))
|
187 |
if dists:
|
188 |
if not self.url_ok(url): |
189 |
return
|
190 |
self.debug("Found link: %s", url) |
191 |
|
192 |
if dists or not retrieve or url in self.fetched_urls: |
193 |
map(self.add, dists) |
194 |
return # don't need the actual page |
195 |
|
196 |
if not self.url_ok(url): |
197 |
self.fetched_urls[url] = True |
198 |
return
|
199 |
|
200 |
self.info("Reading %s", url) |
201 |
f = self.open_url(url, "Download error: %s -- Some packages may not be found!") |
202 |
if f is None: return |
203 |
self.fetched_urls[url] = self.fetched_urls[f.url] = True |
204 |
|
205 |
if 'html' not in f.headers.get('content-type', '').lower(): |
206 |
f.close() # not html, we can't process it
|
207 |
return
|
208 |
|
209 |
base = f.url # handle redirects
|
210 |
page = f.read() |
211 |
if not isinstance(page, str): # We are in Python 3 and got bytes. We want str. |
212 |
if isinstance(f, urllib2.HTTPError): |
213 |
# Errors have no charset, assume latin1:
|
214 |
charset = 'latin-1'
|
215 |
else:
|
216 |
charset = f.headers.get_param('charset') or 'latin-1' |
217 |
page = page.decode(charset, "ignore")
|
218 |
f.close() |
219 |
for match in HREF.finditer(page): |
220 |
link = urlparse.urljoin(base, htmldecode(match.group(1)))
|
221 |
self.process_url(link)
|
222 |
if url.startswith(self.index_url) and getattr(f,'code',None)!=404: |
223 |
page = self.process_index(url, page)
|
224 |
|
225 |
def process_filename(self, fn, nested=False): |
226 |
# process filenames or directories
|
227 |
if not os.path.exists(fn): |
228 |
self.warn("Not found: %s", fn) |
229 |
return
|
230 |
|
231 |
if os.path.isdir(fn) and not nested: |
232 |
path = os.path.realpath(fn) |
233 |
for item in os.listdir(path): |
234 |
self.process_filename(os.path.join(path,item), True) |
235 |
|
236 |
dists = distros_for_filename(fn) |
237 |
if dists:
|
238 |
self.debug("Found: %s", fn) |
239 |
map(self.add, dists) |
240 |
|
241 |
def url_ok(self, url, fatal=False): |
242 |
s = URL_SCHEME(url) |
243 |
if (s and s.group(1).lower()=='file') or self.allows(urlparse.urlparse(url)[1]): |
244 |
return True |
245 |
msg = "\nLink to % s ***BLOCKED*** by --allow-hosts\n"
|
246 |
if fatal:
|
247 |
raise DistutilsError(msg % url)
|
248 |
else:
|
249 |
self.warn(msg, url)
|
250 |
|
251 |
def scan_egg_links(self, search_path): |
252 |
for item in search_path: |
253 |
if os.path.isdir(item):
|
254 |
for entry in os.listdir(item): |
255 |
if entry.endswith('.egg-link'): |
256 |
self.scan_egg_link(item, entry)
|
257 |
|
258 |
def scan_egg_link(self, path, entry): |
259 |
lines = filter(None, map(str.strip, open(os.path.join(path, entry)))) |
260 |
if len(lines)==2: |
261 |
for dist in find_distributions(os.path.join(path, lines[0])): |
262 |
dist.location = os.path.join(path, *lines) |
263 |
dist.precedence = SOURCE_DIST |
264 |
self.add(dist)
|
265 |
|
266 |
def process_index(self,url,page): |
267 |
"""Process the contents of a PyPI page"""
|
268 |
def scan(link): |
269 |
# Process a URL to see if it's for a package page
|
270 |
if link.startswith(self.index_url): |
271 |
parts = map(
|
272 |
urllib2.unquote, link[len(self.index_url):].split('/') |
273 |
) |
274 |
if len(parts)==2 and '#' not in parts[1]: |
275 |
# it's a package page, sanitize and index it
|
276 |
pkg = safe_name(parts[0])
|
277 |
ver = safe_version(parts[1])
|
278 |
self.package_pages.setdefault(pkg.lower(),{})[link] = True |
279 |
return to_filename(pkg), to_filename(ver)
|
280 |
return None, None |
281 |
|
282 |
# process an index page into the package-page index
|
283 |
for match in HREF.finditer(page): |
284 |
try:
|
285 |
scan( urlparse.urljoin(url, htmldecode(match.group(1))) )
|
286 |
except ValueError: |
287 |
pass
|
288 |
|
289 |
pkg, ver = scan(url) # ensure this page is in the page index
|
290 |
if pkg:
|
291 |
# process individual package page
|
292 |
for new_url in find_external_links(url, page): |
293 |
# Process the found URL
|
294 |
base, frag = egg_info_for_url(new_url) |
295 |
if base.endswith('.py') and not frag: |
296 |
if ver:
|
297 |
new_url+='#egg=%s-%s' % (pkg,ver)
|
298 |
else:
|
299 |
self.need_version_info(url)
|
300 |
self.scan_url(new_url)
|
301 |
|
302 |
return PYPI_MD5.sub(
|
303 |
lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1,3,2), page |
304 |
) |
305 |
else:
|
306 |
return "" # no sense double-scanning non-package pages |
307 |
|
308 |
|
309 |
|
310 |
def need_version_info(self, url): |
311 |
self.scan_all(
|
312 |
"Page at %s links to .py file(s) without version info; an index "
|
313 |
"scan is required.", url
|
314 |
) |
315 |
|
316 |
def scan_all(self, msg=None, *args): |
317 |
if self.index_url not in self.fetched_urls: |
318 |
if msg: self.warn(msg,*args) |
319 |
self.info(
|
320 |
"Scanning index of all packages (this may take a while)"
|
321 |
) |
322 |
self.scan_url(self.index_url) |
323 |
|
324 |
def find_packages(self, requirement): |
325 |
self.scan_url(self.index_url + requirement.unsafe_name+'/') |
326 |
|
327 |
if not self.package_pages.get(requirement.key): |
328 |
# Fall back to safe version of the name
|
329 |
self.scan_url(self.index_url + requirement.project_name+'/') |
330 |
|
331 |
if not self.package_pages.get(requirement.key): |
332 |
# We couldn't find the target package, so search the index page too
|
333 |
self.not_found_in_index(requirement)
|
334 |
|
335 |
for url in list(self.package_pages.get(requirement.key,())): |
336 |
# scan each page that might be related to the desired package
|
337 |
self.scan_url(url)
|
338 |
|
339 |
def obtain(self, requirement, installer=None): |
340 |
self.prescan(); self.find_packages(requirement) |
341 |
for dist in self[requirement.key]: |
342 |
if dist in requirement: |
343 |
return dist
|
344 |
self.debug("%s does not match %s", requirement, dist) |
345 |
return super(PackageIndex, self).obtain(requirement,installer) |
346 |
|
347 |
|
348 |
|
349 |
|
350 |
|
351 |
def check_md5(self, cs, info, filename, tfp): |
352 |
if re.match('md5=[0-9a-f]{32}$', info): |
353 |
self.debug("Validating md5 checksum for %s", filename) |
354 |
if cs.hexdigest()<>info[4:]: |
355 |
tfp.close() |
356 |
os.unlink(filename) |
357 |
raise DistutilsError(
|
358 |
"MD5 validation failed for "+os.path.basename(filename)+
|
359 |
"; possible download problem?"
|
360 |
) |
361 |
|
362 |
def add_find_links(self, urls): |
363 |
"""Add `urls` to the list that will be prescanned for searches"""
|
364 |
for url in urls: |
365 |
if (
|
366 |
self.to_scan is None # if we have already "gone online" |
367 |
or not URL_SCHEME(url) # or it's a local file/directory |
368 |
or url.startswith('file:') |
369 |
or list(distros_for_url(url)) # or a direct package link |
370 |
): |
371 |
# then go ahead and process it now
|
372 |
self.scan_url(url)
|
373 |
else:
|
374 |
# otherwise, defer retrieval till later
|
375 |
self.to_scan.append(url)
|
376 |
|
377 |
def prescan(self): |
378 |
"""Scan urls scheduled for prescanning (e.g. --find-links)"""
|
379 |
if self.to_scan: |
380 |
map(self.scan_url, self.to_scan) |
381 |
self.to_scan = None # from now on, go ahead and process immediately |
382 |
|
383 |
def not_found_in_index(self, requirement): |
384 |
if self[requirement.key]: # we've seen at least one distro |
385 |
meth, msg = self.info, "Couldn't retrieve index page for %r" |
386 |
else: # no distros seen for this name, might be misspelled |
387 |
meth, msg = (self.warn,
|
388 |
"Couldn't find index page for %r (maybe misspelled?)")
|
389 |
meth(msg, requirement.unsafe_name) |
390 |
self.scan_all()
|
391 |
|
392 |
def download(self, spec, tmpdir): |
393 |
"""Locate and/or download `spec` to `tmpdir`, returning a local path
|
394 |
|
395 |
`spec` may be a ``Requirement`` object, or a string containing a URL,
|
396 |
an existing local filename, or a project/version requirement spec
|
397 |
(i.e. the string form of a ``Requirement`` object). If it is the URL
|
398 |
of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one
|
399 |
that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is
|
400 |
automatically created alongside the downloaded file.
|
401 |
|
402 |
If `spec` is a ``Requirement`` object or a string containing a
|
403 |
project/version requirement spec, this method returns the location of
|
404 |
a matching distribution (possibly after downloading it to `tmpdir`).
|
405 |
If `spec` is a locally existing file or directory name, it is simply
|
406 |
returned unchanged. If `spec` is a URL, it is downloaded to a subpath
|
407 |
of `tmpdir`, and the local filename is returned. Various errors may be
|
408 |
raised if a problem occurs during downloading.
|
409 |
"""
|
410 |
if not isinstance(spec,Requirement): |
411 |
scheme = URL_SCHEME(spec) |
412 |
if scheme:
|
413 |
# It's a url, download it to tmpdir
|
414 |
found = self._download_url(scheme.group(1), spec, tmpdir) |
415 |
base, fragment = egg_info_for_url(spec) |
416 |
if base.endswith('.py'): |
417 |
found = self.gen_setup(found,fragment,tmpdir)
|
418 |
return found
|
419 |
elif os.path.exists(spec):
|
420 |
# Existing file or directory, just return it
|
421 |
return spec
|
422 |
else:
|
423 |
try:
|
424 |
spec = Requirement.parse(spec) |
425 |
except ValueError: |
426 |
raise DistutilsError(
|
427 |
"Not a URL, existing file, or requirement spec: %r" %
|
428 |
(spec,) |
429 |
) |
430 |
return getattr(self.fetch_distribution(spec, tmpdir),'location',None) |
431 |
|
432 |
|
433 |
def fetch_distribution(self, |
434 |
requirement, tmpdir, force_scan=False, source=False, develop_ok=False, |
435 |
local_index=None
|
436 |
): |
437 |
"""Obtain a distribution suitable for fulfilling `requirement`
|
438 |
|
439 |
`requirement` must be a ``pkg_resources.Requirement`` instance.
|
440 |
If necessary, or if the `force_scan` flag is set, the requirement is
|
441 |
searched for in the (online) package index as well as the locally
|
442 |
installed packages. If a distribution matching `requirement` is found,
|
443 |
the returned distribution's ``location`` is the value you would have
|
444 |
gotten from calling the ``download()`` method with the matching
|
445 |
distribution's URL or filename. If no matching distribution is found,
|
446 |
``None`` is returned.
|
447 |
|
448 |
If the `source` flag is set, only source distributions and source
|
449 |
checkout links will be considered. Unless the `develop_ok` flag is
|
450 |
set, development and system eggs (i.e., those using the ``.egg-info``
|
451 |
format) will be ignored.
|
452 |
"""
|
453 |
|
454 |
# process a Requirement
|
455 |
self.info("Searching for %s", requirement) |
456 |
skipped = {} |
457 |
dist = None
|
458 |
|
459 |
def find(req, env=None): |
460 |
if env is None: |
461 |
env = self
|
462 |
# Find a matching distribution; may be called more than once
|
463 |
|
464 |
for dist in env[req.key]: |
465 |
|
466 |
if dist.precedence==DEVELOP_DIST and not develop_ok: |
467 |
if dist not in skipped: |
468 |
self.warn("Skipping development or system egg: %s",dist) |
469 |
skipped[dist] = 1
|
470 |
continue
|
471 |
|
472 |
if dist in req and (dist.precedence<=SOURCE_DIST or not source): |
473 |
self.info("Best match: %s", dist) |
474 |
return dist.clone(
|
475 |
location=self.download(dist.location, tmpdir)
|
476 |
) |
477 |
|
478 |
if force_scan:
|
479 |
self.prescan()
|
480 |
self.find_packages(requirement)
|
481 |
dist = find(requirement) |
482 |
|
483 |
if local_index is not None: |
484 |
dist = dist or find(requirement, local_index)
|
485 |
|
486 |
if dist is None and self.to_scan is not None: |
487 |
self.prescan()
|
488 |
dist = find(requirement) |
489 |
|
490 |
if dist is None and not force_scan: |
491 |
self.find_packages(requirement)
|
492 |
dist = find(requirement) |
493 |
|
494 |
if dist is None: |
495 |
self.warn(
|
496 |
"No local packages or download links found for %s%s",
|
497 |
(source and "a source distribution of " or ""), |
498 |
requirement, |
499 |
) |
500 |
return dist
|
501 |
|
502 |
def fetch(self, requirement, tmpdir, force_scan=False, source=False): |
503 |
"""Obtain a file suitable for fulfilling `requirement`
|
504 |
|
505 |
DEPRECATED; use the ``fetch_distribution()`` method now instead. For
|
506 |
backward compatibility, this routine is identical but returns the
|
507 |
``location`` of the downloaded distribution instead of a distribution
|
508 |
object.
|
509 |
"""
|
510 |
dist = self.fetch_distribution(requirement,tmpdir,force_scan,source)
|
511 |
if dist is not None: |
512 |
return dist.location
|
513 |
return None |
514 |
|
515 |
|
516 |
|
517 |
|
518 |
|
519 |
|
520 |
|
521 |
|
522 |
def gen_setup(self, filename, fragment, tmpdir): |
523 |
match = EGG_FRAGMENT.match(fragment) |
524 |
dists = match and [d for d in |
525 |
interpret_distro_name(filename, match.group(1), None) if d.version |
526 |
] or []
|
527 |
|
528 |
if len(dists)==1: # unambiguous ``#egg`` fragment |
529 |
basename = os.path.basename(filename) |
530 |
|
531 |
# Make sure the file has been downloaded to the temp dir.
|
532 |
if os.path.dirname(filename) != tmpdir:
|
533 |
dst = os.path.join(tmpdir, basename) |
534 |
from setuptools.command.easy_install import samefile |
535 |
if not samefile(filename, dst): |
536 |
shutil.copy2(filename, dst) |
537 |
filename=dst |
538 |
|
539 |
file = open(os.path.join(tmpdir, 'setup.py'), 'w') |
540 |
file.write(
|
541 |
"from setuptools import setup\n"
|
542 |
"setup(name=%r, version=%r, py_modules=[%r])\n"
|
543 |
% ( |
544 |
dists[0].project_name, dists[0].version, |
545 |
os.path.splitext(basename)[0]
|
546 |
) |
547 |
) |
548 |
file.close()
|
549 |
return filename
|
550 |
|
551 |
elif match:
|
552 |
raise DistutilsError(
|
553 |
"Can't unambiguously interpret project/version identifier %r; "
|
554 |
"any dashes in the name or version should be escaped using "
|
555 |
"underscores. %r" % (fragment,dists)
|
556 |
) |
557 |
else:
|
558 |
raise DistutilsError(
|
559 |
"Can't process plain .py files without an '#egg=name-version'"
|
560 |
" suffix to enable automatic setup script generation."
|
561 |
) |
562 |
|
563 |
dl_blocksize = 8192
|
564 |
def _download_to(self, url, filename): |
565 |
self.info("Downloading %s", url) |
566 |
# Download the file
|
567 |
fp, tfp, info = None, None, None |
568 |
try:
|
569 |
if '#' in url: |
570 |
url, info = url.split('#', 1) |
571 |
fp = self.open_url(url)
|
572 |
if isinstance(fp, urllib2.HTTPError): |
573 |
raise DistutilsError(
|
574 |
"Can't download %s: %s %s" % (url, fp.code,fp.msg)
|
575 |
) |
576 |
cs = md5() |
577 |
headers = fp.info() |
578 |
blocknum = 0
|
579 |
bs = self.dl_blocksize
|
580 |
size = -1
|
581 |
if "content-length" in headers: |
582 |
# Some servers return multiple Content-Length headers :(
|
583 |
content_length = headers.get("Content-Length")
|
584 |
size = int(content_length)
|
585 |
self.reporthook(url, filename, blocknum, bs, size)
|
586 |
tfp = open(filename,'wb') |
587 |
while True: |
588 |
block = fp.read(bs) |
589 |
if block:
|
590 |
cs.update(block) |
591 |
tfp.write(block) |
592 |
blocknum += 1
|
593 |
self.reporthook(url, filename, blocknum, bs, size)
|
594 |
else:
|
595 |
break
|
596 |
if info: self.check_md5(cs, info, filename, tfp) |
597 |
return headers
|
598 |
finally:
|
599 |
if fp: fp.close()
|
600 |
if tfp: tfp.close()
|
601 |
|
602 |
def reporthook(self, url, filename, blocknum, blksize, size): |
603 |
pass # no-op |
604 |
|
605 |
|
606 |
def open_url(self, url, warning=None): |
607 |
if url.startswith('file:'): |
608 |
return local_open(url)
|
609 |
try:
|
610 |
return open_with_auth(url)
|
611 |
except (ValueError, httplib.InvalidURL), v: |
612 |
msg = ' '.join([str(arg) for arg in v.args]) |
613 |
if warning:
|
614 |
self.warn(warning, msg)
|
615 |
else:
|
616 |
raise DistutilsError('%s %s' % (url, msg)) |
617 |
except urllib2.HTTPError, v:
|
618 |
return v
|
619 |
except urllib2.URLError, v:
|
620 |
if warning:
|
621 |
self.warn(warning, v.reason)
|
622 |
else:
|
623 |
raise DistutilsError("Download error for %s: %s" |
624 |
% (url, v.reason)) |
625 |
except httplib.BadStatusLine, v:
|
626 |
if warning:
|
627 |
self.warn(warning, v.line)
|
628 |
else:
|
629 |
raise DistutilsError('%s returned a bad status line. ' |
630 |
'The server might be down, %s' % \
|
631 |
(url, v.line)) |
632 |
except httplib.HTTPException, v:
|
633 |
if warning:
|
634 |
self.warn(warning, v)
|
635 |
else:
|
636 |
raise DistutilsError("Download error for %s: %s" |
637 |
% (url, v)) |
638 |
|
639 |
def _download_url(self, scheme, url, tmpdir): |
640 |
# Determine download filename
|
641 |
#
|
642 |
name = filter(None,urlparse.urlparse(url)[2].split('/')) |
643 |
if name:
|
644 |
name = name[-1]
|
645 |
while '..' in name: |
646 |
name = name.replace('..','.').replace('\\','_') |
647 |
else:
|
648 |
name = "__downloaded__" # default if URL has no path contents |
649 |
|
650 |
if name.endswith('.egg.zip'): |
651 |
name = name[:-4] # strip the extra .zip before download |
652 |
|
653 |
filename = os.path.join(tmpdir,name) |
654 |
|
655 |
# Download the file
|
656 |
#
|
657 |
if scheme=='svn' or scheme.startswith('svn+'): |
658 |
return self._download_svn(url, filename) |
659 |
elif scheme=='file': |
660 |
return urllib.url2pathname(urlparse.urlparse(url)[2]) |
661 |
else:
|
662 |
self.url_ok(url, True) # raises error if not allowed |
663 |
return self._attempt_download(url, filename) |
664 |
|
665 |
|
666 |
|
667 |
def scan_url(self, url): |
668 |
self.process_url(url, True) |
669 |
|
670 |
|
671 |
def _attempt_download(self, url, filename): |
672 |
headers = self._download_to(url, filename)
|
673 |
if 'html' in headers.get('content-type','').lower(): |
674 |
return self._download_html(url, headers, filename) |
675 |
else:
|
676 |
return filename
|
677 |
|
678 |
def _download_html(self, url, headers, filename): |
679 |
file = open(filename)
|
680 |
for line in file: |
681 |
if line.strip():
|
682 |
# Check for a subversion index page
|
683 |
if re.search(r'<title>([^- ]+ - )?Revision \d+:', line): |
684 |
# it's a subversion index page:
|
685 |
file.close()
|
686 |
os.unlink(filename) |
687 |
return self._download_svn(url, filename) |
688 |
break # not an index page |
689 |
file.close()
|
690 |
os.unlink(filename) |
691 |
raise DistutilsError("Unexpected HTML page found at "+url) |
692 |
|
693 |
def _download_svn(self, url, filename): |
694 |
url = url.split('#',1)[0] # remove any fragment for svn's sake |
695 |
self.info("Doing subversion checkout from %s to %s", url, filename) |
696 |
os.system("svn checkout -q %s %s" % (url, filename))
|
697 |
return filename
|
698 |
|
699 |
def debug(self, msg, *args): |
700 |
log.debug(msg, *args) |
701 |
|
702 |
def info(self, msg, *args): |
703 |
log.info(msg, *args) |
704 |
|
705 |
def warn(self, msg, *args): |
706 |
log.warn(msg, *args) |
707 |
|
708 |
# This pattern matches a character entity reference (a decimal numeric
|
709 |
# references, a hexadecimal numeric reference, or a named reference).
|
710 |
entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
|
711 |
|
712 |
def uchr(c): |
713 |
if not isinstance(c, int): |
714 |
return c
|
715 |
if c>255: return unichr(c) |
716 |
return chr(c) |
717 |
|
718 |
def decode_entity(match): |
719 |
what = match.group(1)
|
720 |
if what.startswith('#x'): |
721 |
what = int(what[2:], 16) |
722 |
elif what.startswith('#'): |
723 |
what = int(what[1:]) |
724 |
else:
|
725 |
from htmlentitydefs import name2codepoint |
726 |
what = name2codepoint.get(what, match.group(0))
|
727 |
return uchr(what)
|
728 |
|
729 |
def htmldecode(text): |
730 |
"""Decode HTML entities in the given text."""
|
731 |
return entity_sub(decode_entity, text)
|
732 |
|
733 |
|
734 |
|
735 |
|
736 |
|
737 |
|
738 |
|
739 |
|
740 |
|
741 |
|
742 |
|
743 |
|
744 |
|
745 |
|
746 |
|
747 |
def socket_timeout(timeout=15): |
748 |
def _socket_timeout(func): |
749 |
def _socket_timeout(*args, **kwargs): |
750 |
old_timeout = socket.getdefaulttimeout() |
751 |
socket.setdefaulttimeout(timeout) |
752 |
try:
|
753 |
return func(*args, **kwargs)
|
754 |
finally:
|
755 |
socket.setdefaulttimeout(old_timeout) |
756 |
return _socket_timeout
|
757 |
return _socket_timeout
|
758 |
|
759 |
|
760 |
def open_with_auth(url): |
761 |
"""Open a urllib2 request, handling HTTP authentication"""
|
762 |
|
763 |
scheme, netloc, path, params, query, frag = urlparse.urlparse(url) |
764 |
|
765 |
if scheme in ('http', 'https'): |
766 |
auth, host = urllib2.splituser(netloc) |
767 |
else:
|
768 |
auth = None
|
769 |
|
770 |
if auth:
|
771 |
auth = "Basic " + urllib2.unquote(auth).encode('base64').strip() |
772 |
new_url = urlparse.urlunparse((scheme,host,path,params,query,frag)) |
773 |
request = urllib2.Request(new_url) |
774 |
request.add_header("Authorization", auth)
|
775 |
else:
|
776 |
request = urllib2.Request(url) |
777 |
|
778 |
request.add_header('User-Agent', user_agent)
|
779 |
fp = urllib2.urlopen(request) |
780 |
|
781 |
if auth:
|
782 |
# Put authentication info back into request URL if same host,
|
783 |
# so that links found on the page will work
|
784 |
s2, h2, path2, param2, query2, frag2 = urlparse.urlparse(fp.url) |
785 |
if s2==scheme and h2==host: |
786 |
fp.url = urlparse.urlunparse((s2,netloc,path2,param2,query2,frag2)) |
787 |
|
788 |
return fp
|
789 |
|
790 |
# adding a timeout to avoid freezing package_index
|
791 |
open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth) |
792 |
|
793 |
|
794 |
|
795 |
|
796 |
|
797 |
|
798 |
|
799 |
|
800 |
|
801 |
|
802 |
|
803 |
def fix_sf_url(url): |
804 |
return url # backward compatibility |
805 |
|
806 |
def local_open(url): |
807 |
"""Read a local path, with special support for directories"""
|
808 |
scheme, server, path, param, query, frag = urlparse.urlparse(url) |
809 |
filename = urllib.url2pathname(path) |
810 |
if os.path.isfile(filename):
|
811 |
return urllib2.urlopen(url)
|
812 |
elif path.endswith('/') and os.path.isdir(filename): |
813 |
files = [] |
814 |
for f in os.listdir(filename): |
815 |
if f=='index.html': |
816 |
fp = open(os.path.join(filename,f),'rb') |
817 |
body = fp.read() |
818 |
fp.close() |
819 |
break
|
820 |
elif os.path.isdir(os.path.join(filename,f)):
|
821 |
f+='/'
|
822 |
files.append("<a href=%r>%s</a>" % (f,f))
|
823 |
else:
|
824 |
body = ("<html><head><title>%s</title>" % url) + \
|
825 |
"</head><body>%s</body></html>" % '\n'.join(files) |
826 |
status, message = 200, "OK" |
827 |
else:
|
828 |
status, message, body = 404, "Path not found", "Not found" |
829 |
|
830 |
return urllib2.HTTPError(url, status, message,
|
831 |
{'content-type':'text/html'}, cStringIO.StringIO(body)) |
832 |
|
833 |
|
834 |
|
835 |
|
836 |
|
837 |
|
838 |
|
839 |
|
840 |
|
841 |
|
842 |
|
843 |
|
844 |
|
845 |
# this line is a kludge to keep the trailing blank lines for pje's editor
|