root / env / lib / python2.7 / site-packages / distribute-0.6.19-py2.7.egg / setuptools / package_index.py @ 1a305335
History | View | Annotate | Download (29.7 KB)
1 | 1a305335 | officers | """PyPI and direct package downloading"""
|
---|---|---|---|
2 | import sys, os.path, re, urlparse, urllib, urllib2, shutil, random, socket, cStringIO |
||
3 | import httplib |
||
4 | from pkg_resources import * |
||
5 | from distutils import log |
||
6 | from distutils.errors import DistutilsError |
||
7 | try:
|
||
8 | from hashlib import md5 |
||
9 | except ImportError: |
||
10 | from md5 import md5 |
||
11 | from fnmatch import translate |
||
12 | |||
13 | EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.]+)$')
|
||
14 | HREF = re.compile("""href\\s*=\\s*['"]?([^'"> ]+)""", re.I)
|
||
15 | # this is here to fix emacs' cruddy broken syntax highlighting
|
||
16 | PYPI_MD5 = re.compile( |
||
17 | '<a href="([^"#]+)">([^<]+)</a>\n\s+\\(<a (?:title="MD5 hash"\n\s+)'
|
||
18 | 'href="[^?]+\?:action=show_md5&digest=([0-9a-f]{32})">md5</a>\\)'
|
||
19 | ) |
||
20 | URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):',re.I).match
|
||
21 | EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
|
||
22 | |||
23 | __all__ = [ |
||
24 | 'PackageIndex', 'distros_for_url', 'parse_bdist_wininst', |
||
25 | 'interpret_distro_name',
|
||
26 | ] |
||
27 | |||
28 | _SOCKET_TIMEOUT = 15
|
||
29 | |||
30 | def parse_bdist_wininst(name): |
||
31 | """Return (base,pyversion) or (None,None) for possible .exe name"""
|
||
32 | |||
33 | lower = name.lower() |
||
34 | base, py_ver, plat = None, None, None |
||
35 | |||
36 | if lower.endswith('.exe'): |
||
37 | if lower.endswith('.win32.exe'): |
||
38 | base = name[:-10]
|
||
39 | plat = 'win32'
|
||
40 | elif lower.startswith('.win32-py',-16): |
||
41 | py_ver = name[-7:-4] |
||
42 | base = name[:-16]
|
||
43 | plat = 'win32'
|
||
44 | elif lower.endswith('.win-amd64.exe'): |
||
45 | base = name[:-14]
|
||
46 | plat = 'win-amd64'
|
||
47 | elif lower.startswith('.win-amd64-py',-20): |
||
48 | py_ver = name[-7:-4] |
||
49 | base = name[:-20]
|
||
50 | plat = 'win-amd64'
|
||
51 | return base,py_ver,plat
|
||
52 | |||
53 | |||
54 | def egg_info_for_url(url): |
||
55 | scheme, server, path, parameters, query, fragment = urlparse.urlparse(url) |
||
56 | base = urllib2.unquote(path.split('/')[-1]) |
||
57 | if '#' in base: base, fragment = base.split('#',1) |
||
58 | return base,fragment
|
||
59 | |||
60 | def distros_for_url(url, metadata=None): |
||
61 | """Yield egg or source distribution objects that might be found at a URL"""
|
||
62 | base, fragment = egg_info_for_url(url) |
||
63 | for dist in distros_for_location(url, base, metadata): yield dist |
||
64 | if fragment:
|
||
65 | match = EGG_FRAGMENT.match(fragment) |
||
66 | if match:
|
||
67 | for dist in interpret_distro_name( |
||
68 | url, match.group(1), metadata, precedence = CHECKOUT_DIST
|
||
69 | ): |
||
70 | yield dist
|
||
71 | |||
72 | def distros_for_location(location, basename, metadata=None): |
||
73 | """Yield egg or source distribution objects based on basename"""
|
||
74 | if basename.endswith('.egg.zip'): |
||
75 | basename = basename[:-4] # strip the .zip |
||
76 | if basename.endswith('.egg') and '-' in basename: |
||
77 | # only one, unambiguous interpretation
|
||
78 | return [Distribution.from_location(location, basename, metadata)]
|
||
79 | |||
80 | if basename.endswith('.exe'): |
||
81 | win_base, py_ver, platform = parse_bdist_wininst(basename) |
||
82 | if win_base is not None: |
||
83 | return interpret_distro_name(
|
||
84 | location, win_base, metadata, py_ver, BINARY_DIST, platform |
||
85 | ) |
||
86 | |||
87 | # Try source distro extensions (.zip, .tgz, etc.)
|
||
88 | #
|
||
89 | for ext in EXTENSIONS: |
||
90 | if basename.endswith(ext):
|
||
91 | basename = basename[:-len(ext)]
|
||
92 | return interpret_distro_name(location, basename, metadata)
|
||
93 | return [] # no extension matched |
||
94 | |||
95 | def distros_for_filename(filename, metadata=None): |
||
96 | """Yield possible egg or source distribution objects based on a filename"""
|
||
97 | return distros_for_location(
|
||
98 | normalize_path(filename), os.path.basename(filename), metadata |
||
99 | ) |
||
100 | |||
101 | |||
102 | def interpret_distro_name(location, basename, metadata, |
||
103 | py_version=None, precedence=SOURCE_DIST, platform=None |
||
104 | ): |
||
105 | """Generate alternative interpretations of a source distro name
|
||
106 |
|
||
107 | Note: if `location` is a filesystem filename, you should call
|
||
108 | ``pkg_resources.normalize_path()`` on it before passing it to this
|
||
109 | routine!
|
||
110 | """
|
||
111 | # Generate alternative interpretations of a source distro name
|
||
112 | # Because some packages are ambiguous as to name/versions split
|
||
113 | # e.g. "adns-python-1.1.0", "egenix-mx-commercial", etc.
|
||
114 | # So, we generate each possible interepretation (e.g. "adns, python-1.1.0"
|
||
115 | # "adns-python, 1.1.0", and "adns-python-1.1.0, no version"). In practice,
|
||
116 | # the spurious interpretations should be ignored, because in the event
|
||
117 | # there's also an "adns" package, the spurious "python-1.1.0" version will
|
||
118 | # compare lower than any numeric version number, and is therefore unlikely
|
||
119 | # to match a request for it. It's still a potential problem, though, and
|
||
120 | # in the long run PyPI and the distutils should go for "safe" names and
|
||
121 | # versions in distribution archive names (sdist and bdist).
|
||
122 | |||
123 | parts = basename.split('-')
|
||
124 | if not py_version: |
||
125 | for i,p in enumerate(parts[2:]): |
||
126 | if len(p)==5 and p.startswith('py2.'): |
||
127 | return # It's a bdist_dumb, not an sdist -- bail out |
||
128 | |||
129 | for p in range(1,len(parts)+1): |
||
130 | yield Distribution(
|
||
131 | location, metadata, '-'.join(parts[:p]), '-'.join(parts[p:]), |
||
132 | py_version=py_version, precedence = precedence, |
||
133 | platform = platform |
||
134 | ) |
||
135 | |||
136 | REL = re.compile("""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I)
|
||
137 | # this line is here to fix emacs' cruddy broken syntax highlighting
|
||
138 | |||
139 | def find_external_links(url, page): |
||
140 | """Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
|
||
141 | |||
142 | for match in REL.finditer(page): |
||
143 | tag, rel = match.groups() |
||
144 | rels = map(str.strip, rel.lower().split(',')) |
||
145 | if 'homepage' in rels or 'download' in rels: |
||
146 | for match in HREF.finditer(tag): |
||
147 | yield urlparse.urljoin(url, htmldecode(match.group(1))) |
||
148 | |||
149 | for tag in ("<th>Home Page", "<th>Download URL"): |
||
150 | pos = page.find(tag) |
||
151 | if pos!=-1: |
||
152 | match = HREF.search(page,pos) |
||
153 | if match:
|
||
154 | yield urlparse.urljoin(url, htmldecode(match.group(1))) |
||
155 | |||
156 | user_agent = "Python-urllib/%s distribute/%s" % (
|
||
157 | sys.version[:3], require('distribute')[0].version |
||
158 | ) |
||
159 | |||
160 | |||
161 | class PackageIndex(Environment): |
||
162 | """A distribution index that scans web pages for download URLs"""
|
||
163 | |||
164 | def __init__(self, index_url="http://pypi.python.org/simple", hosts=('*',), |
||
165 | *args, **kw |
||
166 | ): |
||
167 | Environment.__init__(self,*args,**kw)
|
||
168 | self.index_url = index_url + "/"[:not index_url.endswith('/')] |
||
169 | self.scanned_urls = {}
|
||
170 | self.fetched_urls = {}
|
||
171 | self.package_pages = {}
|
||
172 | self.allows = re.compile('|'.join(map(translate,hosts))).match |
||
173 | self.to_scan = []
|
||
174 | |||
175 | |||
176 | |||
177 | def process_url(self, url, retrieve=False): |
||
178 | """Evaluate a URL as a possible download, and maybe retrieve it"""
|
||
179 | if url in self.scanned_urls and not retrieve: |
||
180 | return
|
||
181 | self.scanned_urls[url] = True |
||
182 | if not URL_SCHEME(url): |
||
183 | self.process_filename(url)
|
||
184 | return
|
||
185 | else:
|
||
186 | dists = list(distros_for_url(url))
|
||
187 | if dists:
|
||
188 | if not self.url_ok(url): |
||
189 | return
|
||
190 | self.debug("Found link: %s", url) |
||
191 | |||
192 | if dists or not retrieve or url in self.fetched_urls: |
||
193 | map(self.add, dists) |
||
194 | return # don't need the actual page |
||
195 | |||
196 | if not self.url_ok(url): |
||
197 | self.fetched_urls[url] = True |
||
198 | return
|
||
199 | |||
200 | self.info("Reading %s", url) |
||
201 | f = self.open_url(url, "Download error: %s -- Some packages may not be found!") |
||
202 | if f is None: return |
||
203 | self.fetched_urls[url] = self.fetched_urls[f.url] = True |
||
204 | |||
205 | if 'html' not in f.headers.get('content-type', '').lower(): |
||
206 | f.close() # not html, we can't process it
|
||
207 | return
|
||
208 | |||
209 | base = f.url # handle redirects
|
||
210 | page = f.read() |
||
211 | if not isinstance(page, str): # We are in Python 3 and got bytes. We want str. |
||
212 | if isinstance(f, urllib2.HTTPError): |
||
213 | # Errors have no charset, assume latin1:
|
||
214 | charset = 'latin-1'
|
||
215 | else:
|
||
216 | charset = f.headers.get_param('charset') or 'latin-1' |
||
217 | page = page.decode(charset, "ignore")
|
||
218 | f.close() |
||
219 | for match in HREF.finditer(page): |
||
220 | link = urlparse.urljoin(base, htmldecode(match.group(1)))
|
||
221 | self.process_url(link)
|
||
222 | if url.startswith(self.index_url) and getattr(f,'code',None)!=404: |
||
223 | page = self.process_index(url, page)
|
||
224 | |||
225 | def process_filename(self, fn, nested=False): |
||
226 | # process filenames or directories
|
||
227 | if not os.path.exists(fn): |
||
228 | self.warn("Not found: %s", fn) |
||
229 | return
|
||
230 | |||
231 | if os.path.isdir(fn) and not nested: |
||
232 | path = os.path.realpath(fn) |
||
233 | for item in os.listdir(path): |
||
234 | self.process_filename(os.path.join(path,item), True) |
||
235 | |||
236 | dists = distros_for_filename(fn) |
||
237 | if dists:
|
||
238 | self.debug("Found: %s", fn) |
||
239 | map(self.add, dists) |
||
240 | |||
241 | def url_ok(self, url, fatal=False): |
||
242 | s = URL_SCHEME(url) |
||
243 | if (s and s.group(1).lower()=='file') or self.allows(urlparse.urlparse(url)[1]): |
||
244 | return True |
||
245 | msg = "\nLink to % s ***BLOCKED*** by --allow-hosts\n"
|
||
246 | if fatal:
|
||
247 | raise DistutilsError(msg % url)
|
||
248 | else:
|
||
249 | self.warn(msg, url)
|
||
250 | |||
251 | def scan_egg_links(self, search_path): |
||
252 | for item in search_path: |
||
253 | if os.path.isdir(item):
|
||
254 | for entry in os.listdir(item): |
||
255 | if entry.endswith('.egg-link'): |
||
256 | self.scan_egg_link(item, entry)
|
||
257 | |||
258 | def scan_egg_link(self, path, entry): |
||
259 | lines = filter(None, map(str.strip, open(os.path.join(path, entry)))) |
||
260 | if len(lines)==2: |
||
261 | for dist in find_distributions(os.path.join(path, lines[0])): |
||
262 | dist.location = os.path.join(path, *lines) |
||
263 | dist.precedence = SOURCE_DIST |
||
264 | self.add(dist)
|
||
265 | |||
266 | def process_index(self,url,page): |
||
267 | """Process the contents of a PyPI page"""
|
||
268 | def scan(link): |
||
269 | # Process a URL to see if it's for a package page
|
||
270 | if link.startswith(self.index_url): |
||
271 | parts = map(
|
||
272 | urllib2.unquote, link[len(self.index_url):].split('/') |
||
273 | ) |
||
274 | if len(parts)==2 and '#' not in parts[1]: |
||
275 | # it's a package page, sanitize and index it
|
||
276 | pkg = safe_name(parts[0])
|
||
277 | ver = safe_version(parts[1])
|
||
278 | self.package_pages.setdefault(pkg.lower(),{})[link] = True |
||
279 | return to_filename(pkg), to_filename(ver)
|
||
280 | return None, None |
||
281 | |||
282 | # process an index page into the package-page index
|
||
283 | for match in HREF.finditer(page): |
||
284 | try:
|
||
285 | scan( urlparse.urljoin(url, htmldecode(match.group(1))) )
|
||
286 | except ValueError: |
||
287 | pass
|
||
288 | |||
289 | pkg, ver = scan(url) # ensure this page is in the page index
|
||
290 | if pkg:
|
||
291 | # process individual package page
|
||
292 | for new_url in find_external_links(url, page): |
||
293 | # Process the found URL
|
||
294 | base, frag = egg_info_for_url(new_url) |
||
295 | if base.endswith('.py') and not frag: |
||
296 | if ver:
|
||
297 | new_url+='#egg=%s-%s' % (pkg,ver)
|
||
298 | else:
|
||
299 | self.need_version_info(url)
|
||
300 | self.scan_url(new_url)
|
||
301 | |||
302 | return PYPI_MD5.sub(
|
||
303 | lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1,3,2), page |
||
304 | ) |
||
305 | else:
|
||
306 | return "" # no sense double-scanning non-package pages |
||
307 | |||
308 | |||
309 | |||
310 | def need_version_info(self, url): |
||
311 | self.scan_all(
|
||
312 | "Page at %s links to .py file(s) without version info; an index "
|
||
313 | "scan is required.", url
|
||
314 | ) |
||
315 | |||
316 | def scan_all(self, msg=None, *args): |
||
317 | if self.index_url not in self.fetched_urls: |
||
318 | if msg: self.warn(msg,*args) |
||
319 | self.info(
|
||
320 | "Scanning index of all packages (this may take a while)"
|
||
321 | ) |
||
322 | self.scan_url(self.index_url) |
||
323 | |||
324 | def find_packages(self, requirement): |
||
325 | self.scan_url(self.index_url + requirement.unsafe_name+'/') |
||
326 | |||
327 | if not self.package_pages.get(requirement.key): |
||
328 | # Fall back to safe version of the name
|
||
329 | self.scan_url(self.index_url + requirement.project_name+'/') |
||
330 | |||
331 | if not self.package_pages.get(requirement.key): |
||
332 | # We couldn't find the target package, so search the index page too
|
||
333 | self.not_found_in_index(requirement)
|
||
334 | |||
335 | for url in list(self.package_pages.get(requirement.key,())): |
||
336 | # scan each page that might be related to the desired package
|
||
337 | self.scan_url(url)
|
||
338 | |||
339 | def obtain(self, requirement, installer=None): |
||
340 | self.prescan(); self.find_packages(requirement) |
||
341 | for dist in self[requirement.key]: |
||
342 | if dist in requirement: |
||
343 | return dist
|
||
344 | self.debug("%s does not match %s", requirement, dist) |
||
345 | return super(PackageIndex, self).obtain(requirement,installer) |
||
346 | |||
347 | |||
348 | |||
349 | |||
350 | |||
351 | def check_md5(self, cs, info, filename, tfp): |
||
352 | if re.match('md5=[0-9a-f]{32}$', info): |
||
353 | self.debug("Validating md5 checksum for %s", filename) |
||
354 | if cs.hexdigest()<>info[4:]: |
||
355 | tfp.close() |
||
356 | os.unlink(filename) |
||
357 | raise DistutilsError(
|
||
358 | "MD5 validation failed for "+os.path.basename(filename)+
|
||
359 | "; possible download problem?"
|
||
360 | ) |
||
361 | |||
362 | def add_find_links(self, urls): |
||
363 | """Add `urls` to the list that will be prescanned for searches"""
|
||
364 | for url in urls: |
||
365 | if (
|
||
366 | self.to_scan is None # if we have already "gone online" |
||
367 | or not URL_SCHEME(url) # or it's a local file/directory |
||
368 | or url.startswith('file:') |
||
369 | or list(distros_for_url(url)) # or a direct package link |
||
370 | ): |
||
371 | # then go ahead and process it now
|
||
372 | self.scan_url(url)
|
||
373 | else:
|
||
374 | # otherwise, defer retrieval till later
|
||
375 | self.to_scan.append(url)
|
||
376 | |||
377 | def prescan(self): |
||
378 | """Scan urls scheduled for prescanning (e.g. --find-links)"""
|
||
379 | if self.to_scan: |
||
380 | map(self.scan_url, self.to_scan) |
||
381 | self.to_scan = None # from now on, go ahead and process immediately |
||
382 | |||
383 | def not_found_in_index(self, requirement): |
||
384 | if self[requirement.key]: # we've seen at least one distro |
||
385 | meth, msg = self.info, "Couldn't retrieve index page for %r" |
||
386 | else: # no distros seen for this name, might be misspelled |
||
387 | meth, msg = (self.warn,
|
||
388 | "Couldn't find index page for %r (maybe misspelled?)")
|
||
389 | meth(msg, requirement.unsafe_name) |
||
390 | self.scan_all()
|
||
391 | |||
392 | def download(self, spec, tmpdir): |
||
393 | """Locate and/or download `spec` to `tmpdir`, returning a local path
|
||
394 |
|
||
395 | `spec` may be a ``Requirement`` object, or a string containing a URL,
|
||
396 | an existing local filename, or a project/version requirement spec
|
||
397 | (i.e. the string form of a ``Requirement`` object). If it is the URL
|
||
398 | of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one
|
||
399 | that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is
|
||
400 | automatically created alongside the downloaded file.
|
||
401 |
|
||
402 | If `spec` is a ``Requirement`` object or a string containing a
|
||
403 | project/version requirement spec, this method returns the location of
|
||
404 | a matching distribution (possibly after downloading it to `tmpdir`).
|
||
405 | If `spec` is a locally existing file or directory name, it is simply
|
||
406 | returned unchanged. If `spec` is a URL, it is downloaded to a subpath
|
||
407 | of `tmpdir`, and the local filename is returned. Various errors may be
|
||
408 | raised if a problem occurs during downloading.
|
||
409 | """
|
||
410 | if not isinstance(spec,Requirement): |
||
411 | scheme = URL_SCHEME(spec) |
||
412 | if scheme:
|
||
413 | # It's a url, download it to tmpdir
|
||
414 | found = self._download_url(scheme.group(1), spec, tmpdir) |
||
415 | base, fragment = egg_info_for_url(spec) |
||
416 | if base.endswith('.py'): |
||
417 | found = self.gen_setup(found,fragment,tmpdir)
|
||
418 | return found
|
||
419 | elif os.path.exists(spec):
|
||
420 | # Existing file or directory, just return it
|
||
421 | return spec
|
||
422 | else:
|
||
423 | try:
|
||
424 | spec = Requirement.parse(spec) |
||
425 | except ValueError: |
||
426 | raise DistutilsError(
|
||
427 | "Not a URL, existing file, or requirement spec: %r" %
|
||
428 | (spec,) |
||
429 | ) |
||
430 | return getattr(self.fetch_distribution(spec, tmpdir),'location',None) |
||
431 | |||
432 | |||
433 | def fetch_distribution(self, |
||
434 | requirement, tmpdir, force_scan=False, source=False, develop_ok=False, |
||
435 | local_index=None
|
||
436 | ): |
||
437 | """Obtain a distribution suitable for fulfilling `requirement`
|
||
438 |
|
||
439 | `requirement` must be a ``pkg_resources.Requirement`` instance.
|
||
440 | If necessary, or if the `force_scan` flag is set, the requirement is
|
||
441 | searched for in the (online) package index as well as the locally
|
||
442 | installed packages. If a distribution matching `requirement` is found,
|
||
443 | the returned distribution's ``location`` is the value you would have
|
||
444 | gotten from calling the ``download()`` method with the matching
|
||
445 | distribution's URL or filename. If no matching distribution is found,
|
||
446 | ``None`` is returned.
|
||
447 |
|
||
448 | If the `source` flag is set, only source distributions and source
|
||
449 | checkout links will be considered. Unless the `develop_ok` flag is
|
||
450 | set, development and system eggs (i.e., those using the ``.egg-info``
|
||
451 | format) will be ignored.
|
||
452 | """
|
||
453 | |||
454 | # process a Requirement
|
||
455 | self.info("Searching for %s", requirement) |
||
456 | skipped = {} |
||
457 | dist = None
|
||
458 | |||
459 | def find(req, env=None): |
||
460 | if env is None: |
||
461 | env = self
|
||
462 | # Find a matching distribution; may be called more than once
|
||
463 | |||
464 | for dist in env[req.key]: |
||
465 | |||
466 | if dist.precedence==DEVELOP_DIST and not develop_ok: |
||
467 | if dist not in skipped: |
||
468 | self.warn("Skipping development or system egg: %s",dist) |
||
469 | skipped[dist] = 1
|
||
470 | continue
|
||
471 | |||
472 | if dist in req and (dist.precedence<=SOURCE_DIST or not source): |
||
473 | self.info("Best match: %s", dist) |
||
474 | return dist.clone(
|
||
475 | location=self.download(dist.location, tmpdir)
|
||
476 | ) |
||
477 | |||
478 | if force_scan:
|
||
479 | self.prescan()
|
||
480 | self.find_packages(requirement)
|
||
481 | dist = find(requirement) |
||
482 | |||
483 | if local_index is not None: |
||
484 | dist = dist or find(requirement, local_index)
|
||
485 | |||
486 | if dist is None and self.to_scan is not None: |
||
487 | self.prescan()
|
||
488 | dist = find(requirement) |
||
489 | |||
490 | if dist is None and not force_scan: |
||
491 | self.find_packages(requirement)
|
||
492 | dist = find(requirement) |
||
493 | |||
494 | if dist is None: |
||
495 | self.warn(
|
||
496 | "No local packages or download links found for %s%s",
|
||
497 | (source and "a source distribution of " or ""), |
||
498 | requirement, |
||
499 | ) |
||
500 | return dist
|
||
501 | |||
502 | def fetch(self, requirement, tmpdir, force_scan=False, source=False): |
||
503 | """Obtain a file suitable for fulfilling `requirement`
|
||
504 |
|
||
505 | DEPRECATED; use the ``fetch_distribution()`` method now instead. For
|
||
506 | backward compatibility, this routine is identical but returns the
|
||
507 | ``location`` of the downloaded distribution instead of a distribution
|
||
508 | object.
|
||
509 | """
|
||
510 | dist = self.fetch_distribution(requirement,tmpdir,force_scan,source)
|
||
511 | if dist is not None: |
||
512 | return dist.location
|
||
513 | return None |
||
514 | |||
515 | |||
516 | |||
517 | |||
518 | |||
519 | |||
520 | |||
521 | |||
522 | def gen_setup(self, filename, fragment, tmpdir): |
||
523 | match = EGG_FRAGMENT.match(fragment) |
||
524 | dists = match and [d for d in |
||
525 | interpret_distro_name(filename, match.group(1), None) if d.version |
||
526 | ] or []
|
||
527 | |||
528 | if len(dists)==1: # unambiguous ``#egg`` fragment |
||
529 | basename = os.path.basename(filename) |
||
530 | |||
531 | # Make sure the file has been downloaded to the temp dir.
|
||
532 | if os.path.dirname(filename) != tmpdir:
|
||
533 | dst = os.path.join(tmpdir, basename) |
||
534 | from setuptools.command.easy_install import samefile |
||
535 | if not samefile(filename, dst): |
||
536 | shutil.copy2(filename, dst) |
||
537 | filename=dst |
||
538 | |||
539 | file = open(os.path.join(tmpdir, 'setup.py'), 'w') |
||
540 | file.write(
|
||
541 | "from setuptools import setup\n"
|
||
542 | "setup(name=%r, version=%r, py_modules=[%r])\n"
|
||
543 | % ( |
||
544 | dists[0].project_name, dists[0].version, |
||
545 | os.path.splitext(basename)[0]
|
||
546 | ) |
||
547 | ) |
||
548 | file.close()
|
||
549 | return filename
|
||
550 | |||
551 | elif match:
|
||
552 | raise DistutilsError(
|
||
553 | "Can't unambiguously interpret project/version identifier %r; "
|
||
554 | "any dashes in the name or version should be escaped using "
|
||
555 | "underscores. %r" % (fragment,dists)
|
||
556 | ) |
||
557 | else:
|
||
558 | raise DistutilsError(
|
||
559 | "Can't process plain .py files without an '#egg=name-version'"
|
||
560 | " suffix to enable automatic setup script generation."
|
||
561 | ) |
||
562 | |||
563 | dl_blocksize = 8192
|
||
564 | def _download_to(self, url, filename): |
||
565 | self.info("Downloading %s", url) |
||
566 | # Download the file
|
||
567 | fp, tfp, info = None, None, None |
||
568 | try:
|
||
569 | if '#' in url: |
||
570 | url, info = url.split('#', 1) |
||
571 | fp = self.open_url(url)
|
||
572 | if isinstance(fp, urllib2.HTTPError): |
||
573 | raise DistutilsError(
|
||
574 | "Can't download %s: %s %s" % (url, fp.code,fp.msg)
|
||
575 | ) |
||
576 | cs = md5() |
||
577 | headers = fp.info() |
||
578 | blocknum = 0
|
||
579 | bs = self.dl_blocksize
|
||
580 | size = -1
|
||
581 | if "content-length" in headers: |
||
582 | # Some servers return multiple Content-Length headers :(
|
||
583 | content_length = headers.get("Content-Length")
|
||
584 | size = int(content_length)
|
||
585 | self.reporthook(url, filename, blocknum, bs, size)
|
||
586 | tfp = open(filename,'wb') |
||
587 | while True: |
||
588 | block = fp.read(bs) |
||
589 | if block:
|
||
590 | cs.update(block) |
||
591 | tfp.write(block) |
||
592 | blocknum += 1
|
||
593 | self.reporthook(url, filename, blocknum, bs, size)
|
||
594 | else:
|
||
595 | break
|
||
596 | if info: self.check_md5(cs, info, filename, tfp) |
||
597 | return headers
|
||
598 | finally:
|
||
599 | if fp: fp.close()
|
||
600 | if tfp: tfp.close()
|
||
601 | |||
602 | def reporthook(self, url, filename, blocknum, blksize, size): |
||
603 | pass # no-op |
||
604 | |||
605 | |||
606 | def open_url(self, url, warning=None): |
||
607 | if url.startswith('file:'): |
||
608 | return local_open(url)
|
||
609 | try:
|
||
610 | return open_with_auth(url)
|
||
611 | except (ValueError, httplib.InvalidURL), v: |
||
612 | msg = ' '.join([str(arg) for arg in v.args]) |
||
613 | if warning:
|
||
614 | self.warn(warning, msg)
|
||
615 | else:
|
||
616 | raise DistutilsError('%s %s' % (url, msg)) |
||
617 | except urllib2.HTTPError, v:
|
||
618 | return v
|
||
619 | except urllib2.URLError, v:
|
||
620 | if warning:
|
||
621 | self.warn(warning, v.reason)
|
||
622 | else:
|
||
623 | raise DistutilsError("Download error for %s: %s" |
||
624 | % (url, v.reason)) |
||
625 | except httplib.BadStatusLine, v:
|
||
626 | if warning:
|
||
627 | self.warn(warning, v.line)
|
||
628 | else:
|
||
629 | raise DistutilsError('%s returned a bad status line. ' |
||
630 | 'The server might be down, %s' % \
|
||
631 | (url, v.line)) |
||
632 | except httplib.HTTPException, v:
|
||
633 | if warning:
|
||
634 | self.warn(warning, v)
|
||
635 | else:
|
||
636 | raise DistutilsError("Download error for %s: %s" |
||
637 | % (url, v)) |
||
638 | |||
639 | def _download_url(self, scheme, url, tmpdir): |
||
640 | # Determine download filename
|
||
641 | #
|
||
642 | name = filter(None,urlparse.urlparse(url)[2].split('/')) |
||
643 | if name:
|
||
644 | name = name[-1]
|
||
645 | while '..' in name: |
||
646 | name = name.replace('..','.').replace('\\','_') |
||
647 | else:
|
||
648 | name = "__downloaded__" # default if URL has no path contents |
||
649 | |||
650 | if name.endswith('.egg.zip'): |
||
651 | name = name[:-4] # strip the extra .zip before download |
||
652 | |||
653 | filename = os.path.join(tmpdir,name) |
||
654 | |||
655 | # Download the file
|
||
656 | #
|
||
657 | if scheme=='svn' or scheme.startswith('svn+'): |
||
658 | return self._download_svn(url, filename) |
||
659 | elif scheme=='file': |
||
660 | return urllib.url2pathname(urlparse.urlparse(url)[2]) |
||
661 | else:
|
||
662 | self.url_ok(url, True) # raises error if not allowed |
||
663 | return self._attempt_download(url, filename) |
||
664 | |||
665 | |||
666 | |||
667 | def scan_url(self, url): |
||
668 | self.process_url(url, True) |
||
669 | |||
670 | |||
671 | def _attempt_download(self, url, filename): |
||
672 | headers = self._download_to(url, filename)
|
||
673 | if 'html' in headers.get('content-type','').lower(): |
||
674 | return self._download_html(url, headers, filename) |
||
675 | else:
|
||
676 | return filename
|
||
677 | |||
678 | def _download_html(self, url, headers, filename): |
||
679 | file = open(filename)
|
||
680 | for line in file: |
||
681 | if line.strip():
|
||
682 | # Check for a subversion index page
|
||
683 | if re.search(r'<title>([^- ]+ - )?Revision \d+:', line): |
||
684 | # it's a subversion index page:
|
||
685 | file.close()
|
||
686 | os.unlink(filename) |
||
687 | return self._download_svn(url, filename) |
||
688 | break # not an index page |
||
689 | file.close()
|
||
690 | os.unlink(filename) |
||
691 | raise DistutilsError("Unexpected HTML page found at "+url) |
||
692 | |||
693 | def _download_svn(self, url, filename): |
||
694 | url = url.split('#',1)[0] # remove any fragment for svn's sake |
||
695 | self.info("Doing subversion checkout from %s to %s", url, filename) |
||
696 | os.system("svn checkout -q %s %s" % (url, filename))
|
||
697 | return filename
|
||
698 | |||
699 | def debug(self, msg, *args): |
||
700 | log.debug(msg, *args) |
||
701 | |||
702 | def info(self, msg, *args): |
||
703 | log.info(msg, *args) |
||
704 | |||
705 | def warn(self, msg, *args): |
||
706 | log.warn(msg, *args) |
||
707 | |||
708 | # This pattern matches a character entity reference (a decimal numeric
|
||
709 | # references, a hexadecimal numeric reference, or a named reference).
|
||
710 | entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
|
||
711 | |||
712 | def uchr(c): |
||
713 | if not isinstance(c, int): |
||
714 | return c
|
||
715 | if c>255: return unichr(c) |
||
716 | return chr(c) |
||
717 | |||
718 | def decode_entity(match): |
||
719 | what = match.group(1)
|
||
720 | if what.startswith('#x'): |
||
721 | what = int(what[2:], 16) |
||
722 | elif what.startswith('#'): |
||
723 | what = int(what[1:]) |
||
724 | else:
|
||
725 | from htmlentitydefs import name2codepoint |
||
726 | what = name2codepoint.get(what, match.group(0))
|
||
727 | return uchr(what)
|
||
728 | |||
729 | def htmldecode(text): |
||
730 | """Decode HTML entities in the given text."""
|
||
731 | return entity_sub(decode_entity, text)
|
||
732 | |||
733 | |||
734 | |||
735 | |||
736 | |||
737 | |||
738 | |||
739 | |||
740 | |||
741 | |||
742 | |||
743 | |||
744 | |||
745 | |||
746 | |||
747 | def socket_timeout(timeout=15): |
||
748 | def _socket_timeout(func): |
||
749 | def _socket_timeout(*args, **kwargs): |
||
750 | old_timeout = socket.getdefaulttimeout() |
||
751 | socket.setdefaulttimeout(timeout) |
||
752 | try:
|
||
753 | return func(*args, **kwargs)
|
||
754 | finally:
|
||
755 | socket.setdefaulttimeout(old_timeout) |
||
756 | return _socket_timeout
|
||
757 | return _socket_timeout
|
||
758 | |||
759 | |||
760 | def open_with_auth(url): |
||
761 | """Open a urllib2 request, handling HTTP authentication"""
|
||
762 | |||
763 | scheme, netloc, path, params, query, frag = urlparse.urlparse(url) |
||
764 | |||
765 | if scheme in ('http', 'https'): |
||
766 | auth, host = urllib2.splituser(netloc) |
||
767 | else:
|
||
768 | auth = None
|
||
769 | |||
770 | if auth:
|
||
771 | auth = "Basic " + urllib2.unquote(auth).encode('base64').strip() |
||
772 | new_url = urlparse.urlunparse((scheme,host,path,params,query,frag)) |
||
773 | request = urllib2.Request(new_url) |
||
774 | request.add_header("Authorization", auth)
|
||
775 | else:
|
||
776 | request = urllib2.Request(url) |
||
777 | |||
778 | request.add_header('User-Agent', user_agent)
|
||
779 | fp = urllib2.urlopen(request) |
||
780 | |||
781 | if auth:
|
||
782 | # Put authentication info back into request URL if same host,
|
||
783 | # so that links found on the page will work
|
||
784 | s2, h2, path2, param2, query2, frag2 = urlparse.urlparse(fp.url) |
||
785 | if s2==scheme and h2==host: |
||
786 | fp.url = urlparse.urlunparse((s2,netloc,path2,param2,query2,frag2)) |
||
787 | |||
788 | return fp
|
||
789 | |||
790 | # adding a timeout to avoid freezing package_index
|
||
791 | open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth) |
||
792 | |||
793 | |||
794 | |||
795 | |||
796 | |||
797 | |||
798 | |||
799 | |||
800 | |||
801 | |||
802 | |||
803 | def fix_sf_url(url): |
||
804 | return url # backward compatibility |
||
805 | |||
806 | def local_open(url): |
||
807 | """Read a local path, with special support for directories"""
|
||
808 | scheme, server, path, param, query, frag = urlparse.urlparse(url) |
||
809 | filename = urllib.url2pathname(path) |
||
810 | if os.path.isfile(filename):
|
||
811 | return urllib2.urlopen(url)
|
||
812 | elif path.endswith('/') and os.path.isdir(filename): |
||
813 | files = [] |
||
814 | for f in os.listdir(filename): |
||
815 | if f=='index.html': |
||
816 | fp = open(os.path.join(filename,f),'rb') |
||
817 | body = fp.read() |
||
818 | fp.close() |
||
819 | break
|
||
820 | elif os.path.isdir(os.path.join(filename,f)):
|
||
821 | f+='/'
|
||
822 | files.append("<a href=%r>%s</a>" % (f,f))
|
||
823 | else:
|
||
824 | body = ("<html><head><title>%s</title>" % url) + \
|
||
825 | "</head><body>%s</body></html>" % '\n'.join(files) |
||
826 | status, message = 200, "OK" |
||
827 | else:
|
||
828 | status, message, body = 404, "Path not found", "Not found" |
||
829 | |||
830 | return urllib2.HTTPError(url, status, message,
|
||
831 | {'content-type':'text/html'}, cStringIO.StringIO(body)) |
||
832 | |||
833 | |||
834 | |||
835 | |||
836 | |||
837 | |||
838 | |||
839 | |||
840 | |||
841 | |||
842 | |||
843 | |||
844 | |||
845 | # this line is a kludge to keep the trailing blank lines for pje's editor |