| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254 |
- import time
- import threading
- import httplib
- import urllib
- import urllib2
- import getpass
- import re
- import os
- import sys
- import cookielib
- import zlib
- import gzip
- import socket
- import ssl
- from cStringIO import StringIO
- import multipart
- BLOCK_SIZE = 64 * 1024
- def decode (page):
- """gunzip or deflate a compressed page
- """
- encoding = page.info().get("Content-Encoding")
- if encoding in ('gzip', 'x-gzip', 'deflate'):
- # cannot seek in socket descriptors, so must get content now
- content = page.read()
- if encoding == 'deflate':
- fp = StringIO(zlib.decompress(content))
- else:
- fp = gzip.GzipFile('', 'rb', 9, StringIO(content))
- # remove content-encoding header
- headers = httplib.HTTPMessage(StringIO(""))
- ceheader = re.compile(r"(?i)content-encoding:")
- for h in page.info().keys():
- if not ceheader.match(h):
- headers[h] = page.info()[h]
- newpage = urllib.addinfourl(fp, headers, page.geturl())
- # Propagate code, msg through
- if hasattr(page, 'code'):
- newpage.code = page.code
- if hasattr(page, 'msg'):
- newpage.msg = page.msg
- return newpage
- return page
- class HttpWithGzipHandler(urllib2.HTTPHandler):
- """http with gzip encoding
- """
- def http_open (self, req):
- return decode(urllib2.HTTPHandler.http_open(self, req))
- class HttpsWithGzipHandler(urllib2.HTTPSHandler):
- """https with gzip encoding
- """
- def https_open (self, req):
- return decode(urllib2.HTTPSHandler.https_open(self, req))
- class handlepasswd(urllib2.HTTPPasswordMgrWithDefaultRealm):
- def find_user_password(self, realm, authurl):
- user, password = urllib2.HTTPPasswordMgrWithDefaultRealm.find_user_password(self, realm, authurl)
- if user is not None:
- return user, password
- user = raw_input('Enter username for %s at %s: ' % (realm, authurl))
- password = getpass.getpass(
- "Enter password for %s in %s at %s: " % (user, realm, authurl))
- self.add_password(realm, authurl, user, password)
- return user, password
- def _printProgress(read, max, kbcurr, kbtotal):
- print '%.2f of %.2f MB [%.1f%%] downloaded [%.2f kb sec]' % (read / 1024.0 ** 2, max / 1024.0 ** 2, 100.0 * read / max, kbtotal)
- def readWithProgress(orgOpenFunc):
- epsilon = 0.00000001
- def _inner(*args, **kwargs):
- '''returns response data as string if no "file" kwargs is given else
- write response data to file
-
- @param file: filelike that is used with write()
- @type file: filelike
- @param cb: callback functiton with signature (currentCount, maxCount, kbsecCurrent, kbsecTotal)
- @type cp: callable
- @param cbcount: how many times callback should be triggered
- @type cbcount: int
- '''
- block_size = BLOCK_SIZE
- request = orgOpenFunc(*args)
- progressCallback = kwargs.get('cb', _printProgress)
- #make 10 callbacks default
- callbackCount = kwargs.get('cbcount', 10)
- callbackStepWide = 0
- callbackStep = 0
- data = kwargs.get('file', StringIO())
- headers = request.info()
- max = int(headers.get('Content-Length', -1))
- if max > 0:
- callbackStep = callbackStepWide = float(max) / callbackCount
- read = 0
- start = time.time()
- while True:
- t = time.time()
- s = request.read(block_size)
- kbsecCurrent = block_size / (time.time() - t + epsilon) / 1024.0
- if s == '':
- break
- data.write(s)
- read += block_size
- kbsecTotal = read / (time.time() - start + epsilon) / 1024.0
- if callbackStepWide > 0:
- if read > callbackStep:
- progressCallback(read, max, kbsecCurrent, kbsecTotal)
- callbackStep += callbackStepWide
- else:
- progressCallback(read, max, kbsecCurrent, kbsecTotal)
- request.close()
- if max >= 0 and read < max:
- raise Exception("retrieval incomplete: got only %i out "
- "of %i bytes" % (read, max))
- if not 'file' in kwargs:
- return data.getvalue()
- return _inner
- def getOpener(proxies=None, authfunc=None, enableGzip=False,
- headers=None, cookieInfo=None,
- enableCookies=True, enableMultipart=False,
- ):
- pwd_manager = handlepasswd()
- handlers = [
- urllib2.UnknownHandler(),
- urllib2.HTTPBasicAuthHandler(pwd_manager),
- urllib2.ProxyBasicAuthHandler(pwd_manager),
- urllib2.HTTPDigestAuthHandler(pwd_manager),
- urllib2.ProxyDigestAuthHandler(pwd_manager),
- urllib2.HTTPDefaultErrorHandler(),
- urllib2.HTTPRedirectHandler(),
- ]
- if proxies == 'auto':
- proxies = urllib.getproxies()
- if proxies is not None:
- # empty values like {'http': ''} will lead to URLError: <urlopen error no host given>
- if '' in proxies.values():
- for k in proxies.keys():
- if proxies[k] == '':
- proxies.pop(k)
- proxyHandler = urllib2.ProxyHandler(proxies)
- else:
- #disable all proxies - if we would pass None, the ProxyHandler would autoetect Proxies
- proxyHandler = urllib2.ProxyHandler({})
- handlers.append(proxyHandler)
-
- if enableMultipart:
- handlers.append(multipart.MultipartPostHandler)
- if enableCookies:
- cj = cookielib.CookieJar()
- if cookieInfo is not None:
- cj.set_cookie(cookielib.Cookie(
- cookieInfo.get('version', None), cookieInfo['name'], cookieInfo['value'],
- cookieInfo.get('port', None), None,
- cookieInfo['domain'], None, None,
- cookieInfo.get('path', '/'), None,
- cookieInfo.get('secure', False),
- cookieInfo.get('expires', None),
- False,
- '',
- '',
- {}))
- handlers.append(urllib2.HTTPCookieProcessor(cj))
-
- if enableGzip:
- handlers.append(HttpWithGzipHandler())
- opener = urllib2.build_opener(*handlers)
- if headers is None:
- headers = dict(opener.addheaders)
- else:
- tmp = dict(opener.addheaders)
- tmp.update(headers)
- if enableGzip:
- headers['Accept-Encoding'] = 'gzip;q=1.0, deflate;q=0.9, identity;q=0.5'
- #add an additional function
- opener.readWithProgress = readWithProgress(opener.open)
- opener.addheaders = headers.items()
- if authfunc is not None:
- authfunc(opener)
- # print _global_opener.handlers
- #~ urllib2.install_opener(_global_opener)
- return opener
- def urlopen(url, proxies=None, data=None, authfunc=None, enableGzip=True, useragent=None):
- ''' create a new opener, open request object and return an addinfourl object with an open filepointer (use .read())'''
- headers = {}
- if enableGzip:
- headers['Accept-Encoding'] = 'gzip;q=1.0, deflate;q=0.9, identity;q=0.5'
- if useragent is not None:
- headers['User-Agent'] = useragent
- req = urllib2.Request(url, data, headers)
- opener = getOpener(proxies=proxies, authfunc=authfunc, enableGzip=enableGzip)
- if authfunc is not None:
- authfunc(opener)
- return opener.open(req)
- def authfunc(url, user, password):
- '''return a configured authfunc for a vls interface'''
- def _inner(opener):
- data = urllib.urlencode([('loginUser', user),
- ('loginPassword', password),
- ('login', ''),
- ('nohttps', ''),
- ])
- res = opener.open(url + '/auth/login', data=data).read()
- if res != 'successful':
- raise Exception('could not authentificate user "%s"\n----%s\n----' % (user, res[:100]))
- return _inner
|