import time import threading import httplib import urllib import urllib2 import getpass import re import os import sys import cookielib import zlib import gzip import socket import ssl from cStringIO import StringIO import multipart BLOCK_SIZE = 64 * 1024 def decode (page): """gunzip or deflate a compressed page """ encoding = page.info().get("Content-Encoding") if encoding in ('gzip', 'x-gzip', 'deflate'): # cannot seek in socket descriptors, so must get content now content = page.read() if encoding == 'deflate': fp = StringIO(zlib.decompress(content)) else: fp = gzip.GzipFile('', 'rb', 9, StringIO(content)) # remove content-encoding header headers = httplib.HTTPMessage(StringIO("")) ceheader = re.compile(r"(?i)content-encoding:") for h in page.info().keys(): if not ceheader.match(h): headers[h] = page.info()[h] newpage = urllib.addinfourl(fp, headers, page.geturl()) # Propagate code, msg through if hasattr(page, 'code'): newpage.code = page.code if hasattr(page, 'msg'): newpage.msg = page.msg return newpage return page class HttpWithGzipHandler(urllib2.HTTPHandler): """http with gzip encoding """ def http_open (self, req): return decode(urllib2.HTTPHandler.http_open(self, req)) class HttpsWithGzipHandler(urllib2.HTTPSHandler): """https with gzip encoding """ def https_open (self, req): return decode(urllib2.HTTPSHandler.https_open(self, req)) class handlepasswd(urllib2.HTTPPasswordMgrWithDefaultRealm): def find_user_password(self, realm, authurl): user, password = urllib2.HTTPPasswordMgrWithDefaultRealm.find_user_password(self, realm, authurl) if user is not None: return user, password user = raw_input('Enter username for %s at %s: ' % (realm, authurl)) password = getpass.getpass( "Enter password for %s in %s at %s: " % (user, realm, authurl)) self.add_password(realm, authurl, user, password) return user, password def _printProgress(read, max, kbcurr, kbtotal): print '%.2f of %.2f MB [%.1f%%] downloaded [%.2f kb sec]' % (read / 1024.0 ** 2, max / 1024.0 ** 2, 100.0 * read / max, kbtotal) def readWithProgress(orgOpenFunc): epsilon = 0.00000001 def _inner(*args, **kwargs): '''returns response data as string if no "file" kwargs is given else write response data to file @param file: filelike that is used with write() @type file: filelike @param cb: callback functiton with signature (currentCount, maxCount, kbsecCurrent, kbsecTotal) @type cp: callable @param cbcount: how many times callback should be triggered @type cbcount: int ''' block_size = BLOCK_SIZE request = orgOpenFunc(*args) progressCallback = kwargs.get('cb', _printProgress) #make 10 callbacks default callbackCount = kwargs.get('cbcount', 10) callbackStepWide = 0 callbackStep = 0 data = kwargs.get('file', StringIO()) headers = request.info() max = int(headers.get('Content-Length', -1)) if max > 0: callbackStep = callbackStepWide = float(max) / callbackCount read = 0 start = time.time() while True: t = time.time() s = request.read(block_size) kbsecCurrent = block_size / (time.time() - t + epsilon) / 1024.0 if s == '': break data.write(s) read += block_size kbsecTotal = read / (time.time() - start + epsilon) / 1024.0 if callbackStepWide > 0: if read > callbackStep: progressCallback(read, max, kbsecCurrent, kbsecTotal) callbackStep += callbackStepWide else: progressCallback(read, max, kbsecCurrent, kbsecTotal) request.close() if max >= 0 and read < max: raise Exception("retrieval incomplete: got only %i out " "of %i bytes" % (read, max)) if not 'file' in kwargs: return data.getvalue() return _inner def getOpener(proxies=None, authfunc=None, enableGzip=False, headers=None, cookieInfo=None, enableCookies=True, enableMultipart=False, ): pwd_manager = handlepasswd() handlers = [ urllib2.UnknownHandler(), urllib2.HTTPBasicAuthHandler(pwd_manager), urllib2.ProxyBasicAuthHandler(pwd_manager), urllib2.HTTPDigestAuthHandler(pwd_manager), urllib2.ProxyDigestAuthHandler(pwd_manager), urllib2.HTTPDefaultErrorHandler(), urllib2.HTTPRedirectHandler(), ] if proxies == 'auto': proxies = urllib.getproxies() if proxies is not None: # empty values like {'http': ''} will lead to URLError: if '' in proxies.values(): for k in proxies.keys(): if proxies[k] == '': proxies.pop(k) proxyHandler = urllib2.ProxyHandler(proxies) else: #disable all proxies - if we would pass None, the ProxyHandler would autoetect Proxies proxyHandler = urllib2.ProxyHandler({}) handlers.append(proxyHandler) if enableMultipart: handlers.append(multipart.MultipartPostHandler) if enableCookies: cj = cookielib.CookieJar() if cookieInfo is not None: cj.set_cookie(cookielib.Cookie( cookieInfo.get('version', None), cookieInfo['name'], cookieInfo['value'], cookieInfo.get('port', None), None, cookieInfo['domain'], None, None, cookieInfo.get('path', '/'), None, cookieInfo.get('secure', False), cookieInfo.get('expires', None), False, '', '', {})) handlers.append(urllib2.HTTPCookieProcessor(cj)) if enableGzip: handlers.append(HttpWithGzipHandler()) opener = urllib2.build_opener(*handlers) if headers is None: headers = dict(opener.addheaders) else: tmp = dict(opener.addheaders) tmp.update(headers) if enableGzip: headers['Accept-Encoding'] = 'gzip;q=1.0, deflate;q=0.9, identity;q=0.5' #add an additional function opener.readWithProgress = readWithProgress(opener.open) opener.addheaders = headers.items() if authfunc is not None: authfunc(opener) # print _global_opener.handlers #~ urllib2.install_opener(_global_opener) return opener def urlopen(url, proxies=None, data=None, authfunc=None, enableGzip=True, useragent=None): ''' create a new opener, open request object and return an addinfourl object with an open filepointer (use .read())''' headers = {} if enableGzip: headers['Accept-Encoding'] = 'gzip;q=1.0, deflate;q=0.9, identity;q=0.5' if useragent is not None: headers['User-Agent'] = useragent req = urllib2.Request(url, data, headers) opener = getOpener(proxies=proxies, authfunc=authfunc, enableGzip=enableGzip) if authfunc is not None: authfunc(opener) return opener.open(req) def authfunc(url, user, password): '''return a configured authfunc for a vls interface''' def _inner(opener): data = urllib.urlencode([('loginUser', user), ('loginPassword', password), ('login', ''), ('nohttps', ''), ]) res = opener.open(url + '/auth/login', data=data).read() if res != 'successful': raise Exception('could not authentificate user "%s"\n----%s\n----' % (user, res[:100])) return _inner