Wgetter
Publicado por Fernando (última atualização em 02/05/2014)
[ Hits: 5.815 ]
Homepage: https://github.com/phoemur/
Esta é a minha implementação do Wget escrita em Python.
Se gostar você pode instalar o módulo em:
https://pypi.python.org/pypi/wgetter/
Ou contribuir no GitHub em:
https://github.com/phoemur/wgetter
#!/usr/bin/env python """ Wgetter is another command line download utility written completely in python. It is based on python-wget (https://bitbucket.org/techtonik/python-wget/src) with some improvements. It works on python >= 2.6 or python >=3.0 Runs on Windows or Linux or Mac API Usage: >>> import wgetter >>> filename = wgetter.download('https://sites.google.com/site/doctormike/pacman-1.2.tar.gz', outdir='/home/user') 100 % [====================================================>] 19.9KiB / 19.9KiB 100.0KiB/s >>> filename '/home/user/pacman-1.2.tar.gz' """ import sys import os import shutil import tempfile import hashlib from time import time PY3K = sys.version_info >= (3, 0) if PY3K: import urllib.request as ulib import urllib.parse as urlparse else: import urllib2 as ulib import urlparse SUFFIXES = {1000: ['KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'], 1024: ['KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB']} def approximate_size(size, a_kilobyte_is_1024_bytes=True): ''' Humansize.py from Dive into Python3 - Mark Pilgrim - http://www.diveintopython3.net/ Copyright (c) 2009, Mark Pilgrim, All rights reserved. Convert a file size to human-readable form. Keyword arguments: size -- file size in bytes a_kilobyte_is_1024_bytes -- if True (default), use multiples of 1024 if False, use multiples of 1000 Returns: string ''' size = float(size) if size < 0: raise ValueError('number must be non-negative') multiple = 1024 if a_kilobyte_is_1024_bytes else 1000 for suffix in SUFFIXES[multiple]: size /= multiple if size < multiple: return '{0:.1f}{1}'.format(size, suffix) raise ValueError('number too large') def get_console_width(): """Return width of available window area. Autodetection works for Windows and POSIX platforms. Returns 80 for others Code from http://bitbucket.org/techtonik/python-pager """ if os.name == 'nt': STD_INPUT_HANDLE = -10 STD_OUTPUT_HANDLE = -11 STD_ERROR_HANDLE = -12 # get console handle from ctypes import windll, Structure, byref try: from ctypes.wintypes import SHORT, WORD, DWORD except ImportError: # workaround for missing types in Python 2.5 from ctypes import ( c_short as SHORT, c_ushort as WORD, c_ulong as DWORD) console_handle = windll.kernel32.GetStdHandle(STD_OUTPUT_HANDLE) # CONSOLE_SCREEN_BUFFER_INFO Structure class COORD(Structure): _fields_ = [("X", SHORT), ("Y", SHORT)] class SMALL_RECT(Structure): _fields_ = [("Left", SHORT), ("Top", SHORT), ("Right", SHORT), ("Bottom", SHORT)] class CONSOLE_SCREEN_BUFFER_INFO(Structure): _fields_ = [("dwSize", COORD), ("dwCursorPosition", COORD), ("wAttributes", WORD), ("srWindow", SMALL_RECT), ("dwMaximumWindowSize", DWORD)] sbi = CONSOLE_SCREEN_BUFFER_INFO() ret = windll.kernel32.GetConsoleScreenBufferInfo( console_handle, byref(sbi)) if ret == 0: return 0 return sbi.srWindow.Right + 1 elif os.name == 'posix': from fcntl import ioctl from termios import TIOCGWINSZ from array import array winsize = array("H", [0] * 4) try: ioctl(sys.stdout.fileno(), TIOCGWINSZ, winsize) except IOError: pass return (winsize[1], winsize[0])[0] return 80 CONSOLE_WIDTH = get_console_width() # Need 2 spaces more to avoid linefeed on Windows AVAIL_WIDTH = CONSOLE_WIDTH - 44 if os.name == 'nt' else CONSOLE_WIDTH - 42 def filename_from_url(url): """:return: detected filename or None""" fname = os.path.basename(urlparse.urlparse(url).path) if len(fname.strip(" \n\t.")) == 0: return None return fname def filename_from_headers(headers): """Detect filename from Content-Disposition headers if present. http://greenbytes.de/tech/tc2231/ :param: headers as dict, list or string :return: filename from content-disposition header or None """ if type(headers) == str: headers = headers.splitlines() if type(headers) == list: headers = dict([x.split(':', 1) for x in headers]) cdisp = headers.get("Content-Disposition") if not cdisp: return None cdtype = cdisp.split(';') if len(cdtype) == 1: return None if cdtype[0].strip().lower() not in ('inline', 'attachment'): return None # several filename params is illegal, but just in case fnames = [x for x in cdtype[1:] if x.strip().startswith('filename=')] if len(fnames) > 1: return None name = fnames[0].split('=')[1].strip(' \t"') name = os.path.basename(name) if not name: return None return name def filename_fix_existing(filename, dirname): """Expands name portion of filename with numeric ' (x)' suffix to return filename that doesn't exist already. """ name, ext = filename.rsplit('.', 1) names = [x for x in os.listdir(dirname) if x.startswith(name)] names = [x.rsplit('.', 1)[0] for x in names] suffixes = [x.replace(name, '') for x in names] # filter suffixes that match ' (x)' pattern suffixes = [x[2:-1] for x in suffixes if x.startswith(' (') and x.endswith(')')] indexes = [int(x) for x in suffixes if set(x) <= set('0123456789')] idx = 1 if indexes: idx += sorted(indexes)[-1] return '{0}({1}).{2}'.format(name, idx, ext) def report_bar(bytes_so_far, chunk_size, total_size, speed): ''' This callback for the download function is used to print the download bar ''' percent = int(bytes_so_far * 100 / total_size) current = approximate_size(bytes_so_far).center(9) total = approximate_size(total_size).center(9) shaded = int(float(bytes_so_far) / total_size * AVAIL_WIDTH) sys.stdout.write(" {0}% [{1}{2}{3}]".format(str(percent).center(4), '=' * (shaded - 1), '>', ' ' * (AVAIL_WIDTH - shaded)) + "{0}/{1} {2}".format(current, total, (approximate_size(speed) + '/s').center(12))) sys.stdout.write("\r") sys.stdout.flush() if bytes_so_far >= total_size: sys.stdout.write('\n') def report_unknown(bytes_so_far, chunk_size, total_size, speed): ''' This callback for the download function is used when the total size is unknown ''' sys.stdout.write("Downloading: {0} / Unknown - {1}/s\r".format(approximate_size(bytes_so_far), approximate_size(speed))) def report_onlysize(bytes_so_far, chunk_size, total_size, speed): ''' This callback for the download function is used when console width is not enough to print the bar. It prints only the sizes ''' percent = int(bytes_so_far * 100 / total_size) current = approximate_size(bytes_so_far).center(10) total = approximate_size(total_size).center(10) sys.stdout.write('D: {0}% -{1}/{2}\r'.format(percent, current, total)) def md5sum(filename, blocksize=8192): ''' Returns the MD5 checksum of a file ''' with open(filename, 'rb') as fh: m = hashlib.md5() while True: data = fh.read(blocksize) if not data: break m.update(data) return m.hexdigest() def download(link, outdir='.', chunk_size=4096): ''' This is the Main function, which downloads a given link and saves on outdir (default = current directory) ''' url = None fh = None bytes_so_far = 0 filename = filename_from_url(link) or "." # get filename for temp file in current directory (fd_tmp, tmpfile) = tempfile.mkstemp( ".tmp", prefix=filename + ".", dir=outdir) os.close(fd_tmp) os.unlink(tmpfile) try: url = ulib.urlopen(link) fh = open(tmpfile, mode='wb') headers = url.info() try: total_size = int(headers['Content-Length']) except (ValueError, KeyError, TypeError): total_size = 'unknown' try: md5_header = headers['Content-MD5'] except (ValueError, KeyError, TypeError): md5_header = None # Define which callback we're gonna use if total_size != 'unknown': if CONSOLE_WIDTH >= 45: reporthook = report_bar else: reporthook = report_onlysize else: reporthook = report_unknown # Below are the registers to calculate network transfer rate time_register = time() speed = 0.0 bytes_register = 0.0 # Loop that reads in chunks, calculates speed and does the callback to # print the progress while True: chunk = url.read(chunk_size) # Update Download Speed every 1 second if time() - time_register > 1: speed = (bytes_so_far - bytes_register) / \ (time() - time_register) time_register = time() # Set register properly for future use # Set register properly for future use bytes_register = bytes_so_far bytes_so_far += len(chunk) if not chunk: break fh.write(chunk) reporthook(bytes_so_far, chunk_size, total_size, speed) except KeyboardInterrupt: print('\n\nCtrl + C: Download aborted by user') print('Partial downloaded file:\n{0}'.format(os.path.abspath(tmpfile))) sys.exit(1) finally: if url: url.close() if fh: fh.close() filenamealt = filename_from_headers(headers) if filenamealt: filename = filenamealt # add numeric '(x)' suffix if filename already exists if os.path.exists(os.path.join(outdir, filename)): filename = filename_fix_existing(filename, outdir) filename = os.path.join(outdir, filename) shutil.move(tmpfile, filename) # Check if sizes matches if total_size != 'unknown' and total_size != bytes_so_far: print( '\n\nWARNING!! Downloaded file size mismatches... Probably corrupted...') # Check md5 if it was in html header if md5_header: print('\nValidating MD5 checksum...') if md5_header == md5sum(filename): print('MD5 checksum passed!') else: print('MD5 checksum do NOT passed!!!') return filename if __name__ == '__main__': if len(sys.argv) == 1 or sys.argv[1] in {'-h', '--help'}: print('Usage: {0} <URL>'.format(sys.argv[0])) args = [str(elem) for elem in sys.argv[1:]] for link in args: print('Downloading ' + link) filename = download(link) print('\nSaved under {0}'.format(filename))
Cotação atual do dólar - versão Python
hdns - Enumerador de diretórios
Script em Python 3.6 para fazer scrape de uma URL exportando métricas no formato Prometheus
Compartilhando a tela do Computador no Celular via Deskreen
Como Configurar um Túnel SSH Reverso para Acessar Sua Máquina Local a Partir de uma Máquina Remota
Configuração para desligamento automatizado de Computadores em um Ambiente Comercial
Como renomear arquivos de letras maiúsculas para minúsculas
Imprimindo no formato livreto no Linux
Vim - incrementando números em substituição
Efeito "livro" em arquivos PDF
Como resolver o erro no CUPS: Unable to get list of printer drivers
Mensagem quando tento fazer o apt update && apt upgrade no kal... (2)
Melhores Práticas de Nomenclatura: Pastas, Arquivos e Código (0)
[Python] Automação de scan de vulnerabilidades
[Python] Script para analise de superficie de ataque
[Shell Script] Novo script para redimensionar, rotacionar, converter e espelhar arquivos de imagem
[Shell Script] Iniciador de DOOM (DSDA-DOOM, Doom Retro ou Woof!)
[Shell Script] Script para adicionar bordas às imagens de uma pasta