比这篇新的文章: 我的vimrc
比这篇旧的文章: MimeMail

Python实现多线程下载

语言: Python, 标签: Python http download multi-Thread 2008/05/31发布 3年前更新
作者: volans, 点击3061次, 评论(0), 收藏者(2), , 打分:

背景
主题: 字体:
001 #!/usr/bin/python
002 # -*- coding: utf-8 -*-
003 # filename: paxel.py
004
005 '''It is a multi-thread downloading tool
006
007     It was developed follow axel.
008         Author: volans
009         E-mail: volansw [at] gmail.com
010 '''
011
012 import sys
013 import os
014 import time
015 import urllib
016 from threading import Thread
017
018 local_proxies = {'http': 'http://131.139.58.200:8080'}
019
020 class AxelPython(Thread, urllib.FancyURLopener):
021     '''Multi-thread downloading class.
022
023         run() is a vitural method of Thread.
024     '''
025     def __init__(self, threadname, url, filename, ranges=0, proxies={}):
026         Thread.__init__(self, name=threadname)
027         urllib.FancyURLopener.__init__(self, proxies)
028         self.name = threadname
029         self.url = url
030         self.filename = filename
031         self.ranges = ranges
032         self.downloaded = 0
033
034     def run(self):
035         '''vertual function in Thread'''
036         try:
037             self.downloaded = os.path.getsize( self.filename )
038         except OSError:
039             #print 'never downloaded'
040             self.downloaded = 0
041
042         # rebuild start poind
043         self.startpoint = self.ranges[0] + self.downloaded
044        
045         # This part is completed
046         if self.startpoint >= self.ranges[1]:
047             print 'Part %s has been downloaded over.' % self.filename
048             return
049        
050         self.oneTimeSize = 16384 #16kByte/time
051         print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1])
052
053         self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))
054            
055         self.urlhandle = self.open( self.url )
056
057         data = self.urlhandle.read( self.oneTimeSize )
058         while data:
059             filehandle = open( self.filename, 'ab+' )
060             filehandle.write( data )
061             filehandle.close()
062
063             self.downloaded += len( data )
064             #print "%s" % (self.name)
065             #progress = u'\r...'
066
067             data = self.urlhandle.read( self.oneTimeSize )
068        
069 def GetUrlFileSize(url, proxies={}):
070     urlHandler = urllib.urlopen( url, proxies=proxies )
071     headers = urlHandler.info().headers
072     length = 0
073     for header in headers:
074         if header.find('Length') != -1:
075             length = header.split(':')[-1].strip()
076             length = int(length)
077     return length
078
079 def SpliteBlocks(totalsize, blocknumber):
080     blocksize = totalsize/blocknumber
081     ranges = []
082     for i in range(0, blocknumber-1):
083         ranges.append((i*blocksize, i*blocksize +blocksize - 1))
084     ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))
085
086     return ranges
087 def islive(tasks):
088     for task in tasks:
089         if task.isAlive():
090             return True
091     return False
092
093 def paxel(url, output, blocks=6, proxies=local_proxies):
094     ''' paxel
095     '''
096     size = GetUrlFileSize( url, proxies )
097     ranges = SpliteBlocks( size, blocks )
098
099     threadname = [ "thread_%d" % i for i in range(0, blocks) ]
100     filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]
101  
102     tasks = []
103     for i in range(0,blocks):
104         task = AxelPython( threadname[i], url, filename[i], ranges[i] )
105         task.setDaemon( True )
106         task.start()
107         tasks.append( task )
108        
109     time.sleep( 2 )
110     while islive(tasks):
111         downloaded = sum( [task.downloaded for task in tasks] )
112         process = downloaded/float(size)*100
113         show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process)
114         sys.stdout.write(show)
115         sys.stdout.flush()
116         time.sleep( 0.5 )
117            
118     filehandle = open( output, 'wb+' )
119     for i in filename:
120         f = open( i, 'rb' )
121         filehandle.write( f.read() )
122         f.close()
123         try:
124             os.remove(i)
125             pass
126         except:
127             pass
128
129     filehandle.close()
130
131 if __name__ == '__main__':
132     url = "http://www.pygtk.org/dist/pygtk2-tut.pdf"
133     output = 'pygtk2.pdf'
134     paxel( url, output, blocks=4, proxies={} )


所有评论,共0条:( 我也来说两句)


发表评论

注册登录后再发表评论