比这篇新的文章:
我的vimrc
比这篇旧的文章: MimeMail
作者: volans, 点击3061次, 评论(0), 收藏者(2), , 打分:
所有评论,共0条:( 我也来说两句)
比这篇旧的文章: MimeMail
Python实现多线程下载
语言: Python, 标签: Python http download multi-Thread 2008/05/31发布 3年前更新作者: volans, 点击3061次, 评论(0), 收藏者(2), , 打分:
Python语言: Python实现多线程下载
001 #!/usr/bin/python
002 # -*- coding: utf-8 -*-
003 # filename: paxel.py
004
005 '''It is a multi-thread downloading tool
006
007 It was developed follow axel.
008 Author: volans
009 E-mail: volansw [at] gmail.com
010 '''
011
012 import sys
013 import os
014 import time
015 import urllib
016 from threading import Thread
017
018 local_proxies = {'http': 'http://131.139.58.200:8080'}
019
020 class AxelPython(Thread, urllib.FancyURLopener):
021 '''Multi-thread downloading class.
022
023 run() is a vitural method of Thread.
024 '''
025 def __init__(self, threadname, url, filename, ranges=0, proxies={}):
026 Thread.__init__(self, name=threadname)
027 urllib.FancyURLopener.__init__(self, proxies)
028 self.name = threadname
029 self.url = url
030 self.filename = filename
031 self.ranges = ranges
032 self.downloaded = 0
033
034 def run(self):
035 '''vertual function in Thread'''
036 try:
037 self.downloaded = os.path.getsize( self.filename )
038 except OSError:
039 #print 'never downloaded'
040 self.downloaded = 0
041
042 # rebuild start poind
043 self.startpoint = self.ranges[0] + self.downloaded
044
045 # This part is completed
046 if self.startpoint >= self.ranges[1]:
047 print 'Part %s has been downloaded over.' % self.filename
048 return
049
050 self.oneTimeSize = 16384 #16kByte/time
051 print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1])
052
053 self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))
054
055 self.urlhandle = self.open( self.url )
056
057 data = self.urlhandle.read( self.oneTimeSize )
058 while data:
059 filehandle = open( self.filename, 'ab+' )
060 filehandle.write( data )
061 filehandle.close()
062
063 self.downloaded += len( data )
064 #print "%s" % (self.name)
065 #progress = u'\r...'
066
067 data = self.urlhandle.read( self.oneTimeSize )
068
069 def GetUrlFileSize(url, proxies={}):
070 urlHandler = urllib.urlopen( url, proxies=proxies )
071 headers = urlHandler.info().headers
072 length = 0
073 for header in headers:
074 if header.find('Length') != -1:
075 length = header.split(':')[-1].strip()
076 length = int(length)
077 return length
078
079 def SpliteBlocks(totalsize, blocknumber):
080 blocksize = totalsize/blocknumber
081 ranges = []
082 for i in range(0, blocknumber-1):
083 ranges.append((i*blocksize, i*blocksize +blocksize - 1))
084 ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))
085
086 return ranges
087 def islive(tasks):
088 for task in tasks:
089 if task.isAlive():
090 return True
091 return False
092
093 def paxel(url, output, blocks=6, proxies=local_proxies):
094 ''' paxel
095 '''
096 size = GetUrlFileSize( url, proxies )
097 ranges = SpliteBlocks( size, blocks )
098
099 threadname = [ "thread_%d" % i for i in range(0, blocks) ]
100 filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]
101
102 tasks = []
103 for i in range(0,blocks):
104 task = AxelPython( threadname[i], url, filename[i], ranges[i] )
105 task.setDaemon( True )
106 task.start()
107 tasks.append( task )
108
109 time.sleep( 2 )
110 while islive(tasks):
111 downloaded = sum( [task.downloaded for task in tasks] )
112 process = downloaded/float(size)*100
113 show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process)
114 sys.stdout.write(show)
115 sys.stdout.flush()
116 time.sleep( 0.5 )
117
118 filehandle = open( output, 'wb+' )
119 for i in filename:
120 f = open( i, 'rb' )
121 filehandle.write( f.read() )
122 f.close()
123 try:
124 os.remove(i)
125 pass
126 except:
127 pass
128
129 filehandle.close()
130
131 if __name__ == '__main__':
132 url = "http://www.pygtk.org/dist/pygtk2-tut.pdf"
133 output = 'pygtk2.pdf'
134 paxel( url, output, blocks=4, proxies={} )
002 # -*- coding: utf-8 -*-
003 # filename: paxel.py
004
005 '''It is a multi-thread downloading tool
006
007 It was developed follow axel.
008 Author: volans
009 E-mail: volansw [at] gmail.com
010 '''
011
012 import sys
013 import os
014 import time
015 import urllib
016 from threading import Thread
017
018 local_proxies = {'http': 'http://131.139.58.200:8080'}
019
020 class AxelPython(Thread, urllib.FancyURLopener):
021 '''Multi-thread downloading class.
022
023 run() is a vitural method of Thread.
024 '''
025 def __init__(self, threadname, url, filename, ranges=0, proxies={}):
026 Thread.__init__(self, name=threadname)
027 urllib.FancyURLopener.__init__(self, proxies)
028 self.name = threadname
029 self.url = url
030 self.filename = filename
031 self.ranges = ranges
032 self.downloaded = 0
033
034 def run(self):
035 '''vertual function in Thread'''
036 try:
037 self.downloaded = os.path.getsize( self.filename )
038 except OSError:
039 #print 'never downloaded'
040 self.downloaded = 0
041
042 # rebuild start poind
043 self.startpoint = self.ranges[0] + self.downloaded
044
045 # This part is completed
046 if self.startpoint >= self.ranges[1]:
047 print 'Part %s has been downloaded over.' % self.filename
048 return
049
050 self.oneTimeSize = 16384 #16kByte/time
051 print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1])
052
053 self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))
054
055 self.urlhandle = self.open( self.url )
056
057 data = self.urlhandle.read( self.oneTimeSize )
058 while data:
059 filehandle = open( self.filename, 'ab+' )
060 filehandle.write( data )
061 filehandle.close()
062
063 self.downloaded += len( data )
064 #print "%s" % (self.name)
065 #progress = u'\r...'
066
067 data = self.urlhandle.read( self.oneTimeSize )
068
069 def GetUrlFileSize(url, proxies={}):
070 urlHandler = urllib.urlopen( url, proxies=proxies )
071 headers = urlHandler.info().headers
072 length = 0
073 for header in headers:
074 if header.find('Length') != -1:
075 length = header.split(':')[-1].strip()
076 length = int(length)
077 return length
078
079 def SpliteBlocks(totalsize, blocknumber):
080 blocksize = totalsize/blocknumber
081 ranges = []
082 for i in range(0, blocknumber-1):
083 ranges.append((i*blocksize, i*blocksize +blocksize - 1))
084 ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))
085
086 return ranges
087 def islive(tasks):
088 for task in tasks:
089 if task.isAlive():
090 return True
091 return False
092
093 def paxel(url, output, blocks=6, proxies=local_proxies):
094 ''' paxel
095 '''
096 size = GetUrlFileSize( url, proxies )
097 ranges = SpliteBlocks( size, blocks )
098
099 threadname = [ "thread_%d" % i for i in range(0, blocks) ]
100 filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]
101
102 tasks = []
103 for i in range(0,blocks):
104 task = AxelPython( threadname[i], url, filename[i], ranges[i] )
105 task.setDaemon( True )
106 task.start()
107 tasks.append( task )
108
109 time.sleep( 2 )
110 while islive(tasks):
111 downloaded = sum( [task.downloaded for task in tasks] )
112 process = downloaded/float(size)*100
113 show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process)
114 sys.stdout.write(show)
115 sys.stdout.flush()
116 time.sleep( 0.5 )
117
118 filehandle = open( output, 'wb+' )
119 for i in filename:
120 f = open( i, 'rb' )
121 filehandle.write( f.read() )
122 f.close()
123 try:
124 os.remove(i)
125 pass
126 except:
127 pass
128
129 filehandle.close()
130
131 if __name__ == '__main__':
132 url = "http://www.pygtk.org/dist/pygtk2-tut.pdf"
133 output = 'pygtk2.pdf'
134 paxel( url, output, blocks=4, proxies={} )
所有评论,共0条:( 我也来说两句)
代码
