比这篇新的文章: Codee#25449
比这篇旧的文章: 串口中断发送和接收

picasa 下载

语言: Python, 标签: picasa 下载 相册 全部 2012/02/08发布 2天前更新 更新记录
作者: weizhe86, 点击150次, 评论(0), 收藏者(0), , 打分:

背景
主题: 字体:
Python语言: picasa 下载
001 #!/usr/bin/env python
002 #coding = utf-8
003
004 import urllib,urllib2,socket,os,sys,time
005 from HTMLParser import HTMLParser
006 import socks
007 import socket
008 socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 7070)
009 socket.socket = socks.socksocket
010
011 global num,userid
012 num = 0
013 class getalbumlist(HTMLParser):
014     def __init__(self):
015         HTMLParser.__init__(self)
016         self.flag0 = 0
017     def handle_starttag(self, tag, attrs):
018         if tag == 'iframe' and dict(attrs).has_key('id') and dict(attrs)['id'] == 'lhid_secrethistory':
019             self.flag0 = 1
020         if tag == 'script':
021             self.flag1 = 1
022     def handle_data(self,data):
023         if self.flag0 == 1 and self.flag1 == 1:
024             self.albumlist=data
025             self.flag0 = 0
026     def handle_endtag(self,tag):
027         if tag == 'script':
028             self.flag1 = 0
029     def returnalbumlist(self):
030         return self.albumlist
031 class getimglist(HTMLParser):
032     def __init__(self):
033         HTMLParser.__init__(self)
034         self.flag0 = 0
035         self.value = []
036     def handle_starttag(self, tag, attrs):
037         if tag == 'img' and dict(attrs).has_key('src'):
038             imgurl = dict(attrs)['src']
039             if '/s128/' in imgurl and imgurl.endswith('.jpg') :
040                 self.value.append(imgurl.replace('/s128/','/s1600/'))
041     def returnvalue(self):
042         return self.value
043
044 def getbasepage(uid):
045     picasaweb_url = "https://picasaweb.google.com/%s?showall=true"
046     picurl = urllib.urlopen(picasaweb_url%uid)
047     basepage = picurl.read()
048     picurl.close()
049     return  basepage
050
051 def downalbum(url):
052     picurl = urllib.urlopen(url)
053     basepage = picurl.read()
054     picurl.close()
055     parser = getimglist()
056     parser.feed(basepage)
057     value = parser.returnvalue()
058     return  value
059
060 def downpic(url,userid):
061     global num
062     num += 1
063     print 'load url',url
064     url = urllib.urlopen(url)
065     data = url.read()
066     url.close()
067     fi = open('./Pictures/picasa/%s/pic%d.jpg'%(userid,num),'w')
068     fi.write(data)
069     fi.close()
070            
071
072 def main(userid=''):
073     global num
074     if not userid:return
075     num=0
076     try:
077         os.makedirs('./Pictures/picasa/'+userid)
078     except:
079         num = len(os.listdir('./Pictures/picasa/'+userid))
080     basepage = getbasepage(userid)
081     parser = getalbumlist()
082     parser.feed(basepage)
083     albumlist = parser.returnalbumlist()
084     albumlist = urllib.unquote(albumlist.replace('\\x','%'))
085     picalbumlist = []
086     if True:
087         albumscript = albumlist[albumlist.index('['):albumlist.rindex(']')]
088         for line in albumscript.split('\n'):
089             if line.startswith(',url:'):
090                 picalbumlist.append(line[6:-1])
091     print picalbumlist
092     for picalbum in picalbumlist:
093         print 'load album',picalbum
094         for picurl in downalbum(picalbum):
095             downpic(picurl,userid)
096
097 if __name__ == '__main__':
098     uidlist = ['107778867772322280285']
099     for uid in uidlist:
100         main(userid=uid)


所有评论,共0条:( 我也来说两句)


发表评论

注册登录后再发表评论