最近在学Python,练手作品
[python]
#!/bin/env python
import urllib
import re
import os
from sgmllib import SGMLParser
found=[]
class URLLister(SGMLParser):
def reset(self):
SGMLParser.reset(self)
self.urls = []
def start_a(self, attrs):
href = [v for k, v in attrs if k==’href’]
if href:
self.urls.extend(href)
if not os.path.isdir(os.getcwd()+’/ppts’):
os.mkdir(os.getcwd()+’/ppts’)
usock = urllib.urlopen(“http://velocity.oreilly.com.cn/index.php?func=slidesvideos”)
parser = URLLister()
parser.feed(usock.read())
usock.close()
parser.close()
for url in parser.urls:
found.append(re.findall(‘ppts/\w*.pdf’,url))
for pdfurl in found:
for i in range(0,len(pdfurl),1):
urllib.urlretrieve(“http://velocity.oreilly.com.cn/”+pdfurl[i],os.getcwd()+”/”+pdfurl[i])
[/python]