需求
最近看到的一篇比较实用的关于生产力的文章:[Quora][翻譯] 有哪些可以應用到每日生活的省時妙招呢?,内容是翻译 Quora 上的一个答案。我结合自己实践的结果,特别认同其中提到的一个观点:睡好、吃好、多运动。其他观点还需要好好实践一下才能体会出作者的用意。
文章中还提到了一个幻灯片:Productivity porn。
内容很不错,但是作者不让下载。
解决方案
但是这难不倒程序员,图片都有了,自己抓下来生成 PDF 就行了。搜了一下,果然有人实践过,代码如下(需要在 Mac / Linux 下使用的需要注释一下相应生成 PPT 的代码):
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
slideshare-dl.py
~~~~~~~~~~~~~~~~
slideshare-dl is a small command-line program
for downloading slides from SlideShare.net
"""
import os
import re
import urllib2
from BeautifulSoup import BeautifulSoup
from xml.etree import ElementTree as ET
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
import win32com.client #Only Windows for generating ppt
from PIL import Image
class SlideShare(object):
"""SlideShare download script"""
def __init__(self, url=None):
self.url = url
self.__xml_file = ''
self.__slide_name = ''
self.__files = []
self.__images = []
def set_xml_file(self):
url = urllib2.urlopen(self.url)
source = url.read()
soup = BeautifulSoup(source)
html = soup.find("script", {"id": "page-json"})
slide_regex = re.search('"doc":"(.*?)"', str(html), re.IGNORECASE)
self.__slide_name = str(slide_regex.group(1))
self.__xml_file = "http://s3.amazonaws.com/slideshare/" + self.__slide_name + ".xml"
def create_directory(self, dir_name):
if not os.path.exists(dir_name):
os.makedirs(dir_name)
os.chdir(dir_name)
def files_from(self, xml_file):
files = []
try:
url = urllib2.urlopen(xml_file)
tree = ET.parse(url)
element = tree.getroot()
for subelement in element:
files.append(str(subelement.get('Src')))
return files
except Exception, inst:
print "Unexpected error opening xml file"
def download_file(self, url):
file_name = url.split('/')[-1]
u = urllib2.urlopen(url)
f = open(file_name, 'wb')
meta = u.info()
file_size = int(meta.getheaders("Content-Length")[0])
print "Downloading: %s Bytes: %s" % (file_name, file_size)
file_size_dl = 0
block_sz = 8192
while True:
buffer = u.read(block_sz)
if not buffer:
break
file_size_dl += len(buffer)
f.write(buffer)
status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size)
status = status + chr(8)*(len(status)+1)
print status,
f.close()
self.__files.append(file_name)
def download(self):
for url in self.files_from(self.__xml_file):
self.download_file(url)
def convert_to_images(self):
for filename in self.__files:
#swfrender path/to/my.swf -X<width of output> -Y<height of output> -o<filename of output png>
swfrender_cmd = 'swfrender ' + os.getcwd() + '/' + filename + ' -o ' + os.path.splitext(filename)[0] + '.png'
os.system(swfrender_cmd)
self.__images.append(os.path.splitext(filename)[0] + '.png')
def generate_pdf(self):
pdf_name = self.__slide_name + ".pdf"
print "Generating PDF..."
aux = canvas.Canvas(pdf_name, pagesize = A4)
lWidth, lHeight = A4
aux.setPageSize((lHeight, lWidth)) #landscape
#aux.setPageSize((lWidth, lHeight)) # portrait
for filename in self.__images:
image = os.getcwd() + '/' + filename
#canvas.drawImage(self, image, x,y, width=None,height=None,mask=None)
aux.drawImage(image, 60, 10) # 400,0,130,150
aux.showPage()
aux.save()
print "Done."
def generate_ppt(self):
pdf_name = self.__slide_name + ".ppt"
print "Generatin PPT..."
ppLayoutBlank = 12 # Slide Type's
Application = win32com.client.Dispatch("PowerPoint.Application")
Application.Visible = True
Presentation = Application.Presentations.Add();
for filename in reversed(self.__images):
pictName = os.getcwd() + '/' + filename
im = Image.open(pictName)
width, height = im.size
Slide1 = Presentation.Slides.Add(1, ppLayoutBlank);
Pict1 = Slide1.Shapes.AddPicture(FileName=pictName, LinkToFile=False, SaveWithDocument=True, Left=0, Top=0, Width=width, Height=height)
print "Done."
Presentation.SaveAs(os.getcwd() + '/' + self.__slide_name + ".pptx");
Application.Quit()
def get(self, url):
self.url = url
self.set_xml_file()
self.create_directory(self.__slide_name)
self.download()
self.convert_to_images()
self.generate_pdf()
self.generate_ppt()
def main():
slide = SlideShare()
#slide.get("http://www.slideshare.net/oisin/simple-web-services-with-sinatra-and-heroku-6882369")
#slide.get("http://www.slideshare.net/barrasozky/miembros")
#slide.get("http://www.slideshare.net/RobleJose/vectorgrunge")
#slide.get("http://www.slideshare.net/david.motta/modelo-del-negocio-con-rup-y-uml-parte-1")
#slide.get("http://www.slideshare.net/david.motta/modelo-del-negocio-con-rup-y-uml-parte-3")
slide.get("http://www.slideshare.net/david.motta/modelo-del-negocio-con-rup-y-uml-parte-3-1534304")
# suggest it
#arc = raw_input("Ingrese url: ")
#print arc
#slide.get(""+arc)
if __name__ == "__main__":
main()
代码出处:slideshare-dl is a small command-line program for downloading slides from SlideShare.net
PS: 据 Slideshare 评论里作者的回复,46 - 51 页码里的小本子是作者自己用 InDesign 做的,足见他是有多喜欢探索提高效率的方法。
- EOF -