次のhtmlをmysqlデータベースに保存したい。
<table border="0" cellpadding="2" cellspacing="7" style="vertical-align:top;"><tr><td width="80" align="center" valign="top"><font style="font-size:85%;font-family:arial,sans-serif"><a href="http://news.google.com/news/url?sa=t&fd=R&usg=AFQjCNFJZ2wrps7WAV59VWbRP_g0aQ4oew&url=http://www.usatoday.com/money/companies/story/2012-06-09/cost-lunch-warren-buffett/55476718/1"><img src="//nt3.ggpht.com/news/tbn/n9IJTSGY5UhSzM/6.jpg" alt="" border="1" width="80" height="80" /><br /><font size="-2">USA TODAY</font></a></font></td><td valign="top" class="j"><font style="font-size:85%;font-family:arial,sans-serif"><br /><div style="padding-top:0.8em;"><img alt="" height="1" width="1" /></div><div class="lh"><a href="http://news.google.com/news/url?sa=t&fd=R&usg=AFQjCNGyE1zK-V0YHEoCAnXZhUxElVsH_Q&url=http://www.washingtonpost.com/national/private-lunch-with-investor-warren-buffett-sells-for-35-million-in-charity-auction/2012/06/08/gJQAEp07OV_story.html"><b>Private lunch with investor Warren Buffett sells for $3.5 million in charity <b>...</b></b></a><br /><font size="-1"><b><font color="#6f6f6f">Washington Post</font></b></font><br /><font size="-1">OMAHA, Neb. — The cost to dine with investor Warren Buffett has apparently spiked in value, with one deep-pocketed bidder forking over nearly $3.5 million during a charity auction Friday night. The annual auction for a private lunch with the Nebraska <b>...</b></font><br /><font size="-1"><a href="http://news.google.com/news/url?sa=t&fd=R&usg=AFQjCNHIe3njmIFMpS9E6-co2zswxEsBZQ&url=http://www.google.com/hostednews/ap/article/ALeqM5jJKkBW1l-UDJC2cTIbtdsnf3HVPA?docId%3D83206c782d534f83ac832145ba2d9c1d">Cost to lunch with Warren Buffett: $3.5 million</a><link rel="syndication-source" href="www.ap.org/83206c782d534f83ac832145ba2d9c1d" /><font size="-1" color="#6f6f6f"><nobr>The Associated Press</nobr></font></font><br /><font size="-1"><a href="http://news.google.com/news/url?sa=t&fd=R&usg=AFQjCNEiOqIjoDlaluVXLlwIYI-_3wCABw&url=http://www.sfgate.com/cgi-bin/article.cgi?f%3D/g/a/2012/06/08/bloomberg_articlesM4UMRV0D9L3701-M5BYZ.DTL">Buffett Lunch Auction Raises Record $3.46 Million for Charity</a><font size="-1" color="#6f6f6f"><nobr>San Francisco Chronicle</nobr></font></font><br /><font size="-1"><a href="http://news.google.com/news/url?sa=t&fd=R&usg=AFQjCNE07-nsDiD0R0w_g7Juy6bU_oPv7w&url=http://money.cnn.com/2012/06/08/investing/buffett-auction-lunch/">Bidding for lunch with Buffett hits $400000</a><font size="-1" color="#6f6f6f"><nobr>CNN</nobr></font></font><br /><font size="-1" class="p"></font><br /><font class="p" size="-1"><a class="p" href="http://news.google.com/news/more?ned=us&topic=b&ncl=divwXhAe6uG0iLMj4RF1YfpKIhlsM"><nobr><b>all 446 news articles »</b></nobr></a></font></div></font></td></tr></table>
エラーレポートはありませんが、DB にデータがありません。
次のコードを使用してhtmlをエスケープしています。私はPython 2.6.6を使用しています
def get_feeds():
import time,datetime
import MySQLdb
import cgi
import xml.etree.ElementTree as etree
from urllib import urlopen
db=MySQLdb.connect("localhost","test","test","headline")
cursor = db.cursor()
URL = "http://news.google.com/news?cf=all&ned=us&hl=en&topic=b&output=rss"
tree = etree.parse(urlopen(URL))
items = tree.findall('.//item')
for item in items:
link = item.find('link')
title = item.find('title')
title_text = title.text
#print(title_text)
desc = item.find('description')
desc_text = cgi.escape(desc.text)
#print(desc_text)
timestamp = item.find('pubDate')
timestamp2 = time.mktime(time.strptime(timestamp.text, '%a, %d %b %Y %H:%M:%S GMT'))
guid = item.find('guid')
guid_text = guid.text
link = item.find('link')
link_text = link.text
#save item to DB
sql = 'insert into headline(guid,title,link,description,timestamp) values ("%s","%s","%s","%s","%d")' % \
(guid_text,title_text,link_text,desc_text,timestamp2)
#try:
cursor.execute(sql)
db.commit()
#except:
# db.rollback()
db.close()
if __name__ == '__main__':
get_feeds()