module MetaParser
require 'open-uri'
COMMON_USER_AGENTS = ['Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36','Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36']
def self.parse_doc(url)
Nokogiri::HTML.parse(open(url,'User-Agent' => COMMON_USER_AGENTS.sample ))
end
module Facebook
def self.get_attributes(url)
attributes = {}
doc = MetaParser.parse_doc(url)
attributes[:url] = url
attributes[:site_name] = doc.at('meta[property="og:site_name"]')['content']
attributes[:title] = doc.at('meta[property="og:title"]')['content']
attributes[:description] = doc.at('meta[property="og:description"]')['content']
attributes[:image] = doc.at('meta[property="og:image"]')['content']
return attributes
end
end # Facebook
module Twitter
def self.get_attributes(url)
attributes = {}
doc = MetaParser.parse_doc(url)
attributes[:url] = url
attributes[:site_name] = doc.at('meta[name="twitter:site"]')['content']
attributes[:title] = doc.at('meta[name="twitter:title"]')['content']
attributes[:description] = doc.at('meta[name="twitter:description"]')['content']
attributes[:image] = doc.at('meta[name="twitter:image"]')['content']
return attributes
end
end # Twitter
end
使用法:
MetaParser :: Facebook.get_attributes( "google.com")MetaParser :: Twitter.get_attributes( "google.com")