#!/usr/bin/env ruby -w
#ruby 2.0.0 tested.
#Using mechanize,which is very very awesome gem.
#Must install gem. ex) gem install mechanize
#This short script get the nytimes.com editorial and link each word to webster dictionary.
#My personal purpose.
require 'mechanize'
agent=Mechanize.new
url='http://www.nytimes.com/2015/01/28/opinion/a-new-chapter-for-america-and-india.html?ref=opinion&_r=0'
page = agent.get(url)
#page = agent.get('http://www.nytimes.com/2015/01/28/opinion/a-new-chapter-for-america-and-india.html?ref=opinion&_r=0')
# Getting title by scan and join
title=url.match(/\w+-[\w+-]+/).to_s.gsub("-"," ").capitalize
content=page.body
# scan nytimes editorial content by ptag to array
contents=content.scan(/\<p class=\"story-body-text story-content\".*>*\<\/p\>/)
header=%q{<html><body><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><style type="text/css">
a { text-decoration:none; color : #000; }
</style> }
footer=%q{</body></html>}
puts "#{header}"
puts "<h1><center>#{title}</center></h1>"
for i in 0..(contents.size-1)
# removal of html tag
contents[i] = contents[i].gsub(/<.[^>]+>/,'')
# link to each word to merriam-webster dictionary
contents[i]=contents[i].gsub(/\w+|\w+\./) {|s| "<a href=http://www.merriam-webster.com/dictionary/#{s}>#{s}</a>"}
print " #{contents[i]}"
puts "<p>"
end
#Footer
puts "#{footer}"
No comments:
Post a Comment