#!/usr/bin/env ruby -w
#ruby 2.0.0 tested.
#Using mechanize,which is very very awesome gem.
#Must install gem. ex) gem install mechanize
#This short script get the nytimes.com editorial and link each word to webster dictionary.
#My personal purpose.
require 'mechanize'
agent=Mechanize.new
url='http://www.nytimes.com/2015/01/28/opinion/a-new-chapter-for-america-and-india.html?ref=opinion&_r=0'
page = agent.get(url)
#page = agent.get('http://www.nytimes.com/2015/01/28/opinion/a-new-chapter-for-america-and-india.html?ref=opinion&_r=0')
# Getting title by scan and join
title=url.match(/\w+-[\w+-]+/).to_s.gsub("-"," ").capitalize
content=page.body
# scan nytimes editorial content by ptag to array
contents=content.scan(/\<p class=\"story-body-text story-content\".*>*\<\/p\>/)
header=%q{<html><body><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><style type="text/css">
a { text-decoration:none; color : #000; }
</style> }
footer=%q{</body></html>}
puts "#{header}"
puts "<h1><center>#{title}</center></h1>"
for i in 0..(contents.size-1)
# removal of html tag
contents[i] = contents[i].gsub(/<.[^>]+>/,'')
# link to each word to merriam-webster dictionary
contents[i]=contents[i].gsub(/\w+|\w+\./) {|s| "<a href=http://www.merriam-webster.com/dictionary/#{s}>#{s}</a>"}
print " #{contents[i]}"
puts "<p>"
end
#Footer
puts "#{footer}"
Friday, January 30, 2015
Wednesday, January 28, 2015
Benchmark scan and match
2.0.0-p598 :001 > require 'benchmark'
=> true
2.0.0-p598 :002 > url='http://www.nytimes.com/2015/01/28/opinion/a-new-chapter-for-america-and-india.html?ref=opinion&_r=0'
=> "http://www.nytimes.com/2015/01/28/opinion/a-new-chapter-for-america-and-india.html?ref=opinion&_r=0"
2.0.0-p598 :003 > Benchmark.bmbm(100) do |b|
2.0.0-p598 :004 > b.report("scan function") do
2.0.0-p598 :005 > title = url.scan(/\w+-[\w+-]+/).join.gsub("-"," ").capitalize
2.0.0-p598 :006?> end
2.0.0-p598 :007?> b.report("match function") do
2.0.0-p598 :008 > title = url.match(/\w+-[\w+-]+/).to_s.gsub("-"," ").capitalize
2.0.0-p598 :009?> end
2.0.0-p598 :010?> end
Rehearsal ----------------------------------------------------------------------------------------------------------------------------------------
scan function 0.000000 0.000000 0.000000 ( 0.000113)
match function 0.000000 0.000000 0.000000 ( 0.000092)
------------------------------------------------------------------------------------------------------------------------------- total: 0.000000sec
user system total real
scan function 0.000000 0.000000 0.000000 ( 0.000052)
match function 0.000000 0.000000 0.000000 ( 0.000033)
=> [#<Benchmark::Tms:0x00000002083930 @label="scan function", @real=5.2013e-05, @cstime=0.0, @cutime=0.0, @stime=0.0, @utime=0.0, @total=0.0>, #<Benchmark::Tms:0x00000002083778 @label="match function", @real=3.2601e-05, @cstime=0.0, @cutime=0.0, @stime=0.0, @utime=0.0, @total=0.0>]
=> true
2.0.0-p598 :002 > url='http://www.nytimes.com/2015/01/28/opinion/a-new-chapter-for-america-and-india.html?ref=opinion&_r=0'
=> "http://www.nytimes.com/2015/01/28/opinion/a-new-chapter-for-america-and-india.html?ref=opinion&_r=0"
2.0.0-p598 :003 > Benchmark.bmbm(100) do |b|
2.0.0-p598 :004 > b.report("scan function") do
2.0.0-p598 :005 > title = url.scan(/\w+-[\w+-]+/).join.gsub("-"," ").capitalize
2.0.0-p598 :006?> end
2.0.0-p598 :007?> b.report("match function") do
2.0.0-p598 :008 > title = url.match(/\w+-[\w+-]+/).to_s.gsub("-"," ").capitalize
2.0.0-p598 :009?> end
2.0.0-p598 :010?> end
Rehearsal ----------------------------------------------------------------------------------------------------------------------------------------
scan function 0.000000 0.000000 0.000000 ( 0.000113)
match function 0.000000 0.000000 0.000000 ( 0.000092)
------------------------------------------------------------------------------------------------------------------------------- total: 0.000000sec
user system total real
scan function 0.000000 0.000000 0.000000 ( 0.000052)
match function 0.000000 0.000000 0.000000 ( 0.000033)
=> [#<Benchmark::Tms:0x00000002083930 @label="scan function", @real=5.2013e-05, @cstime=0.0, @cutime=0.0, @stime=0.0, @utime=0.0, @total=0.0>, #<Benchmark::Tms:0x00000002083778 @label="match function", @real=3.2601e-05, @cstime=0.0, @cutime=0.0, @stime=0.0, @utime=0.0, @total=0.0>]
Sunday, January 25, 2015
inject example(fibonacci,factorial)
### plus from min to max ###
2.0.0-p598 :018 > def fib(min,max)
2.0.0-p598 :019?> a=min.upto(max).inject(:+)
2.0.0-p598 :020?> a
2.0.0-p598 :021?> end
=> nil
2.0.0-p598 :022 > fib(1,4)
=> 10
2.0.0-p598 :023 > fib(-1,4)
=> 9
### multiple from min to max###
2.0.0-p598 :028 > def fact(min,max)
2.0.0-p598 :029?> a = min.upto(max).inject(:*)
2.0.0-p598 :030?> a
2.0.0-p598 :031?> end
=> nil
2.0.0-p598 :032 > fib(1,4)
=> 10
2.0.0-p598 :033 > fact(1,4)
=> 24
Very simple....
2.0.0-p598 :018 > def fib(min,max)
2.0.0-p598 :019?> a=min.upto(max).inject(:+)
2.0.0-p598 :020?> a
2.0.0-p598 :021?> end
=> nil
2.0.0-p598 :022 > fib(1,4)
=> 10
2.0.0-p598 :023 > fib(-1,4)
=> 9
### multiple from min to max###
2.0.0-p598 :028 > def fact(min,max)
2.0.0-p598 :029?> a = min.upto(max).inject(:*)
2.0.0-p598 :030?> a
2.0.0-p598 :031?> end
=> nil
2.0.0-p598 :032 > fib(1,4)
=> 10
2.0.0-p598 :033 > fact(1,4)
=> 24
Very simple....
Saturday, January 24, 2015
ruby negative factorial and plus .
#!/usr/bin/env ruby
#ruby 2.0.0 version test
# negative factorial and plus each result number
n=ARGV[0].to_i
def n_fact(n)
if n > -1
puts "argument must be less then 0"
elsif n == -1
return n
else
return (n*n_fact(n+1))
end
end
result = n_fact(n)
puts "negative fact value : #{result}"
def nf_sum(r)
r = r.to_s
if r[0] == "-"
r1 = -(r[1].to_i)
for i in 2..(r.size-1)
puts "#{r1}+#{r[i]}=#{r1+=r[i].to_i}"
end
# putting result
puts "So last sum is = #{r1}"
else # If the result value is positive
sum = 0
r.each_char { |r| sum += r.to_i }
puts sum
end
end
nf_sum(result)
#Testing ^^;
oyj@oyjmint ~/rp $ ruby negative_fact.rb -7
negative fact value : -5040
-5+0=-5
-5+4=-1
-1+0=-1
So last sum is = -1
oyj@oyjmint ~/rp $ ruby negative_fact.rb -8
negative fact value : 40320
9
#ruby 2.0.0 version test
# negative factorial and plus each result number
n=ARGV[0].to_i
def n_fact(n)
if n > -1
puts "argument must be less then 0"
elsif n == -1
return n
else
return (n*n_fact(n+1))
end
end
result = n_fact(n)
puts "negative fact value : #{result}"
def nf_sum(r)
r = r.to_s
if r[0] == "-"
r1 = -(r[1].to_i)
for i in 2..(r.size-1)
puts "#{r1}+#{r[i]}=#{r1+=r[i].to_i}"
end
# putting result
puts "So last sum is = #{r1}"
else # If the result value is positive
sum = 0
r.each_char { |r| sum += r.to_i }
puts sum
end
end
nf_sum(result)
#Testing ^^;
oyj@oyjmint ~/rp $ ruby negative_fact.rb -7
negative fact value : -5040
-5+0=-5
-5+4=-1
-1+0=-1
So last sum is = -1
oyj@oyjmint ~/rp $ ruby negative_fact.rb -8
negative fact value : 40320
9
Friday, January 23, 2015
small ruby scripts,useful or practical?
#!/usr/bin/env ruby
#moving files *php*.pdf
d="/home/whatsup/Desktop/book2/"
de="/home/whatsup/Desktop/book2/php/"
system("find #{d} -name \"*.pdf\" | grep -i \"php\" > /tmp/phpbook")
a=File.readlines("/tmp/phpbook").each { |b| system("mv -vf \"#{b.strip}\" #{de}")}
~
#!/usr/bin/env ruby
#suming of uid
P="/etc/passwd"
i=0
sum_uid=0
a=File.readlines(P)
while i < a.size
# Getting array for each users
getuid=a[i].split(/:/)
# Getting uid for each users
uid=getuid[2]
# Getting user for each users
user=getuid[0]
# summing uids. Should convert to integer
sum_uid+=uid.to_i
i += 1
end
puts "sum of uid = #{sum_uid}"
#!/usr/bin/env ruby
#suming of uid
sum=0
File.readlines("/etc/passwd").collect {|line| sum += line.split(/:/)[2].to_i}
puts "The sum of uid is #{sum}"
#Using bash shell(gawk study)
#!/usr/bin/env bash
#Getting sum of uid on /etc/passwd file using awk(gawk)
#GNU Awk 4.0.1
TEMP_FILE="/tmp/uid"
AWK=$(which awk)
PASSFILE="/etc/passwd"
#Getting uid via awk ':' FS
$AWK -F: '{print $3}' $PASSFILE > $TEMP_FILE
#Using awk(gawk)
$AWK 'BEGIN{FS="\n"; RS=""} {
total = 0
i = 1
while (i <= NF)
{
total += $i
i++
print "Current total is=" total
print "It will increase by " $i
}
print "Total_Sum_of_uid_of_passwd file:", total
print "NF="NF
}' $TEMP_FILE
#Removing TEMP_FILE
rm -f $TEMP_FILE
#!/usr/bin/env ruby
# Sorting by uid.
# puts File.readlines("/etc/passwd").compact.sort_by {|line| line.split(/:/)[2].to_i}
# Just above line is simpler and I think it is better maybe.
p="/etc/passwd"
i=0
sort_by_uid=[]
p_array=File.readlines(p)
while i < p_array.size
# Getting array for each users
get_each=p_array[i].split(/:/)
# Getting uid for each users
uid=get_each[2]
# Getting user for each users
rest = get_each[0]+":"+get_each[1]+":"+get_each[2]+":"+get_each[3]+":"+get_each[4]+":"+get_each[5]+":"+get_each[6]
sort_by_uid << uid +":"+ rest
i += 1
end
# to sort must convert to to_i integer
sort_by_uid=sort_by_uid.sort_by { |k,s| k.to_i }
puts "Soting passwd file by uid number"
puts sort_by_uid
#moving files *php*.pdf
d="/home/whatsup/Desktop/book2/"
de="/home/whatsup/Desktop/book2/php/"
system("find #{d} -name \"*.pdf\" | grep -i \"php\" > /tmp/phpbook")
a=File.readlines("/tmp/phpbook").each { |b| system("mv -vf \"#{b.strip}\" #{de}")}
~
#!/usr/bin/env ruby
#suming of uid
P="/etc/passwd"
i=0
sum_uid=0
a=File.readlines(P)
while i < a.size
# Getting array for each users
getuid=a[i].split(/:/)
# Getting uid for each users
uid=getuid[2]
# Getting user for each users
user=getuid[0]
# summing uids. Should convert to integer
sum_uid+=uid.to_i
i += 1
end
puts "sum of uid = #{sum_uid}"
#!/usr/bin/env ruby
#suming of uid
sum=0
File.readlines("/etc/passwd").collect {|line| sum += line.split(/:/)[2].to_i}
puts "The sum of uid is #{sum}"
#Using bash shell(gawk study)
#!/usr/bin/env bash
#Getting sum of uid on /etc/passwd file using awk(gawk)
#GNU Awk 4.0.1
TEMP_FILE="/tmp/uid"
AWK=$(which awk)
PASSFILE="/etc/passwd"
#Getting uid via awk ':' FS
$AWK -F: '{print $3}' $PASSFILE > $TEMP_FILE
#Using awk(gawk)
$AWK 'BEGIN{FS="\n"; RS=""} {
total = 0
i = 1
while (i <= NF)
{
total += $i
i++
print "Current total is=" total
print "It will increase by " $i
}
print "Total_Sum_of_uid_of_passwd file:", total
print "NF="NF
}' $TEMP_FILE
#Removing TEMP_FILE
rm -f $TEMP_FILE
#!/usr/bin/env ruby
# Sorting by uid.
# puts File.readlines("/etc/passwd").compact.sort_by {|line| line.split(/:/)[2].to_i}
# Just above line is simpler and I think it is better maybe.
p="/etc/passwd"
i=0
sort_by_uid=[]
p_array=File.readlines(p)
while i < p_array.size
# Getting array for each users
get_each=p_array[i].split(/:/)
# Getting uid for each users
uid=get_each[2]
# Getting user for each users
rest = get_each[0]+":"+get_each[1]+":"+get_each[2]+":"+get_each[3]+":"+get_each[4]+":"+get_each[5]+":"+get_each[6]
sort_by_uid << uid +":"+ rest
i += 1
end
# to sort must convert to to_i integer
sort_by_uid=sort_by_uid.sort_by { |k,s| k.to_i }
puts "Soting passwd file by uid number"
puts sort_by_uid
Wednesday, January 7, 2015
My practical bash scripts.
# sort passwd by 3rd field separated by ':'
oyj@oyjmint ~ $ cat /etc/passwd | sort -t : -k 3n
# To find info detail on sort command.
oyj@oyjmint ~/rp $ info coreutils 'sort invocation' > sortm
#!/usr/bin/env bash
#
# This script is for copying file that is >= 100000 bytes to destination directory # from current directory's directories.
#
DES_DIR="/data/des/"
CUR_DIR=$(pwd)
for i in $(ls)
do
cd $i
for j in $(ls -l | awk -F ' ' '{if (($5 >= 100000)) print $9}')
do
cp -fv $j $DES_DIR
done
cd $CUR_DIR
done
###############################################
#!/usr/bin/env bash
# awk if example.
cat /etc/passwd | awk -F: '{if (($3 >= 1000)) print $0 }' | sort -k3 -t":"
###############################################
################################################
# I want to delete(rm *.png) all png file.
# There are 217941 files.
# Argument list too long ####
#########################
oyjmint images # ls -f *.png | wc -l
bash: /bin/ls: Argument list too long
0
oyjmint images # rm -f *.png
bash: /bin/rm: Argument list too long
# -f option do trick well.
oyjmint images # ls -f | wc -l
217941
# So below command did the trick.
oyjmint images # for i in $(ls -f)
> do
> if [[ ! -d $i ]]
> then
> rm -fv $i
> fi
> done
^^;
oyj@oyjmint ~ $ cat /etc/passwd | sort -t : -k 3n
# To find info detail on sort command.
oyj@oyjmint ~/rp $ info coreutils 'sort invocation' > sortm
#!/usr/bin/env bash
#
# This script is for copying file that is >= 100000 bytes to destination directory # from current directory's directories.
#
DES_DIR="/data/des/"
CUR_DIR=$(pwd)
for i in $(ls)
do
cd $i
for j in $(ls -l | awk -F ' ' '{if (($5 >= 100000)) print $9}')
do
cp -fv $j $DES_DIR
done
cd $CUR_DIR
done
###############################################
#!/usr/bin/env bash
# awk if example.
cat /etc/passwd | awk -F: '{if (($3 >= 1000)) print $0 }' | sort -k3 -t":"
###############################################
################################################
# I want to delete(rm *.png) all png file.
# There are 217941 files.
# Argument list too long ####
#########################
oyjmint images # ls -f *.png | wc -l
bash: /bin/ls: Argument list too long
0
oyjmint images # rm -f *.png
bash: /bin/rm: Argument list too long
# -f option do trick well.
oyjmint images # ls -f | wc -l
217941
# So below command did the trick.
oyjmint images # for i in $(ls -f)
> do
> if [[ ! -d $i ]]
> then
> rm -fv $i
> fi
> done
^^;
Subscribe to:
Posts (Atom)