Scott:
Get wget & perl
wget -O cmeprices.html
"
perl html2text cmeprices.html
then use this script to remove those HTML tags...
And then parse using one of Jan/Feb/Mar/Apr strings...
HTH
#!/usr/bin/perl
######################################################################
# HTML to text converter Version 1.01 #
# Copyright 1999 Frederic TYNDIUK (FTLS) All Rights Reserved. #
# E-Mail: tyndiuk@... Script License: GPL #
# Created 06/30/99 Last Modified 06/30/99 #
# Scripts Archive at: #
######################################################################
# Function : #
# Suppress All HTML TAGs in a file. #
######################################################################
##################### license & copyright header #####################
# #
# Copyright (c) 1999 TYNDIUK Frederic #
# #
# This program is free software; you can redistribute it and/or #
# modify it under the terms of the GNU General Public License as #
# published by the Free Software Foundation; either version 2 of #
# the License, or (at your option) any later version. #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with this program in the file 'COPYING'; if not, write to #
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, #
# Boston, MA 02111-1307, USA, or contact the author: #
# #
# TYNDIUK Frederic <tyndiuk@...> #
# <> #
# #
################### end license & copyright header ###################
######################################################################
$Version = "1.01";
$Copyright = "HTML to Text converter v$Version (C) 1999 Frederic
TYNDIUK (alias FTLS)\n";
$Copyright .= "Report bugs to tyndiuk\@ftls.org, News and Updates:
;n";
$NameResultFile = $BgColor = "";
$UsePre = $No = 0;
# En: Check Args
# Fr: Verification des aguments
while ($ARGV[0] =~ /^-/) {
if (($ARGV[0] eq "-r") || ($ARGV[0] eq "--result")) {
shift(@ARGV); if($ARGV[0]) { $NameResultFile = $ARGV[0];
$NameResultFile =~ s/\.txt//;} }
if (($ARGV[0] eq "-v") || ($ARGV[0] eq "--version")) { print
$Copyright; exit; }
if (($ARGV[0] eq "-h") || ($ARGV[0] eq "--help")) { &Usage(); }
shift(@ARGV);
}
if (@ARGV < 1) {
&Usage;
}
while ($ARGV[0] ne "") {
$SourceFile = $ARGV[0];
if ($NameResultFile eq "") {
$ResultFile = $SourceFile;
$ResultFile =~ s/\.\w*//;
} else {
$ResultFile = $NameResultFile;
}
if (($No > 0) && ($NameResultFile ne "")) {
$ResultFile .= "-".$No;
}
$No++;
$ResultFile .= ".txt";
&html2txt($SourceFile, $ResultFile);
shift(@ARGV);
}
sub html2txt {
my($SourceFile, $ResultFile) = @_;
my($HTML, @HTMLPage, $SymbLine, $ascii, $html);
open(FILE, "$SourceFile") || die("Cannot open HTML source file :
$SourceFile, Error $!\n");
@HTMLPage = <FILE>;
close(FILE);
$HTML = join("", @HTMLPage);
($Head, $HTML) = split(/<\/HEAD>/i, $HTML);
$HTML =~ s/ / /g;
$HTML =~ s/\s\s*/ /g;
$HTML =~ s/<p[^>]*>/\n\n/gi; #<p> -> \n\n
$HTML =~
s/<br>|<\/*h[1-6][^>]*>|<li[^>]*>|<dt[^>]*>|<dd[^>]*>|<\/tr[^>]*>/\n/gi;
# <br> or <H*> or <li> or </tr> or <dt> or <dd> -> \n
$HTML =~ s/(<[^>]*>)*//g;
$HTML =~ s/\n\s*\n\s*/\n\n/g;
$HTML =~ s/\n */\n/g;
foreach $SymbLine (&HTMLSymb) {
($ascii, $html) = split(/\s\s*/,$SymbLine);
$HTML =~ s/$html/$ascii/g;
}
# En: Write result file
# Fr: Ecrit le fichier resultat
open(COM, ">$ResultFile") || die("Cannot write file $ResultFile, Error
$!");
print COM $HTML;
close(COM);
}
sub Usage {
print STDERR <<EOF;
$Copyright
Usage: $0 [Options] HTMLFile...
Options:
-r File, --result File result file Name (without .txt)
-v, --version output version information and
exit
-h, --help display this help and exit
EOF
exit 1;
}
# HTML Codes
sub HTMLSymb {
return (
"& &",
"\" "",
"< <",
"> >",
"? ©",
"? ®",
"? Æ",
"? Á",
"? Â",
"? À",
"? Å",
"? Ã",
"? Ä",
"? Ç",
"? Ð",
"? É",
"? Ê",
"? È",
"? Ë",
"? Í",
"? Î",
"? Ì",
"? Ï",
"? Ñ",
"? Ó",
"? Ô",
"? Ò",
"? Ø",
"? Õ",
"? Ö",
"? Þ",
"? Ú",
"? Û",
"? Ù",
"? Ü",
"? Ý",
"? á",
"? â",
"? æ",
"? à",
"? å",
"? ã",
"? ä",
"? ç",
"? é",
"? ê",
"? è",
"? ð",
"? ë",
"? í",
"? î",
"? ì",
"? ï",
"? ñ",
"? ó",
"? ô",
"? ò",
"? ø",
"? õ",
"? ö",
"? ß",
"? þ",
"? ú",
"? û",
"? ù",
"? ü",
"? ý",
"? ÿ",
"  ",
"? ¡",
"? ¢",
"? £",
"? ¥",
"? ¦",
"? §",
"? ¨",
"? ©",
"? ª",
"? «",
"? ¬",
"? ­",
"? ®",
"? ¯",
"? °",
"? ±",
"? ²",
"? ³",
"? ´",
"? µ",
"? ¶",
"? ·",
"? ¸",
"? ¹",
"? º",
"? »",
"? ¼",
"? ½",
"? ¾",
"? ¿",
"? ×",
"? Þ",
"? ÷")
}
__________________________________________________
Do You Yahoo!?
Yahoo! - Official partner of 2002 FIFA World Cup