少し前にAsk Ubuntuでこの質問をしたところ、おそらく最適化の問題であるため、ここにコードを提供するように指示されました。スクリプト全体を含めました。
一般的な目標は、ディレクトリ内の約 7000 個の HTML ファイルを調べて、それらから特定の情報を解析し、それを 1 行としてテキスト ファイルにエクスポートすることです。
#!/usr/bin/perl
use Switch;
use strict;
use HTML::Query 'Query';
my $dir = '/home/mark/Documents/Perl/garchivesfiles/completeresults';
opendir my $dh, $dir or die "Can't open $dir: $!";
my @files = map {"$dir/$_"} grep { $_ !~ /^\./ } readdir $dh;
closedir $dh;
my $total;
my %xlateNum2Text = qw (0 January
1 Febuary
2 March
3 April
4 May
5 June
6 July
7 August
8 September
9 October
10 November
11 December
);
my $inc = 0;
foreach my $file (@files) {
open FILE, $file;
my $html = do { local $/; <FILE> };
my $q = Query(text => $html);
my @homescore = $q->query("span.homeScore");
my @awayscore = $q->query("span.awayScore");
my $singlehomescore = $homescore[0]->as_text();
my $singleawayscore = $homescore[0]->as_text();
my @hometeam = $q->query("table.teaminfo td.home span");
my @awayteam = $q->query("table.teaminfo td.away span");
my $singlehometeam = rightTeamName($hometeam[0]->as_text());
my $singleawayteam = rightTeamName($awayteam[0]->as_text());
my @homegoalstotal;
my @awaygoalstotal;
my @datearray;
my @fixtureinfo;
my @newhomegoals;
my @newawaygoals;
my @allinfogoals;
if($singlehomescore ne "0" || $singleawayscore ne "0") {
@homegoalstotal = $q->query("div.home ul li");
@awaygoalstotal = $q->query("div.away ul li");
my $i = 0;
@datearray = $q->query("p.fixtureinfo span");
my $finaldate = $datearray[0]->as_text();
my @datecomponents = split(" ", $finaldate);
my $mysqlyyyy = $datecomponents[3];
my $mysqlmm = monthConvert($datecomponents[2]);
my $mysqldd = $datecomponents[1];
my $mysqldate;
if(length($mysqlmm) == 1) {
$mysqlmm = "0".$mysqlmm;
}
if(length($mysqldd) == 1) {
$mysqldd = "0".$mysqldd;
}
$mysqldate = $mysqlyyyy."-".$mysqlmm."-".$mysqldd;
@fixtureinfo = $q->query("p.fixtureinfo");
my $fixtureinfoinit = $fixtureinfo[0]->as_text();
my @fixtureinfobrokenup = split(/ \| /, $fixtureinfoinit);
my $fixtureinfostring = $fixtureinfobrokenup[1];
foreach my $goal (@homegoalstotal) {
my $tempmodifier = $goal->as_text();
$tempmodifier =~ s/\)//g;
my @tempcomponents = split(' \(', $tempmodifier);
my $substitutetemp;
my @extratimesplit;
my $compositetime;
if(index($tempcomponents[1], ",") != -1) {
my @goaltimes = split('\,', $tempcomponents[1]);
foreach my $individmultgoal (@goaltimes) {
$individmultgoal =~ s/Pen//g;
$individmultgoal =~ s/ //g;
if(index($individmultgoal, "OG") == -1) {
if(index($individmultgoal, "+") != -1) {
@extratimesplit = split('\+', $individmultgoal);
$compositetime = $extratimesplit[0];
push (@{$allinfogoals[$i]}, ($tempcomponents[0], $compositetime, "for:".$singlehometeam, $singleawayteam, $datecomponents[1], ,$datecomponents[2], $datecomponents[3], $fixtureinfostring, "Barclays Premier League", monthConvert($datecomponents[2]), $mysqldate, $extratimesplit[1]));
$i++;
} else {
push (@{$allinfogoals[$i]}, ($tempcomponents[0], $individmultgoal, "for:".$singlehometeam, $singleawayteam, $datecomponents[1], ,$datecomponents[2], $datecomponents[3], $fixtureinfostring, "Barclays Premier League", monthConvert($datecomponents[2]), $mysqldate, 0));
$i++;
}
}
}
} else {
$substitutetemp = $tempcomponents[1];
$substitutetemp =~ s/Pen//g;
$substitutetemp =~ s/ //g;
if(index($substitutetemp, "OG") == -1) {
if(index($substitutetemp, "+") != -1) {
@extratimesplit = split('\+', $substitutetemp);
$compositetime = $extratimesplit[0];
push (@{$allinfogoals[$i]}, ($tempcomponents[0], $compositetime, "for:".$singlehometeam, $singleawayteam, $datecomponents[1], $datecomponents[2], $datecomponents[3], $fixtureinfostring, "Barclays Premier League", monthConvert($datecomponents[2]), $mysqldate, $extratimesplit[1]));
$i++;
} else {
push (@{$allinfogoals[$i]}, ($tempcomponents[0], $substitutetemp, "for:".$singlehometeam, $singleawayteam, $datecomponents[1], $datecomponents[2], $datecomponents[3], $fixtureinfostring, "Barclays Premier League", monthConvert($datecomponents[2]), $mysqldate, 0));
$i++;
}
}
}
}
foreach my $goal (@awaygoalstotal) {
my $tempmodifier2 = $goal->as_text();
$tempmodifier2 =~ s/\)//g;
my @tempcomponents2 = split(' \(', $tempmodifier2);
my $substitutetemp2;
my @extratimesplit2;
my $compositetime2;
if(index($tempcomponents2[1], ",") != -1) {
my @goaltimes2 = split('\,', $tempcomponents2[1]);
foreach my $individmultgoal2 (@goaltimes2) {
$individmultgoal2 =~ s/Pen//g;
$individmultgoal2 =~ s/ //g;
if(index($individmultgoal2, "OG") == -1) {
if(index($individmultgoal2, "+") != -1) {
@extratimesplit2 = split('\+', $individmultgoal2);
$compositetime2 = $extratimesplit2[0];
push (@{$allinfogoals[$i]}, ($tempcomponents2[0], $compositetime2, "for:".$singleawayteam, $singlehometeam, $datecomponents[1], $datecomponents[2], $datecomponents[3], $fixtureinfostring, "Barclays Premier League", monthConvert($datecomponents[2]), $mysqldate, $extratimesplit2[1]));
$i++;
} else {
push (@{$allinfogoals[$i]}, ($tempcomponents2[0], $individmultgoal2, "for:".$singleawayteam, $singlehometeam, $datecomponents[1], $datecomponents[2], $datecomponents[3], $fixtureinfostring, "Barclays Premier League", monthConvert($datecomponents[2]), $mysqldate, 0));
$i++;
}
}
}
} else {
$substitutetemp2 = $tempcomponents2[1];
$substitutetemp2 =~ s/Pen//g;
$substitutetemp2 =~ s/ //g;
if(index($substitutetemp2, "OG") == -1) {
if(index($substitutetemp2, "+") != -1) {
@extratimesplit2 = split('\+', $substitutetemp2);
$compositetime2 = $extratimesplit2[0];
push(@{$allinfogoals[$i]}, ($tempcomponents2[0], $compositetime2, "for:".$singleawayteam, $singlehometeam, $datecomponents[1], $datecomponents[2], $datecomponents[3], $fixtureinfostring, "Barclays Premier League", monthConvert($datecomponents[2]), $mysqldate, $extratimesplit2[1]));
$i++;
} else {
push(@{$allinfogoals[$i]}, ($tempcomponents2[0], $substitutetemp2, "for:".$singleawayteam, $singlehometeam, $datecomponents[1], $datecomponents[2], $datecomponents[3], $fixtureinfostring, "Barclays Premier League", monthConvert($datecomponents[2]), $mysqldate, 0));
$i++;
}
}
}
}
@allinfogoals = sort { $a->[1] <=> $b->[1] || $a->[12] <=> $b->[12] } @allinfogoals;
open(GOALCSV, '>>goalcsv.txt');
my $homegoalcount = 0;
my $awaygoalcount = 0;
foreach my $row(@allinfogoals){
foreach my $val(@$row){
if($val eq "for:".$singlehometeam) {
$homegoalcount++;
print GOALCSV "$val,".$homegoalcount.",".$awaygoalcount.",true,";
print "$val,".$homegoalcount.",".$awaygoalcount.",true,";
} elsif($val eq "for:".$singleawayteam) {
$awaygoalcount++;
print GOALCSV "$val,".$awaygoalcount.",".$homegoalcount.",false,";
print "$val,".$awaygoalcount.",".$homegoalcount.",false,";
} else {
print GOALCSV "$val,";
print "$val,";
}
}
print GOALCSV "\n";
print "\n";
}
}
}
sub rightTeamName{
my $teamname = $_[0];
switch($teamname) {
case "Nott'm Forest" { return "Nottingham Forest" }
case "QPR" { return "Queens Park Rangers" }
case "Southampton" { return "Southampton FC" }
case "Norwich" { return "Norwich City" }
case "Tottenham" { return "Tottenham Hotspur" }
case "Leeds" { return "Leeds United" }
case "Middlesbrough" { return "Middlesbrough FC" }
case "Chelsea" { return "Chelsea FC" }
case "Arsenal" { return "Arsenal FC" }
case "Oldham" { return "Oldham Athletic" }
case "Ipswich" { return "Ipswich Town" }
case "Man Utd" { return "Manchester United" }
case "Man City" { return "Manchester City" }
case "Sheffield Wed" { return "Sheffield Wednesday" }
case "Man City" { return "Manchester City" }
case "Blackburn" { return "Blackburn Rovers" }
case "Wimbledon" { return "AFC Wimbledon" }
case "Liverpool" { return "Liverpool FC" }
case "Coventry" { return "Coventry City" }
else { return $teamname }
}
}
sub monthConvert{
switch($_[0]) {
case "January" { return 1 }
case "February" { return 2 }
case "March" { return 3 }
case "April" { return 4 }
case "May" { return 5 }
case "June" { return 6 }
case "July" { return 7 }
case "August" { return 8 }
case "September" { return 9 }
case "October" { return 10 }
case "November" { return 11}
case "December" { return 12 }
}
}