HTML::TreeBuilder::XPathを使用してこれを行う適切な方法。rel
これは、他の属性を無視し、タグ内の属性の順序に依存しません。
#!/usr/bin/perl
use strict;
use warnings;
use HTML::TreeBuilder::XPath;
use Test::More tests => 1;
my $root= HTML::TreeBuilder::XPath->new_from_content( do { local undef $/; <DATA> });
# this is the important part
my @imgs= $root->findnodes( '//img[starts-with( @src,"/file?id=") and @class and @alt]');
# checks the results
my $hits= join ' ', map { "H:" . src_id( $_->{src}) } @imgs;
is( $hits, 'H:13166 H:13167', "one test");
# shows how to access the attributes
foreach my $img (@imgs)
{ warn "hit: src= $img->{src} - class=$img->{class} - alt: $img->{alt} - id= ", src_id( $img->{src}), "\n"; }
exit;
sub src_id
{ my( $src)= @_;
return $src=~ m{/file\?id=(.+)$} ? $1 : 'no id';
}
__DATA__
<html>
<head><title>Test HTML</title></head.
<body>
<img rel="lightbox[45451]" src="/file?id=13166" class="bbc_img" alt="myimagess1.jpg">
<img class="bbc_img" src="/file?id=13167" alt="myimagess2.jpg">
<img src="/file?id=13168" class="bbc_img" >
<img src="/file?id=13169" alt="myimagess3.jpg">
<img src="/foo" class="bbc_img" alt="myimagess.jpg4">
</body>
</html>