これは perl の専門家への質問です。ここにスクリプトがあります: http://www.developertoolshed.com/how-to/141/
このスクリプトは、電子メール メッセージを取得し、添付ファイルを削除するために使用します。1 つの問題を除いて、すべてが非常にうまく機能します。すべての Unicode データを取得して ascii に変換しますが、理由がわかりません。
私のperlの知識は貧弱ですが、この問題に対する答えを見つける場所と思われるcpanのドキュメントを読もうとしましたが、何も機能しません。
私の作業のほとんどは、STDIN から入力を受け取り、電子メール メッセージを出力する必要がある deatch_message 関数で行われます。
次の行のようです:
$entity->print();
プロパティでエンコードされたメッセージから文字化けしたテキストに切り替える方法をいくつか作成します。
sub detach_message {
my $self = shift;
my $parser = new MIME::Parser;
$parser->output_under("/mnt/must/mustinbox/big_files/tmp");
$parser->extract_uuencode(1);
my $envelope = <STDIN>;
my $entity = $parser->parse(\*STDIN);
#$entity->dump_skeleton($fh); # for debugging
$self->detach_all($entity);
### if we're in aggressive mode, we need to
### add the blurb to all text/* parts
$self->append_blurbs($entity) if $self->{aggressive};
print $envelope;
$entity->print();
system("/bin/rm", "-rf", $parser->output_dir());
if (@{$self->{detached}}) {
$self->print_index($entity->head()->get('From'),
$entity->head()->get('Subject'));
}
}
ここに私の完全なperlスクリプトがあります:
#!/usr/bin/perl -w
###########################################################################
### Strips attachments out of email messages (of certain types)
### and replaces them with HTML links
###
### For documentation and latest versions see:
### http://detach.optimism.cc/
###
### This program by and copyright Ryan Hamilton <ryan@optimism.cc>,
### and Jason Fesler <jfesler@gigo.com>
### all rights reserved
###
### Edited by Jack Zielke <detach@linuxcoffee.com> and
### Bobby Burden <bobby@codebutcher.com>
### http://linuxcoffee.com/detach
###########################################################################
###
### THIS SOFTWARE IS PROVIDED "AS-IS" WITHOUT WARRANTY OF ANY KIND.
###
###########################################################################
### $Id: detach.pl 213 2011-09-14 15:14:41Z jzielke $
###########################################################################
package Detach;
use strict;
use Log::Log4perl qw(:easy);
use Data::Dumper;
use File::Basename;
use MIME::Parser;
use POSIX qw(strftime);
use Number::Bytes::Human qw(format_bytes);
use Digest::MD5 qw(md5_base64);
sub new {
my $class = shift;
my $self = {};
my $args = shift;
my %ARGS = map { $_ => 1 } qw(aggressive web_root dir_root hash shorten msize);
DEBUG(Dumper(\%ARGS));
DEBUG(Dumper($args));
for my $key (keys %$args) {
if (!$ARGS{$key}) {
die "ERROR: invalid option '$key'\n";
}
$self->{$key} = $args->{$key};
}
for my $required (qw(web_root dir_root) ) {
if (! $self->{$required} ) {
die "ERROR: required option '$required' not specified\n";
}
}
bless $self, $class;
my $stamp;
if (! $self->{hash}) {
$stamp = strftime("%Y/%m/%d/%H:%M:%S-$$",localtime);
} else {
# use jfesler's hash based stamp
my $hash = '';
do {
$hash .= md5_base64(join("",time,$$,$<,$>,$self));
$hash =~ s/[^a-zA-Z0-9]//g; # Really, I want base 62
$hash =~ tr/vVO0Il12Z5S/vV/sd; # And, avoid pain with visual cut/paste,
# now base 53
} while (length($hash) < 10); # Just in case
$stamp= substr($hash,0,2) . # 168287943181908783 combos 53^10 - 2(53^9)
"/" . substr($hash,2,8);
}
$self->{dir_root} .= "/$stamp/";
$self->{web_root} .= "/$stamp/";
$self->{dir_root} =~ s|//|/|g;
$self->{web_root} =~ s|([^:])//|$1/|g;
$self->{detached} = [];
$self->{urls} = [];
$self->{firsts} = {};
$self->{cids} = {};
DEBUG(Dumper($self));
return $self;
}
sub detach_message {
my $self = shift;
my $parser = new MIME::Parser;
$parser->output_under("/tmp");
$parser->extract_uuencode(1);
$parser->decode_bodies(1);
my $envelope = <STDIN>;
my $entity = $parser->parse(\*STDIN);
#$entity->dump_skeleton(\*STDERR); # for debugging
$self->detach_all($entity);
### if we're in aggressive mode, we need to
### add the blurb to all text/* parts
$self->append_blurbs($entity) if $self->{aggressive};
print $envelope;
$entity->print();
system("/bin/rm", "-rf", $parser->output_dir());
if (@{$self->{detached}}) {
$self->print_index($entity->head()->get('From'),
$entity->head()->get('Subject'));
}
}
### If we're in aggressive mode, we need to append
### the detachment blurb to the first "part"
### for each text/* mime type
sub append_blurbs {
my $self = shift;
my($entity) = @_;
DEBUG("appending blurbs ".Dumper([ keys %{$self->{firsts}}]));
if (@{$self->{urls}}) {
DEBUG("got urls");
if (!scalar keys %{$self->{firsts}}) {
DEBUG("There are detachments and nothing to append the blurb to. Creating empty text/plain.");
my $part = build MIME::Entity (
Data => '',
Encoding => 'quoted-printable',
);
$entity->add_part($part, 0);
$self->{firsts}{'text/plain'} = $part;
}
foreach my $m (keys %{$self->{firsts}}) {
DEBUG($m);
my $e = $self->{firsts}{$m};
my $body = $e->bodyhandle;
my @lines = $body->as_lines;
next unless ($body);
DEBUG(" ready to append $m");
my $lines = $self->append_blurb($m,\@lines);
if (!$lines) {
print STDERR "got no lines when appending blurb $m\n";
}
if ($lines) {
my $b = new MIME::Body::InCore $lines;
if ($b) {
$e->bodyhandle($b) ;
} else {
DEBUG("Failed to update body part with index while allocating new MIME::Body::InCore");
}
}
}
}
}
sub append_blurb {
my $self = shift;
my($type,$lines) = @_;
DEBUG("appending blurb of type $type");
my $header = " - The message contains attachments.<br />webservices delivers your attachments as links for saving disk space and backup purposes.";
my $footer = "Only click these links if you trust the sender, as well as this message.";
my $footer2 = "";
if ($type =~ m#text/plain#) {
return $self->append_blurb_plain($header, $footer, $footer2, $lines);
} elsif ($type =~ m#text/html#) {
return $self->append_blurb_html($header, $footer, $footer2, $lines);
} elsif ($type =~ "text/(rich|enriched)#") {
return $self->append_blurb_rtf($header, $footer, $footer2, $lines);
}
}
sub append_blurb_plain {
my $self = shift;
my($header, $footer, $footer2, $lines) = @_;
my @blurb = ("\n\n\n",
" --- 8< --- detachments --- 8< ---\n",
" $header\n",
map(" $_\n", (@{$self->{urls}})),
" $footer\n",
" $footer2\n",
" --- 8< --- detachments --- 8< ---\n",
"\n");
push(@$lines, @blurb);
return $lines;
}
sub append_blurb_html {
my $self = shift;
my($header, $footer, $footer2, $lines) = @_;
my @blurb = ("<hr /><p><b>Attachments</b> $header\n",
"<ul>",);
foreach(@{$self->{urls}}) {
my $pretty_url = $_;
$pretty_url =~ s/\%([A-Fa-f0-9]{2})/pack('C', hex($1))/seg;
push (@blurb, "<li><a href=\"$_\">$pretty_url</a>\n");
my $cid = $self->{cids}{$_};
if ($cid) {
$cid =~ /<(.+)>/;
$cid = $1;
my $url = $_;
DEBUG("Replacing cid:$cid with $url");
foreach (@$lines) {
$_ =~ s#cid:$cid#$url#g;
}
}
}
push(@blurb, ("</ul>$footer<br />\n",
"$footer2\n",
"<p>",));
DEBUG("Adding html blurb: ". join("\n", @blurb));
my $found=0;
foreach my $line (@$lines) {
if ($line =~ m#</body>#i) {
# Sneak it in before </body>
my $blurb = join("",@blurb);
$line =~ s#</body>#$blurb </body>#;
$found++; last;
}
}
unless ($found) {
push(@$lines,@blurb);
}
return $lines;
}
sub append_blurb_rtf {
my $self = shift;
my($header, $footer, $footer2, $lines) = @_;
my @blurb = (" \\par\n--- 8< --- detachments --- 8< ---\\par\n",
"$header\\par\n",
map(" $_\\par\n", (@{$self->{urls}})),
"$footer\\par\n",
"$footer2\\par\n",
"\\par\n");
$lines->[@$lines-1] =~ #}$##; Remove trailing container bracket
push(@$lines, @blurb);
$lines->[@$lines-1] .= "}"; # Replace trailing container bracket
return $lines;
}
sub detach_all {
my $self = shift;
my($entity) = @_;
for my $part ($entity->parts()) {
if ($part->head()->recommended_filename() || $part->head()->get('Content-ID',0)) {
my($h,$b) = $self->detach_part($part);
} elsif ($part->parts()) {
$self->detach_all($part);
} else {
# keep track of the first part for each mime type
# so that later we can come back and
# add a blurb to each of these parts
# (when using opt_aggressive)
my $m = $part->head->mime_type;
$self->{firsts}{$m} ||= $part;
}
}
if ($self->{aggressive}) {
my @keep = grep (! $_->{detached} , $entity->parts);
$entity->parts(\@keep);
}
}
sub detach_part {
my $self = shift;
my($entity) = @_;
my $src = $entity->bodyhandle()->path();
my $base = basename($src);
if ($self->{msize}) {
my $filesize = -s $src;
DEBUG("File name: ".$src);
DEBUG("File size: ".$filesize);
if ($self->{msize} > $filesize) {
return;
}
}
system("mkdir", "-p", $self->{dir_root}) == 0
or die "ERROR: unable to create $self->{dir_root} : $!\n";
chmod(0777, $self->{dir_root});
my $name = $self->uniq_name($self->{dir_root},$base);
my $dest = $self->{dir_root} . $name;
$name =~ s/([^A-Za-z0-9.])/sprintf("%%%02X", ord($1))/seg;
my $url = $self->{web_root} . $name;
my $cid = $entity->head()->get('Content-ID',0);
if ($cid) {
$self->{cids}{$url} = $cid;
}
system("/bin/mv",$src,$dest);
push(@{$self->{detached}},$dest);
push(@{$self->{urls}},$url);
DEBUG("Detach path: ".$self->{dir_root});
DEBUG("Detach url: ".$self->{web_root});
DEBUG("Detach url: ".$url);
$entity->{detached}=1;
my $h = MIME::Head->new();
#$h->replace('Content-type','text/plain; charset=US-ASCII');
$h->replace('Content-type','text/plain; charset=8BIT');
my $b = new MIME::Body::InCore ["\n", $self->{web_root}."\n", $url."\n"];
$entity->head($h);
$entity->bodyhandle($b);
}
sub uniq_name {
my $self = shift;
my($dir,$name) = @_;
$name =~ s#^\.#/_#sg; # No leading dot
if ($self->{shorten}) {
$name =~ s/^[^a-zA-Z0-9_.-]/_/g;
$name =~ s#^([^/]{21,})\.([^/.]+)$#substr($1,0,20) . ".$2" #ge;
}
if (-f "$dir$name") {
DEBUG("$name exists");
my($base,$ext);
if ($name =~ /^(.+)\.(.+)$/) {
$base = $1;
$ext = $2;
} else {
$base = "";
$ext = $name;
}
my $i=1;
$i++ while (-f "$dir$base.$i.$ext");
$name = "$base.$i.$ext";
DEBUG("USING $name");
}
return $name;
}
sub print_index {
my $self = shift;
my($from,$subj) = @_;
chomp($from);
chomp($subj);
my $f = "$self->{dir_root}/index.html";
open(F,">$f") or die "ERROR: unable to open $f : $!\n";
print F "<HTML><BODY><PRE>From: $from\nSubj: $subj\n</PRE><UL>\n";
for (@{$self->{detached}}) {
my $u = substr($_,length($self->{dir_root}));
$u =~ s/([^A-Za-z0-9])/sprintf("%%%02X", ord($1))/seg;
my $b = basename($_);
my $size = format_bytes((stat($_))[7]);
print F "<LI><A href='$self->{web_root}$u'>$b</A>";
print F " - $size</LI>\n";
}
print F "</UL>\n";
close(F);
}
###########################################################################
package main;
use strict;
use Getopt::Long;
use Log::Log4perl qw(:easy);
$|=1;
umask(0000);
my($opt_web, $opt_dir, $opt_verbose, $opt_help, $opt_aggressive, $opt_hash, $opt_shorten, $opt_size);
if (!GetOptions("d|dir-root=s" => \$opt_dir,
"w|web-root=s" => \$opt_web,
"a|aggressive" => \$opt_aggressive,
"s|shorten" => \$opt_shorten,
"hash" => \$opt_hash,
"size=s" => \$opt_size,
"v|verbose" => \$opt_verbose,
"h|help" => \$opt_help) || $opt_help) {
print STDERR <<EOF;
Usage: detach [options] [file]
Options:
-d, --dir-root root of directory tree for detachemnts
-w, --web-root URL to dir-root
-s, --shorten shorten attachment file names
--hash use hash instead of date in dir names
--size minimum file size to detach (in bytes)
-a, --aggressive remove detached attachments, and embed
the blurb in text parts instead
-v, --verbose debugging output
EOF
exit(1);
}
if (!$opt_web || !$opt_dir) {
my @info = getpwuid( $< );
my $user = $info[0];
my $home = $info[7];
$opt_web = "http://localhost/~$user/detach" if !$opt_web;
$opt_dir = "$home/html/detach" if !$opt_dir;
}
Log::Log4perl->easy_init($opt_verbose ? $DEBUG : $INFO);
DEBUG("Web root : $opt_web");
DEBUG("Dir root : $opt_dir");
my $detach = new Detach( { dir_root => $opt_dir,
web_root => $opt_web,
aggressive => $opt_aggressive,
shorten => $opt_shorten,
msize => $opt_size,
hash => $opt_hash });
$detach->detach_message();
どんな助けでも大歓迎です