4

ダイナミック プログラミング (セミ グローバル アラインメント) を使用して、サイズ 5500 塩基の参照配列とサイズ 3600 のクエリ配列を比較しています。 「メモリ不足」。より小さなシーケンスで正常に動作することを知っているので、私の質問は次のとおりです。前もって感謝します。

sub semiGlobal {
    my ( $seq1, $seq2,$MATCH,$MISMATCH,$GAP ) = @_;
    # initialization: first row to 0 ;
    my @matrix;
    $matrix[0][0]{score}   = 0;
    $matrix[0][0]{pointer} = "none";
    for ( my $j = 1 ; $j <= length($seq1) ; $j++ ) {
        $matrix[0][$j]{score}   = 0;
        $matrix[0][$j]{pointer} = "none";
    }

    for ( my $i = 1 ; $i <= length($seq2) ; $i++ ) {
        $matrix[$i][0]{score}   = $GAP * $i;
        $matrix[$i][0]{pointer} = "up";
    }

    # fill
    my $max_i     = 0;
    my $max_j     = 0;
    my $max_score = 0;

    print "seq2: ".length($seq2);
    print "seq1: ".length($seq1);

    for ( my $i = 1 ; $i <= length($seq2) ; $i++ ) {
        for ( my $j = 1 ; $j <= length($seq1) ; $j++ ) {
            my ( $diagonal_score, $left_score, $up_score );
            # calculate match score
            my $letter1 = substr( $seq1, $j - 1, 1 );
            my $letter2 = substr( $seq2, $i - 1, 1 );
            if ( $letter1 eq $letter2 ) {
                $diagonal_score = $matrix[ $i - 1 ][ $j - 1 ]{score} +  $MATCH;
            }
            else {
                $diagonal_score = $matrix[ $i - 1 ][ $j - 1 ]{score} +  $MISMATCH;
            }

            # calculate gap scores
            $up_score   = $matrix[ $i - 1 ][$j]{score} +  $GAP;
            $left_score = $matrix[$i][ $j - 1 ]{score} +  $GAP;

            # choose best score
            if ( $diagonal_score >= $up_score ) {
                if ( $diagonal_score >= $left_score ) {
                    $matrix[$i][$j]{score}   = $diagonal_score;
                    $matrix[$i][$j]{pointer} = "diagonal";
                }
                else {
                    $matrix[$i][$j]{score}   = $left_score;
                    $matrix[$i][$j]{pointer} = "left";
                }
            }
            else {
                if ( $up_score >= $left_score ) {
                    $matrix[$i][$j]{score}   = $up_score;
                    $matrix[$i][$j]{pointer} = "up";
                }
                else {
                    $matrix[$i][$j]{score}   = $left_score;
                    $matrix[$i][$j]{pointer} = "left";
                }
            }

            # set maximum score
            if ( $matrix[$i][$j]{score} > $max_score ) {
                $max_i     = $i;
                $max_j     = $j;
                $max_score = $matrix[$i][$j]{score};
            }
        }
    }

    my $align1 = "";
    my $align2 = "";
    my $j = $max_j;
    my $i = $max_i;

    while (1) {
        if ( $matrix[$i][$j]{pointer} eq "none" ) {
            $stseq1 = $j;
            last;
        }

        if ( $matrix[$i][$j]{pointer} eq "diagonal" ) {
            $align1 .= substr( $seq1, $j - 1, 1 );
            $align2 .= substr( $seq2, $i - 1, 1 );
            $i--;
            $j--;
        }
        elsif ( $matrix[$i][$j]{pointer} eq "left" ) {
            $align1 .= substr( $seq1, $j - 1, 1 );
            $align2 .= "-";
            $j--;
        }
        elsif ( $matrix[$i][$j]{pointer} eq "up" ) {
            $align1 .= "-";
            $align2 .= substr( $seq2, $i - 1, 1 );
            $i--;
        }
    }

    $align1 = reverse $align1;
    $align2 = reverse $align2;
    return ( $align1, $align2, $stseq1 ,$max_j);
}
4

2 に答える 2

2

この問題を解決する 1 つの方法は@matrix、 をファイルに関連付けることです。ただし、これによりプログラムの速度が大幅に低下します。このことを考慮:

sub semiGlobal {

    use Tie::Array::CSV; 

    tie my @matrix, 'Tie::Array::CSV', 'temp.txt'; # Don't forget to add your own error handler.


    my ( $seq1, $seq2,$MATCH,$MISMATCH,$GAP ) = @_;

    # initialization: first row to 0 ;

    $matrix[0][0] = '0 n';
    for ( my $j = 1 ; $j <= length($seq1) ; $j++ ) {
        $matrix[0][$j] = '0 n';
    }

    for ( my $i = 1 ; $i <= length($seq2) ; $i++ ) {

        my $score = $GAP * $i;
        $matrix[$i][0] = join ' ',$score,'u';
    }

    #print Dumper(\@matrix);

    # fill
    my $max_i     = 0;
    my $max_j     = 0;
    my $max_score = 0;

    print "seq2: ".length($seq2)."\n";
    print "seq1: ".length($seq1)."\n";

    for ( my $i = 1 ; $i <= length($seq2) ; $i++ ) {
        for ( my $j = 1 ; $j <= length($seq1) ; $j++ ) {
            my ( $diagonal_score, $left_score, $up_score );

            # calculate match score
            my $letter1 = substr( $seq1, $j - 1, 1 );
            my $letter2 = substr( $seq2, $i - 1, 1 );
            my $score = (split / /, $matrix[ $i - 1 ][ $j - 1 ])[0];
            if ( $letter1 eq $letter2 ) {
                $diagonal_score = $score +  $MATCH;
            }
            else {
                $diagonal_score = $score +  $MISMATCH;
            }

            # calculate gap scores
            $up_score   = (split / /,$matrix[ $i - 1 ][$j])[0] +  $GAP;
            $left_score = (split / /,$matrix[$i][ $j - 1 ])[0] +  $GAP;

            # choose best score
            if ( $diagonal_score >= $up_score ) {
                if ( $diagonal_score >= $left_score ) {
                    $matrix[$i][$j] = join ' ',$diagonal_score,'d';
                }
                else {
                    $matrix[$i][$j] = join ' ', $left_score, 'l';
                }
            }
            else {
                if ( $up_score >= $left_score ) {
                    $matrix[$i][$j] = join ' ', $up_score, 'u';
                }
                else {
                    $matrix[$i][$j] = join ' ', $left_score, 'l';
                }
            }

            # set maximum score
            if ( (split / /, $matrix[$i][$j])[0] > $max_score ) {
                $max_i     = $i;
                $max_j     = $j;
                $max_score = (split / /, $matrix[$i][$j])[0];

            }
        }
    }


    my $align1 = "";
    my $align2 = "";
    my $stseq1;

    my $j = $max_j;
    my $i = $max_i;

    while (1) {
        my $pointer = (split / /, $matrix[$i][$j])[1];
        if ( $pointer eq "n" ) {
            $stseq1 = $j;
            last;
        }

        if ( $pointer eq "d" ) {
            $align1 .= substr( $seq1, $j - 1, 1 );
            $align2 .= substr( $seq2, $i - 1, 1 );
            $i--;
            $j--;
        }
        elsif ( $pointer eq "l" ) {
            $align1 .= substr( $seq1, $j - 1, 1 );
            $align2 .= "-";
            $j--;
        }
        elsif ( $pointer eq "u" ) {
            $align1 .= "-";
            $align2 .= substr( $seq2, $i - 1, 1 );
            $i--;
        }
    }

    $align1 = reverse $align1;
    $align2 = reverse $align2;

    untie @matrix; # Don't forget to add your own error handler.

    unlink 'temp.txt'; # Don't forget to add your own error handler.

    return ( $align1, $align2, $stseq1 ,$max_j);
} 

短いシーケンスには元のサブを引き続き使用し、長いシーケンスにはこのサブに切り替えることができます。

于 2013-10-25T12:55:17.217 に答える