11

Maia パッケージのいくつかの matlab コードを Octave で動作するものに変換しようとしています。containers.Mapファイルの 1 つにいくつかの呼び出しがあり、明らかに octave でまだ実装されていないため、現在行き詰まっています。オクターブで多くの余分な作業を行うことなく、同様の機能を簡単に実現するためのアイデアはありますか? お時間をいただきありがとうございます。

function [adj_direct contig_direct overlap names longest_path_direct...
          weigth_direct deltafiles deltafiles_ref ReferenceAlignment ...
          contig_ref overlap_ref name_hash_ref] = ...
          assembly_driver(assemblies,ref_genome,target_chromosome, ... 
                          deltafiles_ref,contig_ref, overlap_ref, ...
                          name_hash_ref, varargin)

% ASSEMBLY_DRIVER Combines contig sets into one assembled chromosome
%
% INPUT 
%   assemblies
%   ref_chromosome
%   Startnode_name
%   Endnode_name
%        OPTIONAL              DEFAULT
%       'z_weigths'            [.25 .25 .25 .25]
%       'clipping_thrs'        10
%       'ref_distance'         -10
%       'ref_quality'          1E-5
%       'max_chromosome_dist'  100
%       'quit_treshold'        15
%       'tabu_time'            3
%       'minimum_improvement'  -inf
%       'ref_node_assemblies'  all assemblies (slow)
%       'endextend'            true
%
%

    % SET DEFAULTS
    % General parameters
    z_weights           = [.25 .25 .25 .25];
    clipping_thrs       = 10;
    mapfilter           = '-rq';
    alignlen            = 75;
    ident               = 85;

    % Reference nod parameters
    ref_distance        = -10;
    ref_quality         = 1E-5;
    max_chromosome_dist = 100;
    % TABU parameters
    quit_treshold       = 15;
    tabu_time           = 3;
    minimum_improvement = -inf;
    ref_node_assemblies = assemblies;
    % Extending the assembly outwards from the start and en node
    endextend           = true;
    AllowReverse        = true;
    % If no start and end node are given, they will be determined from tiling
    Startnode_name      = '';
    Endnode_name        = '';
    containment_edge    = true;
    ref_first           = true;

    % If contigs have already been aligned to the reference, give the
    % deltafile 
    ReferenceAlignment = 'NotYetDoneByMaia';

    % Get VARARGIN user input
    if length(varargin) > 0
        while 1
            switch varargin{1}
                case 'Startnode_name'
                    Startnode_name = varargin{2};
                case 'Endnode_name'
                    Endnode_name = varargin{2};
                case 'z_weigths'
                    z_weights = varargin{2};
                case 'clipping_thrs' 
                    clipping_thrs = varargin{2};
                case 'ref_distance' 
                    ref_distance = varargin{2};
                case 'ref_quality' 
                    ref_quality = varargin{2};
                case 'max_chromosome_dist' 
                    max_chromosome_dist = varargin{2};
                case 'quit_treshold' 
                    quit_treshold = varargin{2};
                case 'tabu_time' 
                    tabu_time = varargin{2};
                case 'minimum_improvement' 
                    minimum_improvement = varargin{2};
                case 'ref_node_assemblies'
                    ref_node_assemblies = assemblies(varargin{2},:);
                case 'extend_ends'
                    endextend = assemblies(varargin{2},:);
                case 'AllowReverse'
                    AllowReverse = varargin{2};
                case 'ReferenceAlignment'
                    ReferenceAlignment = varargin{2};
                case 'containment_edge'
                    containment_edge = varargin{2};
                case 'ref_first'
                    ref_first = varargin{2};
                case 'mapfilter'
                    mapfilter = varargin{2};
                case 'alignlen'
                    alignlen = varargin{2};
                case 'ident'
                    ident = varargin{2};
                otherwise
                    error(['Input ' varargin{2} ' is not known']);
            end
            if length(varargin) > 2
                varargin = varargin(3:end);
            else
                break;
            end
        end
    end

    % Read input assemblies
    assembly_names   = assemblies(:,1);
    assembly_locs    = assemblies(:,2);
    assembly_quality = containers.Map(assemblies(:,1),assemblies(:,3));
    assembly_quality('reference') = ref_quality;

    % Read input assemblies for creation of reference nodes
    ref_node_assembly_names   = ref_node_assemblies(:,1);
    ref_node_assembly_locs    = ref_node_assemblies(:,2);
    ref_node_assembly_quality = containers.Map(ref_node_assemblies(:,1),ref_node_assemblies(:,3));
    ref_node_assembly_quality('reference') = ref_quality;


    % If there is only one assembly there is nothing to align
    if size(assemblies,1) >= 2

        % Align assemblies against each other
        assembly_pairs = {};
        coordsfiles = [];
        deltafiles = [];
        for i = 1:length(assembly_locs)-1
            for j = i+1:length(assembly_locs)
                [coordsfile,deltafile] = align_assemblies({assembly_locs{i},assembly_locs{j}},{assembly_names{i}, assembly_names{j}}, ...
                                                           mapfilter, alignlen, ident);
                coordsfiles = [coordsfiles; coordsfile];
                %deltafiles = [deltafiles deltafile];
                deltafiles = [deltafiles; {deltafile}];
                assembly_pairs = [assembly_pairs;[assembly_names(i) assembly_names(j)]];
            end
        end


     %   fprintf('Loading alignment files.\n');
     %   load alignments_done;

        % Put the nucmer alignments in an adjency matrix
        %[adj, names, name_hash, contig, overlap] = get_adj_matrix(coordsfiles, assembly_pairs, assembly_quality, z_weights, 'clipping_thrs', clipping_thrs, 'dove_tail', 'double','edge_weight','z-scores', 'containment_edge', true);
        [adj, names, name_hash, contig, overlap] = get_adj_matrix(deltafiles, assembly_pairs, assembly_quality, z_weights, 'clipping_thrs', clipping_thrs, 'dove_tail', 'double','edge_weight','z-scores', 'containment_edge', containment_edge);



        % Merge deltafiles
        deltafilesnew = deltafiles{1};
        if size(deltafiles,1) > 1
            for di = 2:size(deltafiles,1)
                deltafilesnew = [deltafilesnew deltafiles{di}];
            end
        end
        deltafiles = deltafilesnew;

    else
        assembly_pairs = {};
        coordsfiles = [];
        deltafiles = [];        
        adj = [];
        names = {};
        name_hash = containers.Map;
        contig  = struct('name',{},'size',[],'chromosome',[],'number',[], 'assembly', [], 'assembly_quality', []);
        overlap = struct('Q',{},'R',[],'S1',[],'E1', [], 'S2', [], 'E2', [], 'LEN1', [], 'LEN2', [], 'IDY', [], 'COVR', [], 'COVQ', [],'LENR',[], 'LENQ',[]);
    end


    % Ad the pseudo nodes to the graph. If the contigs have already been
    % aligned to the reference genome, just select the alignments that
    % correspond to the target chromosome
    if isequal(ReferenceAlignment,'NotYetDoneByMaia')
        % Align all contigs in 'contig_sets_fasta' to the reference chromosome 
        [contig_ref, overlap_ref, name_hash_ref, deltafiles_ref] = align_contigs_sets(... 
            ref_genome, ref_node_assembly_locs, ref_node_assembly_names, ... 
            ref_node_assembly_quality, clipping_thrs, z_weights, ... 
            ref_distance,max_chromosome_dist);

        ReferenceAlignment = 'out2.delta';
    end
    % Select only the entries in the deltafile for the current target chromosome
    [contig_target_ref, overlap_target_ref, name_hash_target_ref, delta_target_ref] = ...
              GetVariablesForTargetChromosome(...
              contig_ref, overlap_ref, deltafiles_ref);


    % Ref clipping should be high in case of tiling
    %if isequal(max_chromosome_dist,'tiling')
    %    clipping_thrs = 10000
    %end

    % Add reference nodes to the adjency matrix
    [adj, names, name_hash, contig, overlap, delta_target_ref, Startnode_name, Endnode_name] = get_reference_nodes( ...
                     adj, names, name_hash, contig, overlap, target_chromosome, ...
                     contig_target_ref, overlap_target_ref, name_hash_target_ref, delta_target_ref, ...
                     max_chromosome_dist, ref_distance, clipping_thrs, ref_first,...
                     Startnode_name, Endnode_name, AllowReverse);


    % Give reference edges some small extra value to distict between
    % assemblies to which a reference node leads
    % adj = rank_reference_edges(adj,contig,assembly_quality);

    % Specify a start and an end node for the assembly
    Startnode = name_hash(Startnode_name);
    Endnode = name_hash(Endnode_name);


    % Find the best scoring path
    fprintf('Directing the final graph\n');
    % Calculate path on undirected graph to get an idea on how to direct the graph
    [longest_path weigth] = longest_path_tabu(adj, Startnode, Endnode, quit_treshold, tabu_time, minimum_improvement);
    % Make the graph directed (greedy)
    [adj_direct contig_direct] = direct_graph(adj,overlap, contig, names, name_hash,clipping_thrs, Startnode, longest_path, true, ref_first);
    % Calcultate final layout-path
    fprintf('Find highest scoring path\n');
    [longest_path_direct weigth_direct] = longest_path_tabu(adj_direct, Startnode, Endnode, quit_treshold, tabu_time, minimum_improvement);


    function [contig_target_ref, overlap_target_ref, name_hash_target_ref, delta_target_ref] = ...
              GetVariablesForTargetChromosome(...
              contig_ref, overlap_ref, deltafiles_ref)

        % Select only the entries in the deltafile for the current target chromosome
        delta_target_ref = deltafiles_ref;
        for di = size(delta_target_ref,2):-1:1
            if ~isequal(delta_target_ref(di).R,target_chromosome)
                delta_target_ref(di) = [];
            end
        end
        overlap_target_ref = overlap_ref;
        for oi = size(overlap_target_ref,2):-1:1
            if ~isequal(overlap_target_ref(oi).R,target_chromosome)
                overlap_target_ref(oi) = [];
            end
        end    
        contig_target_ref = contig_ref;
        for ci = size(contig_target_ref,1):-1:1
            if isequal(contig_target_ref(ci).assembly, 'reference') && ~isequal(contig_target_ref(ci).name,target_chromosome)
                contig_target_ref(ci) = [];
            end
        end    
        name_hash_target_ref = make_hash({contig_target_ref.name}');
    end


end
4

1 に答える 1

13

containers.Map私が知っているOctaveには正確に相当するものはありません...

1 つのオプションは、Java パッケージを使用してjava.util.Hashtable. このを使用すると:

pkg load java
d = javaObject("java.util.Hashtable");
d.put('a',1)
d.put('b',2)
d.put('c',3)
d.get('b')

少し書き直したい場合は、ビルトインstructを、文字列 (有効な変数名) をキーとして持つ初歩的なハッシュ テーブルとして使用し、ほとんどすべての値を格納することができます。

たとえば、次のようになります。

keys = {'Mon','Tue','Wed'}
values = {10, 20, 30}

これを置き換えることができます:

map = containers.Map(keys,values);
map('Mon')

に:

s = struct();
for i=1:numel(keys)
    s.(keys{i}) = values{i};
end
s.('Mon')

有効なキーを生成するために使用する必要がある場合genvarnameや、有効なキー文字列を生成する適切なハッシュ関数を使用する必要がある場合があります。

getfield、setfield、isfield、fieldnames、rmfield などの構造体関連の関数も調べてください。

于 2012-07-24T02:04:25.417 に答える