javascript - node js azure SDK getBlobToStream は大量のメモリを使用します

Question

特定の Azure アカウントのすべての BLOB コンテナー内のすべての BLOB を単純にダウンロードするバックアップスクリプトを作成しています。

このスクリプトは async.js を使用して、同時に実行できるスレッドの数を制限することで、サーバーが過負荷にならないようにしています。このスクリプトを実行すると問題なく動作しますが、大きなファイルにヒットするとメモリ不足になります。ダウンロードはディスクが書き込めるよりも速く実行され、最終的にメモリ内バッファがひどくいっぱいになり、メモリが完全に不足すると思いますが、正確な原因をデバッグすることは今のところ不可能です.

多くのメモリを使用しているように見える特定の関数は、次のように呼び出されます。

blobService.getBlobToStream(
  containerName,
  blob.name,
  fs.createWriteStream(fullPath),
  function(error) {
    if(error){ //Something went wrong, write it to the console but finish the queue item and continue.
    console.log("Failed writing " + blob.name + " (" + error + ")");
    callback();
    }
    else if(!error) { //Write the last modified date and finish the queue item silently
    fs.writeFile(fullPath + ".date", blobLastModified, function(err)
    { if(err) console.log("Couldn't write .date file: " + err); });
    callback();
    }
    });

1回の700MBのダウンロードでも、私の側では1GBのメモリが簡単にいっぱいになります.

これを回避する方法はありますか？Azure SDK がすべてとキッチンシンクをバッファリングするのを魔法のように防ぐパラメーターがありませんか?

完全なコード:

#!/usr/bin/env node

//Requires
var azure = require('azure');
var fs    = require('fs');
var mkdirp = require('mkdirp');
var path  = require('path');
var async = require('async');

var maxconcurrency = 1; //Max amount of simultaneous running threads of getBlobsAndSaveThem() running through async.js.

var blobService = azure.createBlobService();

backupPrefix='/backups/azurebackup/' //Always end with a '/'!!

//Main flow of the script is near the bottom of the file.
var containerProcessingQueue = async.queue(
 function getBlobsAndSaveThem(containerName) {
console.log(containerName); //DEBUG
  blobService.listBlobs(containerName,
   function(error, blobs) {
     if(!error){
        var blobProcessingQueue =
         async.queue(function(index,callback) {
                var blob = blobs[index];
                console.log(blob); //DEBUG
                var fullPath = backupPrefix + containerName + '/' + blob.name;
                var blobLastModified = new Date(blob.properties['last-modified']);

                //Only create if the directoy doesn't exist, since mkdirp fails if the directory exists.
                if(!fs.existsSync(path.dirname(fullPath))){ //And do it sync, because otherwise it'll check 99999 times if the directory exists simultaneously, doesn't find it, then fails to create it 99998 times.
                        mkdirp.sync(path.dirname(fullPath), function(err) { console.log('Failed to create directory ' + path.dirname(fullPath) + " ("+ err + ")"); });
                        }


                if(fs.existsSync(fullPath + ".date")){
                        if(blobLastModified == fs.readFileSync(fullPath + ".date").toString()) {
                                callback();
                                return; //If the file is unmodified, return. No this won't exit the program, because it's called within a function definition (async.queue(function ...))
                                }
                        }

                blobService.getBlobToStream(
                  containerName,
                  blob.name,
                  fs.createWriteStream(fullPath),
                  function(error) {
                        if(error){ //Something went wrong, write it to the console but finish the queue item and continue.
                                console.log("Failed writing " + blob.name + " (" + error + ")");
                                callback();
                                }
                        else if(!error) { //Write the last modified date and finish the queue item silently
                                fs.writeFile(fullPath + ".date", blobLastModified, function(err)
                                { if(err) console.log("Couldn't write .date file: " + err); });
                                callback();
                                }
                           });

                },maxconcurrency);

        for(var blobindex in blobs){
                blobProcessingQueue.push(blobindex);
                 } //Push new items to the queue for processing



        }
        else {
         console.log("An error occurred listing the blobs: " + error);
        }
});
},1);

blobService.listContainers(function(err, result){
        for(var i=0;i<result.length;i++) {
                containerProcessingQueue.push(result[i].name);
        }
});

score 2 · Accepted Answer

興味のある方は、開始と終了の変数が変更されました。現在は rangeStart と rangeEnd のみです。詳細については、Azure ノードのドキュメントを参照してください http://dl.windowsazure.com/nodestoragedocs/BlobService.html

score 1 · Accepted Answer

できることの 1 つは、ブロブデータ全体ではなくデータのチャンクのみをストリームに読み込み、それをファイルに追加して次のチャンクを読み込むことです。Blob Storage サービスはそれをサポートしています。getBlobToStream( https://github.com/WindowsAzure/azure-sdk-for-node/blob/master/lib/services/blob/blobservice.js )のソースコードを見ると、from/to バイトを指定できます。オプション -rangeStartHeaderおよびrangeEndHeader. それが役立つかどうかを確認してください。

私はちょうどそれを行ういくつかのコードをハックしました (私のコードからわかるように、node.js に関する私の知識は非常に原始的です:))。[このコードを使用して、チャンクダウンロードを行う方法を理解してください。まだ問題があると思います]

var azure = require('azure');
var fs = require('fs');

var blobService = azure.createBlobService("account", "accountkey");
var containerName = "container name";
var blobName = "blob name";
var blobSize;
var chunkSize = 1024 * 512;//chunk size -- we'll read 512 KB at a time.
var startPos = 0;
var fullPath = "D:\\node\\";
var blobProperties = blobService.getBlobProperties(containerName, blobName, null, function (error, blob) {
        if (error) {
            throw error;
        }
        else    {
            blobSize = blob.contentLength;
            fullPath = fullPath + blobName;
            console.log(fullPath);
            doDownload();
        }
    }
);

function doDownload() {
    var stream = fs.createWriteStream(fullPath, {flags: 'a'});
    var endPos = startPos + chunkSize;
    if (endPos > blobSize) {
        endPos = blobSize;
    }
    console.log("Downloading " + (endPos - startPos) + " bytes starting from " + startPos + " marker.");
    blobService.getBlobToStream("test", blobName, stream, 
        { "rangeStartHeader": startPos, "rangeEndHeader": endPos-1 }, function(error) {
        if (error) {
            throw error;
        }
        else if (!error) {
            startPos = endPos;
            if (startPos <= blobSize - 1) {
                doDownload();
            }
        }
    });
}

javascript - node js azure SDK getBlobToStream は大量のメモリを使用します

2 に答える 2

Related

Reference