与えられた重みベクトルに基づいて配列をリサンプリングするwekaの実装を読んでいます。コードを読んだ後、この実装の基礎となるアルゴリズムが何であるかわかりません。さらに、次の2行のコードの使用法についてかなり混乱しています。
Utils.normalize(probabilities, sumProbs / sumOfWeights);
と
// Make sure that rounding errors don't mess things up
probabilities[numInstances() - 1] = sumOfWeights;
何に使われているのかわかりません。以下はWekaからコピーされたコードです
Instances weka::core::Instances::resampleWithWeights(Random random,double[] weights )
{
if (weights.length != numInstances()) {
throw new IllegalArgumentException("weights.length != numInstances.");
}
Instances newData = new Instances(this, numInstances());
if (numInstances() == 0) {
return newData;
}
double[] probabilities = new double[numInstances()];
double sumProbs = 0, sumOfWeights = Utils.sum(weights);
for (int i = 0; i < numInstances(); i++) {
sumProbs += random.nextDouble();
probabilities[i] = sumProbs;
}
Utils.normalize(probabilities, sumProbs / sumOfWeights);
// Make sure that rounding errors don't mess things up
probabilities[numInstances() - 1] = sumOfWeights;
int k = 0; int l = 0;
sumProbs = 0;
while ((k < numInstances() && (l < numInstances()))) {
if (weights[l] < 0) {
throw new IllegalArgumentException("Weights have to be positive.");
}
sumProbs += weights[l];
while ((k < numInstances()) &&
(probabilities[k] <= sumProbs)) {
newData.add(instance(l));
newData.instance(k).setWeight(1);
k++;
}
l++;
}
return newData;
}