multi.py の 96 行目では、"**args" を渡さずに "self.classifiers[i].train(datai)" を呼び出しているため、"mc.train(data, saveSpace=False)" を呼び出すと、この saveSpace -引数が失われます。これが、分類子を multiclass-classifier に個別に保存しようとすると、エラー メッセージが表示される理由です。ただし、この行をすべての引数を渡すように変更すると、各分類子を個別に保存できます。
#!/usr/bin/python
import numpy
from PyML.utils import misc
from PyML.evaluators import assess
from PyML.classifiers.svm import SVM, loadSVM
from PyML.containers.labels import oneAgainstRest
from PyML.classifiers.baseClassifiers import Classifier
from PyML.containers.vectorDatasets import SparseDataSet
from PyML.classifiers.composite import CompositeClassifier
class OneAgainstRestFixed(CompositeClassifier) :
'''A one-against-the-rest multi-class classifier'''
def train(self, data, **args) :
'''train k classifiers'''
Classifier.train(self, data, **args)
numClasses = self.labels.numClasses
if numClasses <= 2:
raise ValueError, 'Not a multi class problem'
self.classifiers = [self.classifier.__class__(self.classifier)
for i in range(numClasses)]
for i in range(numClasses) :
# make a copy of the data; this is done in case the classifier modifies the data
datai = data.__class__(data, deepcopy = self.classifier.deepcopy)
datai = oneAgainstRest(datai, data.labels.classLabels[i])
self.classifiers[i].train(datai, **args)
self.log.trainingTime = self.getTrainingTime()
def classify(self, data, i):
r = numpy.zeros(self.labels.numClasses, numpy.float_)
for j in range(self.labels.numClasses) :
r[j] = self.classifiers[j].decisionFunc(data, i)
return numpy.argmax(r), numpy.max(r)
def preproject(self, data) :
for i in range(self.labels.numClasses) :
self.classifiers[i].preproject(data)
test = assess.test
train_data = """
0 1:1.0 2:0.0 3:0.0 4:0.0
0 1:0.9 2:0.0 3:0.0 4:0.0
1 1:0.0 2:1.0 3:0.0 4:0.0
1 1:0.0 2:0.8 3:0.0 4:0.0
2 1:0.0 2:0.0 3:1.0 4:0.0
2 1:0.0 2:0.0 3:0.9 4:0.0
3 1:0.0 2:0.0 3:0.0 4:1.0
3 1:0.0 2:0.0 3:0.0 4:0.9
"""
file("foo_train.data", "w").write(train_data.lstrip())
test_data = """
0 1:1.1 2:0.0 3:0.0 4:0.0
1 1:0.0 2:1.2 3:0.0 4:0.0
2 1:0.0 2:0.0 3:0.6 4:0.0
3 1:0.0 2:0.0 3:0.0 4:1.4
"""
file("foo_test.data", "w").write(test_data.lstrip())
train = SparseDataSet("foo_train.data")
mc = OneAgainstRestFixed(SVM())
mc.train(train, saveSpace=False)
test = SparseDataSet("foo_test.data")
print [mc.classify(test, i) for i in range(4)]
for i, classifier in enumerate(mc.classifiers):
classifier.save("foo.model.%d" % i)
classifiers = []
for i in range(4):
classifiers.append(loadSVM("foo.model.%d" % i))
mcnew = OneAgainstRestFixed(SVM())
mcnew.labels = misc.Container()
mcnew.labels.addAttributes(test.labels, ['numClasses', 'classLabels'])
mcnew.classifiers = classifiers
print [mcnew.classify(test, i) for i in range(4)]