これは私の arff ファイル (links.arff) です:
@relation links
@attribute isLink1Present numeric
@attribute isLink2Present numeric
@attribute isLink3Present numeric
@attribute isLink4Present numeric
@attribute isLink6Present numeric
@attribute isLink7Present numeric
@attribute isLink8Present numeric
@attribute isLink9Present numeric
@data
0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0
1,1,1,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0
1,0,0,1,0,0,0,0,0
1,0,1,0,0,0,0,0,0
1,0,1,1,0,0,0,0,0
1,1,0,0,0,0,0,0,0
1,1,0,1,0,0,0,0,0
1,1,1,0,0,0,0,0,0
1,1,1,1,0,0,0,0,0
1,0,0,0,0,0,0,0,0
1,0,0,0,1,0,0,0,0
1,0,0,1,0,0,0,0,0
1,0,0,1,1,0,0,0,0
1,0,1,0,0,0,0,0,0
1,0,1,0,1,0,0,0,0
1,0,1,1,0,0,0,0,0
1,0,1,1,1,0,0,0,0
1,1,0,0,0,0,0,0,0
1,1,0,0,1,0,0,0,0
1,1,0,1,0,0,0,0,0
1,1,0,1,1,0,0,0,0
1,1,1,0,0,0,0,0,0
1,1,1,0,1,0,0,0,0
1,1,1,1,0,0,0,0,0
1,1,1,1,1,0,0,0,0
1,0,0,0,0,0,0,0,0
1,0,0,0,0,1,0,0,0
1,0,0,0,1,0,0,0,0
1,0,0,0,1,1,0,0,0
1,0,0,1,0,0,0,0,0
1,0,0,1,0,1,0,0,0
1,0,0,1,1,0,0,0,0
1,0,0,1,1,1,0,0,0
1,0,1,0,0,0,0,0,0
1,0,1,0,0,1,0,0,0
1,0,1,0,1,0,0,0,0
1,0,1,0,1,1,0,0,0
1,0,1,1,0,0,0,0,0
1,0,1,1,0,1,0,0,0
1,0,1,1,1,0,0,0,0
1,0,1,1,1,1,0,0,0
1,1,0,0,0,0,0,0,0
1,1,0,0,0,1,0,0,0
1,1,0,0,1,0,0,0,0
1,1,0,0,1,1,0,0,0
1,1,0,1,0,0,0,0,0
1,1,0,1,0,1,0,0,0
1,1,0,1,1,0,0,0,0
1,1,0,1,1,1,0,0,0
1,1,1,0,0,0,0,0,0
1,1,1,0,0,1,0,0,0
1,1,1,0,1,0,0,0,0
1,1,1,0,1,1,0,0,0
1,1,1,1,0,0,0,0,0
1,1,1,1,0,1,0,0,0
1,1,1,1,1,0,0,0,0
1,1,1,1,1,1,0,0,0
1,0,0,0,0,0,0,0,0
1,0,0,0,0,0,1,0,0
1,0,0,0,0,1,0,0,0
1,0,0,0,0,1,1,0,0
1,0,0,0,1,0,0,0,0
1,0,0,0,1,0,1,0,0
1,0,0,0,1,1,0,0,0
1,0,0,0,1,1,1,0,0
1,0,0,1,0,0,0,0,0
1,0,0,1,0,0,1,0,0
1,0,0,1,0,1,0,0,0
1,0,0,1,0,1,1,0,0
1,0,0,1,1,0,0,0,0
1,0,0,1,1,0,1,0,0
1,0,0,1,1,1,0,0,0
1,0,0,1,1,1,1,0,0
1,0,1,0,0,0,0,0,0
1,0,1,0,0,0,1,0,0
1,0,1,0,0,1,0,0,0
1,0,1,0,0,1,1,0,0
1,0,1,0,1,0,0,0,0
1,0,1,0,1,0,1,0,0
1,0,1,0,1,1,0,0,0
1,0,1,0,1,1,1,0,0
1,0,1,1,0,0,0,0,0
1,0,1,1,0,0,1,0,0
1,0,1,1,0,1,0,0,0
1,0,1,1,0,1,1,0,0
1,0,1,1,1,0,0,0,0
1,0,1,1,1,0,1,0,0
1,0,1,1,1,1,0,0,0
1,0,1,1,1,1,1,0,0
1,1,0,0,0,0,0,0,0
1,1,0,0,0,0,1,0,0
1,1,0,0,0,1,0,0,0
1,1,0,0,0,1,1,0,0
k-means の実装方法は次のとおりです。
public void runKMeans(int numClusters){
try {
SimpleKMeans kmeans = new SimpleKMeans();
//DistanceFunction df = new weka.core.ManhattanDistance();
DistanceFunction df = new weka.core.EuclideanDistance();
kmeans.setDistanceFunction(df);
kmeans.setSeed(10);
kmeans.setPreserveInstancesOrder(true);
kmeans.setNumClusters(numClusters);
String arffFile = new PropertyUtils().getProperty("datafiles-home")+"\\links.arff";
DataSource source = new DataSource(arffFile);
Instances instances = source.getDataSet();
//inst.setDataset(instances);
kmeans.buildClusterer(instances);
System.out.println(kmeans.displayStdDevsTipText());
// This array returns the cluster number (starting with 0) for each instance
// The array has as many elements as the number of instances
int[] assignments = kmeans.getAssignments();
int i=0;
List<Cluster> lc = new ArrayList<Cluster>();
for(int clusterNum : assignments) {
lc.add(new Cluster((i+1) , clusterNum));
// System.out.println("Instance "+(i+1)+" -> Cluster "+clusterNum);
i++;
}
Collections.sort(lc);
for(Cluster c : lc){
PrintUtils.println("Instance : "+c.getInstance()+" Cluster "+c.getCluster());
}
}
catch(Exception e){
e.printStackTrace();
}
}
データの各列を「名前」属性に関連付けて、各列を識別できるようにしたいと思います。どうすればこれを達成できますか? @data に String 属性を追加することはできないと思います。別の方法はありますか?