double の行列が文字列の行列に変わるという、本当に奇妙な問題があります。コードを次の問題に切り詰めました。
num.samples <- nrow(expr.matrix)
num.genes <- ncol(expr.matrix)
gene.names <- colnames(expr.matrix)
# Define a function which returns a vector in order to...
execute.per.gene <- function(target.gene, ...) {
# Uninteresting code
x <- expr.matrix[,setdiff(1:num.genes, target.gene)]
y <- expr.matrix[,target.gene]
rf <- randomForest(x, y, mtry=10, ntree=100, importance=TRUE)
# Calculate importance measure
im <- importance(rf)[,"IncNodePurity"]
# Divide by number of samples
im / num.samples
}
# ... execute mclapply!
all.output <- mclapply(1:num.genes, execute.per.gene, mc.cores=mc.cores)
# Initialise matrix
weight.matrix <- matrix(0.0, nrow=num.genes, ncol=num.genes)
rownames(weight.matrix) <- gene.names
colnames(weight.matrix) <- gene.names
# And now I merge the results from 'all.output' into the weight.matrix
for (target.gene in 1:num.genes) {
# Get result
im <- all.output[[target.gene]]
# Find which rows to change for this column
cand.tf.idx <- match(names(im), gene.names)
# Merge results into output matrix
weight.matrix[cand.tf.idx, target.gene] <- im
}
# And suddenly, the matrix consists of a bunch of strings!
if (!is.numeric(weight.matrix[[1,1]])) { # dafuq
cat("\nEncountered strings! :/\n")
print(weight.matrix)
# recently added this for debugging purposes:
print(sapply(all.output, class))
browser()
}
出力は次のとおりです。
Encountered strings! :/
G1 G7 G9 G23 G26 G28 G29 G33 G44 G48 G50 G52 G55 G59 G63 G64 G69 G70
G1 "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
G7 "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
G9 "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
G23 "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
G26 "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
...
そして、マトリックスがゼロを含む一連の文字列に変わっている理由がわかりません。結局のところ、execute.per.gene
メソッドで最後に行うことは、ベクトルを整数で除算することであり、問題はその時点で例外をスローしないため、その時点ではim
依然として double のベクトルである必要があります。
問題がどこにあるのか誰にもわかりますか?私は何を間違っていますか?
更新im
すると、常に次のようになります。im
すでに文字列で構成されている場合、im / num.samples
失敗すると思います。
> dput(im)
structure(c(3.86421872658217, 0.0600404651226161, 0.0729843866848986,
0.0556398483535666, 0.0488815568218319, 0.0526059937835038, 0.170688282373908,
0.129655447072086, 0.174050696716209, 0.244770969072866, 0.170282014024477,
0.100440545265572, 0.0634773436494396, 0.0696835665372604, 0.118303002740336,
0.0493612110879677, 0.103414668075989, 0.0149516634700066, 0.0377397612656266,
0.0462366818296757, 0.0534595079995701, 0.0418429987271517, 0.0521335103883387,
0.0454590053400778, 0.0620792864477719, 0.0528642019860386, 0.0440233200010488,
....
2.4293680818691, 0.0455845647048088, 0.0480473721971548, 0.0493418345253576,
0.0468391879447859, 1.53509517636789, 0.0639471582428624, 0.155340800410008,
0.0668494853135931, 0.0436381864919185, 1.09024170028797, 0.0649503734307499,
0.0490042073829033, 0.0304435411561372, 0.034892331733943, 0.0759421587532521,
0.0666974014679768, 0.913196971375135, 0.0550660353121449, 1.36191204205922,
3.63194611493454, 0.177078251458191, 0.17856008667256, 0.0499985787306069,
0.0465138307009715, 0.071656156183379, 0.0441178391009568, 0.239933902772204,
0.0719828575374175, 0.0654148345872996, 0.920668929212975, 0.0454979263784418,
2.92899170564573, 0.0208273505572265, 0.0397416566013167, 0.197310579354446,
0.0313568556466712), .Names = c("sample_2", "sample_3", "sample_4",
"sample_5", "sample_6", "sample_7", "sample_8", "sample_9", "sample_10",
"sample_11", "sample_12", "sample_13", "sample_14", "sample_15",
"sample_16", "sample_17", "sample_18", "sample_19", "sample_20",
"sample_21", "sample_22", "sample_23", "sample_24", "sample_25",
"sample_26", "sample_27", "sample_28", "sample_29", "sample_30",
"sample_31", "sample_32", "sample_33", "sample_34", "sample_35",
"sample_36", "sample_37", "sample_38", "sample_39", "sample_40",
"sample_41", "sample_42", "sample_43", "sample_44", "sample_45",
"sample_46", "sample_47", "sample_48", "sample_49", "sample_50",
"sample_51", "sample_52", "sample_53", "sample_54", "sample_55",
"sample_56", "sample_57", "sample_58", "sample_59", "sample_60",
"sample_61", "sample_62", "sample_63", "sample_64", "sample_65",
"sample_66", "sample_67", "sample_68", "sample_69", "sample_70",
"sample_71", "sample_72", "sample_73", "sample_74", "sample_75",
....
"sample_801", "sample_802", "sample_803", "sample_804", "sample_805"
))
gene.names
、num.samples
およびの詳細情報num.genes
:
> dput(num.samples)
1643L
> dput(num.genes)
805L
> dput(gene.names)
c("sample_1", "sample_2", "sample_3", "sample_4", "sample_5",
"sample_6", "sample_7", "sample_8", "sample_9", "sample_10",
"sample_11", "sample_12", "sample_13", "sample_14", "sample_15",
"sample_16", "sample_17", "sample_18", "sample_19", "sample_20",
"sample_21", "sample_22", "sample_23", "sample_24", "sample_25",
"sample_26", "sample_27", "sample_28", "sample_29", "sample_30",
"sample_31", "sample_32", "sample_33", "sample_34", "sample_35",
"sample_36", "sample_37", "sample_38", "sample_39", "sample_40",
...
"sample_741", "sample_742", "sample_743", "sample_744", "sample_745",
"sample_746", "sample_747", "sample_748", "sample_749", "sample_750",
"sample_751", "sample_752", "sample_753", "sample_754", "sample_755",
"sample_756", "sample_757", "sample_758", "sample_759", "sample_760",
"sample_761", "sample_762", "sample_763", "sample_764", "sample_765",
"sample_766", "sample_767", "sample_768", "sample_769", "sample_770",
"sample_771", "sample_772", "sample_773", "sample_774", "sample_775",
"sample_776", "sample_777", "sample_778", "sample_779", "sample_780",
"sample_781", "sample_782", "sample_783", "sample_784", "sample_785",
"sample_786", "sample_787", "sample_788", "sample_789", "sample_790",
"sample_791", "sample_792", "sample_793", "sample_794", "sample_795",
"sample_796", "sample_797", "sample_798", "sample_799", "sample_800",
"sample_801", "sample_802", "sample_803", "sample_804", "sample_805"
)