-1

これが私のデータフレームのサンプル列です。RRはヘッダーです:

RR
Cvv  
Cvv  
Caa 

私が必要としているのは、データを「反転」して、サブストリングvvとaaをヘッダーとして取得し、データフレームでRRを取得することです。結果のマトリックスは次のようになります。

vv  | aa  
CRR |  
CRR |  
    | CRR  

したがって、両方のマトリックスで同じ関係が得られます。1行目と2行目では、vvがRRと結合されています。3行目では、aaがRRと結合されています。

これはRで達成できますか?何か案は ?

見てくれてありがとう!

上記の例では、データを単純化しすぎています。これが私の実際のデータセットのサンプルです:

> dput(head(A1F[4:15],n=20))
structure(list(RR = structure(c(15L, 15L, 15L, 27L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", 
" ", "Caa", "Caj", "Cbb", "Cbb ", "Cbv", "Cja", "Cjr", "Crj", 
"Crr", "Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj", "Gbb", "Gbv", 
"Gja", "Gjr", "Grj", "Grr", "Grv", "Gvb", "Gvr", "Gvv"), class = "factor"), 
    AA = structure(c(13L, 13L, 13L, 1L, 1L, 1L, 1L, 15L, 27L, 
    27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 1L), .Label = c("", 
    "Caa", "Caj", "Car", "Cbb", "Cbv", "Cja", "Cjr", "Cjr ", 
    "Crj", "Crr", "Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj", "Gbb", 
    "Gbv", "Gja", "Gjr", "Grj", "Grr", "Grv", "Gvb", "Gvr", "Gvv"
    ), class = "factor"), BB = structure(c(9L, 9L, 9L, 9L, 9L, 
    9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L
    ), .Label = c("", "?", "Caa", "Caj", "Cbv", "Cja", "Cjr", 
    "Crj", "Crr", "Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj", "Gbv", 
    "Gja", "Gjr", "Grj", "Grr", "Grv", "Gvb", "Gvr", "Gvv"), class = "factor"), 
    VV = structure(c(8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 
    8L, 1L, 1L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), .Label = c("", 
    " ", "Caa", "Caj", "Caj+", "Cbb", "Cbv", "Cja", "Cjr", "Crv", 
    "Cvb", "Cvr", "Cvv", "Gaa", "Gbb", "Gja", "Gjr", "Grv", "Gvb", 
    "Gvr"), class = "factor"), RJ = structure(c(8L, 3L, 3L, 1L, 
    1L, 12L, 12L, 12L, 12L, 12L, 1L, 12L, 12L, 12L, 12L, 12L, 
    12L, 12L, 12L, 12L), .Label = c("", "Caa", "Caj", "Cbv", 
    "Ccrj", "Cja", "Cjr", "Crj", "Crj ", "Crr", "Crv", "Cvr", 
    "Cvv", "Gaa", "Gaj", "Gbv", "Gja", "Gjr", "Grj", "Grr", "Grv", 
    "Gvr", "Gvv"), class = "factor"), JR = structure(c(7L, 7L, 
    18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 
    18L, 18L, 18L, 18L, 18L, 18L), .Label = c("", "Caa", "Caj", 
    "Cbv", "Cja", "Cjr", "Crj", "Crr", "Crv", "Cvb", "Cvr", "Cvv", 
    "Gaa", "Gaj", "Gbv", "Gja", "Gjr", "Grj", "Grr", "Grv", "Grv ", 
    "Gvb", "Gvb ", "Gvr", "Gvv"), class = "factor"), BV = structure(c(4L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L), .Label = c("", "Caa", "Caj", "Cbb", "Cbv", 
    "Cja", "Cjr", "Crj", "Crr", "Crv", "Cvb", "Cvr", "Cvv", "Gaa", 
    "Gaj", "Gbb", "Gbv", "Gja", "Gjr", "Grj", "Grv", "Gvb", "Gvr", 
    "Gvv", "R"), class = "factor"), VB = structure(c(1L, 1L, 
7L, 7L, 18L, 18L, 1L, 1L, 10L, 10L, 21L, 21L, 21L, 1L, 21L, 
21L, 21L, 21L, 21L, 1L), .Label = c("", "Caa", "Caj", "Cbb", 
"Cbv", "Cja", "Cjr", "Crj", "Crr", "Crv", "Cvb", "Cvv", "Gaa", 
"Gaj", "Gbb", "Gbv", "Gja", "Gjr", "Grj", "Grr", "Grv", "Gvb", 
"Gvr", "Gvv"), class = "factor"), AJ = structure(c(2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 10L, 
1L, 10L, 10L), .Label = c("", "Caa", "Caj", "Cbb", "Cbv", 
"Cja", "Cjr", "Crj", "Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj", 
"Gbb", "Gbv", "Gja", "Gjr", "Grj", "Grj ", "Grr", "Grv", 
"Gvb", "Gvr", "Gvv"), class = "factor"), JA = structure(c(10L, 
10L, 10L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 6L, 6L, 6L, 6L), .Label = c("", "Caa", "Caj", "Cbv", 
"Cja", "Cjr", "Crr", "Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj", 
"Gbv", "Gja", "Gjr", "Grr", "Grv", "Gvb", "Gvv"), class = "factor"), 
VR = structure(c(1L, 5L, 5L, 5L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), .Label = c("", 
"Caa", "Caj", "Caj ", "Cbv", "Cja", "Cjr", "Crj", "Crr", 
"Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj", "Gbv", "Gja", "Gjr", 
"Grj", "Grr", "Grv", "Gvb", "Gvr", "Gvv"), class = "factor"), 
RV = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 1L, 1L), .Label = c("", 
"Caa", "Caj", "Cbb", "Cbv", "Cja", "Cjr", "Crj", "Crr", "Crv", 
"Cvr", "Cvv", "Cvv ", "Gaa", "Gaj", "Gbb", "Gbv", "Gja", 
"Gjr", "Grj", "Grr", "Grv", "Gvr", "Gvv"), class = "factor")), .Names = c("RR", 
"AA", "BB", "VV", "RJ", "JR", "BV", "VB", "AJ", "JA", "VR", "RV"
), row.names = c(NA, 20L), class = "data.frame")

上記のように、目的のマトリックスは関係と行の順序を維持します。GSeeは、適用できる回答を提供しましたが、マトリックスの1つの列にのみ適用できます。これは、[[特定のエントリのみを選択し、[で複数のエントリを選択しても機能しないためです。これで正しい方向に向かっているかどうかはわかりません...

実際のデータセット(上記のとおり)に基づいて、目的の出力(最初の3行)は次のようになります。

structure(list(vv = structure(c(1L, 1L, 1L), .Label = "CRR", class = "factor"), 
    rv = c(NA, NA, NA), ja = structure(c(1L, 1L, 1L), .Label = "CVV", class = "factor"), 
    aa = structure(c(1L, 1L, 1L), .Label = "CAJ", class = "factor"), 
    bv = structure(c(1L, 2L, 2L), .Label = c("", "CVR"), class = "factor"), 
    aj = structure(c(1L, 2L, 2L), .Label = c("", "CRJ"), class = "factor"), 
    vb = structure(c(1L, 1L, 1L), .Label = "CAA", class = "factor"), 
    rj = structure(c(2L, 1L, 1L), .Label = c("", "CRJ"), class = "factor"), 
    rr = structure(c(1L, 1L, 1L), .Label = "CBB", class = "factor"), 
    vr = structure(c(1L, 1L, 1L), .Label = "CJA", class = "factor"), 
    bb = structure(c(1L, 1L, 1L), .Label = "CBV", class = "factor"), 
    jr = c(NA, NA, NA)), .Names = c("vv", "rv", "ja", "aa", "bv", 
"aj", "vb", "rj", "rr", "vr", "bb", "jr"), class = "data.frame", row.names = c(NA, 
-3L))

これがもっと理にかなっていることを願っています。

4

2 に答える 2

5

これは少しハードコーディングされていますが、アイデアはそこにあります。

require(stringr)
require(plyr)
vect <- data.frame(RR=c("Cvv", "Cvv", "Caa"))
theMat <- t(adply(levels(vect$RR), .margins=1, .fun=function(x){str_extract(string=vect$RR, pattern=x)}))[-1 ,]
colnames(theMat) <- levels(vect$RR)
colnames(theMat) <- str_sub(colnames(theMat), start=2, end=3)
theMat <- str_replace(string=theMat, pattern=paste(colnames(theMat), collapse="|"), replacement="RR")
于 2012-06-16T22:17:37.920 に答える
3

大丈夫。私はcodez4uを持っています。

#dat is the data.frame that was created from the `dput` output in the question
m <- as.matrix(dat) #convert to matrix
m[10, "AJ"] <- "" # Fix the typo/error in your data

出力行列の名前を見つけて、行列を作成します(今のところNAで埋められています)

ocn <- unique(substr(paste(m[m!=""]), 2, 3)) #out column names
out <- matrix(NA, nrow(m), length(ocn))
colnames(out) <- ocn

各列の各行をループします

for (i in seq_len(NCOL(m))) { #for each column
  cn <- colnames(m)[i] #this will become the second 2 characters of new value
  for (j in seq_along(m[, i])) { # for each row of this column
    if (nzchar(m[j, i])) { # if there is something there (i.e. it is not "")
      # do the substitution
      out[j, substr(m[j, i], 2, 3)] <- paste0(substr(m[j, i], 1, 1), cn)   
    }
  }
}
out
#      vv    vb    rr    ja    rj    aj    vr    bb    jr    rv    aa    bv   
# [1,] "CRR" "CAA" "CBB" "CVV" "CJR" NA    "CJA" "CBV" NA    NA    "CAJ" NA   
# [2,] "CRR" "CAA" "CBB" "CVV" "CJR" "CRJ" "CJA" "CBV" NA    NA    "CAJ" "CVR"
# [3,] "CRR" "CAA" "CBB" "CVV" "GJR" "CRJ" "CJA" "CBV" "CVB" NA    "CAJ" "CVR"
# [4,] "GRR" NA    "CBB" "CVV" "GJR" NA    NA    "CBV" "CVB" NA    "CAJ" "CVR"
# [5,] NA    NA    "CBB" "CVV" "GJR" NA    NA    "CBV" "GVB" NA    "CAJ" "GVR"
# [6,] NA    NA    "CBB" "CVV" "GJR" NA    "CRJ" "CBV" "GVB" NA    "CAJ" "GVR"
# [7,] NA    NA    "CBB" "CVV" "GJR" NA    "CRJ" "CBV" NA    NA    "CAJ" "GVR"
# [8,] "CAA" NA    "CBB" "CVV" "GJR" NA    "CRJ" "CBV" NA    NA    "CAJ" "GVR"
# [9,] "GAA" NA    "CBB" "CVV" "GJR" "CRV" "CRJ" "CBV" NA    "CVB" NA    "GVR"
# [10,] "GAA" NA    "CBB" "CVV" "GJR" "CRV" "CRJ" "CBV" NA    "CVB" NA    "GVR"
# [11,] "GAA" NA    "CBB" "CVV" "GJR" "GRV" NA    "CBV" NA    "GVB" NA    "GVR"
# [12,] "GAA" NA    "CBB" NA    "GJR" "GRV" "CRJ" "CBV" NA    "GVB" NA    "GVR"
# [13,] "GAA" NA    "CBB" NA    "GJR" "GRV" "CRJ" "CBV" NA    "GVB" NA    "GVR"
# [14,] "GAA" NA    "CBB" "GVV" "GJR" "GRV" "CRJ" "CBV" NA    NA    NA    "GVR"
# [15,] "GAA" NA    "CBB" "GVV" "GJR" "GRV" "CRJ" "CBV" NA    "GVB" NA    "GVR"
# [16,] "GAA" NA    "CBB" "GVV" "GJR" "GRV" "CRJ" "CBV" NA    "GVB" NA    "GVR"
# [17,] "GAA" "CAJ" "CBB" "GVV" "GJR" "GRV" "CRJ" "CBV" "CJA" "GVB" NA    "GVR"
# [18,] "GAA" NA    "CBB" "GVV" "GJR" "GRV" "CRJ" "CBV" "CJA" "GVB" NA    "GVR"
# [19,] "GAA" "CAJ" "CBB" "GVV" "GJR" NA    "CRJ" "CBV" "CJA" "GVB" NA    "GVR"
# [20,] NA    "CAJ" "CBB" "GVV" "GJR" NA    "CRJ" "CBV" "CJA" NA    NA    "GVR"
于 2012-06-17T04:13:18.843 に答える