と呼ばれるこのデータがありmydf
ます。
列内の文字 (DNA 文字)REF
と( )ALT
を一致させ、対応する数値を として貼り付ける必要があります。colnames(x)
"A","T","G","C"
"REF,ALT"
ただし、列にある行がいくつかあり"snp:+[0-9]"
ます。"flat$"
TYPE
今"flat$"
私がしたい行のために:
- 文字が一意である場合、平らな線自体を含め、対応するid
の
ALT
値をできるだけ多く合計します(1 つの平らな線について中括弧で囲まれたスクリプトを参照してください)。"snp:+[0-9]"
"start"
ALT
- その
ALT
値をもう一度貼り付け"REF,ALT"
ます(REF
値は両方で同じで"snp:+[0-9]"
、"flat$"
同じ開始IDになります) - 結果に示すように出力を取得します。
私はこれを 1 つのフラット ラインに対してflatcase
実行しましたが、すべてのフラット ラインに対して同じことができるように関数を作成するのに助けが必要です。
に対してこれを行う関数を作成するにはどうすればよいflatcase
ですか?
コード
normalCase <- function(x, ns) {
ref.idx <- which(ns == "REF")
ref.allele <- x[ref.idx]
ref.count <- x[which(ns == ref.allele)]
alt.idx <- which(ns == "ALT")
alt.allele <- x[alt.idx]
alt.count <- x[which(ns == alt.allele)]
paste(ref.count, alt.count, sep=",")
}
flatcase??{
g<-x[,"start"]=="chr16:2530921"& grepl("snp:+[0-9]",x[,"TYPE"])
myt<-x[g,]
x[g,"ALT"]
unique(x[g,"ALT"])
c<-unique(x[g,"ALT"])
flat<-myt[grepl("flat$",myt[,"TYPE"]),]
c<-unique(x[g,"ALT"])
alt.count<- sum(as.numeric(flat[c]))
}
calculateAD <- function(x, mat, ns) {
if (grepl("flat$", x[which(ns == 'TYPE')])) {
flatCase(x, mat, ns)
} else {
normalCase(x, ns)
}
}
bamAD <- function(x) {
new.x <- cbind(x, apply(x, 1, calculateAD, x, colnames(x)))
colnames(new.x)[ncol(new.x)] <- "bam.AD"
new.x
}
flatCase で試した機能は次のとおりです。
flatCase <- function(x, mat, ns) {
id.idx <- which(ns == 'start')
type.idx <- which(ns == 'TYPE')
ref.idx <- which(ns == 'REF')
alt.idx <- which(ns == 'ALT')
id <- x[id.idx]
#m <- mat[mat[, id.idx] == id & mat[, type.idx] == "snp", ]
#m <- mat[mat[, id.idx] == id & mat[, type.idx] == "snp", ]
m<-mat[grepl(id,mat[, id.idx]) & grepl("snp:+[0-9]",mat[, type.idx]),]
#flat<-mat[grepl("flat$",mat[, type.idx]),]
ref.allele <- x[ref.idx]
ref.count<-x[which(ns == ref.allele)]
alt.count <- sum(apply(m, 1, function(x) as.numeric(x[which(ns == x[alt.idx])])))
paste(ref.count, alt.count, sep=",")
}
mydf
x <- as.matrix(read.csv(text="start,A,T,G,C,REF,ALT,TYPE
chr20:5363934,95,29,14,59,C,T,snp
chr5:8529759,24,1,28,41,G,C,snp
chr14:9620689,65,49,41,96,T,G,snp
chr18:547375,94,1,51,67,G,C,snp
chr8:5952145,27,80,25,96,T,T,snp
chr14:8694382,68,94,26,30,A,A,snp
chr16:2530921,49,15,79,72,A,T,snp:2530921
chr16:2530921,49,15,79,72,A,G,snp:2530921
chr16:2530921,49,15,79,72,A,T,snp:2530921flat
chr16:2533924,42,13,19,52,G,T,snp:2533924flat
chr16:2543344,4,13,13,42,G,T,snp:2543344flat
chr16:2543344,4,23,13,42,G,A,snp:2543344
chr14:4214117,73,49,18,77,G,A,snp
chr4:7799768,36,28,1,16,C,A,snp
chr3:9141263,27,41,93,90,A,A,snp", stringsAsFactors=FALSE))
結果:
start A T G C REF ALT TYPE bam.AD
[1,] "chr20:5363934" "95" "29" "14" "59" "C" "T" "snp" "59,29"
[2,] "chr5:8529759" "24" " 1" "28" "41" "G" "C" "snp" "28,41"
[3,] "chr14:9620689" "65" "49" "41" "96" "T" "G" "snp" "49,41"
[4,] "chr18:547375" "94" " 1" "51" "67" "G" "C" "snp" "51,67"
[5,] "chr8:5952145" "27" "80" "25" "96" "T" "T" "snp" "80,80"
[6,] "chr14:8694382" "68" "94" "26" "30" "A" "A" "snp" "68,68"
[7,] "chr16:2530921" "49" "15" "79" "72" "A" "T" "snp:2530921" "49,15"
[8,] "chr16:2530921" "49" "15" "79" "72" "A" "G" "snp:2530921" "49,79"
[9,] "chr16:2530921" "49" "15" "79" "72" "A" "T" "snp:2530921flat" "49,94"
[10,] "chr16:2533924" "42" "13" "19" "52" "G" "T" "snp:2533924flat" "19,13"
[11,] "chr16:2543344" "42" "13" "13" "42" "G" "T" "snp:2543344flat" "13,55"
[12,] "chr16:2543344" "42" "23" "13" "42" "G" "A" "snp:2543344" "13,42"
[13,] "chr14:4214117" "73" "49" "18" "77" "G" "A" "snp" "18,73"
[14,] "chr4:7799768" "36" "28" " 1" "16" "C" "A" "snp" "16,36"
[15,] "chr3:9141263" "27" "41" "93" "90" "A" "A" "snp" "27,27"