tidyr
きちんとしたデータ形式で作業してdplyr
から、テーブルを結合するために使用できます
library(dplyr)
library(tidyr)
パイプで片道
df1 %>%
mutate(id = as.character(id)) %>%
gather(key = "col", value = "val", -id) %>%
left_join(df2 %>%
mutate(id = as.character(id)) %>%
gather(key = "col", value = "val", -id),
by =c("id", "col")) %>%
transmute(id, col, val = ifelse(is.na(val.x), val.y, val.x)) %>%
spread(col, val) %>%
select(id, num_range("X", 1:10))
#> id X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
#> 1 LABEL1 1.0000 7.000 13.000 19 25 31 37 43 49 55
#> 2 LABEL2 2.0000 8.000 14.000 20 26 32 38 44 50 56
#> 3 LABEL3 3.0000 9.000 15.000 21 27 33 39 45 51 57
#> 4 LABEL4 419.3548 1696.774 2974.194 22 28 34 40 46 52 58
#> 5 LABEL5 738.7097 2016.129 3293.548 23 29 35 41 47 53 59
#> 6 LABEL6 1058.0645 2335.484 3612.903 24 30 36 42 48 54 60
順を追って説明
# id as character instead of factor
df1 <- df1 %>% mutate(id = as.character(id))
# tidy data
df1 <- df1 %>% gather(key = "col", value = "val", -id)
# print result as dplyr tbl
df1 %>% as.tbl()
#> Source: local data frame [60 x 3]
#>
#> id col val
#> (chr) (chr) (int)
#> 1 LABEL1 X1 1
#> 2 LABEL2 X1 2
#> 3 LABEL3 X1 3
#> 4 LABEL4 X1 NA
#> 5 LABEL5 X1 NA
#> 6 LABEL6 X1 NA
#> 7 LABEL1 X2 7
#> 8 LABEL2 X2 8
#> 9 LABEL3 X2 9
#> 10 LABEL4 X2 NA
#> .. ... ... ...
# idem on df2
df2 <- df2 %>%
mutate(id = as.character(id)) %>%
tidyr::gather(key = "col", value = "val", -id)
# print result as dplyr tbl
df2 %>% as.tbl()
#> Source: local data frame [32 x 3]
#>
#> id col val
#> (chr) (chr) (dbl)
#> 1 LABEL3 X1 100.0000
#> 2 LABEL4 X1 419.3548
#> 3 LABEL5 X1 738.7097
#> 4 LABEL6 X1 1058.0645
#> 5 LABEL3 X2 1377.4194
#> 6 LABEL4 X2 1696.7742
#> 7 LABEL5 X2 2016.1290
#> 8 LABEL6 X2 2335.4839
#> 9 LABEL3 X3 2654.8387
#> 10 LABEL4 X3 2974.1935
#> .. ... ... ...
# join only id and col level of df1 with df2
new.df <- left_join(df1, df2, by = c("id", "col"))
# print result as dplyr tbl
new.df %>% as.tbl()
#> Source: local data frame [60 x 4]
#>
#> id col val.x val.y
#> (chr) (chr) (int) (dbl)
#> 1 LABEL1 X1 1 NA
#> 2 LABEL2 X1 2 NA
#> 3 LABEL3 X1 3 100.0000
#> 4 LABEL4 X1 NA 419.3548
#> 5 LABEL5 X1 NA 738.7097
#> 6 LABEL6 X1 NA 1058.0645
#> 7 LABEL1 X2 7 NA
#> 8 LABEL2 X2 8 NA
#> 9 LABEL3 X2 9 1377.4194
#> 10 LABEL4 X2 NA 1696.7742
#> .. ... ... ... ...
#replace NA in col val.x from df1 by value val.y of df2
# and only keep id, col and new column val
new.df <- new.df %>% transmute(id, col, val = ifelse(is.na(val.x), val.y, val.x))
new.df %>% as.tbl()
#> Source: local data frame [60 x 3]
#>
#> id col val
#> (chr) (chr) (dbl)
#> 1 LABEL1 X1 1.0000
#> 2 LABEL2 X1 2.0000
#> 3 LABEL3 X1 3.0000
#> 4 LABEL4 X1 419.3548
#> 5 LABEL5 X1 738.7097
#> 6 LABEL6 X1 1058.0645
#> 7 LABEL1 X2 7.0000
#> 8 LABEL2 X2 8.0000
#> 9 LABEL3 X2 9.0000
#> 10 LABEL4 X2 1696.7742
#> .. ... ... ...
# put back data in wide format
new.df %>%
spread(col, val) %>%
select(id, num_range("X", 1:10)) # put column in same order as df1
#> id X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
#> 1 LABEL1 1.0000 7.000 13.000 19 25 31 37 43 49 55
#> 2 LABEL2 2.0000 8.000 14.000 20 26 32 38 44 50 56
#> 3 LABEL3 3.0000 9.000 15.000 21 27 33 39 45 51 57
#> 4 LABEL4 419.3548 1696.774 2974.194 22 28 34 40 46 52 58
#> 5 LABEL5 738.7097 2016.129 3293.548 23 29 35 41 47 53 59
#> 6 LABEL6 1058.0645 2335.484 3612.903 24 30 36 42 48 54 60