次のデータ フレームがあります https://www.dropbox.com/s/c02qu7uobvrc8ku/college_Rda
これはデータのサンプルです: ( copy+paste
'able)
educational_history <- structure(list(SCH_COLLEGE_STATUS_1997_09 = structure(c(1L, 1L,
1L, 1L, 5L, 1L, 1L, 5L, 5L, 5L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_1998_09 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_1999_09 = structure(c(3L,
1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2000_09 = structure(c(3L,
3L, 1L, 1L, 1L, 3L, 1L, 3L, 3L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2001_09 = structure(c(3L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 3L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2002_09 = structure(c(3L,
3L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2003_09 = structure(c(1L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2004_09 = structure(c(1L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2005_09 = structure(c(1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 3L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2006_09 = structure(c(1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2007_09 = structure(c(1L,
1L, 1L, 1L, 1L, 3L, 1L, 4L, 1L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2008_09 = structure(c(1L,
1L, 1L, 1L, 1L, 3L, 1L, 4L, 1L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2009_09 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2010_09 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 5L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2011_09 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), PUBID = c(1,
2, 3, 4, 5, 6, 7, 8, 9, 10)), .Names = c("SCH_COLLEGE_STATUS_1997_09",
"SCH_COLLEGE_STATUS_1998_09", "SCH_COLLEGE_STATUS_1999_09", "SCH_COLLEGE_STATUS_2000_09",
"SCH_COLLEGE_STATUS_2001_09", "SCH_COLLEGE_STATUS_2002_09", "SCH_COLLEGE_STATUS_2003_09",
"SCH_COLLEGE_STATUS_2004_09", "SCH_COLLEGE_STATUS_2005_09", "SCH_COLLEGE_STATUS_2006_09",
"SCH_COLLEGE_STATUS_2007_09", "SCH_COLLEGE_STATUS_2008_09", "SCH_COLLEGE_STATUS_2009_09",
"SCH_COLLEGE_STATUS_2010_09", "SCH_COLLEGE_STATUS_2011_09", "PUBID"
), row.names = c(NA, 10L), class = "data.frame")
そのデータを使用して新しいデータ フレームを生成したいと考えています。
PUBID と 4 年制大学に入学した初年度の 2 つのフィールドだけが必要です。年に関する情報は、列の名前の中にあります。私は試した:
FirstYear4C <- function(ID) {
ndX=which(educational_history$PUBID==ID)
educational_historyNdX=educational_history[ndX,]
year=NA
if (educational_historyNdX$SCH_COLLEGE_STATUS_1997_09=="Enrolled in 4-year college"){
year=1997
return(year)
}
if (educational_historyNdX$SCH_COLLEGE_STATUS_1998_09=="Enrolled in 4-year college"){
year=1998
return(year)
}
if (educational_historyNdX$SCH_COLLEGE_STATUS_1999_09=="Enrolled in 4-year college"){
year=1999
return(year)
}
if (educational_historyNdX$SCH_COLLEGE_STATUS_2000_09=="Enrolled in 4-year college"){
year=2000
return(year)
}
return(NA)
}
FirstYear<-unlist(lapply(X=educational_history$PUBID,FirstYear4C))
FourYearCollege<-data.frame(PUBID=educational_history$PUBID,
FirstYear=FirstYear)
その関数をコーディングするより良い方法があると確信しています。列ごとにコピーして貼り付ける必要があるのは非常に非効率的です。
PUBID 1stYear4YC
1 1999
2 2000
...
6 2000