0

したがって、私の分析では、サンプルに 4 つのサブサンプルを作成する必要があります。

有権者の投票率を比較したい

1) 世帯の他の有権者が 0 人の有権者
2) 世帯の他の有権者が 1 人の有権者
3) 世帯の他の有権者が 2 人の有権者の有権者
4) 世帯の他の有権者が 3 人以上の有権者の有権者

そのため の変数は 7 つあり ます



私のロジックでは、グループごとに 4 つのサブサンプルを作成する必要があります。

最初のグループ (HH の他の 0 人の有権者) は、以下を満たす観察です。

d$vn437 == 1;  
d$vn437 == 2 & d$vn438a < 18;  
d$vn437 == 3 & d$vn438a < 18 & d$vn438b < 18  
d$vn437 == 4 & d$vn438a < 18 & d$vn438b < 18 & d$vn438c < 18; 

'd$vn438e < 18' で終了するまで続きます。

私はRの超初心者で、どうすればいいのかわかりません。
これらのグループを作成するにはどうすればよいですか? 私は本当に必死で、何時間も無駄に探していました!

Richard Telford が示唆したように、'dput(head(d))コマンドの出力は次のとおりです。

structure(list(dat = c(20091026, 20091025, 20091025, 20091026, 
20091025, 20091025), vn1 = c(1, 2, 1, 1, 1, 1), vn542 = c(27, 
22, 25, 23, 24, 22), vn217 = c(4, 3, 2, 4, 3, 3), n111 = c(1, 
1, 1, 2, 1, 1), vn437 = c(2, 2, 2, 2, 2, 2), vn438a = c(28, 24, 
24, 24, 23, 25), vn438b = c(1000, 1000, 1000, 1000, 1000, 1000
), vn438c = c(1000, 1000, 1000, 1000, 1000, 1000), vn438d = c(1000, 
1000, 1000, 1000, 1000, 1000), vn438e = c(1000, 1000, 1000, 1000, 
1000, 1000), vn5 = c(4, 4, 4, 4, 4, 4), vn9a = c(5, 5, 5, 5, 
5, 5), vn75 = c(1, 1, 3, 2, 1, 3), vn79 = c(2, 2, 2, 2, 2, 2)), .Names = c("dat", 
"vn1", "vn542", "vn217", "n111", "vn437", "vn438a", "vn438b", 
"vn438c", "vn438d", "vn438e", "vn5", "vn9a", "vn75", "vn79"), row.names = c(2174L, 
2175L, 2177L, 2178L, 2180L, 2181L), class = "data.frame")  

これvn438b = "1000"は NA 値ですが、それらを削除すると他の観察結果が失われるため、HH の年齢変数で N 番目の人を消去しませんでした。

また、これが私の結果が最終的にどのように見えるかです

編集

なんとか自力で解決。興味のある人のために、ここに私のコードがあります:

# changing variable names into understandable names
colnames(d)[2] <- "sex"
colnames(d)[3] <- "age"
colnames(d)[4] <- "polint"
colnames(d)[5] <- "turnout"
colnames(d)[6] <- "HHsize"
colnames(d)[7] <- "HHage2"
colnames(d)[8] <- "HHage3"
colnames(d)[9] <- "HHage4"
colnames(d)[10] <- "HHage5"
colnames(d)[11] <- "HHage6"
colnames(d)[12] <- "marital"
colnames(d)[13] <- "education"
colnames(d)[14] <- "income"
colnames(d)[15] <- "religion"


####################################################################
## creating subsets: no other voters in HH --> combine them later ##
####################################################################
noHHM <- d[d$HHsize==1, ]
noHHM1 <- d[d$HHsize==2 & d$HHage2<18, ]
noHHM2 <- d[d$HHsize==3 & d$HHage2<18 & d$HHage3<18, ]
noHHM3 <- d[d$HHsize==4 & d$HHage2<18 & d$HHage3<18 & d$HHage4<18, ]
noHHM4 <- d[d$HHsize==5 & d$HHage2<18 & d$HHage3<18 & d$HHage4<18 & d$HHage5<18, ] 
#at this point no more observations match specifications so we remove noHHM4
rm(noHHM4)

#merging the noHHM variables
zeroHHM <- rbind(noHHM, noHHM1, noHHM2, noHHM3)

#removing intermediate variables now
rm(noHHM, noHHM1, noHHM2, noHHM3)

#creating two subsets (youth voters [under 25] and non youth voters [over 25])
Youth0 <- zeroHHM[zeroHHM$age <26, ]
Old0 <- zeroHHM[zeroHHM$age >25, ]


##################################################
## repeat whole process for 1 other voter in HH ##
##################################################
one1HHM <- d[d$HHsize==2 & d$HHage2>17, ]
oneHHM1 <- d[d$HHsize==3 & d$HHage2>17 & d$HHage2<900 & d$HHage3<18,  ]
oneHHM2 <- d[d$HHsize==4 & d$HHage2>17 & d$HHage2<900 & d$HHage3<18 & d$HHage4<18, ]
oneHHM3 <- d[d$HHsize==5 & d$HHage2>17 & d$HHage2<900 & d$HHage3<18 & d$HHage4<18 & d$HHage5<18, ]
oneHHM4 <- d[d$HHsize==6 & d$HHage2>17 & d$HHage2<900 & d$HHage3<18 & d$HHage4<18 & d$HHage5<18 & d$HHage6<18, ]

#merging the oneHHM variables
oneHHM <- rbind(one1HHM, oneHHM1, oneHHM2, oneHHM3, oneHHM4)

#removing intermediate variables now
rm(one1HHM, oneHHM1, oneHHM2, oneHHM3, oneHHM4)

#creating two subsets (youth voters [under 25] and non youth voters [over 25])
Youth1 <- oneHHM[zeroHHM$age <26, ]
Old1 <- oneHHM[zeroHHM$age >25, ]


###################################################
## repeat whole process for 2 other voters in HH ##
###################################################
twoHHM1 <- d[d$HHsize==3 & d$HHage2>17 & d$HHage2<900 & d$HHage3>17 & d$HHage3<900, ]
twoHHM2 <- d[d$HHsize==4 & d$HHage2>17 & d$HHage2<900 & d$HHage3>17 & d$HHage3<900 & d$HHage4<18, ]
twoHHM3 <- d[d$HHsize==5 & d$HHage2>17 & d$HHage2<900 & d$HHage3>17 & d$HHage3<900 & d$HHage4<18 & d$HHage5<18, ]
twoHHM4 <- d[d$HHsize==6 & d$HHage2>17 & d$HHage2<900 & d$HHage3>17 & d$HHage3<900 & d$HHage4<18 & d$HHage5<18 & d$HHage6<18, ]

#merging the the twoHHM variables
twoHHM <- rbind(twoHHM1, twoHHM2, twoHHM3, twoHHM4)

#removing intermediate variables
rm(twoHHM1, twoHHM2, twoHHM3, twoHHM4)

#creating two subsets (youth voters [under 25] and non youth voters [over 25])
Youth2 <- twoHHM[zeroHHM$age <26, ]
Old2 <- twoHHM[zeroHHM$age >25, ]


####################################################
## repeat whole process for 3+ other voters in HH ##
####################################################
threeHHM1 <- d[d$HHsize==4 & d$HHage2>17 & d$HHage2<900 & d$HHage3>17 & d$HHage3<900 & d$HHage4>17 & d$HHage4<900, ]
threeHHM2 <- d[d$HHsize==5 & d$HHage2>17 & d$HHage2<900 & d$HHage3>17 & d$HHage3<900 & d$HHage4>17 & d$HHage4<900, ]
threeHHM3 <- d[d$HHsize==6 & d$HHage2>17 & d$HHage2<900 & d$HHage3>17 & d$HHage3<900 & d$HHage4>17 & d$HHage4<900, ]

#merging the the threeHHM variables
threeHHM <- rbind(threeHHM1, threeHHM2, threeHHM3)

#removing intermediate variables
rm(threeHHM1, threeHHM2, threeHHM3)

#creating two subsets (youth voters [under 25] and non youth voters [over 25])
Youth3 <- threeHHM[zeroHHM$age <26, ]
Old3 <- threeHHM[zeroHHM$age >25, ]


#renaming the bigsets
HHM0 <- zeroHHM
HHM1 <- oneHHM
HHM2 <- twoHHM
HHM3 <- threeHHM
#removing old name bigsets
rm(zeroHHM, oneHHM, twoHHM, threeHHM)
4

0 に答える 0