0

以下は、df という名前のフォーマット済みデータフレームです。

Company     Category    Margin     Ranking
SBI             BK      34.5       1
PNB             BK      39.5       2
UCO BANK        BK      39.9       3 
BANK            BK      41.3       4
INDIAN BANK     BK      42.3       5
DENA BANK       BK      44.5       6
VIJAYA BANK     BK      44.5       7
UNION BANK      BK      47.6       8
CENTRAL BANK    BK      49.8       9
INFOSYS         IT      5.6        1
HCL TECH        IT      5.9        2
TCS             IT      6.9        3
CMC             IT      12.6       4
TECHMAHINDRA    IT      12.6       5
COGNIZANT       IT      15.8       6
IGATE           IT      22.4       7
WIPRO           IT      22.9       8
HEXAWARE        IT      34.8       9
MAHINDRA SATYAM IT      34.8       10
DR. REDDYS      PH      14.5       1
SUN PHARMA      PH      19.2       2
CIPLA           PH      23.9       3
LUPIN           PH      23.9       4
DIVIS LABS      PH      29         5

(i) ベクトル (カテゴリのセット) と (ii) ランク (整数) の 2 つのパラメーターを 2 つのパラメーターとして受け取る関数rankCompany()を作成したいと思います。

関数の出力 - Data.Frame ( df.out )

df.out の内容

指定されたランキングに一致する会社名、カテゴリ、およびマージンを使用します。
任意のカテゴリに指定されたランキングに一致する会社がない場合は、その列の下に返される必要があります。

以下は、関数をテストするためのサンプル コードです。

テスト #1

catg <- c("BK", "IT", "PH")
rankCompany(catg, 2)
    Company          Category        Margin
BK  PNB              BK              39.5
IT  HCLTECH          IT              5.9
PH  SUN PHARMA       PH              19.2

テスト#2

catg <- c("BK", "IT", "PH")
rankCompany(catg, 7)
    Company          Category        Margin
BK  VIJAYA BANK      BK              44.5
IT  IGATE            IT              22.4
PH  <NA>             PH              <NA>  

テスト #3

catg <- c("BK", "IT", "PH", "EG")
rankCompany(catg, 10)
    Company          Category        Margin
BK  <NA>             BK              <NA>
IT  MAHINDRA SATYAM  IT              34.8
PH  <NA>             PH              <NA>
EG  <NA>             EG              <NA>

これを行う簡単な方法はありますか?

4

2 に答える 2

7

mergeあなたが調べるべき機能でしょう。関数の例を次に示します。

rankCompany <- function(inDF = mydf, catg, ranking) {
  merge(inDF, data.frame(Category = catg, Ranking = ranking), all.y = TRUE)
}

これが「テストケース」です。

test1 <- c("BK", "IT", "PH")
rankCompany(catg = test1, ranking = 2)
#   Category Ranking    Company Margin
# 1       BK       2        PNB   39.5
# 2       IT       2   HCL TECH    5.9
# 3       PH       2 SUN PHARMA   19.2

test2 <- c("BK", "IT", "PH")
rankCompany(catg = test2, ranking = 7)
#   Category Ranking     Company Margin
# 1       BK       7 VIJAYA BANK   44.5
# 2       IT       7       IGATE   22.4
# 3       PH       7        <NA>     NA

test3 <- c("BK", "IT", "PH", "EG")
rankCompany(catg = test3, ranking = 10)
#   Category Ranking         Company Margin
# 1       BK      10            <NA>     NA
# 2       IT      10 MAHINDRA SATYAM   34.8
# 3       PH      10            <NA>     NA
# 4       EG      10            <NA>     NA

アップデート

「最悪」で何が必要かがわかったので、いくつかの理由で関数よりも少し優れている代替手段を次に示します (たとえば、最初に関数の本体に値をハードコーディングしないなど)。

rankCompany <- function(inDF = mydf, catg, ranking) {
  if (ranking == "worst") {
    do.call(rbind, by(inDF, catg, tail, 1))
  } else {
    merge(inDF, data.frame(Category = catg, Ranking = ranking), all.y = TRUE)
  }
}

rankCompany(catg = mydf$Category, ranking = "worst")
#            Company Category Margin Ranking
# BK    CENTRAL BANK       BK   49.8       9
# IT MAHINDRA SATYAM       IT   34.8      10
# PH      DIVIS LABS       PH   29.0       5

将来、他のユーザーの便宜のために、データを共有して、dput簡単に再現できるようにしてください。次のようになります。

mydf <- structure(list(Company = structure(c(17L, 16L, 21L, 1L, 12L, 
    6L, 23L, 22L, 2L, 13L, 9L, 19L, 4L, 20L, 5L, 11L, 24L, 10L, 15L, 
    8L, 18L, 3L, 14L, 7L), .Label = c("BANK", "CENTRAL BANK", 
    "CIPLA", "CMC", "COGNIZANT", "DENA BANK", "DIVIS LABS", "DR. REDDYS", 
    "HCL TECH", "HEXAWARE", "IGATE", "INDIAN BANK", "INFOSYS", "LUPIN", 
    "MAHINDRA SATYAM", "PNB", "SBI", "SUN PHARMA", "TCS", "TECHMAHINDRA", 
    "UCO BANK", "UNION BANK", "VIJAYA BANK", "WIPRO"), class = "factor"), 
        Category = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
        2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), 
        .Label = c("BK", "IT", "PH"), class = "factor"), 
        Margin = c(34.5, 39.5, 39.9, 41.3, 42.3, 44.5, 44.5, 47.6, 49.8, 
        5.6, 5.9, 6.9, 12.6, 12.6, 15.8, 22.4, 22.9, 34.8, 34.8, 14.5, 
        19.2, 23.9, 23.9, 29), 
        Ranking = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 
        5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L)), 
        .Names = c("Company", "Category", "Margin", "Ranking"), 
        class = "data.frame", row.names = c(NA, -24L))
于 2013-10-17T16:23:59.070 に答える
0

これがこの質問の最終的な解決策です。

これは入力データフレームです

inDF <- structure(list(Company = structure(c(17L, 16L, 21L, 1L, 12L, 
    6L, 23L, 22L, 2L, 13L, 9L, 19L, 4L, 20L, 5L, 11L, 24L, 10L, 15L, 
    8L, 18L, 3L, 14L, 7L), .Label = c("BANK", "CENTRAL BANK", 
    "CIPLA", "CMC", "COGNIZANT", "DENA BANK", "DIVIS LABS", "DR. REDDYS", 
    "HCL TECH", "HEXAWARE", "IGATE", "INDIAN BANK", "INFOSYS", "LUPIN", 
    "MAHINDRA SATYAM", "PNB", "SBI", "SUN PHARMA", "TCS", "TECHMAHINDRA", 
    "UCO BANK", "UNION BANK", "VIJAYA BANK", "WIPRO"), class = "factor"), 
        Category = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
            2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), 
        .Label = c("BK", "IT", "PH"), class = "factor"), 
        Margin = c(34.5, 39.5, 39.9, 41.3, 42.3, 44.5, 44.5, 47.6, 49.8, 
        5.6, 5.9, 6.9, 12.6, 12.6, 15.8, 22.4, 22.9, 34.8, 34.8, 14.5, 
        19.2, 23.9, 23.9, 29), 
        Ranking = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 
        5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L)), 
        .Names = c("Company", "Category", "Margin", "Ranking"), 
        class = "data.frame", row.names = c(NA, -24L))

入力データセットは次のようになります

> mydf
           Company Category Margin Ranking
1              SBI       BK   34.5       1
2              PNB       BK   39.5       2
3         UCO BANK       BK   39.9       3
4             BANK       BK   41.3       4
5      INDIAN BANK       BK   42.3       5
6        DENA BANK       BK   44.5       6
7      VIJAYA BANK       BK   44.5       7
8       UNION BANK       BK   47.6       8
9     CENTRAL BANK       BK   49.8       9
10         INFOSYS       IT    5.6       1
11        HCL TECH       IT    5.9       2
12             TCS       IT    6.9       3
13             CMC       IT   12.6       4
14    TECHMAHINDRA       IT   12.6       5
15       COGNIZANT       IT   15.8       6
16           IGATE       IT   22.4       7
17           WIPRO       IT   22.9       8
18        HEXAWARE       IT   34.8       9
19 MAHINDRA SATYAM       IT   34.8      10
20      DR. REDDYS       PH   14.5       1
21      SUN PHARMA       PH   19.2       2
22           CIPLA       PH   23.9       3
23           LUPIN       PH   23.9       4
24      DIVIS LABS       PH   29.0       5

# 「rankCompany」という関数を書く

> rankCompany <- function(inDF = mydf, catg, ranking) {
   if (ranking == "worst") {
      worst.ranks <- as.vector(table(inDF$Category))
      mgdf <- merge(inDF, data.frame(Category = catg, Ranking = worst.ranks), all.y = TRUE)
   } else if(is.numeric(ranking)) {
      mgdf <- merge(inDF, data.frame(Category = catg, Ranking = ranking), all.y = TRUE)
   }
   rownames(mgdf) <- catg
   return(mgdf)
 }

機能のテスト:

テストケース #1

test1 <- c("BK", "IT", "PH")
rankCompany(catg = test1, ranking = 2)
  Category Ranking    Company Margin
BK       BK       2        PNB   39.5
IT       IT       2   HCL TECH    5.9
PH       PH       2 SUN PHARMA   19.2

テストケース #2

test2 <- c("BK", "IT", "PH")
rankCompany(catg = test2, ranking = 7)
   Category Ranking     Company Margin
BK       BK       7 VIJAYA BANK   44.5
IT       IT       7       IGATE   22.4
PH       PH       7        <NA>     NA

テストケース #3

test3 <- c("BK", "IT", "PH", "EG")
rankCompany(catg = test3, ranking = 10)

   Category Ranking         Company Margin
BK       BK      10            <NA>     NA
IT       IT      10 MAHINDRA SATYAM   34.8
PH       PH      10            <NA>     NA
EG       EG      10            <NA>     NA

テストケース #4

test4 <- c("BK", "IT", "PH")
rankCompany(catg = test4, ranking = "worst")

   Category Ranking         Company Margin
BK       BK       9    CENTRAL BANK   49.8
IT       IT      10 MAHINDRA SATYAM   34.8
PH       PH       5      DIVIS LABS   29.0
于 2013-10-18T03:17:41.903 に答える