Knime で R Learner を使用しています。次のような行列を離散化したい:
> my_matrix= as(knime.in,"matrix");
> dput(head(my_matrix, 5))
structure(c("KS", "OH", "NJ", "OH", "OK", "128", "107", "137",
" 84", " 75", "415", "415", "415", "408", "415", "No", "No",
"No", "Yes", "Yes", "Yes", "Yes", "No", "No", "No", "25", "26",
" 0", " 0", " 0", "265.1", "161.6", "243.4", "299.4", "166.7",
"110", "123", "114", " 71", "113", "45.07", "27.47", "41.38",
"50.90", "28.34", "197.4", "195.5", "121.2", " 61.9", "148.3",
" 99", "103", "110", " 88", "122", "16.78", "16.62", "10.30",
" 5.26", "12.61", "244.7", "254.4", "162.6", "196.9", "186.9",
" 91", "103", "104", " 89", "121", "11.01", "11.45", " 7.32",
" 8.86", " 8.41", "10.0", "13.7", "12.2", " 6.6", "10.1", " 3",
" 3", " 5", " 7", " 3", "2.70", "3.70", "3.29", "1.78", "2.73",
"1", "1", "0", "2", "3", "False", "False", "False", "False",
"False"), .Dim = c(5L, 20L), .Dimnames = list(c("Row0", "Row1",
"Row2", "Row3", "Row4"), c("State", "Account length", "Area code",
"International plan", "Voice mail plan", "Number vmail messages",
"Total day minutes", "Total day calls", "Total day charge", "Total eve minutes",
"Total eve calls", "Total eve charge", "Total night minutes",
"Total night calls", "Total night charge", "Total intl minutes",
"Total intl calls", "Total intl charge", "Customer service calls",
"Churn")))
次のコードを使用して行列を離散化しています。
require(arules)
#require(arulesViz)
my_matrix= as(knime.in,"matrix");
my_rows= nrow(my_matrix);
my_cols= ncol(my_matrix);
#discretize(x, method="interval", categories = 3, labels = NULL,
# ordered=FALSE, onlycuts=FALSE, ...)
typeof(my_matrix)
vector = my_matrix[,2]
my_matrix[,2] = discretize(vector, method="interval", categories = 3, labels=c("length0","length1","length2"))
my_matrix[,3] = ...
etc...
コード行の対応:
my_matrix[,2] = discretize(vector, method="interval", categories = 3, labels=c("length0","length1","length2"))
次のエラーが表示されます。
seq.default(from = min(x, na.rm = TRUE), to = max(x, na.rm = TRUE) のエラー: 'from' は NA、NaN、または無限にはできません
ここに「sum(is.na(vector))」を入れると:
vector = my_matrix[,2]
sum(is.na(vector))
my_matrix[,2] = discretize(vector, method="interval", categories = 3, labels=c("length0","length1","length2"))
私は得る:
> sum(is.na(vector))
[1] 0
そのため、ベクトルに NA 要素がありません。とにかく typeof(matrix) は「文字」です。ベクトルを印刷すると、次のようになります。
> vector = my_matrix[,2]
> sum(is.na(vector))
[1] 0
> head(vector, 20)
Row0 Row1 Row2 Row3 Row4 Row5 Row6 Row7 Row8 Row9 Row10 Row11 Row12
"128" "107" "137" " 84" " 75" "118" "121" "147" "117" "141" " 65" " 74" "168"
Row13 Row14 Row15 Row16 Row17 Row18 Row19
" 95" " 62" "161" " 85" " 93" " 76" " 73"