sox
最初に へのシステム コールを実行し、その出力を取得する必要があります。例えば:
> spam = system("sox worf.wav -n stat 2>&1", intern = TRUE)
> spam
[1] "Samples read: 34000" "Length (seconds): 3.083900"
[3] "Scaled by: 2147483647.0" "Maximum amplitude: 0.999969"
[5] "Minimum amplitude: -0.938721" "Midline amplitude: 0.030624"
[7] "Mean norm: 0.190602" "Mean amplitude: -0.004302"
[9] "RMS amplitude: 0.244978" "Maximum delta: 1.340240"
[11] "Minimum delta: 0.000000" "Mean delta: 0.051444"
[13] "RMS delta: 0.099933" "Rough frequency: 715"
[15] "Volume adjustment: 1.000"
設定intern = TRUE
により、コマンドの出力が変数に返されます。奇妙なことに、sox
その出力を に提供し、 に提供しstderr
ないstdout
ため、 が必要になり2>&1
ます。最善の方法は、出力を後処理する関数でこれをラップすることですsystem
。
get_wav_stats = function(wav_file) {
rough_wav_stats = system(sprintf("sox %s -n stat 2>&1", wav_file), intern = TRUE)
wav_stats = data.frame(do.call("rbind", strsplit(rough_wav_stats, split = ":")))
names(wav_stats) = c("variable", "value")
wav_stats = transform(wav_stats, value = as.numeric(as.character(value)))
return(wav_stats)
}
> spam = get_wav_stats("worf.wav")
> spam
variable value
1 Samples read 3.400000e+04
2 Length (seconds) 3.083900e+00
3 Scaled by 2.147484e+09
4 Maximum amplitude 9.999690e-01
5 Minimum amplitude -9.387210e-01
6 Midline amplitude 3.062400e-02
7 Mean norm 1.906020e-01
8 Mean amplitude -4.302000e-03
9 RMS amplitude 2.449780e-01
10 Maximum delta 1.340240e+00
11 Minimum delta 0.000000e+00
12 Mean delta 5.144400e-02
13 RMS delta 9.993300e-02
14 Rough frequency 7.150000e+02
15 Volume adjustment 1.000000e+00
次に、これを適用ループでラップして、特定のディレクトリからすべての統計を取得できます。
# files_dir = list.files("path", full.names = TRUE)
# For this example I create a mock list:
files_dir = rep("worf.wav", 10)
stat_wavs = lapply(files_dir, get_wav_stats)
> str(stat_wavs)
List of 10
$ :'data.frame': 15 obs. of 2 variables:
..$ variable: Factor w/ 15 levels "Length (seconds)",..: 13 1 14 2 8 7 6 4 10 3 ...
..$ value : num [1:15] 3.40e+04 3.08 2.15e+09 1.00 -9.39e-01 ...
$ :'data.frame': 15 obs. of 2 variables:
..$ variable: Factor w/ 15 levels "Length (seconds)",..: 13 1 14 2 8 7 6 4 10 3 ...
..$ value : num [1:15] 3.40e+04 3.08 2.15e+09 1.00 -9.39e-01 ...
<< snip >>
$ :'data.frame': 15 obs. of 2 variables:
..$ variable: Factor w/ 15 levels "Length (seconds)",..: 13 1 14 2 8 7 6 4 10 3 ...
..$ value : num [1:15] 3.40e+04 3.08 2.15e+09 1.00 -9.39e-01 ...
value
必要な統計を含む列のみを抽出するには:
stats4files = data.frame(do.call("rbind", lapply(stat_wavs, "[[", 2)))
names(stats4files) = stat_wavs[[1]][[1]]
rownames(stats4files) = files_dir # this doesn't work actually because I have repeated the same file multiple times :)
> stats4files
Samples read Length (seconds) Scaled by Maximum amplitude Minimum amplitude Midline amplitude
1 34000 3.0839 2147483647 0.999969 -0.938721 0.030624
2 34000 3.0839 2147483647 0.999969 -0.938721 0.030624
3 34000 3.0839 2147483647 0.999969 -0.938721 0.030624
4 34000 3.0839 2147483647 0.999969 -0.938721 0.030624
5 34000 3.0839 2147483647 0.999969 -0.938721 0.030624
6 34000 3.0839 2147483647 0.999969 -0.938721 0.030624
7 34000 3.0839 2147483647 0.999969 -0.938721 0.030624
8 34000 3.0839 2147483647 0.999969 -0.938721 0.030624
9 34000 3.0839 2147483647 0.999969 -0.938721 0.030624
10 34000 3.0839 2147483647 0.999969 -0.938721 0.030624
Mean norm Mean amplitude RMS amplitude Maximum delta Minimum delta Mean delta
1 0.190602 -0.004302 0.244978 1.34024 0 0.051444
2 0.190602 -0.004302 0.244978 1.34024 0 0.051444
3 0.190602 -0.004302 0.244978 1.34024 0 0.051444
4 0.190602 -0.004302 0.244978 1.34024 0 0.051444
5 0.190602 -0.004302 0.244978 1.34024 0 0.051444
6 0.190602 -0.004302 0.244978 1.34024 0 0.051444
7 0.190602 -0.004302 0.244978 1.34024 0 0.051444
8 0.190602 -0.004302 0.244978 1.34024 0 0.051444
9 0.190602 -0.004302 0.244978 1.34024 0 0.051444
10 0.190602 -0.004302 0.244978 1.34024 0 0.051444
RMS delta Rough frequency Volume adjustment
1 0.099933 715 1
2 0.099933 715 1
3 0.099933 715 1
4 0.099933 715 1
5 0.099933 715 1
6 0.099933 715 1
7 0.099933 715 1
8 0.099933 715 1
9 0.099933 715 1
10 0.099933 715 1