はい、ngram は bash で実装できます。
# Usage: ngrams N < FILE
ngrams () {
local N=$1
local line
set --
while read line; do
set -- $* $line
while [[ -n ${*:$N} ]]; do
echo ${*:1:$N}
shift
done
done |
sort | uniq -c
}
$ ngrams 2
Here is some text, and here is
some more text, and here is yet
some more text
1 Here is
2 and here
2 here is
2 is some
1 is yet
1 more text
1 more text,
2 some more
1 some text,
2 text, and
1 yet some
注:上記は関数であり、スクリプトではありません(おそらく、この質問が役立つか、別のより良い質問があるかもしれません)。スクリプトのバージョンは次のとおりです。
#!/bin/bash
# Usage: ngrams N < FILE
N=$1
set --
while read line; do
set -- $* $line
while [[ -n ${*:$N} ]]; do
echo ${*:1:$N}
shift
done
done |
sort | uniq -c