4

ビルトインcollection.sortcomparator<string>インターフェイスを使用して、頻度の高い順に単語のリストを並べ替える方法を知っている人はいますか?

テキスト ファイル内の特定の単語の数を取得するメソッドが既にあります。ここで必要なのは、各単語のカウントを比較し、それらを頻度の低いものから多いものへと並べ替えたリストに入れるメソッドを作成することだけです。

アイデアやヒントをいただければ幸いです。この特定の方法を開始するのに問題があります。

public class Parser implements Comparator<String> {

    public Map<String, Integer> wordCount;

    void parse(String filename) throws IOException {
        File file = new File(filename);
        Scanner scanner = new Scanner(file);

        //mapping of string -> integer (word -> frequency)
        Map<String, Integer> wordCount = new HashMap<String, Integer>();

        //iterates through each word in the text file
        while(scanner.hasNext()) {
            String word = scanner.next();
            if (scanner.next()==null) {
                wordCount.put(word, 1);
            }
            else {
                wordCount.put(word, wordCount.get(word) + 1);;
                }
            }
            scanner.next().replaceAll("[^A-Za-z0-9]"," ");
            scanner.next().toLowerCase();
        }

    public int getCount(String word) {
        return wordCount.get(word);
    }

    public int compare(String w1, String w2) {
        return getCount(w1) - getCount(w2);
    } 

        //this method should return a list of words in order of frequency from least to   greatest
    public List<String> getWordsInOrderOfFrequency() {
        List<Integer> wordsByCount = new ArrayList<Integer>(wordCount.values());
        //this part is unfinished.. the part i'm having trouble sorting the word frequencies
        List<String> result = new ArrayList<String>();


    }
}
4

4 に答える 4

1

以下からアイデアを比較して抽出できます。

public class FrequencyCount {

    public static void main(String[] args) {

        // read in the words as an array
        String s = StdIn.readAll();
        // s = s.toLowerCase();
        // s = s.replaceAll("[\",!.:;?()']", "");
        String[] words = s.split("\\s+");

        // sort the words
        Merge.sort(words);

        // tabulate frequencies of each word
        Counter[] zipf = new Counter[words.length];
        int M = 0;                                        // number of distinct words
        for (int i = 0; i < words.length; i++) {
            if (i == 0 || !words[i].equals(words[i-1]))   // short-circuiting OR
                zipf[M++] = new Counter(words[i], words.length);
            zipf[M-1].increment();
        }

        // sort by frequency and print
        Merge.sort(zipf, 0, M);                           // sorting a subarray
        for (int j = M-1; j >= 0; j--) {
            StdOut.println(zipf[j]);
        }
    }
}
于 2012-04-15T03:47:53.733 に答える
1

コメントでToriousが提案した修正と並べ替えを使用して、元の投稿に近い解決策:

import java.util.*;

public class Parser implements Comparator <String> {

    public Map<String, Integer> wordCount;

    void parse ()
    {
        Scanner scanner = new Scanner (System.in);

        // don't redeclare it here - your attribute wordCount will else be shadowed
        wordCount = new HashMap<String, Integer> ();

        //iterates through each word in the text file
        while (scanner.hasNext ()) {
            String word = scanner.next ();
            // operate on the word, not on next and next of next word from Scanner
            word = word.replaceAll (" [^A-Za-z0-9]", " ");
            word = word.toLowerCase ();
            // look into your map:
            if (! wordCount.containsKey (word))
                wordCount.put (word, 1);
            else
                wordCount.put (word, wordCount.get (word) + 1);;
        }
    }

    public int getCount (String word) {
        return wordCount.get (word);
    }

    public int compare (String w1, String w2) {
        return getCount (w1) - getCount (w2);
    }

    public List<String> getWordsInOrderOfFrequency () {
        List<String> justWords = new ArrayList<String> (wordCount.keySet());
        Collections.sort (justWords, this);
        return justWords; 
    }

    public static void main (String args []) {
        Parser p = new Parser ();
        p.parse ();
        List<String> ls = p.getWordsInOrderOfFrequency ();
        for (String s: ls) 
            System.out.println (s);
    }
}
于 2012-04-16T03:40:51.950 に答える