0

私のServletContextListener:

@WebListener
public class RunServlet implements ServletContextListener {

    private ScheduledExecutorService scheduler;

    @Override
    public void contextInitialized(ServletContextEvent event) {
        System.out.println("ready");
        scheduler = Executors.newScheduledThreadPool(10);
        scheduler.execute(new RunThread("http://stackoverflow.com"));
    }

    @Override
    public void contextDestroyed(ServletContextEvent event) {
        scheduler.shutdownNow();
        System.out.println("removed");
    }
}

Runnableを実装するクラスはRunThreadです(Webページからすべてのリンクを取得し、リンクをクリックし、Webページを解析し、jsoupとhibernateを使用して単語をデータベースに保存します):

public class RunThread implements Runnable{
    private Document html;
    private String url;

    private static final int threads_num = Runtime.getRuntime().availableProcessors()*4;
    private int links = 0;
    private int alinks = 0;

    public RunThread(String url){
        this.url = url;
        try {
            this.html = Jsoup.connect(url).get();
            this.links = html.select("a[href]").size();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }


    @Override
    public void run() {
        if(alinks != links){
            Elements collectedLinks = html.select("a[href]");
            ExecutorService executor = Executors.newFixedThreadPool(threads_num);
            for(Element link:collectedLinks){
                if(alinks == links) break;
                else{
                    String current = link.attr("abs:href");
                    if(!current.equals(url) && current.startsWith(url)&& !current.contains("#")){
                        executor.execute(new RunThread(current));
                        alinks++;
                    }
                }
            }
        }
        AnalyzePage(html, url);
    }

    private void AnalyzePage(Document doc,String url){
        String text = doc.body().text();
        SaveTextToDB(text,url);
    }

    public void SaveTextToDB(String text, String link){
        TreeMap<String, Integer> frequencyMap = new TreeMap<String, Integer>();
        StringTokenizer parser = 
            new StringTokenizer(text.replaceAll("[0-9]+","").replaceAll("[^a-zA-Zа-яА-Я]-[^a-zA-Zа-яА-Я]", " "), " \t\n\r\f.,;:!?%#+№/<←→↓@'\"—«»©“\\(\\)");
        while (parser.hasMoreTokens()) {
            String currentWord = parser.nextToken();
            Integer frequency = frequencyMap.get(currentWord);
        if (frequency == null) {
            frequency = 0;
        }
        frequencyMap.put(currentWord, frequency + 1);
    }

    for (Map.Entry<String,Integer> entry : frequencyMap.entrySet()){
        Indexation word = new Indexation();
        IndexationPK pk = new IndexationPK();
        pk.setLink(link);
        pk.setWord(entry.getKey());
        word.setFrequency(entry.getValue());
        word.setIndexationPK(pk);
        IndexationDAO indDAO = new IndexationDAOImpl();
        indDAO.AddRecord(word);
    }}
}

次のエラーが発生します:

java.net.SocketTimeoutException: Read timed out
    at java.net.SocketInputStream.socketRead0(Native Method)
    at java.net.SocketInputStream.read(SocketInputStream.java:129)
    at java.io.BufferedInputStream.fill(BufferedInputStream.java:218)
    at java.io.BufferedInputStream.read1(BufferedInputStream.java:258)
    at java.io.BufferedInputStream.read(BufferedInputStream.java:317)
    at sun.net.www.http.HttpClient.parseHTTPHeader(HttpClient.java:695)
    at sun.net.www.http.HttpClient.parseHTTP(HttpClient.java:640)
    at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1195)
    at java.net.HttpURLConnection.getResponseCode(HttpURLConnection.java:379)
    at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:381)
    at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:364)
    at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:143)
    at org.jsoup.helper.HttpConnection.get(HttpConnection.java:132)
    at com.mstu.service.RunThread.<init>(RunThread.java:35)
    at com.mstu.service.RunThread.run(RunThread.java:53)
    at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
    at java.lang.Thread.run(Thread.java:662)
Exception in thread "pool-9-thread-1" java.lang.NullPointerException
    at com.mstu.service.RunThread.AnalyzePage(RunThread.java:63)
    at com.mstu.service.RunThread.run(RunThread.java:59)
    at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
    at java.lang.Thread.run(Thread.java:662)
java.net.SocketTimeoutException: Read timed out
    at java.net.SocketInputStream.socketRead0(Native Method)
    at java.net.SocketInputStream.read(SocketInputStream.java:129)
    at java.io.BufferedInputStream.fill(BufferedInputStream.java:218)
    at java.io.BufferedInputStream.read1(BufferedInputStream.java:258)
    at java.io.BufferedInputStream.read(BufferedInputStream.java:317)
    at sun.net.www.http.HttpClient.parseHTTPHeader(HttpClient.java:695)
    at sun.net.www.http.HttpClient.parseHTTP(HttpClient.java:640)
    at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1195)
    at java.net.HttpURLConnection.getResponseCode(HttpURLConnection.java:379)
    at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:381)
    at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:364)
    at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:143)
    at org.jsoup.helper.HttpConnection.get(HttpConnection.java:132)
    at com.mstu.service.RunThread.<init>(RunThread.java:35)
    at com.mstu.service.RunThread.run(RunThread.java:53)
    at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
    at java.lang.Thread.run(Thread.java:662)
java.net.SocketTimeoutException: Read timed out

どうしたの?私を助けてください。

4

1 に答える 1

1

私はそれがあなたの糸脱毛とは関係がないのではないかと思いますが、それは

 this.html = Jsoup.connect(url).get();

指定されたURLに接続できません。これはスタンドアロンコンポーネントとして実行されますか?このスタンドアロンをテストし(を引き出して、Runnable簡単にテストできるスタンドアロンクラスにする)、HTTPプロキシなどを設定する必要があるかどうかを確認する価値があります。

run()(オブジェクトの作成で接続を取得し、エグゼキュータが実行する準備ができるまでそれにぶら下がるのではなく、おそらくメソッド内で上記を実行します)

于 2012-04-10T19:27:34.843 に答える