1

i use URLConnection to read a weburl, however sometimes it can not read full response (and sometimes it worked fine for the same url),

one url is:http://messages.yahoo.com/yahoo/Business_%26_Finance/Investments/Stocks_%28A_to_Z%29/Stocks_Y/index.html

below is one sample cut-off response

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
    <head>
        <title>Yahoo! Message Boards - Stocks Y</title>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">


    <link rel="stylesheet" type="text/css" href="http://l.yimg.com/d/lib/mb/default_yui_ycs_20080219.css"/>


    <!--CSS source files for the Calendar Control -->
<style>
#cal1 { width: 100%;}
#cal2 { width: 100%;}
</style>

    </head>
    <body>
    <center>
        <div id="doc-layout">
        <link type="text/css" rel="stylesheet" href="http://l.yimg.com/a/lib/uh/15/css/uh_rsa-1.0.5.css" /><style type="text/css">#ygma div{clear:none;}#ygma #yahoo{padding-top:0;*padding-top:5px;}</style><script type="text/javascript">(function(){var h={};var setUp=function(){h = YAHOO.one.uh.hotlistInfo = {rd:"http://global.ard.yahoo.com",space:"/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=0",adid:"5857129",prop:"ymb",protocol:"http",host:"messages.yahoo.com",url:"http%3a%2f%2fmessages.yahoo.com%2fyahoo%2fBusiness_%2526_Finance%2fInvestments%2fStocks_%2528A_to_Z%2529%2fStocks_Y%2findex.html",spaceid:"389132707"};YAHOO.one.uh.translate=function(str){var set={yahoo_homepage:"http://www.yahoo.com/", hp_detect_script:"http://www.yahoo.com/includes/hdhpdetect.php", set_hp_script:"http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=1/SIG=10uacnjgh/*http://www.yahoo.com/bin/set", set_hp_firefox_instructions:["Drag the \"Y!\" and drop it onto the \"Home\" icon.", "Select \"Yes\" from the pop up window.", "Nothing, you're done."], close_this_window:"Close this window", set_hp_alternative_instructions1:"If this didn't work for you see ", detailed_set_hp_instructions:"detailed instructions", set_hp_alternative_instructions2:""};return set[str];};YAHOO.one.uh.Search=['ygmasearchInput', 'sat'];};if("undefined" !==typeof YAHOO && "undefined" !==typeof YAHOO.one && "undefined" !==typeof YAHOO.one.uh){setUp();}else{setTimeout(arguments.callee, 500);}})();</script><div id="ygma"><div id="ygmaheader"><div class="bd sp"><div id="ymenu" class="ygmaclr"><div id="mepanel"><ul id="mepanel-nav"><li class="me1"><em>New User? <a class="ygmasignup" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=2/SIG=1395kefdv/*https://edit.yahoo.com/config/eval_register?.done=http://messages.finance.yahoo.com&.src=quote&.intl=us">Register</a></em></li><li class="me2"><a href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=3/SIG=132l1q4g4/*https://login.yahoo.com/config/login?.done=http://messages.finance.yahoo.com&.src=quote&.intl=us"><em>Sign In</em></a></li><li class="me3"><a href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=4/SIG=11bv9h578/*http://help.yahoo.com/l/us/yahoo/finance/" target="_top">Help</a></li></ul></div><div id="ygmapromo"><div id="ygmapromo-i"></div>
<style> 
#ygma-s{display:none;}
#ygma #ygmapromo-i, #ygmabt #ygmapromo-i {display:inline;}
</style> 
<script> 
(function(){
window.YAHOO = window.YAHOO || {};
YAHOO.one = window.YAHOO.one || {};
YAHOO.one.uh = window.YAHOO.one.uh || {};
YAHOO.one.uh.popularSearches = function(data) {
    var chop = function(s, n) {
        if (s.length > n) {
        return s.substring(0,n)+"...";
        }
        else return s;
    }
    var e = document.getElementById("ygmapromo") || document.getElementById("ygmabtpromo");
    var e2 = document.getElementById("ygmapromo-i");
if (!data.query.results || data.query.results == "0" || !data.query.results.links) {
        return;
    }
    var i = data.query.results.links; 
    var hd = i.text;
    var fr = "&fr=ush-tts&fr2=ps";
    e2.innerHTML = '<a href="http://global.ard.yahoo.com/SIG=15p310019/M=650008.13959238.14033549.12832737/D=ymb/S=389132707:HPRM2/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=OFPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=6076890/R=0/*'+i.href+fr+'" id="ygma-s-h">Trending: '+chop(hd, 20)+'</a>';
};
var ie;
if (navigator.userAgent.match(/MSIE\s6/)) {
    var D = new Date();
    var yr = D.getFullYear();
    var mt = D.getMonth()+1;
    var dy = D.getDate();
    var hr = D.getHours();
    ie = '&cache=' + yr + mt + dy + hr;
}
else {
    ie = "";
}
var y = document.getElementById("ygma") || document.getElementById("ygmabt");
var feed;
if (y.offsetWidth < 850){
    feed = "http://query.yahooapis.com/v1/public/yql/uhTrending/cokeTrending3?format=json&callback=YAHOO.one.uh.popularSearches&_maxage=1800&diagnostics=false&limit=1";
}
else {
    feed = "http://query.yahooapis.com/v1/public/yql/uhTrending/cokeTrending2?format=json&callback=YAHOO.one.uh.popularSearches&_maxage=1800&diagnostics=false&limit=1";
}
var h = document.getElementsByTagName("head").item(0);
var s = document.createElement("script");
s.setAttribute("type", "text/javascript");
s.setAttribute("charset", "utf-8");
s.setAttribute("src", feed + ie);
h.appendChild(s);
})();
</script>     
<style> 
#ygma ol.searches h4, #ygmabt ol.searches h4 {
    font-size:100%;
    font-weight:bold;
    color:#333333;
    text-align:left;
    padding-bottom:3px;
    margin:0;
}
#ygma ol.searches, #ygmabt ol.searches {
    position:absolute;
    z-index:10002;
    width:155px;
    _width:165px;
    padding:7px 7px 3px 7px;
    background-color:#ffffff;
    border:1px solid #cacaca;
    margin:0;
}
#ygma ol.searches li, #ygmabt ol.searches li {
    text-align:left;
    list-style-type:none;
    color:#163780;
    margin:0;
    padding: 0 0 2px 0;
}
#ygma #ygmapromo ol.searches a, #ygmabt #ygmabtpromo ol.searches a {
    font-weight:normal;
    color:#163780;
}
#ygma #ygmapromo ol.searches a:hover, #ygmabt #ygmabtpromo ol.searches a:hover {
    width:100%;
}
#ygma #ygmapromo-i, #ygmabt #ygmapromo-i {
    position:relative;
    z-index:10002;
    zoom:1;
}
</style><script language=javascript>
if(window.yzq_d==null)window.yzq_d=new Object();
window.yzq_d['OFPLD0oGYzg-']='&U=13h2rtkt7%2fN%3dOFPLD0oGYzg-%2fC%3d650008.13959238.14033549.12832737%2fD%3dHPRM2%2fB%3d6076890%2fV%3d1';
</script><noscript><img width=1 height=1 alt="" src="http://us.bc.yahoo.com/b?P=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc&T=17t8jqecg%2fX%3d1342682750%2fE%3d389132707%2fR%3dymb%2fK%3d5%2fV%3d2.1%2fW%3dH%2fY%3dYAHOO%2fF%3d2676527448%2fH%3dc2VydmVJZD0iNHkwbC5XS0pMR0dYWWFLSTZnOXpXUTVod1BveEFWQUh0bjRBQnRYYyIgc2l0ZUlkPSI0NDY1NTUxIiB0U3RtcD0iMTM0MjY4Mjc1MDQ2NTkyNSIg%2fQ%3d-1%2fS%3d1%2fJ%3dFA208962&U=13h2rtkt7%2fN%3dOFPLD0oGYzg-%2fC%3d650008.13959238.14033549.12832737%2fD%3dHPRM2%2fB%3d6076890%2fV%3d1"></noscript></div><div id="pa"><div id="pa-wrapper"><ul id="pa2-nav" class="sp"><li class="pa1 sp"><a class="sp" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=5/SIG=10np9vmbm/*http://www.yahoo.com/" target="_top">Yahoo!</a></li><li class="pa2 sp"><a class="sp" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=6/SIG=1107gluf6/*http://mail.yahoo.com?.intl=us" target="_top">Mail</a></li></ul><div id="pa-left" class="sp"></div><ul id="pa-nav" class="sp"><li class="pa3 sp"><a class="sp" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=7/SIG=10l2nj3k8/*http://my.yahoo.com" title="My Yahoo!" target="_top">My Yahoo!</a></li><li class="pa4 sp"><a class="sp" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=8/SIG=10niob72s/*http://news.yahoo.com" title="Yahoo! News" target="_top">News</a></li><li class="pa5 sp"><a class="sp" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=9/SIG=10q40gpus/*http://finance.yahoo.com" title="Yahoo! Finance" target="_top">Finance</a></li><li class="pa6 sp"><a class="sp" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=10/SIG=10pcalhda/*http://sports.yahoo.com" title="Yahoo! Sports" target="_top">Sports</a></li></ul><div id="pa-right" class="sp"></div></div></div></div><div id="yahoo" class="ygmaclr"><div id="ygmabot"> <a id="ygmalogo" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=11/SIG=10q40gpus/*http://finance.yahoo.com" target="_top"><img id="ygmalogoimg" width="246" h...

below is my code to read a web url,

public String getHtml(String urlstr)
{
    try {
        URL url = new URL(urlstr);
        URLConnection conn = url.openConnection();

        conn.addRequestProperty("User-Agent", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)");


        System.out.println("now to set cookie: " + cookie);

        conn.addRequestProperty("Cookie", cookie);






        BufferedReader in = new BufferedReader(new InputStreamReader(
                                    conn.getInputStream()));
        String inputLine;

        extractCookie(conn);



        //PrintWriter file = new PrintWriter(new BufferedWriter(new FileWriter("out.txt")));


        StringBuilder sb = new StringBuilder();

        while ((inputLine = in.readLine()) != null)
        {
            //file.println(inputLine);
            sb.append(inputLine).append("\n");
        }

        in.close();


        //file.close();



        return sb.toString();

    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    return "";
}

private void extractCookie(URLConnection conn) {
    System.out.println("now try to get cookie:");
    Map<String, List<String>> fields = conn.getHeaderFields();
    for(String key: fields.keySet())
    {
        List<String> values = fields.get(key);


        if("Set-Cookie".equalsIgnoreCase(key))
        {
            System.out.println(values.size());

            for(String v: values)
            {
                String c = Util.getCookie(v);

                this.cookieSet.add(c);  

                System.out.println(c);
            }

            //this.cookieMap.p

            //cookieSet.add(values);
        }

    }
}
4

1 に答える 1

1

まあ、私はこのコードが正常に動作すると思います.Eclipse変数が私を混乱させた長い文字列の完全な値を表示しなかったのはデバッグモードです...

(デバッグモードの変数値から出力をコピーしました....)

後でコンソール/ファイルに出力したところ、これまでにテストしたケースでは問題ないようでした...

于 2012-07-19T08:37:22.490 に答える