i use URLConnection to read a weburl, however sometimes it can not read full response (and sometimes it worked fine for the same url),
one url is:http://messages.yahoo.com/yahoo/Business_%26_Finance/Investments/Stocks_%28A_to_Z%29/Stocks_Y/index.html
below is one sample cut-off response
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>Yahoo! Message Boards - Stocks Y</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<link rel="stylesheet" type="text/css" href="http://l.yimg.com/d/lib/mb/default_yui_ycs_20080219.css"/>
<!--CSS source files for the Calendar Control -->
<style>
#cal1 { width: 100%;}
#cal2 { width: 100%;}
</style>
</head>
<body>
<center>
<div id="doc-layout">
<link type="text/css" rel="stylesheet" href="http://l.yimg.com/a/lib/uh/15/css/uh_rsa-1.0.5.css" /><style type="text/css">#ygma div{clear:none;}#ygma #yahoo{padding-top:0;*padding-top:5px;}</style><script type="text/javascript">(function(){var h={};var setUp=function(){h = YAHOO.one.uh.hotlistInfo = {rd:"http://global.ard.yahoo.com",space:"/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=0",adid:"5857129",prop:"ymb",protocol:"http",host:"messages.yahoo.com",url:"http%3a%2f%2fmessages.yahoo.com%2fyahoo%2fBusiness_%2526_Finance%2fInvestments%2fStocks_%2528A_to_Z%2529%2fStocks_Y%2findex.html",spaceid:"389132707"};YAHOO.one.uh.translate=function(str){var set={yahoo_homepage:"http://www.yahoo.com/", hp_detect_script:"http://www.yahoo.com/includes/hdhpdetect.php", set_hp_script:"http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=1/SIG=10uacnjgh/*http://www.yahoo.com/bin/set", set_hp_firefox_instructions:["Drag the \"Y!\" and drop it onto the \"Home\" icon.", "Select \"Yes\" from the pop up window.", "Nothing, you're done."], close_this_window:"Close this window", set_hp_alternative_instructions1:"If this didn't work for you see ", detailed_set_hp_instructions:"detailed instructions", set_hp_alternative_instructions2:""};return set[str];};YAHOO.one.uh.Search=['ygmasearchInput', 'sat'];};if("undefined" !==typeof YAHOO && "undefined" !==typeof YAHOO.one && "undefined" !==typeof YAHOO.one.uh){setUp();}else{setTimeout(arguments.callee, 500);}})();</script><div id="ygma"><div id="ygmaheader"><div class="bd sp"><div id="ymenu" class="ygmaclr"><div id="mepanel"><ul id="mepanel-nav"><li class="me1"><em>New User? <a class="ygmasignup" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=2/SIG=1395kefdv/*https://edit.yahoo.com/config/eval_register?.done=http://messages.finance.yahoo.com&.src=quote&.intl=us">Register</a></em></li><li class="me2"><a href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=3/SIG=132l1q4g4/*https://login.yahoo.com/config/login?.done=http://messages.finance.yahoo.com&.src=quote&.intl=us"><em>Sign In</em></a></li><li class="me3"><a href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=4/SIG=11bv9h578/*http://help.yahoo.com/l/us/yahoo/finance/" target="_top">Help</a></li></ul></div><div id="ygmapromo"><div id="ygmapromo-i"></div>
<style>
#ygma-s{display:none;}
#ygma #ygmapromo-i, #ygmabt #ygmapromo-i {display:inline;}
</style>
<script>
(function(){
window.YAHOO = window.YAHOO || {};
YAHOO.one = window.YAHOO.one || {};
YAHOO.one.uh = window.YAHOO.one.uh || {};
YAHOO.one.uh.popularSearches = function(data) {
var chop = function(s, n) {
if (s.length > n) {
return s.substring(0,n)+"...";
}
else return s;
}
var e = document.getElementById("ygmapromo") || document.getElementById("ygmabtpromo");
var e2 = document.getElementById("ygmapromo-i");
if (!data.query.results || data.query.results == "0" || !data.query.results.links) {
return;
}
var i = data.query.results.links;
var hd = i.text;
var fr = "&fr=ush-tts&fr2=ps";
e2.innerHTML = '<a href="http://global.ard.yahoo.com/SIG=15p310019/M=650008.13959238.14033549.12832737/D=ymb/S=389132707:HPRM2/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=OFPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=6076890/R=0/*'+i.href+fr+'" id="ygma-s-h">Trending: '+chop(hd, 20)+'</a>';
};
var ie;
if (navigator.userAgent.match(/MSIE\s6/)) {
var D = new Date();
var yr = D.getFullYear();
var mt = D.getMonth()+1;
var dy = D.getDate();
var hr = D.getHours();
ie = '&cache=' + yr + mt + dy + hr;
}
else {
ie = "";
}
var y = document.getElementById("ygma") || document.getElementById("ygmabt");
var feed;
if (y.offsetWidth < 850){
feed = "http://query.yahooapis.com/v1/public/yql/uhTrending/cokeTrending3?format=json&callback=YAHOO.one.uh.popularSearches&_maxage=1800&diagnostics=false&limit=1";
}
else {
feed = "http://query.yahooapis.com/v1/public/yql/uhTrending/cokeTrending2?format=json&callback=YAHOO.one.uh.popularSearches&_maxage=1800&diagnostics=false&limit=1";
}
var h = document.getElementsByTagName("head").item(0);
var s = document.createElement("script");
s.setAttribute("type", "text/javascript");
s.setAttribute("charset", "utf-8");
s.setAttribute("src", feed + ie);
h.appendChild(s);
})();
</script>
<style>
#ygma ol.searches h4, #ygmabt ol.searches h4 {
font-size:100%;
font-weight:bold;
color:#333333;
text-align:left;
padding-bottom:3px;
margin:0;
}
#ygma ol.searches, #ygmabt ol.searches {
position:absolute;
z-index:10002;
width:155px;
_width:165px;
padding:7px 7px 3px 7px;
background-color:#ffffff;
border:1px solid #cacaca;
margin:0;
}
#ygma ol.searches li, #ygmabt ol.searches li {
text-align:left;
list-style-type:none;
color:#163780;
margin:0;
padding: 0 0 2px 0;
}
#ygma #ygmapromo ol.searches a, #ygmabt #ygmabtpromo ol.searches a {
font-weight:normal;
color:#163780;
}
#ygma #ygmapromo ol.searches a:hover, #ygmabt #ygmabtpromo ol.searches a:hover {
width:100%;
}
#ygma #ygmapromo-i, #ygmabt #ygmapromo-i {
position:relative;
z-index:10002;
zoom:1;
}
</style><script language=javascript>
if(window.yzq_d==null)window.yzq_d=new Object();
window.yzq_d['OFPLD0oGYzg-']='&U=13h2rtkt7%2fN%3dOFPLD0oGYzg-%2fC%3d650008.13959238.14033549.12832737%2fD%3dHPRM2%2fB%3d6076890%2fV%3d1';
</script><noscript><img width=1 height=1 alt="" src="http://us.bc.yahoo.com/b?P=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc&T=17t8jqecg%2fX%3d1342682750%2fE%3d389132707%2fR%3dymb%2fK%3d5%2fV%3d2.1%2fW%3dH%2fY%3dYAHOO%2fF%3d2676527448%2fH%3dc2VydmVJZD0iNHkwbC5XS0pMR0dYWWFLSTZnOXpXUTVod1BveEFWQUh0bjRBQnRYYyIgc2l0ZUlkPSI0NDY1NTUxIiB0U3RtcD0iMTM0MjY4Mjc1MDQ2NTkyNSIg%2fQ%3d-1%2fS%3d1%2fJ%3dFA208962&U=13h2rtkt7%2fN%3dOFPLD0oGYzg-%2fC%3d650008.13959238.14033549.12832737%2fD%3dHPRM2%2fB%3d6076890%2fV%3d1"></noscript></div><div id="pa"><div id="pa-wrapper"><ul id="pa2-nav" class="sp"><li class="pa1 sp"><a class="sp" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=5/SIG=10np9vmbm/*http://www.yahoo.com/" target="_top">Yahoo!</a></li><li class="pa2 sp"><a class="sp" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=6/SIG=1107gluf6/*http://mail.yahoo.com?.intl=us" target="_top">Mail</a></li></ul><div id="pa-left" class="sp"></div><ul id="pa-nav" class="sp"><li class="pa3 sp"><a class="sp" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=7/SIG=10l2nj3k8/*http://my.yahoo.com" title="My Yahoo!" target="_top">My Yahoo!</a></li><li class="pa4 sp"><a class="sp" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=8/SIG=10niob72s/*http://news.yahoo.com" title="Yahoo! News" target="_top">News</a></li><li class="pa5 sp"><a class="sp" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=9/SIG=10q40gpus/*http://finance.yahoo.com" title="Yahoo! Finance" target="_top">Finance</a></li><li class="pa6 sp"><a class="sp" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=10/SIG=10pcalhda/*http://sports.yahoo.com" title="Yahoo! Sports" target="_top">Sports</a></li></ul><div id="pa-right" class="sp"></div></div></div></div><div id="yahoo" class="ygmaclr"><div id="ygmabot"> <a id="ygmalogo" href="http://global.ard.yahoo.com/SIG=15of4v3ir/M=650008.13179474.13810954.12691855/D=ymb/S=389132707:HEAD/Y=YAHOO/EXP=1342689950/L=4y0l.WKJLGGXYaKI6g9zWQ5hwPoxAVAHtn4ABtXc/B=NlPLD0oGYzg-/J=1342682750498023/K=DHARfvXptNXfBjvsJrQw.g/A=5857129/R=11/SIG=10q40gpus/*http://finance.yahoo.com" target="_top"><img id="ygmalogoimg" width="246" h...
below is my code to read a web url,
public String getHtml(String urlstr)
{
try {
URL url = new URL(urlstr);
URLConnection conn = url.openConnection();
conn.addRequestProperty("User-Agent", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)");
System.out.println("now to set cookie: " + cookie);
conn.addRequestProperty("Cookie", cookie);
BufferedReader in = new BufferedReader(new InputStreamReader(
conn.getInputStream()));
String inputLine;
extractCookie(conn);
//PrintWriter file = new PrintWriter(new BufferedWriter(new FileWriter("out.txt")));
StringBuilder sb = new StringBuilder();
while ((inputLine = in.readLine()) != null)
{
//file.println(inputLine);
sb.append(inputLine).append("\n");
}
in.close();
//file.close();
return sb.toString();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return "";
}
private void extractCookie(URLConnection conn) {
System.out.println("now try to get cookie:");
Map<String, List<String>> fields = conn.getHeaderFields();
for(String key: fields.keySet())
{
List<String> values = fields.get(key);
if("Set-Cookie".equalsIgnoreCase(key))
{
System.out.println(values.size());
for(String v: values)
{
String c = Util.getCookie(v);
this.cookieSet.add(c);
System.out.println(c);
}
//this.cookieMap.p
//cookieSet.add(values);
}
}
}