私が経験している問題は、ブラウザでそのURLを開くと、スクレイピングに必要なすべてのものが得られますが、コード内の同じリンクをスクレイピングすると、2つの(重要な)部分が欠落しています。レビュー番号と評価、価格と売り手の下です。情報。これがc#の内部Webクライアントからのスクリーンショットです:http://gyazo.com/908a37c7f70712fba1f82ec90a604d4d.png?1338822369
これが私がコンテンツを取得しようとしているコードです:
public string navGet(string inURL, CookieContainer inCookieContainer, bool GZip, string proxyAddress, int proxyPort,string proxyUserName, string proxyPassword)
{
try
{
this.currentUrl = inURL;
HttpWebRequest webRequest = (HttpWebRequest)WebRequest.Create(inURL);
webRequest.Timeout = this.TimeOutSetting;
webRequest.CookieContainer = inCookieContainer;
if (proxyAddress == "0" || proxyPort == 0)
{ }
else
{
webRequest.Proxy = new WebProxy(proxyAddress, proxyPort);
// Use login credentials to access proxy
NetworkCredential networkCredential = new NetworkCredential(proxyUserName, proxyPassword);
webRequest.Proxy.Credentials = networkCredential;
}
Uri destination = webRequest.Address;
webRequest.KeepAlive = true;
webRequest.Method = "GET";
webRequest.Accept = "*/*";
webRequest.Headers.Add("Accept-Language", "en-us");
if (GZip)
{
webRequest.Headers.Add("Accept-Encoding", "gzip, deflate");
}
webRequest.AllowAutoRedirect = true;
webRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; FunWebProducts; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";
webRequest.ContentType = "text/xml";
//webRequest.CookieContainer.Add(inCookieContainer.GetCookies(destination));
try
{
string strSessionID = inCookieContainer.GetCookies(destination)["PHPSESSID"].Value;
webRequest.Headers.Add("Cookie", "USER_OK=1;PHPSESSID=" + strSessionID);
}
catch (Exception ex2)
{
}
HttpWebResponse webResponse = (HttpWebResponse)webRequest.GetResponse();
if (webRequest.HaveResponse)
{
// First handle cookies
foreach(Cookie retCookie in webResponse.Cookies)
{
bool cookieFound = false;
foreach(Cookie oldCookie in inCookieContainer.GetCookies(destination))
{
if (retCookie.Name.Equals(oldCookie.Name))
{
oldCookie.Value = retCookie.Value;
cookieFound = true;
}
}
if (!cookieFound)
inCookieContainer.Add(retCookie);
}
// Read response
Stream responseStream = responseStream = webResponse.GetResponseStream();
if (webResponse.ContentEncoding.ToLower().Contains("gzip"))
{
responseStream = new GZipStream(responseStream, CompressionMode.Decompress);
}
else if (webResponse.ContentEncoding.ToLower().Contains("deflate"))
{
responseStream = new DeflateStream(responseStream, CompressionMode.Decompress);
}
StreamReader stream = new StreamReader(responseStream, System.Text.Encoding.Default);
string responseString = stream.ReadToEnd();
stream.Close();
this.currentUrl = webResponse.ResponseUri.ToString();
this.currentAddress = webRequest.Address.ToString();
setViewState(responseString);
return responseString;
}
throw new Exception("No response received from host.");
return "An error was encountered";
}
catch(Exception ex)
{
//MessageBox.Show("NavGet:" + ex.Message);
return ex.Message;
}
}