コードは次のとおりです。
catch (WebException ex)
{
failed = true;
wccfg.failedUrls++;
return csFiles;
}
catch (Exception ex)
{
failed = true;
wccfg.failedUrls++;
throw;
}
例外は例外です。例外メッセージは次のとおりです: NullReferenceException : オブジェクト参照がオブジェクトのインスタンスに設定されていません
System.NullReferenceException was unhandled by user code
HResult=-2147467261
Message=Object reference not set to an instance of an object.
Source=GatherLinks
StackTrace:
at GatherLinks.TimeOut.getHtmlDocumentWebClient(String url, Boolean useProxy, String proxyIp, Int32 proxyPort, String usename, String password) in d:\C-Sharp\GatherLinks\GatherLinks-2\GatherLinks\GatherLinks\TimeOut.cs:line 55
at GatherLinks.WebCrawler.webCrawler(String mainUrl, Int32 levels) in d:\C-Sharp\GatherLinks\GatherLinks-2\GatherLinks\GatherLinks\WebCrawler.cs:line 151
at GatherLinks.WebCrawler.webCrawler(String mainUrl, Int32 levels) in d:\C-Sharp\GatherLinks\GatherLinks-2\GatherLinks\GatherLinks\WebCrawler.cs:line 151
at GatherLinks.WebCrawler.webCrawler(String mainUrl, Int32 levels) in d:\C-Sharp\GatherLinks\GatherLinks-2\GatherLinks\GatherLinks\WebCrawler.cs:line 151
at GatherLinks.BackgroundWebCrawling.secondryBackGroundWorker_DoWork(Object sender, DoWorkEventArgs e) in d:\C-Sharp\GatherLinks\GatherLinks-2\GatherLinks\GatherLinks\BackgroundWebCrawling.cs:line 82
at System.ComponentModel.BackgroundWorker.OnDoWork(DoWorkEventArgs e)
at System.ComponentModel.BackgroundWorker.WorkerThreadStart(Object argument)
InnerException:
これは、WebCrawler 関数内にある try コードです。
public List<string> webCrawler(string mainUrl, int levels)
{
busy.WaitOne();
HtmlWeb hw = new HtmlWeb();
List<string> webSites;
List<string> csFiles = new List<string>();
csFiles.Add("temp string to know that something is happening in level = " + levels.ToString());
csFiles.Add("current site name in this level is : " + mainUrl);
try
{
HtmlAgilityPack.HtmlDocument doc = TimeOut.getHtmlDocumentWebClient(mainUrl, false, "", 0, "", "");
done = true;
Object[] temp_arr = new Object[8];
temp_arr[0] = csFiles;
temp_arr[1] = mainUrl;
temp_arr[2] = levels;
temp_arr[3] = currentCrawlingSite;
temp_arr[4] = sitesToCrawl;
temp_arr[5] = done;
temp_arr[6] = wccfg.failedUrls;
temp_arr[7] = failed;
OnProgressEvent(temp_arr);
currentCrawlingSite.Add(mainUrl);
webSites = getLinks(doc);
removeDupes(webSites);
removeDuplicates(webSites, currentCrawlingSite);
removeDuplicates(webSites, sitesToCrawl);
if (wccfg.removeext == true)
{
for (int i = 0; i < webSites.Count; i++)
{
webSites.Remove(removeExternals(webSites,mainUrl,wccfg.localy));
}
}
if (wccfg.downloadcontent == true)
{
retwebcontent.retrieveImages(mainUrl);
}
if (levels > 0)
sitesToCrawl.AddRange(webSites);
if (levels == 0)
{
return csFiles;
}
else
{
for (int i = 0; i < webSites.Count(); i++)
{
if (wccfg.toCancel == true)
{
return new List<string>();
}
string t = webSites[i];
if ((t.StartsWith("http://") == true) || (t.StartsWith("https://") == true))
{
csFiles.AddRange(webCrawler(t, levels - 1));
}
}
return csFiles;
}
}
catch (WebException ex)
{
failed = true;
wccfg.failedUrls++;
return csFiles;
}
catch (Exception ex)
{
failed = true;
wccfg.failedUrls++;
throw;
}
}
これは、クラスの上部で wccfg を使用する方法です。
private System.Threading.ManualResetEvent busy;
WebcrawlerConfiguration wccfg;
List<string> currentCrawlingSite;
List<string> sitesToCrawl;
RetrieveWebContent retwebcontent;
public event EventHandler<WebCrawlerProgressEventHandler> ProgressEvent;
public bool done;
public bool failed;
public WebCrawler(WebcrawlerConfiguration webcralwercfg)
{
failed = false;
done = false;
currentCrawlingSite = new List<string>();
sitesToCrawl = new List<string>();
busy = new System.Threading.ManualResetEvent(true);
wccfg = webcralwercfg;
}