私は、Webページを操作のためにmhtファイルとして保存するようになったWebスクレーパーに取り組んでいます(許可と私のプログラミングスキルのために、ページにアクセスしてオンラインで抽出できませんでした笑)。
アジリティ パックを使用して表からテキストを削除していますが、ページ上の表には多数の画像も含まれています。
さまざまな方法で多くのことを試しましたが、このファイルから画像を抽出できません..誰か助けてもらえますか?
どうもありがとう
Private Sub Button1_Click(ByVal sender As Object, ByVal e As EventArgs) Handles Button1.Click
Dim Web As New HtmlAgilityPack.HtmlWeb
Dim Doc As New HtmlAgilityPack.HtmlDocument
Dim RowCount As Integer = 1
Doc.Load("J:\table1.mht")
Dim tables As HtmlAgilityPack.HtmlNodeCollection = Doc.DocumentNode.SelectNodes("//table")
Dim img As HtmlAgilityPack.HtmlNodeCollection = Doc.DocumentNode.SelectNodes("//table")
Dim Links As HtmlAgilityPack.HtmlNodeCollection = Doc.DocumentNode.SelectNodes("//table")
Dim hNodeCol As HtmlNodeCollection = Doc.DocumentNode.SelectNodes("//a[@href]")
Dim rows As HtmlAgilityPack.HtmlNodeCollection = tables(0).SelectNodes("//*[@id=HomeMyStudents]")
For Each Link As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[1]")
RowCount = RowCount + 1
' DGV.Rows(RowCount).Cells(0).Value = Link.InnerText
DGV.Rows.Add("test", Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing)
' DGV.Rows.Add("test")
Next
RowCount = 0
For Each table As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[2]")
RowCount = RowCount + 1
DGV.Rows(RowCount).Cells(1).Value = table.InnerText
Next
RowCount = 0
For Each table As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[3]")
RowCount = RowCount + 1
DGV.Rows(RowCount).Cells(2).Value = table.InnerText
Next
RowCount = 0
For Each table As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[4]")
RowCount = RowCount + 1
DGV.Rows(RowCount).Cells(3).Value = table.InnerText
Next
RowCount = 0
For Each table As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[5]")
RowCount = RowCount + 1
DGV.Rows(RowCount).Cells(4).Value = table.InnerText
Next
RowCount = 0
For Each table As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[6]")
RowCount = RowCount + 1
DGV.Rows(RowCount).Cells(5).Value = table.InnerText
Next
RowCount = 0
For Each table As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[7]")
RowCount = RowCount + 1
DGV.Rows(RowCount).Cells(6).Value = table.InnerText
Next
RowCount = 0
For Each table As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[8]")
RowCount = RowCount + 1
DGV.Rows(RowCount).Cells(7).Value = table.InnerText
Next
RowCount = 0
For Each table As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[9]")
RowCount = RowCount + 1
DGV.Rows(RowCount).Cells(8).Value = table.InnerText
Next
RowCount = 0
For Each table As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[10]")
RowCount = RowCount + 1
DGV.Rows(RowCount).Cells(9).Value = table.InnerText
Next
RowCount = 0
For Each table As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[11]")
RowCount = RowCount + 1
DGV.Rows(RowCount).Cells(10).Value = table.InnerText
Next
RowCount = 0
For Each Link As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[1]")
'RowCount = RowCount + 1
'DGV.Rows(RowCount).Cells(11).Value = Link.InnerText
' DGV.Rows.Add("test", Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing)
' DGV.Rows.Add("test")
ListBox1.Items.Add(Link.InnerText)
Next
End Sub