如何通过VB下载网页及附件形成本地浏览

https://www.ww.gov.cn/openness/public/6603481/37163631.html

如何通过VB下载上页面,形成本地浏览,包括图片、附件等,尤其是附件。
最好有详细的源代码,谢谢。

麻烦附代码的小主们,自己检测下代码是否运行啊,,不要在网上DOWN一个,就说可以啊。我是弱鸡,我要实现的是这个网页能够完全下载下来,包括样式表,和附件文件,确保能够在本地完美运行。

该回答引用ChatGPT

您可以使用VB编写一个简单的程序,通过HTTP请求下载网页并保存到本地,然后将所有的图片和附件也下载下来,以便可以离线浏览。

以下是一个示例程序,演示如何使用VB下载一个网页及其相关的图片和附件:
运行有问题的话可以找我

Sub DownloadPage()
    Dim url As String, localPath As String, html As String
    Dim httpRequest As Object, stream As Object, fileSystem As Object
    Dim doc As Object, img As Object, a As Object
    
    ' 要下载的页面的URL和本地保存路径
    url = "https://www.ww.gov.cn/openness/public/6603481/37163631.html"
    localPath = "C:\Downloads\page.html"
    
    ' 发送HTTP请求获取页面内容
    Set httpRequest = CreateObject("MSXML2.XMLHTTP")
    httpRequest.Open "GET", url, False
    httpRequest.send
    html = httpRequest.responseText
    
    ' 保存页面到本地文件
    Set fileSystem = CreateObject("Scripting.FileSystemObject")
    Set stream = fileSystem.CreateTextFile(localPath, True)
    stream.Write html
    stream.Close
    
    ' 加载HTML文档
    Set doc = CreateObject("HTMLFile")
    doc.body.innerHTML = html
    
    ' 下载所有图片
    For Each img In doc.getElementsByTagName("img")
        DownloadFile img.src, "C:\Downloads\" & GetFilenameFromUrl(img.src)
    Next
    
    ' 下载所有附件
    For Each a In doc.getElementsByTagName("a")
        If a.href Like "*.doc" Or a.href Like "*.pdf" Then
            DownloadFile a.href, "C:\Downloads\" & GetFilenameFromUrl(a.href)
        End If
    Next
End Sub

' 下载文件并保存到本地
Sub DownloadFile(url As String, localPath As String)
    Dim httpRequest As Object, stream As Object, fileSystem As Object
    Dim content As String
    
    ' 发送HTTP请求获取文件内容
    Set httpRequest = CreateObject("MSXML2.XMLHTTP")
    httpRequest.Open "GET", url, False
    httpRequest.send
    content = httpRequest.responseBody
    
    ' 保存文件到本地
    Set fileSystem = CreateObject("Scripting.FileSystemObject")
    Set stream = fileSystem.CreateBinaryFile(localPath, True)
    stream.Write content
    stream.Close
End Sub

' 从URL中获取文件名
Function GetFilenameFromUrl(url As String) As String
    Dim pos As Integer
    pos = InStrRev(url, "/")
    If pos > 0 Then
        GetFilenameFromUrl = Mid(url, pos + 1)
    Else
        GetFilenameFromUrl = url
    End If
End Function


以下答案基于ChatGPT与GISer Liu编写:

以下是使用VB下载静态网页内容并在本地浏览器中显示的示例代码。该代码可以处理包括图片和附件在内的所有静态内容。

vbnet

' 创建一个新的WebClient对象
Dim wc As New System.Net.WebClient()

' 设置用户代理(User-Agent)头以模拟浏览器请求
wc.Headers.Add("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36")

' 下载网页内容
Dim url As String = "http://example.com"
Dim html As String = wc.DownloadString(url)

' 保存网页内容到本地文件
Dim filePath As String = "C:\example.html"
System.IO.File.WriteAllText(filePath, html)

' 在浏览器中打开本地文件
System.Diagnostics.Process.Start(filePath)

要下载网页中的图片和附件,您可以使用以下代码:

vbnet

' 获取网页中的所有链接
Dim matches As MatchCollection = Regex.Matches(html, "<a\s+(?:[^>]*?\s+)?href=""([^""]*)""", RegexOptions.IgnoreCase)

' 遍历链接并下载对应的文件
For Each match As Match In matches
    Dim link As String = match.Groups(1).Value

    ' 如果链接指向一个图片或附件,下载并保存到本地文件
    If link.EndsWith(".jpg") Or link.EndsWith(".png") Or link.EndsWith(".gif") Or link.EndsWith(".pdf") Or link.EndsWith(".doc") Or link.EndsWith(".xls") Then
        Dim fileName As String = link.Substring(link.LastIndexOf("/") + 1)
        wc.DownloadFile(link, "C:\" & fileName)
    End If
Next

以上代码使用正则表达式获取网页中的所有链接,并检查链接是否指向图片或附件。如果是,则使用WebClient对象的DownloadFile方法下载文件,并将其保存到本地硬盘。

注意:此代码仅适用于静态网页。如果要下载动态网页中的内容,您可能需要使用WebBrowser控件或类似的工具来模拟完整的浏览器环境。


Imports System.IO
Imports System.Net

Public Class Form1
    Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
        '下载网页
        Dim url As String = "https://www.ww.gov.cn/openness/public/6603481/37163631.html"
        Dim html As String = DownloadHtml(url)
        Dim htmlFile As String = Path.Combine(Application.StartupPath, "index.html")
        File.WriteAllText(htmlFile, html)

        '下载附件
        Dim links As List(Of String) = ExtractLinks(html)
        For Each link In links
            DownloadFile(link)
        Next

        MessageBox.Show("下载完成!")
    End Sub

    '下载网页内容
    Private Function DownloadHtml(url As String) As String
        Using wc As New WebClient()
            wc.Encoding = System.Text.Encoding.UTF8
            Return wc.DownloadString(url)
        End Using
    End Function

    '提取页面中的链接
    Private Function ExtractLinks(html As String) As List(Of String)
        Dim links As New List(Of String)()
        Dim start As Integer = 0
        Do
            start = html.IndexOf("<a href=""", start)
            If start < 0 Then Exit Do
            start += "<a href=""".Length
            Dim endpos As Integer = html.IndexOf("""", start)
            If endpos < 0 Then Exit Do
            Dim link As String = html.Substring(start, endpos - start)
            If Not link.StartsWith("http") Then
                '相对链接,需要转换成绝对链接
                Dim url As New Uri("https://www.ww.gov.cn")
                link = url.GetLeftPart(UriPartial.Authority) & link
            End If
            links.Add(link)
            start = endpos + 1
        Loop
        Return links
    End Function

    '下载文件
    Private Sub DownloadFile(url As String)
        Dim fileName As String = Path.GetFileName(url)
        Dim filePath As String = Path.Combine(Application.StartupPath, fileName)
        Using wc As New WebClient()
            wc.DownloadFile(url, filePath)
        End Using
    End Sub
End Class

Sub DownloadWebPage()
    Dim URL As String
    Dim HTMLDoc As Object
    Dim objFSO As Object
    Dim objFile As Object
    
    ' 设置网页URL
    URL = "https://www.ww.gov.cn/openness/public/6603481/37163631.html"
    
    ' 创建HTMLDoc对象并打开URL
    Set HTMLDoc = CreateObject("htmlfile")
    HTMLDoc.body.innerHTML = GetHTTPResponse(URL)
    
    ' 获取文件名并保存HTML文件
    Set objFSO = CreateObject("Scripting.FileSystemObject")
    Set objFile = objFSO.CreateTextFile("C:\Temp\webpage.html")
    objFile.WriteLine HTMLDoc.documentElement.outerHTML
    objFile.Close
    
    ' 打开保存的HTML文件
    Shell "C:\Temp\webpage.html"
End Sub

Function GetHTTPResponse(URL As String) As String
    Dim HttpReq As Object
    
    ' 创建HttpReq对象并发送GET请求
    Set HttpReq = CreateObject("MSXML2.XMLHTTP")
    HttpReq.Open "GET", URL, False
    HttpReq.send
    
    ' 返回响应内容
    GetHTTPResponse = HttpReq.responseText
End Function


该代码使用MSXML2.XMLHTTP对象发送HTTP GET请求,然后将响应内容保存为本地HTML文件,并用默认浏览器打开该文件。

要下载页面中的图片和附件,您可以使用相同的方法获取它们的URL,并使用相应的VB方法将它们下载到本地文件夹中。

Private Declare Function URLDownloadToFile Lib "urlmon" _
    Alias "URLDownloadToFileA" ( _
    ByVal pCaller As Long, _
    ByVal szURL As String, _
    ByVal szFileName As String, _
    ByVal dwReserved As Long, _
    ByVal lpfnCB As Long) As Long

Sub DownloadWebpageAndResources()
    Dim strURL As String
    Dim strFilename As String
    Dim strFolder As String
    Dim objXMLHTTP As Object
    Dim objHTML As Object
    Dim objLinks As Object
    Dim objLink As Object
    Dim intCount As Integer
    Dim strFilepath As String
    
    strURL = "https://www.ww.gov.cn/openness/public/6603481/37163631.html"
    strFolder = "C:\DownloadedWebpage\"
    strFilename = strFolder & "webpage.html"
    
    ' Download the webpage HTML
    Set objXMLHTTP = CreateObject("MSXML2.XMLHTTP")
    objXMLHTTP.Open "GET", strURL, False
    objXMLHTTP.Send
    Do While objXMLHTTP.readyState <> 4 Or objXMLHTTP.Status <> 200
        DoEvents
    Loop
    Set objHTML = CreateObject("htmlfile")
    objHTML.body.innerHTML = objXMLHTTP.responseText
    Set objXMLHTTP = Nothing
    
    ' Save the HTML to a file
    Open strFilename For Output As #1
    Print #1, objHTML.documentElement.outerHTML
    Close #1
    
    ' Download all images and attachments
    Set objLinks = objHTML.getElementsByTagName("a")
    For Each objLink In objLinks
        If Right(objLink.href, 4) = ".pdf" Or Right(objLink.href, 4) = ".doc" Or Right(objLink.href, 4) = ".xls" Or Right(objLink.href, 4) = ".jpg" Or Right(objLink.href, 4) = ".png" Then
            intCount = intCount + 1
            strFilepath = strFolder & "resource" & intCount & Right(objLink.href, 4)
            URLDownloadToFile 0&, objLink.href, strFilepath, 0&, 0&
        End If
    Next objLink
    
    MsgBox "Download completed successfully."
End Sub

这个示例代码使用了VB的URLDownloadToFile函数来下载网页中的图片和附件,将其保存在本地文件夹中。可以根据需要修改下载的文件类型和存储路径等参数。

方案来自 梦想橡皮擦 狂飙组基于 GPT 编写的 “程秘”


以下是一个基于 VB.NET 的示例程序,可以下载指定 URL 的页面并保存到本地。该程序会自动下载页面中的所有图片和链接的文件。你可以根据自己的需求对程序进行修改。

Imports System.Net
Imports System.IO

Module Module1

    Sub Main()
        Dim url As String = "https://www.example.com/"
        Dim outputDirectory As String = "C:\output\"

        DownloadWebPage(url, outputDirectory)
    End Sub

    Private Sub DownloadWebPage(ByVal url As String, ByVal outputDirectory As String)
        Dim client As New WebClient()
        Dim html As String = client.DownloadString(url)

        ' Create the output directory if it doesn't exist
        If Not Directory.Exists(outputDirectory) Then
            Directory.CreateDirectory(outputDirectory)
        End If

        ' Save the HTML file
        Dim htmlFileName As String = Path.Combine(outputDirectory, "index.html")
        File.WriteAllText(htmlFileName, html)

        ' Download all linked resources (images, attachments, etc.)
        Dim links As List(Of String) = GetLinks(html)
        For Each link In links
            Dim linkUri As New Uri(link)
            If linkUri.Scheme = "http" Or linkUri.Scheme = "https" Then
                DownloadFile(link, outputDirectory)
            End If
        Next
    End Sub

    Private Function GetLinks(ByVal html As String) As List(Of String)
        Dim links As New List(Of String)()

        ' Search for links in the HTML file
        Dim regex As New System.Text.RegularExpressions.Regex("<a.*?href=[""'](?<url>.*?)[""'].*?>(?<name>.*?)</a>", RegexOptions.IgnoreCase)
        Dim matches As System.Text.RegularExpressions.MatchCollection = regex.Matches(html)

        ' Add each link to the list
        For Each match As System.Text.RegularExpressions.Match In matches
            links.Add(match.Groups("url").Value)
        Next

        Return links
    End Function

    Private Sub DownloadFile(ByVal url As String, ByVal outputDirectory As String)
        Dim client As New WebClient()
        Dim uri As New Uri(url)

        ' Create the output directory if it doesn't exist
        If Not Directory.Exists(outputDirectory) Then
            Directory.CreateDirectory(outputDirectory)
        End If

        ' Determine the local filename
        Dim localFileName As String = Path.Combine(outputDirectory, Path.GetFileName(uri.LocalPath))

        ' Download the file
        client.DownloadFile(uri, localFileName)
    End Sub

End Module

这个程序会下载指定 URL 的页面,并将页面保存到本地。程序会自动下载页面中的所有图片和链接的文件,并将这些文件保存到同一目录中。你可以将程序中的 url 和 outputDirectory 变量修改为你想要的 URL 和输出目录。

需要注意的是,如果下载的页面包含动态内容(例如 JavaScript 生成的内容),这些内容可能无法被保存到本地。此外,该程序没有处理页面中的样式表(CSS),因此可能无法正确显示页面的样式。

https://www.baidu.com/link?url=6s1wPLUIsAqV2hq-SdWxQsNzubzqz-freaKucandtTyQ4UFSpWLy6zCWOcm7B7Jehm2T3VzYIAJwt0t87l4ie_&wd=&eqid=d99425e10005e84e0000000263f4145b

Imports System.IO
Imports System.Net

Public Class Form1
    Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
        '输入URL
        Dim url As String = InputBox("请输入URL", "下载网页")
        '下载网页及其附件
        DownloadPage(url, "C:\DownloadedPages")
        MsgBox("下载完成")
    End Sub

    Private Sub DownloadPage(url As String, localFolder As String)
        '创建Web请求
        Dim request As WebRequest = WebRequest.Create(url)
        '发送请求并获取响应
        Dim response As WebResponse = request.GetResponse()
        '获取响应流
        Dim stream As Stream = response.GetResponseStream()
        '创建本地文件
        Dim fileName As String = Path.Combine(localFolder, Path.GetFileName(url))
        Dim fileStream As New FileStream(fileName, FileMode.Create)
        '从响应流中读取数据并写入本地文件
        Dim buffer(4096) As Byte
        Dim bytesRead As Integer
        Do
            bytesRead = stream.Read(buffer, 0, buffer.Length)
            If bytesRead > 0 Then
                fileStream.Write(buffer, 0, bytesRead)
            End If
        Loop While bytesRead > 0
        '关闭本地文件流和响应流
        fileStream.Close()
        stream.Close()
        '如果下载的是HTML文件,则下载其所有资源文件
        If Path.GetExtension(url).ToLower() = ".html" OrElse Path.GetExtension(url).ToLower() = ".htm" Then
            DownloadResources(fileName, localFolder)
        End If
    End Sub

    Private Sub DownloadResources(htmlFile As String, localFolder As String)
        '读取HTML文件中的内容
        Dim html As String = File.ReadAllText(htmlFile)
        '使用正则表达式匹配所有的资源文件URL
        Dim regex As New System.Text.RegularExpressions.Regex("<(img|link|a).*?href=[\""']?(?<url>[^\""'>]+)[\""']?.*?>", System.Text.RegularExpressions.RegexOptions.IgnoreCase)
        Dim matches As System.Text.RegularExpressions.MatchCollection = regex.Matches(html)
        '下载所有资源文件
        For Each match As System.Text.RegularExpressions.Match In matches
            Dim url As String = match.Groups("url").Value
            '判断URL是否为相对路径,如果是,则转换为绝对路径
            If Not url.ToLower().StartsWith("http") AndAlso Not url.ToLower().StartsWith("https") Then
                url = Path.Combine(Path.GetDirectoryName(htmlFile), url)
            End If
            DownloadFile(url, localFolder)
        Next
    End Sub

    Private Sub DownloadFile(url As String, localFolder As String)
        '创建Web请求
        Dim request As WebRequest = WebRequest.Create(url)
        '发送请求并获取响应
        Dim response As WebResponse = request.GetResponse()
        '获取响应流
        Dim stream As

该回答引用ChatGPT

以下是一段用VB实现的下载html文件的源代码:
打开Microsoft Visual Basic 6.0新建一个标准EXE的项目:
Dim strHtml As String
 
Private Sub Form_Load()
strHtml = DownloadHtml("https://www.ww.gov.cn/openness/public/6603481/37163631.html")
MsgBox strHtml
End Sub
 
Function DownloadHtml(ByVal strUrl As String) As String
 
Dim objXMLHTTP As Object
Dim strTemp As String
 
Set objXMLHTTP = CreateObject("MSXML2.XMLHttp.6.0")
 
With objXMLHTTP
.Open "GET", strUrl, False
.Send
 
Do Until .ReadyState = 4
DoEvents
Loop
 
strTemp = .responseText
End With
 
Set objXMLHTTP = Nothing
DownloadHtml = strTemp
 
End Function


提供参考实例方法:https://www.likecs.com/show-939532.html

参考例子:https://blog.csdn.net/rztyfx/article/details/7605624

提供参考实例方法:https://www.likecs.com/show-939532.html