C# HttpWebRequest 越来越慢是什么原因?

比如我有1万个不同的网址准备进行访问。在50个线程的情况下, 刚启动速度很快,运行几分钟后越来越慢,以下是代码片段。

class HttpRequest
    {
        private string url;
        private string userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36";

        private string accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9";

        private int timeOut = 8000;

        private bool keepAlive = false;

        public string UserAgent { get => userAgent; set => userAgent = value; }
        public string Accept { get => accept; set => accept = value; }
        public int TimeOut { get => timeOut; set => timeOut = value; }
        public bool KeepAlive { get => keepAlive; set => keepAlive = value; }
        public string Url { get => url; set => url = value; }

        public HttpResponse Get()
        {
            HttpWebRequest request = null;
            HttpWebResponse response = null;
            HttpResponse resp = new HttpResponse();

            string host = this.url.Split('/')[2];
            request = (HttpWebRequest)HttpWebRequest.Create(this.url);
            request.Method = "GET";
            request.Accept = accept;
            request.UserAgent = userAgent;
            request.Timeout = timeOut;
            request.ReadWriteTimeout = timeOut;
            request.ContinueTimeout = timeOut;
            request.AllowAutoRedirect = true;
            request.MaximumAutomaticRedirections = 3;
            request.KeepAlive = keepAlive;
            request.Proxy = null;
            request.ServicePoint.Expect100Continue = false;
            request.ServicePoint.UseNagleAlgorithm = false;
            request.ServicePoint.ConnectionLimit = 65500;
            request.AllowWriteStreamBuffering = false;
            request.Host = host;
            request.Referer = "http://" + this.url.Split('/')[2];
            request.Headers.Add("Accept-Encoding", "gzip");

            try
            {
                response = (HttpWebResponse)request.GetResponse(); //发起请求
                resp.StatusCode = response.StatusCode;
                resp.Html = GetResponseBody(response);
                return resp;
            }
            catch (Exception ex)
            {

                resp.Html = ex.Message;

                if (response != null)
                {
                    resp.StatusCode = response.StatusCode;
                    response.Close();
                }

                if (request != null)
                {
                    request.Abort();
                }

                return resp;

            }
            finally
            {
                if (response != null)
                {
                    response.Close();
                }

                if (request != null)
                {
                    request.Abort();
                }
            }

            
        }

以下是调用代码:


//list 里面有1万个链接
            for (int i = 0; i < 50; i++)
            {
                Task.Factory.StartNew(() =>
                {
                    Random r = new Random((int)DateTime.Now.Ticks);

                    while (true)
                    {
                        string links = list[r.Next(0, list.Count - 1)];//随机取一个链接

                        HttpRequest http = new HttpRequest();
                        http.Url = links;
                        http.Get();

                        System.Threading.Thread.Sleep(1);
                    }



                }, TaskCreationOptions.LongRunning);
            }

以下是线程耗时:

img

尝试关掉代理,在配置文件中(.config)中添加配置节:

<?xml version="1.0"?>  
<configuration>  
  <startup>  
    <supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.0"/>  
  </startup>  
  <system.net>  
    <defaultProxy  
        enabled="false"  
        useDefaultCredentials="false" >  
      <proxy/>  
      <bypasslist/>  
      <module/>  
    </defaultProxy>  
  </system.net>  
</configuration>  

感觉好像死循环了,while循环那里无法退出

可能和访问完资源没有及时释放有关系,增加一些释放资源的语句试一下

不一定是代码的问题,有可能对方网站加了反扒机制,同一个ip频繁访问会限制访问时间。题主可以使用多个代理ip来轮流访问试试是否还会出现此类情况