为啥代码可以运行,但最后导出的CSV文件却是空白的呢?

帮忙看看为啥代码可以运行,但最后导出的CSV文件却是空白的呢?只有标题栏。而且文件也会随着爬取的数据增多而变大


```python
# encoding=utf-8
# -*- coding:utf-8 -*- 
import logging
logging.getLogger("bs4.dammit").setLevel(logging.ERROR)
from bs4 import BeautifulSoup
from multiprocessing.dummy import Pool as ThreadPool
import urllib2
import requests
import json
import re
import csv
import unicodecsv as ucsv
from itertools import islice
import gc

import sys
reload(sys)
sys.setdefaultencoding('utf-8')

house_urls = []
region = '台山市'

with open('./基础信息/所有单元链接.csv')as f:
    f_csv = csv.reader(f)
    for row in islice(f_csv, 1, None):
        if row[0] == region:
            house_urls.append({'url':row[1]})
        

count_house = len(house_urls)

head = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-CN,zh;q=0.9,en-GB;q=0.8,en-US;q=0.7,en;q=0.6',
    'Connection': 'keep-alive',
    'Host': 'jmzjj.jiangmen.cn:8085',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36}'
    }


count_fail = 0
fail_house = []

def gethtml(url):
    global count_house
    global count_fail
    global fail_house
    fails = 0
    while True:
        try:
            if fails >= 10:
                count_fail += 1
                fail_house.append({'url':url})
                break
            
            head['Referer'] = url['url']
            req = urllib2.Request(url['url'],headers=head)
            response = urllib2.urlopen(req,None,15)
            html = response.read()
            soup_house_detail(html)
            response.close()
            count_house -= 1 
            print('待执行:'+str(count_house)+',失败:'+str(count_fail))
        except:
            fails += 1
            print('正在尝试再次请求: '+str(fails))
        else:
            break

House_Detail_dic = {}
House_Detail_list = []

csv_headers = ['登记号','开发商名称','开发商地址','开发商电话','项目地址','预售证号','房屋结构','销售情况','项目名称','栋号','房屋坐落','房屋号','套内面积','建筑面积','房屋用途','申报单价','申报总价','房屋朝向','签约日期','备注','物业管理公司','物业管理费']
with open('./结果/'+region+'.csv','wb')as f:
    f_csv = csv.writer(f)
    f_csv.writerow(csv_headers)
    f.close()

def soup_house_detail(html):
    House_Detail_list = []
    soup = BeautifulSoup(html,"html.parser")
    while True:
        try:
            for link in soup.find_all('div',style="max-width:1200px;width:100%; margin:0 auto;"):
                PresellName = link.find_all('td',id="PresellName")
                House_Detail_list=[
                    link.find('td',id="djhtd").get_text().strip(),
                    link.find('td',id["CorpName"]).get_text().strip(),
                    link.find('td',id["CorpAddress"]).get_text().strip(),
                    link.find('td',id["CorpPhone"]).get_text().strip(),
                    link.find('td',id["HouseRepose"]).get_text(),
                    link.find('td',id["PresellBookID"]).get_text(),
                    link.find('td',id["HouseFrame"]).get_text(),
                    link.find('td',id["statusfont1"]).get_text()+link.find('td',id["statusfont2"]).get_text()+link.find('td',id["statusfont3"]).get_text(),
                    PresellName[0].string,
                    link.find('td',id["DongNo"]).get_text(),
                    PresellName[1].string,
                    link.find('td',id["HouseNO"]).get_text(),
                    link.find('td',id["HouseArea"]).get_text().split('m')[0],
                    link.find('td',id["SumBuildArea1"]).get_text().split('m')[0],
                    link.find('td',id["HouseUse"]).get_text(),
                    link.find('td',id["sbdj"]).get_text().split('元')[0],
                    link.find('td',id["sbzj"]).get_text().split('元')[0],
                    link.find('td',id["CHX"]).get_text(),
                    link.find('td',id["VisaDate"]).get_text(),
                    link.find('td',id["BZGS"]).get_text(),
                    link.find('td',id["ManagerCom"]).get_text(),
                    link.find('td',id["ManagerCharge"]).get_text().split('元')[0]
                ]
                print(House_Detail_list)
            with open('./结果/'+region+'.csv','a+')as f:
                f_csv = csv.writer(f)
                f_csv.writerow(House_Detail_list)
                f.close()
        except Exception as e:
            print '内容解析失败' ,e
        else:
            break
    soup.decompose()
    del soup
    gc.collect()



pool = ThreadPool(100) 
pool.map(gethtml, house_urls)
pool.close() 
pool.join()

csv_headers = ['url']
with open('./fail/'+region+'.csv','wb')as f:
    f_csv = ucsv.DictWriter(f,csv_headers)
    f_csv.writeheader()
    f_csv.writerows(fail_house)
    f.close()


```

不知道你这个问题是否已经解决, 如果还没有解决的话:
  • 你可以看下这个问题的回答https://ask.csdn.net/questions/7626675
  • 你也可以参考下这篇文章:合并多个CSV文件,并且只保留一个标题行。
  • 您还可以看一下 李宁老师的数据分析“薪”法修炼-面试篇课程中的 读写CSV文件小节, 巩固相关知识点
  • 除此之外, 这篇博客: 这个代码是用来统计某一csv文件的某一列数据,对该列数据进行分类并且进行计数中的 这个代码是用来统计某一csv文件的某一列数据,对该列数据进行分类并且进行计数 部分也许能够解决你的问题, 你可以仔细阅读以下内容或跳转源博客中阅读:
    python版本为python2.7
    
    下面直接上代码
    
    #a.csv is three rows csv
    #这个代码是用来统计某一csv文件的某一列数据,对该列数据进行分类并且进行计数
    
    
    
    import pandas as pd
    import re
    import csv
    import sys
    
    
    path='C:\\Users\\hg\\Desktop\\a\\a.csv'#文件路径不能包含中文,否则会报错
    
    
    df=pd.read_csv(path,'utf-8',engine='python')#编码格式utf-8
    df.head()
    print(df)
    
    
    df.describe()
    
    import numpy as np
    #公共部分
    
    
    #统计出行方式
    address=pd.read_csv(path,usecols=[0])   #提取想要的数据列,0是列索引
    
    address.to_csv("C:\\Users\\hg\\Desktop\\b\\d.csv")  #文件输出
    
    path2='C:\\Users\\hg\\Desktop\\b\\d.csv'   #文件读取
    
    
    df2=pd.read_csv(path2,'utf-8',engine='python')
    
    df3=np.unique(address)      #调用unique函数对该列数据分组,返回每一组的组名
    
    
    print(df3)
    
    ts = pd.Series(address['driving'].values, index=address['driving'])   #分组后计数返回该组的组名和每一个名称的数量
    
    ts.describe()
    
    ts.value_counts()
    
    
    wuqu=ts.value_counts()   #格式转换加文件输出,series无法直接输出为csv
    wuqu1=pd.DataFrame(ts.value_counts())
    wuqu1.to_csv('C:\\Users\\hg\\Desktop\\b\\e.csv')    #输出文件是包含组名以及个数的csv文件
    

如果你已经解决了该问题, 非常希望你能够分享一下解决方案, 写成博客, 将相关链接放在评论区, 以帮助更多的人 ^-^

解决方案,参考: