使用node爬区一个网站当中所有的图片的链接,不用下载,该怎么样写代码
给你一个案例
const http = require("http");
const fs = require('fs');
const cheerio = require('cheerio');
const request = require('request');
let iconv = require("iconv-lite");
let index = 13;
// const url = 'http://www.netbian.com'
const url = 'http://www.netbian.com/index_13.htm'
const getData = (url) => {
http.get(url, (res) => {
let buffer = [];
let bufferLength = 0;
res.on("data", chunk => {
buffer.push(chunk);
bufferLength += chunk.length;
});
res.on("end", () => {
// 现在将所有buffer组合为一个整体buffer
let bufferData = Buffer.concat(buffer, bufferLength);
// 使用 iconv 以 GBK 编码转换为字符串。
var html = iconv.decode(bufferData, "GBK");
// console.log(html); // 现在得到的页面内容就没有乱码的了。
var $ = cheerio.load(html);
$('.list img').each((i, el) => {
const text = $(el).attr('alt').replace(/\s*/g,"")
const img_url = $(el).attr('src')
console.log(text,'=====', index + '_' + (i+1))
console.log(img_url)
request.head(img_url, function(error, res,body){
if(error){
console.log('失败了')
}
});
//通过管道的方式用fs模块将图片写到本地的images文件下
request(img_url).pipe(fs.createWriteStream('./images/' + index + '_' + (i+1) + '.jpg'));
})
if (index > 1228) return;
index++;
getData(`http://www.netbian.com/index_${index}.htm`);
});
});
}
fs.stat('images', (error,stats) => {
if (error) {
fs.mkdir("images", (err) => {
if (err) console.log('err', err)
console.log('创建目录成功');
});
}
})
getData(url)
```javascript
```