目录
1. 爬取网站中的数据2. 格式化数据并写入文件中3. 完整代码
1. 爬取网站中的数据
const http
= require("http");
let webUrl
= "http://news.ifeng.com/";
http
.get(webUrl
, res
=> {
let str
= "";
res
.on("data", chunk
=> {
str
+= chunk
;
})
res
.on("end", () => {
formatData(str
)
})
});
2. 格式化数据并写入文件中
cheerio是jquery核心功能的一个快速灵活而又简洁的实现,主要是为了用在服务器端需要对DOM进行操作的地方
const cheerio
= require("cheerio");
const fs
= require("fs");
function formatData(html
) {
let $
= cheerio
.load(html
);
let arr
= [];
$(".news-stream-basic-news-list li").each((k
, v
) => {
let obj
= {
id
: k
+ 1,
title
: $(v
).find("a").attr("title"),
imgUrl
: "http:" + $(v
).find("img").attr("src"),
from:$(v
).find(".news-stream-newsStream-mr10").text(),
newsTime
: $(v
).find("time").text()
}
arr
.push(obj
);
})
fs
.writeFileSync("spider/data.json",JSON.stringify(arr
));
}
3. 完整代码
const http
= require("http");
let webUrl
= "http://news.ifeng.com/";
http
.get(webUrl
, res
=> {
let str
= "";
res
.on("data", chunk
=> {
str
+= chunk
;
})
res
.on("end", () => {
formatData(str
)
})
});
const cheerio
= require("cheerio");
const fs
= require("fs");
function formatData(html
) {
let $
= cheerio
.load(html
);
let arr
= [];
$(".news-stream-basic-news-list li").each((k
, v
) => {
let obj
= {
id
: k
+ 1,
title
: $(v
).find("a").attr("title"),
imgUrl
: "http:" + $(v
).find("img").attr("src"),
from:$(v
).find(".news-stream-newsStream-mr10").text(),
newsTime
: $(v
).find("time").text()
}
arr
.push(obj
);
})
fs
.writeFileSync("spider/data.json",JSON.stringify(arr
));
}
转载请注明原文地址:https://blackberry.8miu.com/read-8869.html