05-node+cheerio实现爬虫获取数据

    科技2022-07-15  110

    目录

    1. 爬取网站中的数据2. 格式化数据并写入文件中3. 完整代码

    1. 爬取网站中的数据

    const http = require("http"); let webUrl = "http://news.ifeng.com/"; http.get(webUrl, res => { let str = ""; res.on("data", chunk => { str += chunk; }) res.on("end", () => { formatData(str) }) });

    2. 格式化数据并写入文件中

    cheerio是jquery核心功能的一个快速灵活而又简洁的实现,主要是为了用在服务器端需要对DOM进行操作的地方 const cheerio = require("cheerio"); const fs = require("fs"); function formatData(html) { let $ = cheerio.load(html); let arr = []; $(".news-stream-basic-news-list li").each((k, v) => { let obj = { id: k + 1, title: $(v).find("a").attr("title"), imgUrl: "http:" + $(v).find("img").attr("src"), from:$(v).find(".news-stream-newsStream-mr10").text(), newsTime: $(v).find("time").text() } arr.push(obj); }) fs.writeFileSync("spider/data.json",JSON.stringify(arr)); }

    3. 完整代码

    const http = require("http"); let webUrl = "http://news.ifeng.com/"; http.get(webUrl, res => { let str = ""; res.on("data", chunk => { str += chunk; }) res.on("end", () => { formatData(str) }) }); const cheerio = require("cheerio"); const fs = require("fs"); function formatData(html) { let $ = cheerio.load(html); let arr = []; $(".news-stream-basic-news-list li").each((k, v) => { let obj = { id: k + 1, title: $(v).find("a").attr("title"), imgUrl: "http:" + $(v).find("img").attr("src"), from:$(v).find(".news-stream-newsStream-mr10").text(), newsTime: $(v).find("time").text() } arr.push(obj); }) fs.writeFileSync("spider/data.json",JSON.stringify(arr)); }
    Processed: 0.010, SQL: 8