纵有疾风起
人生不言弃

记录使用node写一个简易爬虫

准备工作-使用到的模块
//全局安装自动重启工具nodemoncnpm install -g  nodemoncnpm i --save koa koa-router mysql cheerio superagent-charset superagent
----app.js----const Koa = require('koa'),app = new Koa(),index = require('./routes/index');app.use(index.routes(), index.allowedMethods());app.listen(3000);//路由信息----/routes/index----const router = require('koa-router')(),  mysql = require('../db/mysql'),  superagent = require('../caiji/superagent');router.get('/caiji/:page', async (ctx, next) => {  let page = ctx.params;  let html = await superagent.get(`https://cnodejs.org/?tab=all&page=${page.page}`);  if (html.length > 0) {    try {      let count = 0;      for (let i in html) {        await mysql.query(`insert into nodeData(title,userName,time) values('${html[i].title}','${html[i].userName}','${html[i].time}')`);        count += 1      }      ctx.body = {        code: 1,        message: `该页采集完成,共采集【${count}】条`      }    } catch (error) {      ctx.body = {        code: 0,        message: `采集失败:${error}`      }    }  }})
//"数据库配置"----/db/config----module.exports = {   DATABASE:'test',        USERNAME:'root',        PASSWORD:'zhy123456',        PORT:'3306',        HOST:'localhost'}----/db/mysql----const mysql = require('mysql');const config = require('./config');let pool = mysql.createPool({    host:config.HOST,    user:config.USERNAME,    password:config.PASSWORD,    database:config.DATABASE})class Mysql{    constructor(){    }    query(sql){        console.log(sql)        return new Promise((resolve,resject)=>{            pool.query(sql,(err,res,fields)=>{                if (err) {                    throw err;                }                resolve(res)            })        })    }}module.exports = new Mysql()
//采集模块----/db/caiji----const cheerio = require('cheerio'),    superagent = require('superagent'),    charset = require('superagent-charset');charset(superagent);module.exports = {    get(url) {        return new Promise((resolve, reject) => {            superagent.get(url)                .charset('utf-8')                .end((err, res) => {                    if (err) {                        resolve([])                    }                    if (res) {                        let $ = cheerio.load(res.text, {                            decodeEntities: false                        })                        let arr = [];                        for (let i in $('.cell')) {                            let title = $('.cell').eq(i).find('.topic_title').eq(0).html(),                                userName = $('.cell').eq(i).find('.user_avatar').eq(0).find('img').eq(0).attr('title'),                                time = $('.cell').eq(i).find('.last_active_time').eq(0).html(),                                views = $('.cell').eq(i).find('.count_of_visits').eq(0).text();                            if (title) {                                arr.push({                                    title: title.trim(),                                    userName,                                    time,                                    views: views.trim()                                })                            }                        }                        resolve(arr)                    } else {                        resolve([])                    }                })        })    }}
//启动nodemon app.js//浏览器地址[http://127.0.0.1:3000/caiji/1](http://127.0.0.1:3000/caiji/1)第一页采集完成![image.png](https://upload-images.jianshu.io/upload_images/5814981-6d1c2143f10bc9ca.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)

文章转载于:https://www.jianshu.com/p/5a7eb9f35073

原著是一个有趣的人,若有侵权,请通知删除

未经允许不得转载:起风网 » 记录使用node写一个简易爬虫
分享到: 生成海报

评论 抢沙发

评论前必须登录!

立即登录