Welcome to ShenZhenJia Knowledge Sharing Community for programmer and developer-Open, Learning and Share
menu search
person
Welcome To Ask or Share your Answers For Others

Categories

写了一个简单的基于chreeio的数据爬取代码,node http的模块获取html 然后用chreeio爬取。
想使用最新的async await 来异步操作.代码如下:

clipboard.png

let request = require('request');
let cheerio = require('cheerio');
let fs = require('fs');
let http = require('http');
let https = require('https');

let newCollection = [];
let newCollection_item = {
    title: '',
    href: ''
};
const hrefPrefix = 'xxxxxxxxx'

class Crawler {
    static test() {
            // await getList();
            //想在这里直接获取newCollection
            console.log(this.getList());
        }
        //这里是一个异步获取list
    static async getList() {
        return new Promise((resolve, reject) => {
            http.get("http://xxxx/xxxxxx.com", (res) => {
                res.setEncoding('utf-8');
                let html = "";
                res.on('data', (chunk) => {
                    html += chunk;
                });
                res.on('end', () => {
                    let $ = cheerio.load(html);
                    //爬取数据
                    $('#ajaxtable tbody tr').each(function(index, item) {
                        if (index > 4) {
                            let title = $(this).children().eq(1).find('h3').find('a').text();
                            let href = `${hrefPrefix}` + $(this).children().eq(1).find('h3').find('a').attr('href');
                            newCollection_item = { title, href };
                            newCollection.push(newCollection_item);
                        }
                    }, this);
                    resolve(res);
                    // console.log("=============================================list=================================
", newCollection)
                })
            })
        })
        return newCollection;
    }
}
Crawler.test();

不知道哪里写错了,目的就是想在test()里面能直接拿到getList返回的newCollection;


与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
thumb_up_alt 0 like thumb_down_alt 0 dislike
1.4k views
Welcome To Ask or Share your Answers For Others

1 Answer

来,我来回答你这问题
首先我觉得,在非必须情况下,没必要费用es6的class,有的时候模块直接导出一个对象提好的。如果是一个class,实例方法你还要new一个对象才能调用,如若不然,还的申明为类(静态)方法。反而不如js的{xx:xxx}来的方便。

再说你的问题,主要是:如果你调用的一个方法返回值是一个promise。那么你有两种办法可获得。拿你的代码做示例

//第一种办法:用es6的链式回调
Crawler.test().then(function (newCollection) {
    console.log(newCollection);
});

//第二种办法:用es7的awiat
(async function () {//如果代码有异步操作,必须声明该代码块为异步代码,es7规定用方法将其包含起来,async表明此处为异步执行的代码块
   let newCollection=await Crawler.test();//await意为需要等待我执行完,此代码块内才能继续向下执行。但是并不影响其它地方。也就是说只有标识为async内的代码块才受await影响成为同步
   console.log(newCollection);
})()

下边贴上我的爬虫

const http=require('http');
const cheerio=require('cheerio');

function  myHttp(methods,url){
    let promise=new Promise(function (resolve, rejecte) {
        let req=http.get(url)
         req.on("response",function (res) {
             let finalData='';
             res.on("data",function (data) {
                 finalData+=data;
             });
             res.on('end', function(date){
                 resolve(finalData.toString())
             })
         });
    })
    return promise;
}


//调用
(async function () {
        let html=await myHttp('post','http://www.cnblogs.com/flyings/default.html?page=2')
        console.log(html)
})()

效果图
图片描述


与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
thumb_up_alt 0 like thumb_down_alt 0 dislike
Welcome to ShenZhenJia Knowledge Sharing Community for programmer and developer-Open, Learning and Share
...