I would like to abandon the url:
1 item list request
1 request for each result for information
Here is what I have:
var request = require('request')
, cheerio = require('cheerio')
, async = require('async')
, format = require('util').format;
var baseurl = 'http://magiccards.info';
async.waterfall([
function (callback) {
request(baseurl + '/sitemap.html', function (err, response, body) {
var sets = [];
var $ = cheerio.load(body);
$('a[href$="/en.html"]').each(function () {
sets.push({"name": $(this).text(), "code":$(this).attr('href').match(/\/([^)]+)\//)[1], "path": $(this).attr('href'), "translations":[]});
});
callback(null, sets);
});
},
function (sets, callback) {
console.log(sets);
async.eachSeries(sets, function (set, callback) {
console.log('SET ' + set.code.toUpperCase());
request(baseurl + set.path, function (err, response, body) {
var $ = cheerio.load(body);
$('body > a[href^="/' + set.code + '/"]').each(function () {
console.log(' %s (%s)', $(this).text(), $(this).attr('href'));
});
});
});
}
], function (err, result) {
console.log('ERR');
});
The problem is that the function of the 2nd waterfall runs only once, if I replace eachSeries with each, the loop executes X times (but I need to wait for the result).
White am I missing?
source
share