I am trying to get multiple items from a website with multiple pages. I am currently using PhantomJS to do this work, and my code is almost working, but the problem is that my code resets the first page twice, even if (according to the log) it seems like I have already moved to the second.
Here is the code:
var page = require('webpage').create();
page.viewportSize = { width: 1061, height: 1000 };
page.open("website", function () {
function fetch_names(){
var name = page.evaluate(function () {
return [].map.call(document.querySelectorAll('div.pepitesteasermain h2 a'), function(name){
return name.getAttribute('href');
});
});
console.log(name.join('\n'));
page.render('1.png');
window.setTimeout(function (){
goto_next_page();
}, 5000);
}
function goto_next_page(){
page.evaluate(function () {
var a = document.querySelector('#block-system-main .next a');
var e = document.createEvent('MouseEvents');
e.initMouseEvent('click', true, true, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null);
a.dispatchEvent(e);
waitforload = true;
});
fetch_names();
}
fetch_names();
});
You can try it yourself to understand how it all works.
source
share