Real-time wetting (Youtube) with casper. The problem with the choice of polymer elements

I am trying to clear text from LiveTube YouTube channels using casper. I'm having trouble choosing the right selector. There are many nested elements and dynamically generated elements for each new message that is popped. How can I continue to continuously retract nested

<span id="message">some message</span>

how do they happen? Currently, I do not seem to grab even one! Here is my test code: Note: you can replace any YouTube URL that has live chat.

 const casper = require("casper").create({ viewportSize: { width: 1080, height: 724 } }); const ua = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0' const url = "https://www.youtube.com/watch?v=NksKCLsMUsI"; casper.start(); casper.userAgent(ua) casper.thenOpen(url, function() { this.wait(3000, function() { if (this.exists("span#message")) { this.echo("found the a message!"); } else { this.echo("can't find a message"); } casper.capture("test.png"); }); }); casper.run(); 

This is my question. How to choose the right messages? And 2, how can I constantly listen to new ones?

UPDATE: I was playing with a nightmare (electronics testing kit) and it looks promising, but I still can't select the chat items. I know that I missed something simple.

EDIT / UPDATE (using cadabra fine example)

 var casper = require("casper").create({ viewportSize: { width: 1024, height: 768 } }); url = 'https://www.youtube.com/live_chat?continuation=0ofMyAMkGiBDZzhLRFFvTFJVRTFVVlkwZEV4MFRFVWdBUSUzRCUzRDAB' ua = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0' casper.start(url) casper.userAgent(ua); var currentMessage = ''; (function getPosts() { var post = null; casper.wait(1000, function () { casper.capture('test.png') post = this.evaluate(function () { var nodes = document.querySelectorAll('yt-live-chat-text-message-renderer'), author = nodes[nodes.length - 1].querySelector('#author-name').textContent, message = nodes[nodes.length - 1].querySelector('#message').textContent; return { author: author, message: message }; }); }); casper.then(function () { if (currentMessage !== post.message) { currentMessage = post.message; this.echo(post.author + ' - ' + post.message); } }); casper.then(function () { getPosts(); }); })(); casper.run(); 
+5
source share
1 answer

It is much more complicated than you think ... Look at what I tried, without success:

1. Use the ignore-ssl-errors option

YouTube uses https. This is a real problem for us, because PhantomJS really dislikes SSL / TLS ... Here we need to use ignore-ssl-errors . The option can be passed on the command line:

 casperjs --ignore-ssl-errors=true script.js 

2. Open the chat page instead of iframe

The comments we are trying to clear are not on the main page. They come from an external page loaded in an iframe . In CasperJS, we could use the withFrame() method, but this is useless complexity for what we can directly handle ...

Main page | Chat Page

3. Test with PhantomJS (WebKit) and SlimerJS (Gecko)

Due to YouTube limitations, both browsers give the same result:

Oh no!
It looks like you are using an older version of your browser. Update it to use chat.

If you want to test yourself, here is the script:

 var casper = require("casper").create({ viewportSize: { width: 1080, height: 724 } }); casper.start('https://www.youtube.com/live_chat?continuation=0ofMyAMkGiBDZzhLRFFvTFRtdHpTME5NYzAxVmMwa2dBUSUzRCUzRDAB'); casper.wait(5000, function () { this.capture('chat.png'); }); casper.run(); 

PhantomJS: casperjs --ignore-ssl-errors=true script.js

SlimerJS: casperjs --engine=slimerjs script.js

Conclusion: For this, you may need a real web browser, such as Firefox or Chromium. Automation frameworks such as Nightwatch.js can help ...


EDIT 1

OK, so ... Using your user-agent string, this works:

 var casper = require("casper").create({ viewportSize: { width: 1080, height: 724 } }); casper.userAgent('Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'); casper.start('https://www.youtube.com/live_chat?continuation=0ofMyAMkGiBDZzhLRFFvTFRtdHpTME5NYzAxVmMwa2dBUSUzRCUzRDAB'); casper.wait(5000, function () { this.each(this.evaluate(function () { var res = [], nodes = document.querySelectorAll('yt-live-chat-text-message-renderer'), author = null, message = null; for (var i = 0; i < nodes.length; i++) { author = nodes[i].querySelector('#author-name').textContent.toUpperCase(); message = nodes[i].querySelector('#message').textContent.toLowerCase(); res.push(author + ' - ' + message); } return res; }), function (self, post) { this.echo(post); }); }); casper.run(); 

With this script, you should see the latest messages from the conversation in your terminal. :)


EDIT 2

As the video returns, I spent some time modifying my previous code to implement a real-time polling with recursive IIFE. Following the script, I can get the last comment in the chat stream. A request is sent every second to update the content, and messages are filtered to avoid duplication.

 var casper = require("casper").create({ viewportSize: { width: 1080, height: 724 } }); casper.userAgent('Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'); casper.start('https://www.youtube.com/live_chat?continuation=0ofMyAMkGiBDZzhLRFFvTFRtdHpTME5NYzAxVmMwa2dBUSUzRCUzRDAB'); var currentMessage = ''; (function getPosts() { var post = null; casper.wait(1000, function () { post = this.evaluate(function () { var nodes = document.querySelectorAll('yt-live-chat-text-message-renderer'), author = nodes[nodes.length - 1].querySelector('#author-name').textContent, message = nodes[nodes.length - 1].querySelector('#message').textContent; return { author: author, message: message }; }); }); casper.then(function () { if (currentMessage !== post.message) { currentMessage = post.message; this.echo(post.author + ' - ' + post.message); } }); casper.then(function () { getPosts(); }); })(); casper.run(); 

It works PERFECTLY on my computer.

+2
source

Source: https://habr.com/ru/post/1268191/


All Articles