How to get a web page as plain text without using html using javascript?

I am trying to find a way to use javascript or jquery to write a function that removes all html tags from a page and just gives me the plain text of this page.

How can I do that? any ideas?

+3
source share
6 answers

IE and WebKit

document.body.innerText

Others:

document.body.textContent

(as suggested by Amr El-Garhi)

Most js frameworks implement the crossbrowser method for this. This is usually done as follows:

text = document.body.textContent || document.body.innerText;

WebKit seems to preserve some formatting with help textContent, while it splits everything into innerText.

+6
source

, . jQuery :

jQuery(document.body).text();
+3

textContent innerText , ,    .

, ,    .

document.deepText= function(hoo){
    var A= [], tem, tx;
    if(hoo){
        hoo= hoo.firstChild;
        while(hoo!= null){
            if(hoo.nodeType== 3){
                tx= hoo.data || '';
                if(/\S/.test(tx)) A[A.length]= tx;
            }
            else A= A.concat(document.deepText(hoo));
            hoo= hoo.nextSibling;
        }
    }
    return A;
}
alert(document.deepText(document.body).join(' '))
// return document.deepText(document.body).join('\n')
+2

HTML . IE (obj - jQuery):

function getTextFromHTML(obj) {
    var ni = document.createNodeIterator(obj[0], NodeFilter.SHOW_TEXT, null, false);
    var nodeLine = ni.nextNode();   // go to first node of our NodeIterator
    var plainText = "";

    while (nodeLine) {
        plainText += nodeLine.nodeValue + "\n";
        nodeLine = ni.nextNode();
    }

    return plainText;
 }
+1
0

:

<script language="javascript" type="text/javascript" src="http://code.jquery.com/jquery-1.4.2.js"></script>
<script type="text/javascript">
    jQuery.fn.stripTags = function() { return this.replaceWith( this.html().replace(/<\/?[^>]+>/gi, '') ); };
    jQuery('head').stripTags();

    $(document).ready(function() {
        $("img").each(function() {
            jQuery(this).remove();
        });
    });
</script>

, .

, ?

[EDIT] , [/EDIT]

0
source

Source: https://habr.com/ru/post/1748359/


All Articles