Replace the string with the equivalent HTML. In addition to tags

How do I go for a replacement string:

Hello my name is <a href='/max'>max</a>! 
<script>alert("DANGEROUS SCRIPT INJECTION");</script>

from

Hello my name is <a href='/max'>max</a>! 
&lt;script&gt;alert("DANGEROUS SCRIPT INJECTION");&lt;/script&gt;

I can easily replace all <, >on &lt;, &gt;at:

string = string.replace(/</g, "&lt;").replace(/>/g, "&gt;");

but I still want to have <a>links.


I also studied preventing script injection with:

var html = $(string.bold()); 
html.find('script').remove();

But I want to be able to read script tags, and not delete them.

+4
source share
2 answers

One approach to this problem is to use a regular expression with a strict reverse lookup pattern that allows you to very closely bind anchors that follow a specific format.

, , :

<a href="http://host.domain/path?query#anchor">text</a>

<a href="https://host.domain/path?query#anchor">text</a>

, "<" , ( lookbehind):

<(?!a href="https?:\/\/\w[\w.-\/\?#]+">\w+<\/a>)

, , < - a (</a>), , &lt; .

</a>, :

<(?!a href="https?:\/\/\w[\w.-\/\?#]+">\w+<\/a>|\/a>)

, - -.

.replace:

string.replace(/<(?!a href="https?:\/\/\w[\w.-\/\?#]+">\w+<\/a>|\/a>)/g, '&lt;');

. , , , , .

0

. , HTML- allowedTagNames.

// input
var html = "Hello my name is <a href='/max'>max</a>! <script>alert('DANGEROUS SCRIPT INJECTION');</script>";

var allowedTagNames = ["a"];
// output
var processedHTML = "";

var processingStart = 0;
// this block finds the next tag and processes it
while (true) {
    var tagStart = html.indexOf("<", processingStart);
    if (tagStart === -1) { break; }

    var tagEnd = html.indexOf(">", tagStart);
    if (tagEnd === -1) { break; }

    var tagNameStart = tagStart + 1;
    if (html[tagNameStart] === "/") {
        // for closing tags
        ++tagNameStart;
    }
    // we expect there to be either a whitespace or a > after the tagName
    var tagNameEnd = html.indexOf(" ", tagNameStart);
    if (tagNameEnd === -1 || tagNameEnd > tagEnd) {
        tagNameEnd = tagEnd;
    }

    var tagName = html.slice(tagNameStart, tagNameEnd);
    // copy in text which is between this tag and the end of last tag
    processedHTML += html.slice(processingStart, tagStart);
    if (allowedTagNames.indexOf(tagName) === -1) {
        processedHTML += "&lt;" + html.slice(tagStart + 1, tagEnd) + "&gt;";
    } else {
        processedHTML += html.slice(tagStart, tagEnd + 1);
    }
    processingStart = tagEnd + 1;
}
// copy the rest of input which wasn't processed
processedHTML += html.slice(processingStart);

: , < >. : <a href=">">

0

Source: https://habr.com/ru/post/1629548/


All Articles