I would suggest that you might want a solution that produces the widely used base64 URI. Please visit data:text/plain;charset=utf-8;base64,4pi44pi54pi64pi74pi84pi+4pi/ to see a demo (copy the data uri, open a new tab, paste the data URI in the address bar, then press enter to go to the page). Despite the fact that this URI is encoded in base64, the browser can still recognize the upper code points and decode them correctly. The minimum encoder + decoder is 1058 bytes (+ Gzip → 589 bytes)
!function(e){"use strict";function h(b){var a=b.charCodeAt(0);if(55296<=a&&56319>=a)if(b=b.charCodeAt(1),b===b&&56320<=b&&57343>=b){if(a=1024*(a-55296)+b-56320+65536,65535<a)return d(240|a>>>18,128|a>>>12&63,128|a>>>6&63,128|a&63)}else return d(239,191,189);return 127>=a?inputString:2047>=a?d(192|a>>>6,128|a&63):d(224|a>>>12,128|a>>>6&63,128|a&63)}function k(b){var a=b.charCodeAt(0)<<24,f=l(~a),c=0,e=b.length,g="";if(5>f&&e>=f){a=a<<f>>>24+f;for(c=1;c<f;++c)a=a<<6|b.charCodeAt(c)&63;65535>=a?g+=d(a):1114111>=a?(a-=65536,g+=d((a>>10)+55296,(a&1023)+56320)):c=0}for(;c<e;++c)g+="\ufffd";return g}var m=Math.log,n=Math.LN2,l=Math.clz32||function(b){return 31-m(b>>>0)/n|0},d=String.fromCharCode,p=atob,q=btoa;e.btoaUTF8=function(b,a){return q((a?"\u00ef\u00bb\u00bf":"")+b.replace(/[\x80-\uD7ff\uDC00-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]?/g,h))};e.atobUTF8=function(b,a){a||"\u00ef\u00bb\u00bf"!==b.substring(0,3)||(b=b.substring(3));return p(b).replace(/[\xc0-\xff][\x80-\xbf]*/g,k)}}(""+void 0==typeof global?""+void 0==typeof self?this:self:global)
Below is the source code used to generate it.
var fromCharCode = String.fromCharCode; var btoaUTF8 = (function(btoa, replacer){"use strict"; return function(inputString, BOMit){ return btoa((BOMit ? "\xEF\xBB\xBF" : "") + inputString.replace( /[\x80-\uD7ff\uDC00-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]?/g, replacer )); } })(btoa, function(nonAsciiChars){"use strict"; // make the UTF string into a binary UTF-8 encoded string var point = nonAsciiChars.charCodeAt(0); if (point >= 0xD800 && point <= 0xDBFF) { var nextcode = nonAsciiChars.charCodeAt(1); if (nextcode !== nextcode) // NaN because string is 1 code point long return fromCharCode(0xef/*11101111*/, 0xbf/*10111111*/, 0xbd/*10111101*/); // https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae if (nextcode >= 0xDC00 && nextcode <= 0xDFFF) { point = (point - 0xD800) * 0x400 + nextcode - 0xDC00 + 0x10000; if (point > 0xffff) return fromCharCode( (0x1e/*0b11110*/<<3) | (point>>>18), (0x2/*0b10*/<<6) | ((point>>>12)&0x3f/*0b00111111*/), (0x2/*0b10*/<<6) | ((point>>>6)&0x3f/*0b00111111*/), (0x2/*0b10*/<<6) | (point&0x3f/*0b00111111*/) ); } else return fromCharCode(0xef, 0xbf, 0xbd); } if (point <= 0x007f) return nonAsciiChars; else if (point <= 0x07ff) { return fromCharCode((0x6<<5)|(point>>>6), (0x2<<6)|(point&0x3f)); } else return fromCharCode( (0xe/*0b1110*/<<4) | (point>>>12), (0x2/*0b10*/<<6) | ((point>>>6)&0x3f/*0b00111111*/), (0x2/*0b10*/<<6) | (point&0x3f/*0b00111111*/) ); });
Then, to decode the data, base64 either HTTP receives the data as a data URI, or uses the function below.
var clz32 = Math.clz32 || (function(log, LN2){"use strict"; return function(x) {return 31 - log(x >>> 0) / LN2 | 0}; })(Math.log, Math.LN2); var fromCharCode = String.fromCharCode; var atobUTF8 = (function(atob, replacer){"use strict"; return function(inputString, keepBOM){ inputString = atob(inputString); if (!keepBOM && inputString.substring(0,3) === "\xEF\xBB\xBF") inputString = inputString.substring(3);
The advantage of greater standardization is that this encoder and this decoder are more widely used since they can be used as a valid URL that displays correctly. Note.
(function(window){ "use strict"; var sourceEle = document.getElementById("source"); var urlBarEle = document.getElementById("urlBar"); var mainFrameEle = document.getElementById("mainframe"); var gotoButton = document.getElementById("gotoButton"); var parseInt = window.parseInt; var fromCodePoint = String.fromCodePoint; var parse = JSON.parse; function unescape(str){ return str.replace(/\\u[\da-f]{0,4}|\\x[\da-f]{0,2}|\\u{[^}]*}|\\[bfnrtv"'\\]|\\0[0-7]{1,3}|\\\d{1,3}/g, function(match){ try{ if (match.startsWith("\\u{")) return fromCodePoint(parseInt(match.slice(2,-1),16)); if (match.startsWith("\\u") || match.startsWith("\\x")) return fromCodePoint(parseInt(match.substring(2),16)); if (match.startsWith("\\0") && match.length > 2) return fromCodePoint(parseInt(match.substring(2),8)); if (/^\\\d/.test(match)) return fromCodePoint(+match.slice(1)); }catch(e){return "\ufffd".repeat(match.length)} return parse('"' + match + '"'); }); } function whenChange(){ try{ urlBarEle.value = "data:text/plain;charset=UTF-8;base64," + btoaUTF8(unescape(sourceEle.value), true); } finally{ gotoURL(); } } sourceEle.addEventListener("change",whenChange,{passive:1}); sourceEle.addEventListener("input",whenChange,{passive:1});
img:active{opacity:0.8}
<center> <textarea id="source" style="width:66.7vw">Hello \u1234 W\186\0256ld! Enter text into the top box. Then the URL will update automatically. </textarea><br /> <div style="width:66.7vw;display:inline-block;height:calc(25vw + 1em + 6px);border:2px solid;text-align:left;line-height:1em"> <input id="urlBar" style="width:calc(100% - 1em - 13px)" /><img id="gotoButton" src="" style="width:calc(1em + 4px);line-height:1em;vertical-align:-40%;cursor:pointer" /> <iframe id="mainframe" style="width:66.7vw;height:25vw" frameBorder="0"></iframe> </div> </center> + C9zIDHwCGnH5IvZAOAAABmUlEQVQoz7WS25acIBBFkRLkIgKKtOCttbv // xdDmTGZzHv2S63ltuBQQP4rdRiRUP8UK4wh6nVddQwj / NtDQTvac8577zTQb72zj65 / 876qqt7wykU6 / 1U6vFEgjE1mt / 5LRqrpu7oVsn0sjZejMfxR3W / yLikqAFcUx93YxLmZGOtElmEu6Ufd9xV3ZDTGcEvGLbMk0mHHlUSvS5svCwS + hVL8loQQyfpI1Ay8RF / xlNxcsTchGjGDIuBG3Ik7TMyNxn8m0TSnBAK6Z8UZfp3IbAonmJvmsEACum6aNv7B0CnvpezDcNhw9XWsuAr7qnRg6dABmeM4dTgn / DZdXWs3LMspZ1KDMt1kcPJ6S1icWNp2qaEmjq6myx7jbQK3VKItLJaW5FR + cuYlRhYNKzGa9vF4vM5roLW3OSVjkmiGJrPhUq301 / 16pVKZRGFYWjTP50spTxBN5Z4EKnSonruk + n4tUokv1aJSEl / MLZU90S3L6 / U6o0J142iQVp3HcZxKSo8LfkNRCtJaKYFSRX7iaoAAUDty8wvWYR6HJEepdwAAAABJRU5ErkJggg == "style =" width: calc (1em + <center> <textarea id="source" style="width:66.7vw">Hello \u1234 W\186\0256ld! Enter text into the top box. Then the URL will update automatically. </textarea><br /> <div style="width:66.7vw;display:inline-block;height:calc(25vw + 1em + 6px);border:2px solid;text-align:left;line-height:1em"> <input id="urlBar" style="width:calc(100% - 1em - 13px)" /><img id="gotoButton" src="" style="width:calc(1em + 4px);line-height:1em;vertical-align:-40%;cursor:pointer" /> <iframe id="mainframe" style="width:66.7vw;height:25vw" frameBorder="0"></iframe> </div> </center>
Besides the fact that the above code snippets are very standardized, they are also very fast. Instead of an indirect succession chain, when data needs to be transformed several times between different forms (for example, in Riccardo Galli's answer), the above code snippet is as straightforward as possible. It uses only one simple quick call to String.prototype.replace to process data during encoding and only one to decode data during decoding. Another plus is that (especially for large strings), String.prototype.replace allows the browser to automatically handle basic memory management when changing the size of the string, which leads to a significant increase in performance, especially in evergreen browsers like Chrome and Firefox, which greatly optimize String.prototype.replace . Finally, the highlight is that for you the Latin script excludes users, lines that do not contain code points above 0x7f are processed very quickly because the line remains unchanged by the replacement algorithm.
I created a github repository for this solution at https://github.com/anonyco/BestBase64EncoderDecoder/