JavaScript encoding with special characters

I wanted to write a method to avoid special characters, such as "ä", to their Unicode response (e.g. \ u00e4).

For some reason, JS finds it funny not even to save 'ä' internally, but to use 'üÜ' or some other garble, so when I convert it, it rips out '\ u00c3 \ u00b6 \ u00c3 \ u002013' because it converts these instead of "ä".

I tried setting the encoding of the HTML file to utf-8 and tried to load scripts with charset = "UTF-8" to no avail. The code really does nothing special, but here it is:

String.prototype.replaceWithUtf8 = function() { var str_newString = ''; var str_procString = this; for (var i = 0; i < str_procString.length; i++) { if (str_procString.charCodeAt(i) > 126) { var hex_uniCode = '\\u00' + str_procString.charCodeAt(i).toString(16); console.log(hex_uniCode + " (" + str_procString.charAt(i) + ")"); str_newString += hex_uniCode; } else { str_newString += str_procString.charAt(i); } } return str_newString; } var str_item = "Lärm, Lichter, Lücken, Löcher." console.log(str_item); // Lärm, Lichter, Lücken, Löcher. console.log(str_item.replaceWithUtf8()); //L\u00c3\u00a4rm, Lichter, L\u00c3\u00bccken, L\u00c3\u00b6cher. 
+4
source share
3 answers

Use '\\u' + ('000' + str_procString.charCodeAt(i).toString(16) ).stubstr(-4); instead, to get the correct escape sequences - yours always starts at 00 . Also, instead of processing your string, the .replace() string may be faster.

According to your question:

 console.log("Lärm, Lichter, Lücken, Löcher."); // Lärm, Lichter, Lücken, Löcher. 

doesn’t sound because you really sent the file with the correct encoding. Perhaps this is a problem with the server if it is already saved correctly.

+2
source

I have no idea how or why, but I just restarted the server and now it displays correctly. Keep track of; here's the code for anyone interested:

 String.prototype.replaceWithUtf8 = function() { var str_newString = ''; var str_procString = this; var arr_replace = new Array('/', '"'); var arr_replaceWith = new Array('\\/', '\\"'); for (var i = 0; i < str_procString.length; i++) { var int_charCode = str_procString.charCodeAt(i); var cha_charAt = str_procString.charAt(i); var int_chrIndex = arr_replace.indexOf(cha_charAt); if (int_chrIndex > -1) { console.log(arr_replaceWith[int_chrIndex]); str_newString += arr_replaceWith[int_chrIndex]; } else { if (int_charCode > 126 && int_charCode < 65536) { var hex_uniCode = '\\u' + ("000" + int_charCode.toString(16)).substr(-4); console.log(hex_uniCode + " (" + cha_charAt + ")"); str_newString += hex_uniCode; } else { str_newString += cha_charAt; } } } return str_newString; } 
+2
source

 String.prototype.replaceWithUtf8 = function() { function r(r) { for (var t, n, e = "", i = 0; !isNaN(t = r.charCodeAt(i++)); ) n = t.toString(16), e += 256 > t ? "\\x" + (t > 15 ? "" :"0") + n :"\\u" + ("0000" + n).slice(-4); return e; } var a, c, o, u, s, e = "", i = this, t = [ "/", '"' ], n = [ "\\/", '\\"' ]; for (a = 0; a < i.length; a++) c = i.charCodeAt(a), o = i.charAt(a), u = t.indexOf(o), u > -1 ? e += n[u] :c > 126 && 65536 > c ? (s = r(o), e += s) :e += o; return e; }; prompt("Your escaped string:","Lärm, Lichter, Lücken, Löcher.".replaceWithUtf8()); alert("L\xe4rm, Lichter, L\xfccken, L\xf6cher."); 

Unicode encoding only makes each character 6 digits. But for characters above 127 - 256 we can do these hexadecimal with fewer bytes (4 digits per character).

0
source

Source: https://habr.com/ru/post/1444298/


All Articles