Cannot remove diacritics from file name

I stumble upon strange behavior, trying to replace the diacritics and the special nature of the file name.

The function works correctly with the string, but when I try to execute the same sequence with the file name, it is not .

$(document).on('change', 'input[type=file]', function() {
    var files = this.files;
    for (var i = 0; i < files.length; i++) {
      (function(file) {
        // Assuming the file name is áñǽŦõş
        var _string = 'áñǽŦõş.jpg',
            _filename = file.name;
        $('.string .result').html(convertAscii(_string));
        $('.filename .result').html(convertAscii(_filename));
      })(files[i]);
    }
});
    
function convertAscii(str) {
    str = str.replace(/Ä/g, 'Ae');
    str = str.replace(/æ|ǽ|ä/g, 'ae');
    str = str.replace(/À|Á|Â|Ã|Å|Ǻ|Ā|Ă|Ą|Ǎ|/g, 'A');
    str = str.replace(/à|á|â|ã|å|ǻ|ā|ă|ą|ǎ|ª|/g, 'a');
    str = str.replace(//g, 'B');
    str = str.replace(//g, 'b');
    str = str.replace(/Ç|Ć|Ĉ|Ċ|Č|/g, 'C');
    str = str.replace(/ç|ć|ĉ|ċ|č|/g, 'c');
    str = str.replace(/Ð|Ď|Đ/g, 'Dj');
    str = str.replace(/ð|ď|đ/g, 'dj');
    str = str.replace(//g, 'D');
    str = str.replace(//g, 'd');
    str = str.replace(/È|É|Ê|Ë|Ē|Ĕ|Ė|Ę|Ě|||/g, 'E');
    str = str.replace(/è|é|ê|ë|ē|ĕ|ė|ę|ě|||/g, 'e');
    str = str.replace(//g, 'F');
    str = str.replace(/ƒ|/g, 'f');
    str = str.replace(/Ĝ|Ğ|Ġ|Ģ|/g, 'G');
    str = str.replace(/ĝ|ğ|ġ|ģ|/g, 'g');
    str = str.replace(/Ĥ|Ħ|/g, 'H');
    str = str.replace(/ĥ|ħ|/g, 'h');
    str = str.replace(/Ì|Í|Î|Ï|Ĩ|Ī|Ĭ|Ǐ|Į|İ|/g, 'I');
    str = str.replace(/ì|í|î|ï|ĩ|ī|ĭ|ǐ|į|ı|/g, 'i');
    str = str.replace(/Ĵ|/g, 'J');
    str = str.replace(/ĵ|/g, 'j');
    str = str.replace(/Ķ|/g, 'K');
    str = str.replace(/ķ|/g, 'k');
    str = str.replace(/Ĺ|Ļ|Ľ|Ŀ|Ł|/g, 'L');
    str = str.replace(/ĺ|ļ|ľ|ŀ|ł|/g, 'l');
    str = str.replace(//g, 'M');
    str = str.replace(//g, 'm');
    str = str.replace(/Ñ|Ń|Ņ|Ň|/g, 'N');
    str = str.replace(/ñ|ń|ņ|ň|ʼn|/g, 'n');
    str = str.replace(/Ö/g, 'Oe');
    str = str.replace(/œ|ö/g, 'oe');
    str = str.replace(/Ò|Ó|Ô|Õ|Ō|Ŏ|Ǒ|Ő|Ơ|Ø|Ǿ|/g, 'O');
    str = str.replace(/ò|ó|ô|õ|ō|ŏ|ǒ|ő|ơ|ø|ǿ|º|/g, 'o');
    str = str.replace(//g, 'P');
    str = str.replace(//g, 'p');
    str = str.replace(/Ŕ|Ŗ|Ř|/g, 'R');
    str = str.replace(/ŕ|ŗ|ř|/g, 'r');
    str = str.replace(/Ś|Ŝ|Ş|Ș|Š|/g, 'S');
    str = str.replace(/ś|ŝ|ş|ș|š|ſ|/g, 's');
    str = str.replace(/Ţ|Ț|Ť|Ŧ|/g, 'T');
    str = str.replace(/ţ|ț|ť|ŧ|/g, 't');
    str = str.replace(/Ü/g, 'Ue');
    str = str.replace(/ü/g, 'ue');
    str = str.replace(/Ù|Ú|Û|Ũ|Ū|Ŭ|Ů|Ű|Ų|Ư|Ǔ|Ǖ|Ǘ|Ǚ|Ǜ|/g, 'U');
    str = str.replace(/ù|ú|û|ũ|ū|ŭ|ů|ű|ų|ư|ǔ|ǖ|ǘ|ǚ|ǜ|/g, 'u');
    str = str.replace(//g, 'V');
    str = str.replace(//g, 'v');
    str = str.replace(/Ý|Ÿ|Ŷ|/g, 'Y');
    str = str.replace(/ý|ÿ|ŷ|/g, 'y');
    str = str.replace(/Ŵ/g, 'W');
    str = str.replace(/ŵ/g, 'w');
    str = str.replace(/Ź|Ż|Ž|/g, 'Z');
    str = str.replace(/ź|ż|ž|/g, 'z');
    str = str.replace(/Æ|Ǽ/g, 'AE');
    str = str.replace(/ß/g, 'ss');
    str = str.replace(/IJ/g, 'IJ');
    str = str.replace(/ij/g, 'ij');
    str = str.replace(/Œ/g, 'OE');
    str = str.replace(//g, 'Ch');
    str = str.replace(//g, 'ch');
    str = str.replace(//g, 'Ju');
    str = str.replace(//g, 'ju');
    str = str.replace(//g, 'Ja');
    str = str.replace(//g, 'ja');
    str = str.replace(//g, 'Sh');
    str = str.replace(//g, 'sh');
    str = str.replace(//g, 'Shch');
    str = str.replace(//g, 'shch');
    str = str.replace(//g, 'Zh');
    str = str.replace(//g, 'zh');
	return str;
}
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<input type="file" name="pic" accept="image/*">

<div>
  <div class="string">Converted string : <span class="result"></span></div>
  <div class="filename">Converted filename : <span class="result"></span></div>
</div>
Run codeHide result

I also made a fiddle showing the problem, you will see what I mean when loading jpg with the name áñaéTõş.

I get the file name from input. The strange behavior is this:

console.log(convertAscii(_string)); // Correct => anaeTos
console.log(convertAscii(_filename)); // Wrong => áñaéTõş

I believe, because diacritics are interpreted as independent characters, but someone has an idea on how to fix this? Thank.

+4
source share
1

, - . , , , , . , .

, @ClasG, Unicode , JavaScript .

, .

fiddle. , .

PS: unorm.js . , jsfiddle.

<input type="file" name="pic" accept="image/*">
<script type="text/javascript">
  document.write("\<script src='https://raw.githubusercontent.com/walling/unorm/master/lib/unorm.js' type='text/javascript'>\<\/script>");
</script>
<div>
  <div class="string">Converted string : <span class="result"></span></div>
  <div class="filename">Converted filename : <span class="result"></span></div>
</div>

    $(document).on('change', 'input[type=file]', function() {

    var files = this.files;

    for (var i = 0; i < files.length; i++) {
      (function(file) {
        // Assuming the file name is áñǽŦõş
        var _string = 'äöüß', // 'áñǽŦõş.jpg',
            _filename = file.name;

        $('.string .result').html(convertAscii(_string.normalize('NFC')));
        $('.filename .result').html(convertAscii(_filename.normalize('NFC')));

      })(files[i]);
    }

});

function convertAscii(str) {
    //convert German umlauts (normalized using nfc: Canonical Decomposition, followed by Canonical Composition) to Ascii
    tr = {"\u00e4":"ae", "\u00fc":"ue", "\u00f6":"oe", "\u00df":"ss" }
    str = str.replace(/[\u00e4|\u00fc|\u00f6|\u00df]/g, function($0) { return tr[$0] })
    //... add more..

    return str;
}
0

Source: https://habr.com/ru/post/1684386/


All Articles