List of trigrams in JavaScript

I am trying to list all trigrams (3 letter words) in a given string depending on user input. User can enter 1.2 or 3 characters. I wrote the following code:

if (3 == trigram.length) {
    var re = new RegExp(trigram);
} else if (2 == trigram.length) {
    trigram = trigram + '\\S|\\S' + trigram;
    var re = new RegExp(trigram);
} else if (1 == trigram.length) {
    trigram = trigram + '\\S\\S|\\S\\S' + trigram + '|\\S' + trigram + '\\S';
    var re = new RegExp(trigram);
} else {
    alert("Trigram search pattern can be either one, two or three characters!");
    return null;
}

var re = new RegExp(trigram, "ig"); alert(re);

trigramList = givenString.match(re);

This works fine, except that if I have the following character sequence in my string "KDSGKHAGSKH" and I search for trigrams consisting of "A", my code returns only "KHA", where I expect it to return { KHA, HAG, AGS}

+3
source share
2 answers

Here are two simple features that seem like what you are looking for.

String.prototype.ngrams = function(n) {
  var r = [];
  for(var i = 0; i <= this.length - n; i++)
     r.push(this.substring(i, i + n));
  return r;
}

Array.prototype.grep = function(re) {
   var r = [];
   for(var i = 0; i < this.length; i++)   
       if(re.test(this[i]))
          r.push(this[i]);
   return r;
}  

s = "abcdefghjkl";
alert(s.ngrams(3).grep(/d/)) 

prints "bcd", "cde", "def". Not the most effective, but simple.

+1
source

, , , . , , , . , , .

var onegram = /A(?=(\S\S))|\S(?=(\SA))|\S(?=(A\S))/ig;
var str = 'KDSGKHAGSKH';  
var match
var ngrams = [];
while ((match = onegram.exec(str)) != null) {  
    ngrams.push(match.join(''));
}

RE ( ) String:

String.prototype.repeat = function (n) {
    if (n<1) return '';
    var accum = '', c=this;
    for (; n; n >>=1) {
        if (1&n) accum += c;
        c += c;
    }
    return accum;
};

function ngrammer(kgram, n) {
    var m = n - kgram.length;
    var branches = [];
    for (var i = 0; i <= m; ++i) {
        branches.push(('\\S'.repeat(i) + kgram + '\\S'.repeat(m-i) + '))').replace(/^\\?./, '$&(?=('));
    }
    return new RegExp(branches.join('|'), 'ig');
}

var onegram = ngrammer('A', 3);
...
+1

Source: https://habr.com/ru/post/1779369/


All Articles