Here is a solution using regular expression groups to tokenize text using various types of tokens.
Here you can test the code https://jsfiddle.net/u3mvca6q/5/
/* Basic Regex explanation: / Regex start (\w+) First group, words \w means ASCII letter with \w + means 1 or more letters | or (,|!) Second group, punctuation | or (\s) Third group, white spaces / Regex end g "global", enables looping over the string to capture one element at a time Regex result: result[0] : default group : any match result[1] : group1 : words result[2] : group2 : punctuation , ! result[3] : group3 : whitespace */ var basicRegex = /(\w+)|(,|!)|(\s)/g; /* Advanced Regex explanation: [a-zA-Z\u0080-\u00FF] instead of \w Supports some Unicode letters instead of ASCII letters only. Find Unicode ranges here https://apps.timwhitlock.info/js/regex (\.\.\.|\.|,|!|\?) Identify ellipsis (...) and points as separate entities You can improve it by adding ranges for special punctuation and so on */ var advancedRegex = /([a-zA-Z\u0080-\u00FF]+)|(\.\.\.|\.|,|!|\?)|(\s)/g; var basicString = "Hello, this is a random message!"; var advancedString = "Et en français ? Avec des caractères spéciaux ... With one point at the end."; console.log("------------------"); var result = null; do { result = basicRegex.exec(basicString) console.log(result); } while(result != null) console.log("------------------"); var result = null; do { result = advancedRegex.exec(advancedString) console.log(result); } while(result != null) /* Output: Array [ "Hello", "Hello", undefined, undefined ] Array [ ",", undefined, ",", undefined ] Array [ " ", undefined, undefined, " " ] Array [ "this", "this", undefined, undefined ] Array [ " ", undefined, undefined, " " ] Array [ "is", "is", undefined, undefined ] Array [ " ", undefined, undefined, " " ] Array [ "a", "a", undefined, undefined ] Array [ " ", undefined, undefined, " " ] Array [ "random", "random", undefined, undefined ] Array [ " ", undefined, undefined, " " ] Array [ "message", "message", undefined, undefined ] Array [ "!", undefined, "!", undefined ] null */