Calculate common character frequency

How can I write a function that compares two lines and returns an object containing the characters that occur on both lines, and how many times do they occur?

Here is an example of input and output:

"Leopard", "Lion" // { "L": 2, "o": 2 } "Hello", "World" // { "l": 3, "o": 2 } "Flower", "Power" // { "o": 2, "w": 2, "e": 2, "r": 2 } "phone", "Memphis" // { "p": 2, "h": 2, "e": 2 } 
+6
source share
6 answers

If you work with small lines, there is no need to complicate the situation. The following method is simple, due to the performance when working with very large strings.

First we need to define an object for storing counters for our repeating characters.

Then we need to iterate each row. There are many ways to iterate over a string, but in the following examples, I also:

Both of these methods work because the string is an example of an inline iterative object.

During line iteration, we need to first check to see if the character exists in the repeating object.

  • If so, we do not need to check it again, just increase the number.
  • If it is not, look at another line to see if the character exists there.
    • If so, increase the repeating counter for this character.
    • If it is not, do nothing.

Using this method, you do not count characters that are not duplicates, and you are not trying to verify characters that you have already checked. This should provide some significant speed improvements with longer strings.

Once you finish iterating over both lines, just return the duplicate holder object.

ES6 example

 const countLetters = (v1, v2) => { const out = {}; for(let char of v1) (out[char] || v2.indexOf(char) !== -1) && (out[char] = (out[char] || 0) + 1); for(let char of v2) (out[char] || v1.indexOf(char) !== -1) && (out[char] = (out[char] || 0) + 1); return out; } const test = (v1,v2) => (console.log(JSON.stringify(countLetters(v1, v2))),test); test("Leopard", "Lion") // { "L": 2, "o": 2 } ("Hello", "World") // { "l": 3, "o": 2 } ("Flower", "Power") // { "o": 2, "w": 2, "e": 2, "r": 2 } ("phone", "Memphis") // { "p": 2, "h": 2, "e": 2 } ("Bookkeeper", "Zookeeper") // { "o": 4, "k": 3, "e": 6, "p": 2, "r": 2 } ("Some thing", "Other thing"); // { "e": 2, " ": 2, "t": 3, "h": 3, "i": 2, "n": 2, "g": 2 } 

ES5 example

 var countLetters = function(v1, v2) { var out = {}; Array.prototype.forEach.call(v1, function(char) { if(out[char] || v2.indexOf(char) !== -1) out[char] = (out[char] || 0) + 1; }); Array.prototype.forEach.call(v2, function(char) { if(out[char] || v1.indexOf(char) !== -1) out[char] = (out[char] || 0) + 1; }); return out; } var test = function(v1,v2){ return (console.log(JSON.stringify(countLetters(v1, v2))),test) }; test("Leopard", "Lion") // { "L": 2, "o": 2 } ("Hello", "World") // { "l": 3, "o": 2 } ("Flower", "Power") // { "o": 2, "w": 2, "e": 2, "r": 2 } ("phone", "Memphis") // { "p": 2, "h": 2, "e": 2 } ("Bookkeeper", "Zookeeper") // { "o": 4, "k": 3, "e": 6, "p": 2, "r": 2 } ("Some thing", "Other thing"); // { "e": 2, " ": 2, "t": 3, "h": 3, "i": 2, "n": 2, "g": 2 } 

Performance becomes a problem when you start working with very large lines. The following method uses some algorithmic magic to improve performance.

First find the frequency of each letter in both lines. Working with a sorted array is faster than working with an unsorted array , and saves time, because we can divide the string into groups of common characters, rather than counting each character.

I use the Map object to use the Map#has method, which is much faster than Array#indexOf in the following function.

 // Calculate the frequency of each character in a string const calculateCharacterFrequency = string => { // Split the string into individual characters const array = [...string]; // Sort the split string const sorted = array.sort(); // Join the split string const joined = sorted.join(""); // Split the string into groups of common characters const matches = joined.match(/(.)\1*/g); // Iterate the groups const frequency = matches.reduce( (frequency, character) => { // Insert char and frequency into map frequency.set(character[0], character.length); // return frequency map for use in next iteration return frequency; }, // This is the map that will be passed as `frequency` new Map() ); // Return the map return frequency; }; 

Then find the common characters between each line and create a map containing the frequency of each common character.

The biggest performance increase here is to create a unique set of all characters that appear on any line. I use the unique nature of the Set object to remove duplicates from an array, there are other ways to remove duplicates from an array, but this was the fastest I tested. Thus, we execute only one set of unqiue and verify that the symbol is found on both cards.

 // Calculate the frequency of each character two strings have in common const calculateCommonCharacterFrequency = (v1, v2) => { // Get frequency map of first string v1 = calculateCharacterFrequency(v1); // Get frequency map of second string v2 = calculateCharacterFrequency(v2); // Create an array containing a list of all characters occuring in either string const combinedCharacterArray = [...v1.keys(), ...v2.keys()]; // Remove duplicate characters const combinedUniqueCharacterSet = new Set(combinedCharacters); // Convert set back to array const combinedUniqueCharacterArray = Array.from(combinedUniqueSet); // Iterate the unique array of common characters const commonFrequency = combinedUniqueCharacterArray.reduce( (commonFrequency, character) => { // Check if both strings have the character const isCommon = v1.has(character) && v2.has(character); if(isCommon) { // Find the sum of the frequency of the character in both strings const combinedFrequency = v1.get(character) + v2.get(character); // Insert character and frequency into map commonFrequency.set(character, combinedFrequency); } // return frequency map for use in next iteration return commonFrequency; }, // This is the map that will be passed as `commonFrequency` new Map() ); // Return the map return commonFrequency; }; 

Condensed Example

The above example has been extended for readability and explanation. The following example has been compressed to save space.

 const calcCharFreq = string => [...string].sort().join("").match(/(.)\1*/g).reduce((freq, char) => (freq.set(char[0], char.length), freq), new Map()); const calcCommonCharFreq = (v1, v2) => { v1 = calcCharFreq(v1); v2 = calcCharFreq(v2); return Array.from(new Set([...v1.keys(), ...v2.keys()])).reduce((dup, char) => ((v1.has(char) && v2.has(char)) && dup.set(char, v1.get(char) + v2.get(char)), dup), new Map()); }; const test = (v1,v2) => (console.log('{ '+Array.from(calcCommonCharFreq(v1, v2)).reduce((pairs, value) => (pairs.push('"'+value[0]+'": '+value[1]), pairs), []).join(", ")+' }'), test); test("Leopard", "Lion") // { "L": 2, "o": 2 } ("Hello", "World") // { "l": 3, "o": 2 } ("Flower", "Power") // { "o": 2, "w": 2, "e": 2, "r": 2 } ("phone", "Memphis") // { "p": 2, "h": 2, "e": 2 } ("Bookkeeper", "Zookeeper") // { "o": 4, "k": 3, "e": 6, "p": 2, "r": 2 } ("Some thing", "Other thing"); // { "e": 2, " ": 2, "t": 3, "h": 3, "i": 2, "n": 2, "g": 2 } 
+5
source

I would start with a simple function called countChars to count occurrences of characters on a single line. You can find this elsewhere on SO, but I am providing a simple version. Output is an object typed by a symbol, with values ​​as values.

Then we can solve the problem of finding matching characters in two lines as a kind of merging of two objects that we implement in a function called combineObjectsByKey .

 function countChars(str) { return str.split('').reduce((result, chr) => { if (!(chr in result)) result[chr] = 0; result[chr]++; return result; }, {}); } function combineObjectsByKey(o1, o2, fn) { const result = {}; Object.keys(o1).forEach(key => { if (key in o2) result[key] = fn(o1[key], o2[key]); }); return result; } function add(a, b) { return a + b; } function matchingCounts(str1, str2) { return combineObjectsByKey(countChars(str1), countChars(str2), add); } console.log(matchingCounts("Leopard", "Lion")); 

Alternative approach

You can also just concatenate strings, count the characters in a concatenated string, and then delete characters not found on both strings:

 function matchingCounts(str1, str2) { const counts = countChars(str1 + str2); Object.keys(counts).forEach(chr => { if (!str1.includes(chr) || !str2.includes(chr)) delete counts[chr]; }); return counts; } 
+1
source

Here is an option that should be FAST (mainly because it does nothing at all). This will take into account case sensitivity, all az characters in lowercase, all uppercase AZ, numeric characters 0-9 and blank spaces.

here I just use parallel arrays for ASCII char character values ​​for the first and second words.

 var firstString = "hello world"; var secondString = "FOLLOW me"; var firstChars = []; var secondChars = []; var arrLength = 122; // 0 - 122 covers ASCII values for all chars az (lowers), AZ (uppers), numbers, and empty spaces. //initialize array to arrLength number of elements, all set to 0. for (var i = 0; i < arrLength; i++) { firstChars.push(0); secondChars.push(0); } //count first word chars for (var i = 0; i < firstString.length; i++) firstChars[firstString.charCodeAt(i)]++; //count second word chars for (var i = 0; i < secondString.length; i++) secondChars[secondString.charCodeAt(i)]++; //output matching chars and the count of occurences. for (var i = 0; i < arrLength; i++) { if (firstChars[i] > 0 && secondChars[i] > 0) { var letter = i == 32? "Empty Space" : String.fromCharCode(i); var count = firstChars[i] + secondChars[i]; console.log(letter + " : " + count); } } 

Here is the jsFiddle of this code for playback and testing.

0
source

Although this example may be more elegant, I think this approach is simpler and more readable.

Here's a pen to play with.

 function getRepeated(x, y) { let result = []; x.split('').forEach(value => { let array = testCharacterToString(value, y); if (array.length > 0) { result.push(array[0]); } }); y.split('').forEach(value => { let array = testCharacterToString(value, x); if (array.length > 0) { result.push(array[0]); } }); result = result.reduce(function(prev, cur) { prev[cur] = (prev[cur] || 0) + 1; return prev; }, {}); return JSON.stringify(result) // {"o":4,"k":3,"e":6,"p":2,"r":2} } function testCharacterToString(char, string) { return string.split('').filter(value => { return value === char; }); } var test = function(arg1,arg2){ return (console.log(getRepeated(arg1, arg2)),test) }; test("Leopard", "Lion") // { "L": 2, "o": 2 } ("Hello", "World") // { "l": 3, "o": 2 } ("Flower", "Power") // { "o": 2, "w": 2, "e": 2, "r": 2 } ("phone", "Memphis") // { "p": 2, "h": 2, "e": 2 } ("Bookkeeper", "Zookeeper") // { "o": 4, "k": 3, "e": 6, "p": 2, "r": 2 } ("Some thing", "Other thing"); // { "e": 2, " ": 2, "t": 3, "h": 3, "i": 2, "n": 2, "g": 2 } 
0
source

This is a very good question. If I understood correctly, you can easily do in O (2n) as follows:

 function relationIndex(s){ var a = s.split(/\s+/), hash = a[0].length < a[1].length ? Array.prototype.reduce.call(a[1], (p,c) => (p[c] && (p[c] = Math.abs(p[c])+1),p), Array.prototype.reduce.call(a[0], (p,c) => (p[c] = --p[c] || -1,p),{})) : Array.prototype.reduce.call(a[0], (p,c) => (p[c] && (p[c] = Math.abs(p[c])+1),p), Array.prototype.reduce.call(a[1], (p,c) => (p[c] = --p[c] || -1,p),{})); for (var k in hash) hash[k] <= 0 && delete hash[k]; return hash; } var str = "Lion Leopard"; console.log(relationIndex(str)); str = "Brontosaurus Butterfly"; console.log(relationIndex(str)); str = "London Paris"; console.log(relationIndex(str)); str = "phone Memphis"; console.log(relationIndex(str)); 
0
source

Here is a more readable version of Redu algo ...

 function count(v1, v2) { let map = {}; Array.prototype.forEach.call(v1, char => map[char] = map[char]++ || 1); Array.prototype.forEach.call(v2, char => { if(map[char]) map[char] += 1; }); for(let char in map) { if(map[char] < 2) { delete map[char]; } } console.log(map) } count('Lion', 'Leopard'); 
-one
source

Source: https://habr.com/ru/post/1014401/


All Articles