Compare strings and PHP affinity index

What is elegant code for finding common alphabets among two strings without including space in PHP?

We also return the similarity index, i.e. count the number of common characters and return it as a percentage of the total number of characters.

Suppose I have one line "LEGENDARY", while the other is "BARNEY STINSON", so I need to find the common letters b / w without using a space.

Also my code should return a similarity index, i.e. Count the number of common characters and return them as a percentage of the total number of characters.

The common characters for these two lines are β€œARNEY”, so the score is 5/22 ~ = 22%. Any help on this?

0
source share
2

. similar_text(). str_replace(' ', '', $string); .

echo similar_text ( 'LEGENDARY' , 'BARNEYSTINSON', $percent); // outputs 3
echo $percent; // outputs 27.272727272727

<?php
function unique_chars($string) {
   return count_chars(strtolower(str_replace(' ', '', $string)), 3);
}
function compare_strings($a, $b) {
    $index = similar_text(unique_chars($a), unique_chars($b), $percent);
    return array('index' => $index, 'percent' => $percent);
}
print_r( compare_strings('LEGENDARY', 'BARNEY STINSON') );

// outputs:
?>

Array
(
    [index] => 5
    [percent] => 55.555555555556
)
+1

, Levenshtein Jaro Winkler , Smith Waterman Gotoh , , "" . .

php , :

Levenshtein

echo levenshtein("LEGENDARY","BARNEY STINSON");

Jaro Winkler

class StringCompareJaroWinkler 
{
    public function compare($str1, $str2)
    {
        return $this->JaroWinkler($str1, $str2, $PREFIXSCALE = 0.1 );
    }

    private function getCommonCharacters( $string1, $string2, $allowedDistance ){

      $str1_len = mb_strlen($string1);
      $str2_len = mb_strlen($string2);
      $temp_string2 = $string2;

      $commonCharacters='';
      for( $i=0; $i < $str1_len; $i++){

        $noMatch = True;
        // compare if char does match inside given allowedDistance
        // and if it does add it to commonCharacters
        for( $j= max( 0, $i-$allowedDistance ); $noMatch && $j < min( $i + $allowedDistance + 1, $str2_len ); $j++){
          if( $temp_string2[$j] == $string1[$i] ){
            $noMatch = False;
        $commonCharacters .= $string1[$i];
        $temp_string2[$j] = '';
          }
        }
      }
      return $commonCharacters;
    }

    private function Jaro( $string1, $string2 ){

      $str1_len = mb_strlen( $string1 );
      $str2_len = mb_strlen( $string2 );

      // theoretical distance
      $distance = (int) floor(min( $str1_len, $str2_len ) / 2.0); 

      // get common characters
      $commons1 = $this->getCommonCharacters( $string1, $string2, $distance );
      $commons2 = $this->getCommonCharacters( $string2, $string1, $distance );

      if( ($commons1_len = mb_strlen( $commons1 )) == 0) return 0;
      if( ($commons2_len = mb_strlen( $commons2 )) == 0) return 0;
      // calculate transpositions
      $transpositions = 0;
      $upperBound = min( $commons1_len, $commons2_len );
      for( $i = 0; $i < $upperBound; $i++){
        if( $commons1[$i] != $commons2[$i] ) $transpositions++;
      }
      $transpositions /= 2.0;
      // return the Jaro distance
      return ($commons1_len/($str1_len) + $commons2_len/($str2_len) + ($commons1_len - $transpositions)/($commons1_len)) / 3.0;

    }

    private function getPrefixLength( $string1, $string2, $MINPREFIXLENGTH = 4 ){

      $n = min( array( $MINPREFIXLENGTH, mb_strlen($string1), mb_strlen($string2) ) );

      for($i = 0; $i < $n; $i++){
        if( $string1[$i] != $string2[$i] ){
          // return index of first occurrence of different characters 
          return $i;
        }
      }
      // first n characters are the same   
      return $n;
    }

    private function JaroWinkler($string1, $string2, $PREFIXSCALE = 0.1 ){

      $JaroDistance = $this->Jaro( $string1, $string2 );
      $prefixLength = $this->getPrefixLength( $string1, $string2 );
      return $JaroDistance + $prefixLength * $PREFIXSCALE * (1.0 - $JaroDistance);
    }
}

$jw = new StringCompareJaroWinkler();
echo $jw->compare("LEGENDARY","BARNEY STINSON");

class SmithWatermanGotoh 
{
    private $gapValue;
    private $substitution;

    /**
     * Constructs a new Smith Waterman metric.
     * 
     * @param gapValue
     *            a non-positive gap penalty
     * @param substitution
     *            a substitution function
     */
    public function __construct($gapValue=-0.5, 
                $substitution=null) 
    {
        if($gapValue > 0.0) throw new Exception("gapValue must be <= 0");
        //if(empty($substitution)) throw new Exception("substitution is required");
        if (empty($substitution)) $this->substitution = new SmithWatermanMatchMismatch(1.0, -2.0);
        else $this->substitution = $substitution;
        $this->gapValue = $gapValue;
    }

    public function compare($a, $b) 
    {
        if (empty($a) && empty($b)) {
            return 1.0;
        }

        if (empty($a) || empty($b)) {
            return 0.0;
        }

        $maxDistance = min(mb_strlen($a), mb_strlen($b))
                * max($this->substitution->max(), $this->gapValue);
        return $this->smithWatermanGotoh($a, $b) / $maxDistance;
    }

    private function smithWatermanGotoh($s, $t) 
    {   
        $v0 = [];
        $v1 = [];
        $t_len = mb_strlen($t);
        $max = $v0[0] = max(0, $this->gapValue, $this->substitution->compare($s, 0, $t, 0));

        for ($j = 1; $j < $t_len; $j++) {
            $v0[$j] = max(0, $v0[$j - 1] + $this->gapValue,
                    $this->substitution->compare($s, 0, $t, $j));

            $max = max($max, $v0[$j]);
        }

        // Find max
        for ($i = 1; $i < mb_strlen($s); $i++) {
            $v1[0] = max(0, $v0[0] + $this->gapValue, $this->substitution->compare($s, $i, $t, 0));

            $max = max($max, $v1[0]);

            for ($j = 1; $j < $t_len; $j++) {
                $v1[$j] = max(0, $v0[$j] + $this->gapValue, $v1[$j - 1] + $this->gapValue,
                        $v0[$j - 1] + $this->substitution->compare($s, $i, $t, $j));

                $max = max($max, $v1[$j]);
            }

            for ($j = 0; $j < $t_len; $j++) {
                $v0[$j] = $v1[$j];
            }
        }

        return $max;
    }
}

class SmithWatermanMatchMismatch
{
    private $matchValue;
    private $mismatchValue;

    /**
     * Constructs a new match-mismatch substitution function. When two
     * characters are equal a score of <code>matchValue</code> is assigned. In
     * case of a mismatch a score of <code>mismatchValue</code>. The
     * <code>matchValue</code> must be strictly greater then
     * <code>mismatchValue</code>
     * 
     * @param matchValue
     *            value when characters are equal
     * @param mismatchValue
     *            value when characters are not equal
     */
    public function __construct($matchValue, $mismatchValue) {
        if($matchValue <= $mismatchValue) throw new Exception("matchValue must be > matchValue");

        $this->matchValue = $matchValue;
        $this->mismatchValue = $mismatchValue;
    }

    public function compare($a, $aIndex, $b, $bIndex) {
        return ($a[$aIndex] === $b[$bIndex] ? $this->matchValue
                : $this->mismatchValue);
    }

    public function max() {
        return $this->matchValue;
    }

    public function min() {
        return $this->mismatchValue;
    }
}

$o = new SmithWatermanGotoh();
echo $o->compare("LEGENDARY","BARNEY STINSON");
+3

Source: https://habr.com/ru/post/1754407/


All Articles