Writing this as a second answer, because it is not based on my previous (bad) at all.
This code is based on http://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm and http://en.wikipedia.org/wiki/Approximate_string_matching#Problem_formulation_and_algorithms
It returns one (potentially several) substring of the minimum level levenshtein in $ haystack given by $ needle. Now levenshtein distance is just one measurement of the editing distance, and it may not meet your needs. "hte" is closer to this metric to "him" than to "the". Some of the examples I cited show the limitations of this technique. I believe this is much more reliable than the previous answer, but let me know how this works for you.
// utility function - returns the key of the array minimum function array_min_key($arr) { $min_key = null; $min = PHP_INT_MAX; foreach($arr as $k => $v) { if ($v < $min) { $min = $v; $min_key = $k; } } return $min_key; } // Calculate the edit distance between two strings function edit_distance($string1, $string2) { $m = strlen($string1); $n = strlen($string2); $d = array(); // the distance from '' to substr(string,$i) for($i=0;$i<=$m;$i++) $d[$i][0] = $i; for($i=0;$i<=$n;$i++) $d[0][$i] = $i; // fill-in the edit distance matrix for($j=1; $j<=$n; $j++) { for($i=1; $i<=$m; $i++) { // Using, for example, the levenshtein distance as edit distance list($p_i,$p_j,$cost) = levenshtein_weighting($i,$j,$d,$string1,$string2); $d[$i][$j] = $d[$p_i][$p_j]+$cost; } } return $d[$m][$n]; } // Helper function for edit_distance() function levenshtein_weighting($i,$j,$d,$string1,$string2) { // if the two letters are equal, cost is 0 if($string1[$i-1] === $string2[$j-1]) { return array($i-1,$j-1,0); } // cost we assign each operation $cost['delete'] = 1; $cost['insert'] = 1; $cost['substitute'] = 1; // cost of operation + cost to get to the substring we perform it on $total_cost['delete'] = $d[$i-1][$j] + $cost['delete']; $total_cost['insert'] = $d[$i][$j-1] + $cost['insert']; $total_cost['substitute'] = $d[$i-1][$j-1] + $cost['substitute']; // return the parent array keys of $d and the operation cost $min_key = array_min_key($total_cost); if ($min_key == 'delete') { return array($i-1,$j,$cost['delete']); } elseif($min_key == 'insert') { return array($i,$j-1,$cost['insert']); } else { return array($i-1,$j-1,$cost['substitute']); } } // attempt to find the substring of $haystack most closely matching $needle function shortest_edit_substring($needle, $haystack) { // initialize edit distance matrix $m = strlen($needle); $n = strlen($haystack); $d = array(); for($i=0;$i<=$m;$i++) { $d[$i][0] = $i; $backtrace[$i][0] = null; } // instead of strlen, we initialize the top row to all 0's for($i=0;$i<=$n;$i++) { $d[0][$i] = 0; $backtrace[0][$i] = null; } // same as the edit_distance calculation, but keep track of how we got there for($j=1; $j<=$n; $j++) { for($i=1; $i<=$m; $i++) { list($p_i,$p_j,$cost) = levenshtein_weighting($i,$j,$d,$needle,$haystack); $d[$i][$j] = $d[$p_i][$p_j]+$cost; $backtrace[$i][$j] = array($p_i,$p_j); } } // now find the minimum at the bottom row $min_key = array_min_key($d[$m]); $current = array($m,$min_key); $parent = $backtrace[$m][$min_key]; // trace up path to the top row while(! is_null($parent)) { $current = $parent; $parent = $backtrace[$current[0]][$current[1]]; } // and take a substring based on those results $start = $current[1]; $end = $min_key; return substr($haystack,$start,$end-$start); } // some testing $data = array( array('foo',' foo'), array('fat','far'), array('dat burn','rugburn')); $data[] = array('big yellow school bus','they rode the bigyellow schook bus that afternoon'); $data[] = array('bus','they rode the bigyellow schook bus that afternoon'); $data[] = array('big','they rode the bigyellow schook bus that afternoon'); $data[] = array('nook','they rode the bigyellow schook bus that afternoon'); $data[] = array('they','console, controller and games are all in very good condition, only played occasionally. includes power cable, controller charge cable and audio cable. smoke free house. pes 2011 super street fighter'); $data[] = array('controker','console, controller and games are all in very good condition, only played occasionally. includes power cable, controller charge cable and audio cable. smoke free house. pes 2011 super street fighter'); foreach($data as $dat) { $substring = shortest_edit_substring($dat[0],$dat[1]); $dist = edit_distance($dat[0],$substring); printf("Found |%s| in |%s|, matching |%s| with edit distance %d\n",$substring,$dat[1],$dat[0],$dist); }