Right, I created a web crawler. It looks through links, headings and meta descriptions. It scans links and saves them in $ link. It scans the link headers and saves them in the [title] in newArray. Now in this array, I want him to know that if the meta tag does not exist, he can scan the first tag and use it instead. The only problem is that it does not seem to save any information at all.
function getMetas($link) { $str1 = file_get_contents($link); if (strlen($str1)>0) { preg_match_all( '/<meta.*?name=("|\')description("|\').*?content=("|\')(.*?)("|\')/i', $str1, $description); if (count($description) > 1) { return $description[4]; } } return ''; if ($description == '') { $html = file_get_contents($link); preg_match('%(<p[^>]*>.*?</p>)%i', $html, $re); $res = get_custom_excerpt($re[1]); echo "\n"; echo $res; echo "\n"; } function get_custom_excerpt($return, $option = 30, $sentance = false) { $marks = Array(".","!","?"); $return = strip_tags($return); if($sentance == true) { $start = implode(" ", array_slice(preg_split("/\s+/", $return), 0, $option )); $start .= ' '; $end = implode(" ", array_slice(preg_split("/\s+/", $return), $option)); $cut = Array(); foreach($marks AS $m => $mark){ $mark = strpos($end, $mark); if($mark != false) $cut[$m] = $mark; } if($cut[0] != "") $chop = min($cut); else $chop = $option; $rest = substr($end, 0, $chop); $key = array_search($chop, $cut); $return = $start.$rest; }else{ $return = implode(" ", array_slice(preg_split("/\s+/", $return), 0, $option)); } $return .= $marks[$key]; return $return; } } $output = Array(); foreach ($links as $thisLink) { $output[] = array("link" => $thisLink, "title" => Titles($thisLink), "description" => getMetas($thisLink), getMetas($res)); } print_r($output);
source share