I saw this question , but actually it does not satisfy what I am looking for. The answers to this question were either: lifting from the metadata description tag, and the second - creating an excerpt for an article in which you already have a body.
What I want to do is actually get the first few sentences of the article, such as Readability. What is the best way to do this? HTML parsing? Here is what I am using now, but it is not very reliable.
function guessExcerpt($url) { $html = file_get_contents_curl($url); $doc = new DOMDocument(); @$doc->loadHTML($html); $metas = $doc->getElementsByTagName('meta'); for ($i = 0; $i < $metas->length; $i++) { $meta = $metas->item($i); if($meta->getAttribute('name') == 'description') $description = $meta->getAttribute('content'); } return $description; } function file_get_contents_curl($url) { $ch = curl_init(); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_TIMEOUT, 5); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); $data = curl_exec($ch); curl_close($ch); return $data; }
source share