$html = file_get_contents("http://example.com");
$dom = new DOMDocument();
@$dom->loadHTML($html);
$xpath = new DOMXPath($dom);
$hrefs = $xpath->evaluate("/html/body//a");
$list_urls = array();
$list_urlval = array();
for ($i = 0; $i < $hrefs->length; $i++) {
$nValue = $hrefs->item($i);
$href = $nValue->getAttribute('href');
$value = $nValue->nodeValue;
if($href != '' && (!preg_match("/#/", $href)) && $href != '/' && (!preg_match("/javascript/", $href)) && (!preg_match("/mailto/", $href)) && (!preg_match("/plus.google/", $href))){
if((!preg_match("/http/", $href)))
$href = $urlname.'/'.$href;
$list_urls[] = $href;
$list_urlval[] = $value;
}
}
print_r(array_unique($list_urls));
No comments:
Post a Comment