Pages

Friday, December 21, 2012

Find All Links on Web Page

$html = file_get_contents('http://www.TestDomain.com');

$dom = new DOMDocument();
@$dom->loadHTML($html);

// grab all the on the page
$xpath = new DOMXPath($dom);
$hrefs = $xpath->evaluate("/html/body//a");

$list_urls = array();
for ($i = 0; $i < $hrefs->length; $i++) {
       $href = $hrefs->item($i);    
       $list_urls[] = $href->getAttribute('href');
       //echo $url.'<br />';
}
echo '<pre>';
print_r(array_unique($list_urls));
echo '<pre>';

No comments: