使用DOMDocument获取标题标记值

i want to get the value of the <title> tag for all the pages of my website. i am trying to run the script only on my website domain, and get all the pages links on my website , and the titles of them.

This is my code:

$html = file_get_contents('http://xxxxxxxxx.com');
//Create a new DOM document
$dom = new DOMDocument;

//Parse the HTML. The @ is used to suppress any parsing errors
//that will be thrown if the $html string isn't valid XHTML.
@$dom->loadHTML($html);

//Get all links. You could also use any other tag name here,
//like 'img' or 'table', to extract other tags.
$links = $dom->getElementsByTagName('a');

//Iterate over the extracted links and display their URLs
foreach ($links as $link){
    //Extract and show the "href" attribute.
         echo $link->nodeValue;
    echo $link->getAttribute('href'), '<br>';
}

What i get is: <a href="z1.html">z2</a> i get z1.html and z2.... my z1.html have a title named z3. i want to get z1.html and z3, not z2. Can anyone help me?

you need to make your own custom function and call it in appropriate places , if you need to get multiple tags from the pages which are in anchor tag, you just need to create new custom function.

Below code will help you get started

$html = my_curl_function('http://www.anchorartspace.org/');
$doc = new DOMDocument();
@$doc->loadHTML($html);
$mytag = $doc->getElementsByTagName('title');
//get and display what you need:
$title = $mytag->item(0)->nodeValue;

$links = $doc->getElementsByTagName('a');

//Iterate over the extracted links and display their URLs
foreach ($links as $link) {

//Extract and show the "href" attribute.
    echo $link->nodeValue;
    echo "<br/>".'MY ANCHOR LINK : - ' . $link->getAttribute('href') . "---TITLE--->";

    $a_html = my_curl_function($link->getAttribute('href'));
    $a_doc = new DOMDocument();
    @$a_doc->loadHTML($a_html);
    $a_html_title = $a_doc->getElementsByTagName('title');
//get and display what you need:
    $a_html_title = $a_html_title->item(0)->nodeValue;
    echo $a_html_title;
    echo '<br/>';
}
echo "Title: $title" . '<br/><br/>';

function my_curl_function($url) {
    $curl_handle = curl_init();
    curl_setopt($curl_handle, CURLOPT_URL, $url);
    curl_setopt($curl_handle, CURLOPT_CONNECTTIMEOUT, 2);
    curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($curl_handle, CURLOPT_USERAGENT, 'name');
    $html = curl_exec($curl_handle);
    curl_close($curl_handle);
    return $html;
}

let me know if you need any more help

adding a bit to hitesh's answer to check if the elements have attributes and the desired attribute exists. also if the getting the 'title' elements actually does return at least one item before trying to grab the first one ($a_html_title->item(0)).

and added an option for curl to follow location (needed it for my hardcoded test for google.com)

foreach ($links as $link) {

    //Extract and show the "href" attribute.
    if ($link->hasAttributes()){
        if ($link->hasAttribute('href')){

            $href = $link->getAttribute('href');
            $href = 'http://google.com';   // hardcoding just for testing

            echo $link->nodeValue;
            echo "<br/>".'MY ANCHOR LINK : - ' . $href . "---TITLE--->";
            $a_html = my_curl_function($href);

            $a_doc = new DOMDocument();
            @$a_doc->loadHTML($a_html);
            $a_html_title = $a_doc->getElementsByTagName('title');

            //get and display what you need:
            if ($a_html_title->length){
                $a_html_title = $a_html_title->item(0)->nodeValue;
                echo $a_html_title;
                echo '<br/>';
            }

        }
    }

}

function my_curl_function($url) {
    $curl_handle = curl_init();
    curl_setopt($curl_handle, CURLOPT_URL, $url);
    curl_setopt($curl_handle, CURLOPT_CONNECTTIMEOUT, 2);
    curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($curl_handle, CURLOPT_USERAGENT, 'name');
    curl_setopt($curl_handle, CURLOPT_FOLLOWLOCATION, TRUE);    // added this
    $html = curl_exec($curl_handle);
    curl_close($curl_handle);
    return $html;
}