I have a script that I wrote to check for broken links on a page. It works absolutely fantastic for pages with less than a hundred or so links, but once the script runs for longer than 60 seconds I get a 500 error.
I have tried modifying the php ini settings (max execution time is currently set at 300). I have also used set_time_limit as well as ini_set(). I've read countless posts and still haven't come across a solution. I will post the code below and hopefully someone can tell me what is wrong.
<?php
//INITIALIZE VARIABLES
$pageToCheck = $_GET['link'];
//INITIALIZE DOMDOCUMENT
$domDoc = new DOMDocument;
$domDoc->preserveWhiteSpace = false;
//IF THE PAGE BEING CHECKED LOADS
if(@$domDoc->loadHTMLFile($pageToCheck)) { //note that errors are suppressed so DOMDocument doesn't complain about XHTML
//LOOP THROUGH ANCHOR TAGS IN THE MAIN CONTENT AREA
$pageLinks = $domDoc->getElementsByTagName('a');
foreach($pageLinks as $currLink) {
//LOOP THROUGH ATTRIBUTES FOR CURRENT LINK
foreach($currLink->attributes as $attributeName=>$attributeValue) {
//IF CURRENT ATTRIBUTE CONTAINS THE WEBSITE ADDRESS
if($attributeName == 'href' && $attributeValue->value != "" && $attributeValue->value != "#") {
if (preg_match("/webapp/", $attributeValue->value)) {
$lCheck = $pageToCheck . $attributeValue->value;
}
else
{
$lCheck = $attributeValue->value;
}
$check_url_status = check_url($lCheck);
if ($check_url_status != "200" && $check_url_status != "301" && $check_url_status != "302")
{
echo "<span style=\"color: #FF0000; font-weight: bold;\">Broken Link: </span><span style=\"color: #0000FF; font-weight: bold;\">Status: $check_url_status</span> $attributeValue->value <br>";
}
else
{
echo "<span style=\"color: #00FF00; font-weight: bold;\">Working Link: </span><span style=\"color: #0000FF; font-weight: bold;\">Status: $check_url_status</span> $attributeValue->value <br>";
}
}
}
}
}
function check_url($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch , CURLOPT_RETURNTRANSFER, 1);
$data = curl_exec($ch);
$headers = curl_getinfo($ch);
curl_close($ch);
return $headers['http_code'];
}
?>