0

関数があり、array_walk 関数を使用して、タイトル関数を使用して配列 [リンク] の値をスキャンし、対応する配列の [タイトル] に配置しています。

例えば ​​The Qlick => [link] => "http://www.theqlick.com" [title] => The Qlick

    $links = Array();

$URL = 'http://www.theqlick.com'; // change it for urls to grab  

// grabs the urls from URL 
$file  = file_get_html($URL);
foreach ($file->find('a') as $theelement) 
{
    $abs_url = url_to_absolute($URL, $theelement->href);
    if (!empty($abs_url))
        $links[] = $abs_url;
}

  function Titles() {
  global $links;
  $str = implode('',array_map('file_get_contents',$links));
  error_reporting(E_ERROR | E_PARSE);

  $titles = Array();
    if( strlen( $str )>0 ) {
  $titles[] = preg_match_all( "/\<title\>(.*)\<\/title\>/", $str, $titles );
  return $title;   
  return $links;
  } }


  $newArray = array();


  $title = array_walk($links, 'Titles');
  foreach($links as $key => $val ){
$newArray[$key] = array( 'link' => $val, 'title' => $title);
 }
 print_r($newArray);

My result when var_dump is used:
  array(2) {



 [0]=>
  array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
      }



  [1]=>
  array(6) {
[0]=>
string(11) " The Qlick "
[1]=>
string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
    }

    }
   array(2) {
   [0]=>
     array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
       }


  [1]=>
 array(6) {
[0]=>
string(11) " The Qlick "
[1]=>
string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
    }
    }


  array(2) {
 [0]=>
 array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
     }
  [1]=>
  array(6) {
   [0]=>
string(11) " The Qlick "
[1]=>
string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
     }
     }

    array(2) {
   [0]=>
   array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
      }
  [1]=>
   array(6) {
[0]=>
string(11) " The Qlick "
[1]=>
string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
     }
     }
    array(2) {
   [0]=>
   array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
     }
   [1]=>
   array(6) {
  [0]=>
string(11) " The Qlick "
[1]=>
string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
    }
    }
  array(2) {
 [0]=>
 array(6) {
[0]=>
string(26) "<title> The Qlick </title>"
[1]=>
string(26) "<title> The Qlick </title>"
[2]=>
string(68) "<title> Welcome to Festival Freaks | Home to Leeds Festival </title>"
[3]=>
string(24) "<title> Welcome </title>"
[4]=>
string(27) "<title> Qlick Kids </title>"
[5]=>
string(26) "<title> The Qlick </title>"
    }

  [1]=>
  array(6) {
   [0]=>
   string(11) " The Qlick "
   [1]=>

 string(11) " The Qlick "
[2]=>
string(53) " Welcome to Festival Freaks | Home to Leeds Festival "
[3]=>
string(9) " Welcome "
[4]=>
string(12) " Qlick Kids "
[5]=>
string(11) " The Qlick "
    }
    }


   Array
   ( 
[0] => Array
    (
        [link] => http://www.theqlick.com/index.php
        [title] => 1
    )

[1] => Array
    (
        [link] => http://www.theqlick.com/qlickdates.php
        [title] => 1
    )

[2] => Array
    (
        [link] => http://www.theqlick.com/festivalfreaks.html
        [title] => 1
    )

[3] => Array
    (
        [link] => http://www.theqlick.com/2kcm.php
        [title] => 1
    )

[4] => Array
    (
        [link] => http://www.theqlick.com/index3.php
        [title] => 1
    )

[5] => Array
    (
        [link] => http://www.theqlick.com/index2.php
        [title] => 1
    )

       )
4

1 に答える 1

0

関数の終わりを次のように変更します。

if( strlen( $str )>0 ) {
    $titles = Array();
    preg_match_all( "/\<title\>(.*)\<\/title\>/", $str, $titles );
    if (count($titles) > 1) {
        return $titles[1];   
    }
}

return '';

preg_match_all一致数を返します。これにより、最初に見つかった一致が返されます。検索するタイトルやテキストがない場合は、空の文字列が返されます。

私はこれをテストしていないので、デバッグが必要な場合があります。

追加するために編集:

$links = Array();  
$URL = 'http://www.theqlick.com'; // change it for urls to grab    
// grabs the urls from URL  

function Titles($link) {
    $str = file_get_contents($link);    
    if( strlen( $str )>0 ) {    
        preg_match_all( "/\<title\>(.*)\<\/title\>/", $str, $titles );
        if (count($titles) > 1) {
            return $titles[1];   
        }
    }

    return '';
}

$file  = file_get_html($URL); 
foreach ($file->find('a') as $theelement)  {     
    $abs_url = url_to_absolute($URL, $theelement->href);     
    if (!empty($abs_url)) {
         $links[] = $abs_url; 
    } 
}

$output = Array();

foreach ($links as $thisLink) {
    $output[] = array("link" => $thisLink, "title" => Titles($thisLink));
}    

(繰り返しますが、これは非常にテストされていません)

リンクのリストを生成します。次に、そのリストをステップ実行し、それぞれについて、ページのタイトルを取得します。一度に1つずつ実行するので、何がどこにあるかを追跡する方がはるかに簡単です。

于 2012-09-21T21:09:22.617 に答える