lang/php

PHP Gouttle DomCrawler Component

C/H 2018. 7. 2. 08:30

Goutte 설치

composer install fabpot/goutte

Crawler 컨텐츠 탐색 방법

Get Crawler Content

require 'vendor/autoload.php';

use Goutte\Client;
use Symfony\Component\DomCrawler\Crawler;

$domain = 'http://www.etoland.co.kr';
$url = $domain.'/bbs/board.php?bo_table=star';

$client = new \Goutte\Client();
$client->setClient(new \GuzzleHttp\Client([ 
        'timeout' => 90, 
        'verify' => false, 
        'cookie'=>true, 
        // 'debug' => true, 
    ]))->setServerParameters(['HTTP_USER_AGENT' => "'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36"]);
$crawler = $client->request('GET', $url);

each callback

$crawler->filter('#fboardlist tr')->each(function($node, $i){
    if( $i > 1 && $node->filter('a')->count() && ! preg_match("#\[공지\]#", $node->text()) )
    {
        global $domain;
        $cate = str_replace(['[',']'], '', $node->filter('a')->eq(0)->text());
        $name = $node->filter('a')->eq(1)->text();
        $href = $domain.str_replace('../', '/', $node->filter('a')->eq(1)->attr('href'));
     
        echo "{$cate} : {$name} {$href}\n";
    }
});

return each callback

$item = $crawler->filter('#fboardlist tr')->each(function($node, $i){
    if( $i > 1 && $node->filter('a')->count() && ! preg_match("#\[공지\]#", $node->text()) )
    {
        global $domain;
        return [
            'cate' => str_replace(['[',']'], '', $node->filter('a')->eq(0)->text()),
            'name' => $node->filter('a')->eq(1)->text(),
            'href' => $domain.str_replace('../', '/', $node->filter('a')->eq(1)->attr('href')),
        ];
    }
});
var_dump( $item );

foreach new Crawler

$item = [];
foreach($crawler->filter('#fboardlist tr') AS $i => $node)
{
    $node = new Crawler($node);
    if( $i > 1 && $node->filter('a')->count() && ! preg_match("#\[공지\]#", $node->text()) )
    {
        $item[] = [
            'cate' => str_replace(['[',']'], '', $node->filter('a')->eq(0)->text()),
            'name' => $node->filter('a')->eq(1)->text(),
            'href' => $domain.str_replace('../', '/', $node->filter('a')->eq(1)->attr('href')),
        ];
    }
};
var_dump( $item );


반응형

'lang > php' 카테고리의 다른 글

PHP mysql-database-class  (0) 2018.07.04
PHP simple-html-dom-parser  (0) 2018.07.03
PHP Goutte Cookie  (0) 2018.07.01
PHP Guzzle Scraper  (0) 2018.06.30
Simple PHP Web Scraper Guotte  (0) 2018.06.29