Compare commits
No commits in common. "master" and "V4.2.1" have entirely different histories.
12
.github/FUNDING.yml
vendored
12
.github/FUNDING.yml
vendored
@ -1,12 +0,0 @@
|
|||||||
# These are supported funding model platforms
|
|
||||||
|
|
||||||
github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
|
|
||||||
patreon: # Replace with a single Patreon username
|
|
||||||
open_collective: querylist # Replace with a single Open Collective username
|
|
||||||
ko_fi: # Replace with a single Ko-fi username
|
|
||||||
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
|
|
||||||
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
|
|
||||||
liberapay: # Replace with a single Liberapay username
|
|
||||||
issuehunt: # Replace with a single IssueHunt username
|
|
||||||
otechie: # Replace with a single Otechie username
|
|
||||||
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
|
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -2,4 +2,3 @@
|
|||||||
.idea/
|
.idea/
|
||||||
composer.lock
|
composer.lock
|
||||||
.DS_Store
|
.DS_Store
|
||||||
*.cache
|
|
@ -29,7 +29,7 @@
|
|||||||
- .....
|
- .....
|
||||||
|
|
||||||
## 环境要求
|
## 环境要求
|
||||||
- PHP >= 7.1
|
- PHP >= 7.0
|
||||||
|
|
||||||
> 如果你的PHP版本还停留在PHP5,或者不会使用Composer,你可以选择使用QueryList3,QueryList3支持php5.3以及手动安装。
|
> 如果你的PHP版本还停留在PHP5,或者不会使用Composer,你可以选择使用QueryList3,QueryList3支持php5.3以及手动安装。
|
||||||
QueryList3 文档:http://v3.querylist.cc
|
QueryList3 文档:http://v3.querylist.cc
|
||||||
|
@ -31,7 +31,7 @@ Through plug-ins you can easily implement things like:
|
|||||||
- .....
|
- .....
|
||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
- PHP >= 7.1
|
- PHP >= 7.0
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
By Composer installation:
|
By Composer installation:
|
||||||
|
@ -4,11 +4,11 @@
|
|||||||
"keywords":["QueryList","phpQuery","spider"],
|
"keywords":["QueryList","phpQuery","spider"],
|
||||||
"homepage": "http://querylist.cc",
|
"homepage": "http://querylist.cc",
|
||||||
"require": {
|
"require": {
|
||||||
"PHP":">=7.1",
|
"PHP":">=7.0",
|
||||||
"jaeger/phpquery-single": "^1",
|
"jaeger/phpquery-single": "^1",
|
||||||
|
"tightenco/collect": "^5",
|
||||||
"jaeger/g-http": "^1.1",
|
"jaeger/g-http": "^1.1",
|
||||||
"ext-dom": "*",
|
"ext-dom": "*"
|
||||||
"tightenco/collect": ">5.0"
|
|
||||||
},
|
},
|
||||||
"suggest":{
|
"suggest":{
|
||||||
|
|
||||||
@ -32,9 +32,6 @@
|
|||||||
},
|
},
|
||||||
"require-dev": {
|
"require-dev": {
|
||||||
"symfony/var-dumper": "^3.3",
|
"symfony/var-dumper": "^3.3",
|
||||||
"phpunit/phpunit": "^8.5"
|
"phpunit/phpunit": "^7.5"
|
||||||
},
|
|
||||||
"scripts": {
|
|
||||||
"test": "./vendor/bin/phpunit"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -7,7 +7,6 @@
|
|||||||
|
|
||||||
namespace QL;
|
namespace QL;
|
||||||
use Closure;
|
use Closure;
|
||||||
use Tightenco\Collect\Support\Collection;
|
|
||||||
|
|
||||||
class Config
|
class Config
|
||||||
{
|
{
|
||||||
@ -21,8 +20,8 @@ class Config
|
|||||||
*/
|
*/
|
||||||
public function __construct()
|
public function __construct()
|
||||||
{
|
{
|
||||||
$this->plugins = new Collection();
|
$this->plugins = collect();
|
||||||
$this->binds = new Collection();
|
$this->binds = collect();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -9,7 +9,6 @@ namespace QL\Dom;
|
|||||||
|
|
||||||
use phpDocumentor\Reflection\Types\Null_;
|
use phpDocumentor\Reflection\Types\Null_;
|
||||||
use phpQueryObject;
|
use phpQueryObject;
|
||||||
use Tightenco\Collect\Support\Collection;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class Elements
|
* Class Elements
|
||||||
@ -192,7 +191,7 @@ class Elements
|
|||||||
*/
|
*/
|
||||||
public function map($callback)
|
public function map($callback)
|
||||||
{
|
{
|
||||||
$collection = new Collection();
|
$collection = collect();
|
||||||
$this->elements->each(function ($dom) use (& $collection, $callback) {
|
$this->elements->each(function ($dom) use (& $collection, $callback) {
|
||||||
$collection->push($callback(new self(pq($dom))));
|
$collection->push($callback(new self(pq($dom))));
|
||||||
});
|
});
|
||||||
|
@ -52,7 +52,7 @@ class Query
|
|||||||
{
|
{
|
||||||
$this->html = value($html);
|
$this->html = value($html);
|
||||||
$this->destroyDocument();
|
$this->destroyDocument();
|
||||||
$this->document = phpQuery::newDocumentHTML($this->html, $charset);
|
$this->document = phpQuery::newDocumentHTML($this->html,$charset);
|
||||||
return $this->ql;
|
return $this->ql;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -64,7 +64,7 @@ class Query
|
|||||||
*/
|
*/
|
||||||
public function getData(Closure $callback = null)
|
public function getData(Closure $callback = null)
|
||||||
{
|
{
|
||||||
return $this->handleData($this->data, $callback);
|
return is_null($callback) ? $this->data : $this->data->map($callback);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -125,8 +125,8 @@ class Query
|
|||||||
*/
|
*/
|
||||||
public function removeHead()
|
public function removeHead()
|
||||||
{
|
{
|
||||||
$html = preg_replace('/(<head>|<head\s+.+?>).+?<\/head>/is', '<head></head>', $this->html);
|
$html = preg_replace('/<head.+?>.+<\/head>/is','<head></head>',$this->html);
|
||||||
$html && $this->setHtml($html);
|
$this->setHtml($html);
|
||||||
return $this->ql;
|
return $this->ql;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -139,37 +139,24 @@ class Query
|
|||||||
public function query(Closure $callback = null)
|
public function query(Closure $callback = null)
|
||||||
{
|
{
|
||||||
$this->data = $this->getList();
|
$this->data = $this->getList();
|
||||||
$this->data = $this->handleData($this->data, $callback);
|
$callback && $this->data = $this->data->map($callback);
|
||||||
return $this->ql;
|
return $this->ql;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function handleData(Collection $data, $callback)
|
|
||||||
{
|
|
||||||
if (is_callable($callback)) {
|
|
||||||
if (empty($this->range)) {
|
|
||||||
$data = new Collection($callback($data->all(), null));
|
|
||||||
} else {
|
|
||||||
$data = $data->map($callback);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return $data;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected function getList()
|
protected function getList()
|
||||||
{
|
{
|
||||||
$data = [];
|
$data = [];
|
||||||
if (empty($this->range)) {
|
if (empty($this->range)) {
|
||||||
foreach ($this->rules as $key => $reg_value) {
|
foreach ($this->rules as $key => $reg_value){
|
||||||
$rule = $this->parseRule($reg_value);
|
$rule = $this->parseRule($reg_value);
|
||||||
$contentElements = $this->document->find($rule['selector']);
|
$contentElements = $this->document->find($rule['selector']);
|
||||||
$data[$key] = $this->extractContent($contentElements, $key, $rule);
|
$data[$key] = $this->extractContent($contentElements, $key, $rule);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
$rangeElements = $this->document->find($this->range);
|
$rangeElements = $this->document->find($this->range);
|
||||||
$i = 0;
|
$i = 0;
|
||||||
foreach ($rangeElements as $element) {
|
foreach ($rangeElements as $element) {
|
||||||
foreach ($this->rules as $key => $reg_value) {
|
foreach ($this->rules as $key => $reg_value){
|
||||||
$rule = $this->parseRule($reg_value);
|
$rule = $this->parseRule($reg_value);
|
||||||
$contentElements = pq($element)->find($rule['selector']);
|
$contentElements = pq($element)->find($rule['selector']);
|
||||||
$data[$i][$key] = $this->extractContent($contentElements, $key, $rule);
|
$data[$i][$key] = $this->extractContent($contentElements, $key, $rule);
|
||||||
@ -178,7 +165,7 @@ class Query
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return new Collection($data);
|
return collect($data);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function extractContent(phpQueryObject $pqObj, $ruleName, $rule)
|
protected function extractContent(phpQueryObject $pqObj, $ruleName, $rule)
|
||||||
@ -188,7 +175,7 @@ class Query
|
|||||||
$content = $this->allowTags($pqObj->html(), $rule['filter_tags']);
|
$content = $this->allowTags($pqObj->html(), $rule['filter_tags']);
|
||||||
break;
|
break;
|
||||||
case 'texts':
|
case 'texts':
|
||||||
$content = (new Elements($pqObj))->map(function (Elements $element) use ($rule) {
|
$content = (new Elements($pqObj))->map(function(Elements $element) use($rule){
|
||||||
return $this->allowTags($element->html(), $rule['filter_tags']);
|
return $this->allowTags($element->html(), $rule['filter_tags']);
|
||||||
})->all();
|
})->all();
|
||||||
break;
|
break;
|
||||||
@ -196,7 +183,7 @@ class Query
|
|||||||
$content = $this->stripTags($pqObj->html(), $rule['filter_tags']);
|
$content = $this->stripTags($pqObj->html(), $rule['filter_tags']);
|
||||||
break;
|
break;
|
||||||
case 'htmls':
|
case 'htmls':
|
||||||
$content = (new Elements($pqObj))->map(function (Elements $element) use ($rule) {
|
$content = (new Elements($pqObj))->map(function(Elements $element) use($rule){
|
||||||
return $this->stripTags($element->html(), $rule['filter_tags']);
|
return $this->stripTags($element->html(), $rule['filter_tags']);
|
||||||
})->all();
|
})->all();
|
||||||
break;
|
break;
|
||||||
@ -204,22 +191,16 @@ class Query
|
|||||||
$content = $this->stripTags($pqObj->htmlOuter(), $rule['filter_tags']);
|
$content = $this->stripTags($pqObj->htmlOuter(), $rule['filter_tags']);
|
||||||
break;
|
break;
|
||||||
case 'htmlOuters':
|
case 'htmlOuters':
|
||||||
$content = (new Elements($pqObj))->map(function (Elements $element) use ($rule) {
|
$content = (new Elements($pqObj))->map(function(Elements $element) use($rule){
|
||||||
return $this->stripTags($element->htmlOuter(), $rule['filter_tags']);
|
return $this->stripTags($element->htmlOuter(), $rule['filter_tags']);
|
||||||
})->all();
|
})->all();
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
if(preg_match('/attr\((.+)\)/', $rule['attr'], $arr)) {
|
$content = $pqObj->attr($rule['attr']);
|
||||||
$content = $pqObj->attr($arr[1]);
|
|
||||||
} elseif (preg_match('/attrs\((.+)\)/', $rule['attr'], $arr)) {
|
|
||||||
$content = (new Elements($pqObj))->attrs($arr[1])->all();
|
|
||||||
} else {
|
|
||||||
$content = $pqObj->attr($rule['attr']);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_callable($rule['handle_callback'])) {
|
if(is_callable($rule['handle_callback'])){
|
||||||
$content = call_user_func($rule['handle_callback'], $content, $ruleName);
|
$content = call_user_func($rule['handle_callback'], $content, $ruleName);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -239,47 +220,49 @@ class Query
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* 去除特定的html标签
|
* 去除特定的html标签
|
||||||
* @param string $html
|
* @param string $html
|
||||||
* @param string $tags_str 多个标签名之间用空格隔开
|
* @param string $tags_str 多个标签名之间用空格隔开
|
||||||
* @return string
|
* @return string
|
||||||
*/
|
*/
|
||||||
protected function stripTags($html, $tags_str)
|
protected function stripTags($html,$tags_str)
|
||||||
{
|
{
|
||||||
$tagsArr = $this->tag($tags_str);
|
$tagsArr = $this->tag($tags_str);
|
||||||
$html = $this->removeTags($html, $tagsArr[1]);
|
$html = $this->removeTags($html,$tagsArr[1]);
|
||||||
$p = array();
|
$p = array();
|
||||||
foreach ($tagsArr[0] as $tag) {
|
foreach ($tagsArr[0] as $tag) {
|
||||||
$p[] = "/(<(?:\/" . $tag . "|" . $tag . ")[^>]*>)/i";
|
$p[]="/(<(?:\/".$tag."|".$tag.")[^>]*>)/i";
|
||||||
}
|
}
|
||||||
$html = preg_replace($p, "", trim($html));
|
$html = preg_replace($p,"",trim($html));
|
||||||
return $html;
|
return $html;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 保留特定的html标签
|
* 保留特定的html标签
|
||||||
* @param string $html
|
* @param string $html
|
||||||
* @param string $tags_str 多个标签名之间用空格隔开
|
* @param string $tags_str 多个标签名之间用空格隔开
|
||||||
* @return string
|
* @return string
|
||||||
*/
|
*/
|
||||||
protected function allowTags($html, $tags_str)
|
protected function allowTags($html,$tags_str)
|
||||||
{
|
{
|
||||||
$tagsArr = $this->tag($tags_str);
|
$tagsArr = $this->tag($tags_str);
|
||||||
$html = $this->removeTags($html, $tagsArr[1]);
|
$html = $this->removeTags($html,$tagsArr[1]);
|
||||||
$allow = '';
|
$allow = '';
|
||||||
foreach ($tagsArr[0] as $tag) {
|
foreach ($tagsArr[0] as $tag) {
|
||||||
$allow .= "<$tag> ";
|
$allow .= "<$tag> ";
|
||||||
}
|
}
|
||||||
return strip_tags(trim($html), $allow);
|
return strip_tags(trim($html),$allow);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function tag($tags_str)
|
protected function tag($tags_str)
|
||||||
{
|
{
|
||||||
$tagArr = preg_split("/\s+/", $tags_str, -1, PREG_SPLIT_NO_EMPTY);
|
$tagArr = preg_split("/\s+/",$tags_str,-1,PREG_SPLIT_NO_EMPTY);
|
||||||
$tags = array(array(), array());
|
$tags = array(array(),array());
|
||||||
foreach ($tagArr as $tag) {
|
foreach($tagArr as $tag)
|
||||||
if (preg_match('/-(.+)/', $tag, $arr)) {
|
{
|
||||||
|
if(preg_match('/-(.+)/', $tag,$arr))
|
||||||
|
{
|
||||||
array_push($tags[1], $arr[1]);
|
array_push($tags[1], $arr[1]);
|
||||||
} else {
|
}else{
|
||||||
array_push($tags[0], $tag);
|
array_push($tags[0], $tag);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -288,16 +271,17 @@ class Query
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* 移除特定的html标签
|
* 移除特定的html标签
|
||||||
* @param string $html
|
* @param string $html
|
||||||
* @param array $tags 标签数组
|
* @param array $tags 标签数组
|
||||||
* @return string
|
* @return string
|
||||||
*/
|
*/
|
||||||
protected function removeTags($html, $tags)
|
protected function removeTags($html,$tags)
|
||||||
{
|
{
|
||||||
$tag_str = '';
|
$tag_str = '';
|
||||||
if (count($tags)) {
|
if(count($tags))
|
||||||
|
{
|
||||||
foreach ($tags as $tag) {
|
foreach ($tags as $tag) {
|
||||||
$tag_str .= $tag_str ? ',' . $tag : $tag;
|
$tag_str .= $tag_str?','.$tag:$tag;
|
||||||
}
|
}
|
||||||
// phpQuery::$defaultCharset = $this->inputEncoding?$this->inputEncoding:$this->htmlEncoding;
|
// phpQuery::$defaultCharset = $this->inputEncoding?$this->inputEncoding:$this->htmlEncoding;
|
||||||
$doc = phpQuery::newDocumentHTML($html);
|
$doc = phpQuery::newDocumentHTML($html);
|
||||||
@ -310,7 +294,7 @@ class Query
|
|||||||
|
|
||||||
protected function destroyDocument()
|
protected function destroyDocument()
|
||||||
{
|
{
|
||||||
if ($this->document instanceof phpQueryObject) {
|
if($this->document instanceof phpQueryObject) {
|
||||||
$this->document->unloadDocument();
|
$this->document->unloadDocument();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -14,7 +14,6 @@ use Closure;
|
|||||||
use QL\Providers\HttpServiceProvider;
|
use QL\Providers\HttpServiceProvider;
|
||||||
use QL\Providers\PluginServiceProvider;
|
use QL\Providers\PluginServiceProvider;
|
||||||
use QL\Providers\SystemServiceProvider;
|
use QL\Providers\SystemServiceProvider;
|
||||||
use Tightenco\Collect\Support\Collection;
|
|
||||||
|
|
||||||
class Kernel
|
class Kernel
|
||||||
{
|
{
|
||||||
@ -35,7 +34,7 @@ class Kernel
|
|||||||
public function __construct(QueryList $ql)
|
public function __construct(QueryList $ql)
|
||||||
{
|
{
|
||||||
$this->ql = $ql;
|
$this->ql = $ql;
|
||||||
$this->binds = new Collection();
|
$this->binds = collect();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function bootstrap()
|
public function bootstrap()
|
||||||
|
@ -23,7 +23,7 @@ use QL\Services\MultiRequestService;
|
|||||||
* Class QueryList
|
* Class QueryList
|
||||||
* @package QL
|
* @package QL
|
||||||
*
|
*
|
||||||
* @method string getHtml($rel = true)
|
* @method string getHtml()
|
||||||
* @method QueryList setHtml($html)
|
* @method QueryList setHtml($html)
|
||||||
* @method QueryList html($html)
|
* @method QueryList html($html)
|
||||||
* @method Dom\Elements find($selector)
|
* @method Dom\Elements find($selector)
|
||||||
|
@ -17,7 +17,7 @@ class FindTest extends TestCaseBase
|
|||||||
protected $html;
|
protected $html;
|
||||||
protected $ql;
|
protected $ql;
|
||||||
|
|
||||||
protected function setUp(): void
|
public function setUp()
|
||||||
{
|
{
|
||||||
$this->html = $this->getSnippet('snippet-1');
|
$this->html = $this->getSnippet('snippet-1');
|
||||||
$this->ql = QueryList::html($this->html);
|
$this->ql = QueryList::html($this->html);
|
||||||
|
@ -18,7 +18,7 @@ class RulesTest extends TestCaseBase
|
|||||||
protected $html;
|
protected $html;
|
||||||
protected $ql;
|
protected $ql;
|
||||||
|
|
||||||
protected function setUp(): void
|
public function setUp()
|
||||||
{
|
{
|
||||||
$this->html = $this->getSnippet('snippet-2');
|
$this->html = $this->getSnippet('snippet-2');
|
||||||
$this->ql = QueryList::html($this->html);
|
$this->ql = QueryList::html($this->html);
|
||||||
|
@ -18,7 +18,7 @@ class HttpTest extends TestCaseBase
|
|||||||
{
|
{
|
||||||
protected $urls;
|
protected $urls;
|
||||||
|
|
||||||
protected function setUp(): void
|
public function setUp()
|
||||||
{
|
{
|
||||||
$this->urls = [
|
$this->urls = [
|
||||||
'http://httpbin.org/get?name=php',
|
'http://httpbin.org/get?name=php',
|
||||||
|
@ -16,7 +16,7 @@ class InstanceTest extends TestCaseBase
|
|||||||
{
|
{
|
||||||
protected $html;
|
protected $html;
|
||||||
|
|
||||||
protected function setUp(): void
|
public function setUp()
|
||||||
{
|
{
|
||||||
$this->html = $this->getSnippet('snippet-1');
|
$this->html = $this->getSnippet('snippet-1');
|
||||||
}
|
}
|
||||||
@ -38,11 +38,11 @@ class InstanceTest extends TestCaseBase
|
|||||||
public function get_new_object()
|
public function get_new_object()
|
||||||
{
|
{
|
||||||
$ql = (new QueryList())->html($this->html);
|
$ql = (new QueryList())->html($this->html);
|
||||||
$ql2 = (new QueryList())->html('');
|
$ql2 = new QueryList();
|
||||||
$this->assertNotEquals($ql->getHtml(),$ql2->getHtml());
|
$this->assertNotEquals($ql->getHtml(),$ql2->getHtml());
|
||||||
|
|
||||||
$ql = QueryList::range('')->html($this->html);
|
$ql = QueryList::range('')->html($this->html);
|
||||||
$ql2 = QueryList::range('')->html('');
|
$ql2 = QueryList::range('');
|
||||||
$this->assertNotEquals($ql->getHtml(),$ql2->getHtml());
|
$this->assertNotEquals($ql->getHtml(),$ql2->getHtml());
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -16,7 +16,7 @@ class MethodTest extends TestCaseBase
|
|||||||
{
|
{
|
||||||
protected $html;
|
protected $html;
|
||||||
|
|
||||||
protected function setUp(): void
|
public function setUp()
|
||||||
{
|
{
|
||||||
$this->html = $this->getSnippet('snippet-1');
|
$this->html = $this->getSnippet('snippet-1');
|
||||||
}
|
}
|
||||||
@ -30,7 +30,7 @@ class MethodTest extends TestCaseBase
|
|||||||
$qlHtml = QueryList::pipe(function(QueryList $ql) use($html){
|
$qlHtml = QueryList::pipe(function(QueryList $ql) use($html){
|
||||||
$ql->setHtml($html);
|
$ql->setHtml($html);
|
||||||
return $ql;
|
return $ql;
|
||||||
})->getHtml(false);
|
})->getHtml();
|
||||||
$this->assertEquals($html,$qlHtml);
|
$this->assertEquals($html,$qlHtml);
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
x
Reference in New Issue
Block a user