Compare commits

..

No commits in common. "master" and "V4.2.0" have entirely different histories.

15 changed files with 64 additions and 100 deletions

12
.github/FUNDING.yml vendored
View File

@ -1,12 +0,0 @@
# These are supported funding model platforms
github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
patreon: # Replace with a single Patreon username
open_collective: querylist # Replace with a single Open Collective username
ko_fi: # Replace with a single Ko-fi username
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
liberapay: # Replace with a single Liberapay username
issuehunt: # Replace with a single IssueHunt username
otechie: # Replace with a single Otechie username
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']

3
.gitignore vendored
View File

@ -1,5 +1,4 @@
/vendor/
.idea/
composer.lock
.DS_Store
*.cache
.DS_Store

View File

@ -29,7 +29,7 @@
- .....
## 环境要求
- PHP >= 7.1
- PHP >= 7.0
> 如果你的PHP版本还停留在PHP5或者不会使用Composer,你可以选择使用QueryList3,QueryList3支持php5.3以及手动安装。
QueryList3 文档:http://v3.querylist.cc
@ -306,4 +306,4 @@ $ql->curlMulti([
Jaeger <JaegerCode@gmail.com>
## Lisence
QueryList is licensed under the license of MIT. See the LICENSE for more details.
QueryList is licensed under the license of MIT. See the LICENSE for more details.

View File

@ -31,7 +31,7 @@ Through plug-ins you can easily implement things like:
- .....
## Requirements
- PHP >= 7.1
- PHP >= 7.0
## Installation
By Composer installation:
@ -301,4 +301,4 @@ Jaeger <JaegerCode@gmail.com>
If this library is useful for you, say thanks [buying me a beer :beer:](https://www.paypal.me/jaepay)!
## Lisence
QueryList is licensed under the license of MIT. See the LICENSE for more details.
QueryList is licensed under the license of MIT. See the LICENSE for more details.

View File

@ -4,11 +4,11 @@
"keywords":["QueryList","phpQuery","spider"],
"homepage": "http://querylist.cc",
"require": {
"PHP":">=7.1",
"PHP":">=7.0",
"jaeger/phpquery-single": "^1",
"tightenco/collect": "^5",
"jaeger/g-http": "^1.1",
"ext-dom": "*",
"tightenco/collect": ">5.0"
"ext-dom": "*"
},
"suggest":{
@ -32,9 +32,6 @@
},
"require-dev": {
"symfony/var-dumper": "^3.3",
"phpunit/phpunit": "^8.5"
},
"scripts": {
"test": "./vendor/bin/phpunit"
"phpunit/phpunit": "^7.5"
}
}

View File

@ -7,7 +7,6 @@
namespace QL;
use Closure;
use Tightenco\Collect\Support\Collection;
class Config
{
@ -21,8 +20,8 @@ class Config
*/
public function __construct()
{
$this->plugins = new Collection();
$this->binds = new Collection();
$this->plugins = collect();
$this->binds = collect();
}

View File

@ -9,7 +9,6 @@ namespace QL\Dom;
use phpDocumentor\Reflection\Types\Null_;
use phpQueryObject;
use Tightenco\Collect\Support\Collection;
/**
* Class Elements
@ -192,7 +191,7 @@ class Elements
*/
public function map($callback)
{
$collection = new Collection();
$collection = collect();
$this->elements->each(function ($dom) use (& $collection, $callback) {
$collection->push($callback(new self(pq($dom))));
});

View File

@ -35,12 +35,11 @@ class Query
}
/**
* @param bool $rel
* @return String
* @return mixed
*/
public function getHtml($rel = true)
public function getHtml()
{
return $rel ? $this->document->htmlOuter() : $this->html;
return $this->html;
}
/**
@ -52,7 +51,7 @@ class Query
{
$this->html = value($html);
$this->destroyDocument();
$this->document = phpQuery::newDocumentHTML($this->html, $charset);
$this->document = phpQuery::newDocumentHTML($this->html,$charset);
return $this->ql;
}
@ -64,7 +63,7 @@ class Query
*/
public function getData(Closure $callback = null)
{
return $this->handleData($this->data, $callback);
return is_null($callback) ? $this->data : $this->data->map($callback);
}
/**
@ -125,8 +124,8 @@ class Query
*/
public function removeHead()
{
$html = preg_replace('/(<head>|<head\s+.+?>).+?<\/head>/is', '<head></head>', $this->html);
$html && $this->setHtml($html);
$html = preg_replace('/<head.+?>.+<\/head>/is','<head></head>',$this->html);
$this->setHtml($html);
return $this->ql;
}
@ -139,37 +138,24 @@ class Query
public function query(Closure $callback = null)
{
$this->data = $this->getList();
$this->data = $this->handleData($this->data, $callback);
$callback && $this->data = $this->data->map($callback);
return $this->ql;
}
public function handleData(Collection $data, $callback)
{
if (is_callable($callback)) {
if (empty($this->range)) {
$data = new Collection($callback($data->all(), null));
} else {
$data = $data->map($callback);
}
}
return $data;
}
protected function getList()
{
$data = [];
if (empty($this->range)) {
foreach ($this->rules as $key => $reg_value) {
foreach ($this->rules as $key => $reg_value){
$rule = $this->parseRule($reg_value);
$contentElements = $this->document->find($rule['selector']);
$data[$key] = $this->extractContent($contentElements, $key, $rule);
}
} else {
$rangeElements = $this->document->find($this->range);
$rangeElements = $this->document->find($this->range);
$i = 0;
foreach ($rangeElements as $element) {
foreach ($this->rules as $key => $reg_value) {
foreach ($this->rules as $key => $reg_value){
$rule = $this->parseRule($reg_value);
$contentElements = pq($element)->find($rule['selector']);
$data[$i][$key] = $this->extractContent($contentElements, $key, $rule);
@ -178,7 +164,7 @@ class Query
}
}
return new Collection($data);
return collect($data);
}
protected function extractContent(phpQueryObject $pqObj, $ruleName, $rule)
@ -188,7 +174,7 @@ class Query
$content = $this->allowTags($pqObj->html(), $rule['filter_tags']);
break;
case 'texts':
$content = (new Elements($pqObj))->map(function (Elements $element) use ($rule) {
$content = (new Elements($pqObj))->map(function(Elements $element) use($rule){
return $this->allowTags($element->html(), $rule['filter_tags']);
})->all();
break;
@ -196,7 +182,7 @@ class Query
$content = $this->stripTags($pqObj->html(), $rule['filter_tags']);
break;
case 'htmls':
$content = (new Elements($pqObj))->map(function (Elements $element) use ($rule) {
$content = (new Elements($pqObj))->map(function(Elements $element) use($rule){
return $this->stripTags($element->html(), $rule['filter_tags']);
})->all();
break;
@ -204,22 +190,16 @@ class Query
$content = $this->stripTags($pqObj->htmlOuter(), $rule['filter_tags']);
break;
case 'htmlOuters':
$content = (new Elements($pqObj))->map(function (Elements $element) use ($rule) {
$content = (new Elements($pqObj))->map(function(Elements $element) use($rule){
return $this->stripTags($element->htmlOuter(), $rule['filter_tags']);
})->all();
break;
default:
if(preg_match('/attr\((.+)\)/', $rule['attr'], $arr)) {
$content = $pqObj->attr($arr[1]);
} elseif (preg_match('/attrs\((.+)\)/', $rule['attr'], $arr)) {
$content = (new Elements($pqObj))->attrs($arr[1])->all();
} else {
$content = $pqObj->attr($rule['attr']);
}
$content = $pqObj->attr($rule['attr']);
break;
}
if (is_callable($rule['handle_callback'])) {
if(is_callable($rule['handle_callback'])){
$content = call_user_func($rule['handle_callback'], $content, $ruleName);
}
@ -239,47 +219,49 @@ class Query
/**
* 去除特定的html标签
* @param string $html
* @param string $tags_str 多个标签名之间用空格隔开
* @param string $html
* @param string $tags_str 多个标签名之间用空格隔开
* @return string
*/
protected function stripTags($html, $tags_str)
protected function stripTags($html,$tags_str)
{
$tagsArr = $this->tag($tags_str);
$html = $this->removeTags($html, $tagsArr[1]);
$html = $this->removeTags($html,$tagsArr[1]);
$p = array();
foreach ($tagsArr[0] as $tag) {
$p[] = "/(<(?:\/" . $tag . "|" . $tag . ")[^>]*>)/i";
$p[]="/(<(?:\/".$tag."|".$tag.")[^>]*>)/i";
}
$html = preg_replace($p, "", trim($html));
$html = preg_replace($p,"",trim($html));
return $html;
}
/**
* 保留特定的html标签
* @param string $html
* @param string $tags_str 多个标签名之间用空格隔开
* @param string $html
* @param string $tags_str 多个标签名之间用空格隔开
* @return string
*/
protected function allowTags($html, $tags_str)
protected function allowTags($html,$tags_str)
{
$tagsArr = $this->tag($tags_str);
$html = $this->removeTags($html, $tagsArr[1]);
$html = $this->removeTags($html,$tagsArr[1]);
$allow = '';
foreach ($tagsArr[0] as $tag) {
$allow .= "<$tag> ";
}
return strip_tags(trim($html), $allow);
return strip_tags(trim($html),$allow);
}
protected function tag($tags_str)
{
$tagArr = preg_split("/\s+/", $tags_str, -1, PREG_SPLIT_NO_EMPTY);
$tags = array(array(), array());
foreach ($tagArr as $tag) {
if (preg_match('/-(.+)/', $tag, $arr)) {
$tagArr = preg_split("/\s+/",$tags_str,-1,PREG_SPLIT_NO_EMPTY);
$tags = array(array(),array());
foreach($tagArr as $tag)
{
if(preg_match('/-(.+)/', $tag,$arr))
{
array_push($tags[1], $arr[1]);
} else {
}else{
array_push($tags[0], $tag);
}
}
@ -288,16 +270,17 @@ class Query
/**
* 移除特定的html标签
* @param string $html
* @param array $tags 标签数组
* @param string $html
* @param array $tags 标签数组
* @return string
*/
protected function removeTags($html, $tags)
protected function removeTags($html,$tags)
{
$tag_str = '';
if (count($tags)) {
if(count($tags))
{
foreach ($tags as $tag) {
$tag_str .= $tag_str ? ',' . $tag : $tag;
$tag_str .= $tag_str?','.$tag:$tag;
}
// phpQuery::$defaultCharset = $this->inputEncoding?$this->inputEncoding:$this->htmlEncoding;
$doc = phpQuery::newDocumentHTML($html);
@ -310,7 +293,7 @@ class Query
protected function destroyDocument()
{
if ($this->document instanceof phpQueryObject) {
if($this->document instanceof phpQueryObject) {
$this->document->unloadDocument();
}
}

View File

@ -14,7 +14,6 @@ use Closure;
use QL\Providers\HttpServiceProvider;
use QL\Providers\PluginServiceProvider;
use QL\Providers\SystemServiceProvider;
use Tightenco\Collect\Support\Collection;
class Kernel
{
@ -35,7 +34,7 @@ class Kernel
public function __construct(QueryList $ql)
{
$this->ql = $ql;
$this->binds = new Collection();
$this->binds = collect();
}
public function bootstrap()

View File

@ -23,7 +23,7 @@ use QL\Services\MultiRequestService;
* Class QueryList
* @package QL
*
* @method string getHtml($rel = true)
* @method string getHtml()
* @method QueryList setHtml($html)
* @method QueryList html($html)
* @method Dom\Elements find($selector)

View File

@ -17,7 +17,7 @@ class FindTest extends TestCaseBase
protected $html;
protected $ql;
protected function setUp(): void
public function setUp()
{
$this->html = $this->getSnippet('snippet-1');
$this->ql = QueryList::html($this->html);

View File

@ -18,7 +18,7 @@ class RulesTest extends TestCaseBase
protected $html;
protected $ql;
protected function setUp(): void
public function setUp()
{
$this->html = $this->getSnippet('snippet-2');
$this->ql = QueryList::html($this->html);

View File

@ -18,7 +18,7 @@ class HttpTest extends TestCaseBase
{
protected $urls;
protected function setUp(): void
public function setUp()
{
$this->urls = [
'http://httpbin.org/get?name=php',

View File

@ -16,7 +16,7 @@ class InstanceTest extends TestCaseBase
{
protected $html;
protected function setUp(): void
public function setUp()
{
$this->html = $this->getSnippet('snippet-1');
}
@ -38,11 +38,11 @@ class InstanceTest extends TestCaseBase
public function get_new_object()
{
$ql = (new QueryList())->html($this->html);
$ql2 = (new QueryList())->html('');
$ql2 = new QueryList();
$this->assertNotEquals($ql->getHtml(),$ql2->getHtml());
$ql = QueryList::range('')->html($this->html);
$ql2 = QueryList::range('')->html('');
$ql2 = QueryList::range('');
$this->assertNotEquals($ql->getHtml(),$ql2->getHtml());
}
}

View File

@ -16,7 +16,7 @@ class MethodTest extends TestCaseBase
{
protected $html;
protected function setUp(): void
public function setUp()
{
$this->html = $this->getSnippet('snippet-1');
}
@ -30,7 +30,7 @@ class MethodTest extends TestCaseBase
$qlHtml = QueryList::pipe(function(QueryList $ql) use($html){
$ql->setHtml($html);
return $ql;
})->getHtml(false);
})->getHtml();
$this->assertEquals($html,$qlHtml);
}
}