Compare commits

...

35 Commits

Author SHA1 Message Date
Jaeger(黄杰)
894fb4344e
Merge pull request #145 from maxiaozhi/master
正则匹配成功时才替换掉html
2021-08-08 13:04:55 +08:00
lion
e4fc716acd 正则匹配成功时才替换掉html 2021-07-18 23:37:35 +08:00
Jaeger(黄杰)
39dc0ca9c6
Merge pull request #143 from maxiaozhi/patch-1
Fix the matching exception
2021-07-05 14:07:58 +08:00
maxiaozhi
ef0a2efd4f
Fix the matching exception
Fix the matching exception when the page contains multiple tags prefixed with head (for example: < head >, < header >)
2021-07-05 13:51:24 +08:00
huangjie
5953daac54 update collect 2020-12-14 10:39:28 +08:00
huangjie
465c6aefc7 update collect 2020-09-27 17:41:44 +08:00
Jaeger(黄杰)
92cb319d44
Update README-ZH.md 2020-07-18 13:06:29 +08:00
Jaeger(黄杰)
cbf3e0fcad
Update README.md 2020-07-18 13:05:59 +08:00
Jaeger(黄杰)
cfa2d94a79
Update FUNDING.yml 2020-07-17 13:20:49 +08:00
Jaeger(黄杰)
47a444bf9e
Create FUNDING.yml 2020-07-17 13:08:44 +08:00
Jaeger
85903fa9b5 feat: rules add attrs 2020-04-03 20:16:00 +08:00
Jaeger(黄杰)
e527c637c7
Merge pull request #110 from jae-jae/develop
replace collect()
2020-04-03 04:55:27 -05:00
Jaeger
f0a9798925 replace collect() 2020-04-03 17:33:32 +08:00
Jaeger
faea883c6f fix: data callback 2020-04-01 22:03:50 +08:00
Jaeger
c16826a573 updaed composer dependency 2020-03-23 18:15:04 +08:00
Jaeger
1492751f98 feat: optimization getHtml() 2020-03-22 17:19:57 +08:00
Jaeger
b7954b9aef fix: memory overflow 2020-03-20 13:26:40 +08:00
Jaeger
b3d84cf057 feat: modify the each function of class elements 2020-03-15 14:17:18 +08:00
Jaeger
52bbdeae14 Merge branch 'master' of github.com:jae-jae/QueryList into develop 2020-03-15 14:07:52 +08:00
Jaeger(黄杰)
25b2dbdc86
Merge pull request #105 from edwinhuish/add-each-function-same-as-collection
添加 each function 并和 Collection 保持一致,返回 false 时中断循环。
2020-03-15 01:07:22 -05:00
Jaeger
02c2b125d8 feat: elements class add htmlOuters function 2020-03-15 13:58:00 +08:00
Jaeger
fc8b701ef2 feat: optimize range results 2020-03-15 13:45:00 +08:00
Jaeger
75e436c73f feat: merge master 2020-03-15 11:30:35 +08:00
Jaeger(黄杰)
aa90e5a21d
Merge pull request #106 from edwinhuish/destroy-old-phpquey-object-when-setHtml
destroy old phpquery object when setHtml
2020-03-14 22:28:13 -05:00
Jaeger
dd9af6881d feat: rules add texts and htmls attribute 2020-03-13 21:42:25 +08:00
Jaeger
b07d4bfc74 feat: rules add texts and htmls attribute 2020-03-13 21:39:42 +08:00
Edwin Xu
8c1614c4c3 destroy old phpquery object when setHtml 2020-03-13 16:08:55 +08:00
Jaeger
b387ef5bb0 feat: rules add htmlOuter attribute 2020-03-13 15:16:44 +08:00
Edwin Xu
67f0052c5d 添加 each function 并和 Collection 保持一致,返回 false 时中断循环。 2020-03-13 14:20:37 +08:00
Jaeger
7c86f82527 fix: optimize memory usage 2020-03-13 13:49:36 +08:00
Jaeger(黄杰)
6ee6a26aee
Merge pull request #102 from edwinhuish/auto-destroy-phpquery-document
destroy phpquery document object when destruct Query class
2020-03-11 10:29:31 -05:00
Jaeger(黄杰)
116f19da65
Merge pull request #104 from edwinhuish/add-phpdoc
fix phpdoc
2020-03-11 10:20:22 -05:00
Edwin Xu
67cbd0f473 修复phpdoc 2020-03-10 21:36:55 +08:00
Edwin Xu
3eb26451c6 修复phpdoc 2020-03-10 21:03:25 +08:00
Edwin Xu
a76ecb4258 destroy phpquery document object when destruct Query class 2020-03-05 22:27:27 +08:00
15 changed files with 304 additions and 195 deletions

12
.github/FUNDING.yml vendored Normal file
View File

@ -0,0 +1,12 @@
# These are supported funding model platforms
github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
patreon: # Replace with a single Patreon username
open_collective: querylist # Replace with a single Open Collective username
ko_fi: # Replace with a single Ko-fi username
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
liberapay: # Replace with a single Liberapay username
issuehunt: # Replace with a single IssueHunt username
otechie: # Replace with a single Otechie username
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']

3
.gitignore vendored
View File

@ -1,4 +1,5 @@
/vendor/ /vendor/
.idea/ .idea/
composer.lock composer.lock
.DS_Store .DS_Store
*.cache

View File

@ -29,7 +29,7 @@
- ..... - .....
## 环境要求 ## 环境要求
- PHP >= 7.0 - PHP >= 7.1
> 如果你的PHP版本还停留在PHP5或者不会使用Composer,你可以选择使用QueryList3,QueryList3支持php5.3以及手动安装。 > 如果你的PHP版本还停留在PHP5或者不会使用Composer,你可以选择使用QueryList3,QueryList3支持php5.3以及手动安装。
QueryList3 文档:http://v3.querylist.cc QueryList3 文档:http://v3.querylist.cc
@ -306,4 +306,4 @@ $ql->curlMulti([
Jaeger <JaegerCode@gmail.com> Jaeger <JaegerCode@gmail.com>
## Lisence ## Lisence
QueryList is licensed under the license of MIT. See the LICENSE for more details. QueryList is licensed under the license of MIT. See the LICENSE for more details.

View File

@ -31,7 +31,7 @@ Through plug-ins you can easily implement things like:
- ..... - .....
## Requirements ## Requirements
- PHP >= 7.0 - PHP >= 7.1
## Installation ## Installation
By Composer installation: By Composer installation:
@ -301,4 +301,4 @@ Jaeger <JaegerCode@gmail.com>
If this library is useful for you, say thanks [buying me a beer :beer:](https://www.paypal.me/jaepay)! If this library is useful for you, say thanks [buying me a beer :beer:](https://www.paypal.me/jaepay)!
## Lisence ## Lisence
QueryList is licensed under the license of MIT. See the LICENSE for more details. QueryList is licensed under the license of MIT. See the LICENSE for more details.

View File

@ -4,10 +4,11 @@
"keywords":["QueryList","phpQuery","spider"], "keywords":["QueryList","phpQuery","spider"],
"homepage": "http://querylist.cc", "homepage": "http://querylist.cc",
"require": { "require": {
"PHP":">=7.0", "PHP":">=7.1",
"jaeger/phpquery-single": "^1", "jaeger/phpquery-single": "^1",
"tightenco/collect": "^5", "jaeger/g-http": "^1.1",
"jaeger/g-http": "^1.1" "ext-dom": "*",
"tightenco/collect": ">5.0"
}, },
"suggest":{ "suggest":{
@ -31,6 +32,9 @@
}, },
"require-dev": { "require-dev": {
"symfony/var-dumper": "^3.3", "symfony/var-dumper": "^3.3",
"phpunit/phpunit": "^7.5" "phpunit/phpunit": "^8.5"
},
"scripts": {
"test": "./vendor/bin/phpunit"
} }
} }

View File

@ -7,6 +7,7 @@
namespace QL; namespace QL;
use Closure; use Closure;
use Tightenco\Collect\Support\Collection;
class Config class Config
{ {
@ -20,8 +21,8 @@ class Config
*/ */
public function __construct() public function __construct()
{ {
$this->plugins = collect(); $this->plugins = new Collection();
$this->binds = collect(); $this->binds = new Collection();
} }

View File

@ -7,70 +7,72 @@
namespace QL\Dom; namespace QL\Dom;
use phpDocumentor\Reflection\Types\Null_;
use phpQueryObject; use phpQueryObject;
use Tightenco\Collect\Support\Collection;
/** /**
* Class Elements * Class Elements
* @package QL\Dom * @package QL\Dom
* *
* @method Elements toReference($var) * @method Elements toReference(&$var)
* @method documentFragment($state) * @method Elements documentFragment($state = null)
* @method Elements toRoot() * @method Elements toRoot()
* @method Elements getDocumentIDRef($documentID) * @method Elements getDocumentIDRef(&$documentID)
* @method Elements getDocument() * @method Elements getDocument()
* @method getDOMDocument() * @method \DOMDocument getDOMDocument()
* @method Elements getDocumentID() * @method Elements getDocumentID()
* @method Elements unloadDocument() * @method Elements unloadDocument()
* @method isHTML() * @method bool isHTML()
* @method isXHTML() * @method bool isXHTML()
* @method isXML() * @method bool isXML()
* @method serialize() * @method string serialize()
* @method serializeArray($submit) * @method array serializeArray($submit = null)
* @method get($index,$callback1,$callback2,$callback3) * @method \DOMElement|\DOMElement[] get($index = null, $callback1 = null, $callback2 = null, $callback3 = null)
* @method getString($index,$callback1,$callback2,$callback3) * @method string|array getString($index = null, $callback1 = null, $callback2 = null, $callback3 = null)
* @method getStrings($index,$callback1,$callback2,$callback3) * @method string|array getStrings($index = null, $callback1 = null, $callback2 = null, $callback3 = null)
* @method newInstance($newStack) * @method Elements newInstance($newStack = null)
* @method Elements find($selectors,$context,$noHistory) * @method Elements find($selectors, $context = null, $noHistory = false)
* @method Elements is($selector,$nodes) * @method Elements|bool is($selector, $nodes = null)
* @method Elements filterCallback($callback,$_skipHistory) * @method Elements filterCallback($callback, $_skipHistory = false)
* @method Elements filter($selectors,$_skipHistory) * @method Elements filter($selectors, $_skipHistory = false)
* @method load($url,$data,$callback) * @method Elements load($url, $data = null, $callback = null)
* @method Elements trigger($type,$data) * @method Elements trigger($type, $data = [])
* @method Elements triggerHandler($type,$data) * @method Elements triggerHandler($type, $data = [])
* @method Elements bind($type,$data,$callback) * @method Elements bind($type, $data, $callback = null)
* @method unbind($type,$callback) * @method Elements unbind($type = null, $callback = null)
* @method Elements change($callback) * @method Elements change($callback = null)
* @method Elements submit($callback) * @method Elements submit($callback = null)
* @method Elements click($callback) * @method Elements click($callback = null)
* @method Elements wrapAllOld($wrapper) * @method Elements wrapAllOld($wrapper)
* @method Elements wrapAll($wrapper) * @method Elements wrapAll($wrapper)
* @method Elements wrapAllPHP($codeBefore,$codeAfter) * @method Elements wrapAllPHP($codeBefore, $codeAfter)
* @method Elements wrap($wrapper) * @method Elements wrap($wrapper)
* @method Elements wrapPHP($codeBefore,$codeAfter) * @method Elements wrapPHP($codeBefore, $codeAfter)
* @method Elements wrapInner($wrapper) * @method Elements wrapInner($wrapper)
* @method Elements wrapInnerPHP($codeBefore,$codeAfter) * @method Elements wrapInnerPHP($codeBefore, $codeAfter)
* @method Elements contents() * @method Elements contents()
* @method Elements contentsUnwrap() * @method Elements contentsUnwrap()
* @method switchWith($markup) * @method Elements switchWith($markup)
* @method Elements eq($num) * @method Elements eq($num)
* @method Elements size() * @method Elements size()
* @method Elements length() * @method Elements length()
* @method count() * @method int count()
* @method Elements end($level) * @method Elements end($level = 1)
* @method Elements _clone() * @method Elements _clone()
* @method Elements replaceWithPHP($code) * @method Elements replaceWithPHP($code)
* @method Elements replaceWith($content) * @method Elements replaceWith($content)
* @method Elements replaceAll($selector) * @method Elements replaceAll($selector)
* @method Elements remove($selector) * @method Elements remove($selector = null)
* @method markup($markup,$callback1,$callback2,$callback3) * @method Elements|string markup($markup = null, $callback1 = null, $callback2 = null, $callback3 = null)
* @method markupOuter($callback1,$callback2,$callback3) * @method string markupOuter($callback1 = null, $callback2 = null, $callback3 = null)
* @method html($html,$callback1,$callback2,$callback3) * @method Elements|string html($html = null, $callback1 = null, $callback2 = null, $callback3 = null)
* @method xml($xml,$callback1,$callback2,$callback3) * @method Elements|string xml($xml = null, $callback1 = null, $callback2 = null, $callback3 = null)
* @method htmlOuter($callback1,$callback2,$callback3) * @method string htmlOuter($callback1 = null, $callback2 = null, $callback3 = null)
* @method xmlOuter($callback1,$callback2,$callback3) * @method string xmlOuter($callback1 = null, $callback2 = null, $callback3 = null)
* @method Elements php($code) * @method Elements php($code)
* @method markupPHP($code) * @method string markupPHP($code)
* @method markupOuterPHP() * @method string markupOuterPHP()
* @method Elements children($selector) * @method Elements children($selector)
* @method Elements ancestors($selector) * @method Elements ancestors($selector)
* @method Elements append($content) * @method Elements append($content)
@ -85,56 +87,52 @@ use phpQueryObject;
* @method Elements after($content) * @method Elements after($content)
* @method Elements afterPHP($content) * @method Elements afterPHP($content)
* @method Elements insertAfter($seletor) * @method Elements insertAfter($seletor)
* @method Elements insert($target,$type) * @method Elements insert($target, $type)
* @method index($subject) * @method int index($subject)
* @method Elements slice($start,$end) * @method Elements slice($start, $end = null)
* @method Elements reverse() * @method Elements reverse()
* @method text($text,$callback1,$callback2,$callback3) * @method Elements|string text($text = null, $callback1 = null, $callback2 = null, $callback3 = null)
* @method Elements plugin($class,$file) * @method Elements plugin($class, $file = null)
* @method extend($class,$file) * @method Elements _next($selector = null)
* @method Elements _next($selector) * @method Elements _prev($selector = null)
* @method Elements _prev($selector) * @method Elements prev($selector = null)
* @method Elements prev($selector) * @method Elements prevAll($selector = null)
* @method Elements prevAll($selector) * @method Elements nextAll($selector = null)
* @method Elements nextAll($selector) * @method Elements siblings($selector = null)
* @method Elements siblings($selector) * @method Elements not($selector = null)
* @method Elements not($selector) * @method Elements add($selector = null)
* @method Elements add($selector) * @method Elements parent($selector = null)
* @method Elements parent($selector) * @method Elements parents($selector = null)
* @method Elements parents($selector) * @method Elements stack($nodeTypes = null)
* @method stack($nodeTypes) * @method Elements|string attr($attr = null, $value = null)
* @method attr($attr,$value) * @method Elements attrPHP($attr, $code)
* @method Elements attrPHP($attr,$code)
* @method Elements removeAttr($attr) * @method Elements removeAttr($attr)
* @method val($val) * @method Elements|string val($val = null)
* @method Elements andSelf() * @method Elements andSelf()
* @method Elements addClass($className) * @method Elements addClass($className)
* @method Elements addClassPHP($className) * @method Elements addClassPHP($className)
* @method hasClass($className) * @method bool hasClass($className)
* @method Elements removeClass($className) * @method Elements removeClass($className)
* @method Elements toggleClass($className) * @method Elements toggleClass($className)
* @method Elements _empty() * @method Elements _empty()
* @method Elements each($callback,$param1,$param2,$param3) * @method Elements callback($callback, $param1 = null, $param2 = null, $param3 = null)
* @method Elements callback($callback,$param1,$param2,$param3) * @method string data($key, $value = null)
* @method data($key,$value) * @method Elements removeData($key)
* @method removeData($key) * @method void rewind()
* @method rewind() * @method Elements current()
* @method current() * @method int key()
* @method key() * @method Elements next($cssSelector = null)
* @method Elements next($cssSelector) * @method bool valid()
* @method valid() * @method bool offsetExists($offset)
* @method offsetExists($offset) * @method Elements offsetGet($offset)
* @method offsetGet($offset) * @method void offsetSet($offset, $value)
* @method offsetSet($offset,$value) * @method string whois($oneNode)
* @method offsetUnset($offset)
* @method whois($oneNode)
* @method Elements dump() * @method Elements dump()
* @method dumpWhois() * @method Elements dumpWhois()
* @method dumpLength() * @method Elements dumpLength()
* @method dumpTree($html,$title) * @method Elements dumpTree($html, $title)
* @method dumpDie() * @method dumpDie()
*/ */
class Elements class Elements
{ {
/** /**
@ -153,30 +151,49 @@ class Elements
public function __get($name) public function __get($name)
{ {
return property_exists($this->elements,$name)?$this->elements->$name:$this->elements->attr($name); return property_exists($this->elements, $name) ? $this->elements->$name : $this->elements->attr($name);
} }
public function __call($name, $arguments) public function __call($name, $arguments)
{ {
$obj = call_user_func_array([$this->elements,$name],$arguments); $obj = call_user_func_array([$this->elements, $name], $arguments);
if($obj instanceof phpQueryObject){ if ($obj instanceof phpQueryObject) {
$obj = new self($obj); $obj = new self($obj);
}else if(is_string($obj)){ } else if (is_string($obj)) {
$obj = trim($obj); $obj = trim($obj);
} }
return $obj; return $obj;
} }
/**
* Iterating elements
*
* @param callable $callback
*
* @return $this
*/
public function each(callable $callback)
{
foreach ($this->elements as $key => $element) {
$break = $callback(new self(pq($element)), $key);
if ($break === false) {
break;
}
}
return $this;
}
/** /**
* Iterating elements * Iterating elements
* *
* @param $callback * @param $callback
* @return \Illuminate\Support\Collection * @return \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection
*/ */
public function map($callback) public function map($callback)
{ {
$collection = collect(); $collection = new Collection();
$this->elements->each(function($dom) use(& $collection,$callback){ $this->elements->each(function ($dom) use (& $collection, $callback) {
$collection->push($callback(new self(pq($dom)))); $collection->push($callback(new self(pq($dom))));
}); });
return $collection; return $collection;
@ -185,12 +202,12 @@ class Elements
/** /**
* Gets the attributes of all the elements * Gets the attributes of all the elements
* *
* @param $attr HTML attribute name * @param string $attr HTML attribute name
* @return \Illuminate\Support\Collection * @return \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection
*/ */
public function attrs($attr) public function attrs($attr)
{ {
return $this->map(function($item) use($attr){ return $this->map(function ($item) use ($attr) {
return $item->attr($attr); return $item->attr($attr);
}); });
} }
@ -198,11 +215,11 @@ class Elements
/** /**
* Gets the text of all the elements * Gets the text of all the elements
* *
* @return \Illuminate\Support\Collection * @return \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection
*/ */
public function texts() public function texts()
{ {
return $this->map(function($item){ return $this->map(function ($item) {
return trim($item->text()); return trim($item->text());
}); });
} }
@ -210,15 +227,28 @@ class Elements
/** /**
* Gets the html of all the elements * Gets the html of all the elements
* *
* @return \Illuminate\Support\Collection * @return \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection
*/ */
public function htmls() public function htmls()
{ {
return $this->map(function($item){ return $this->map(function ($item) {
return trim($item->html()); return trim($item->html());
}); });
} }
/**
* Gets the htmlOuter of all the elements
*
* @return \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection
*/
public function htmlOuters()
{
return $this->map(function ($item) {
return trim($item->htmlOuter());
});
}
/** /**
* @return phpQueryObject * @return phpQueryObject
*/ */

View File

@ -9,12 +9,16 @@ namespace QL\Dom;
use Tightenco\Collect\Support\Collection; use Tightenco\Collect\Support\Collection;
use phpQuery; use phpQuery;
use phpQueryObject;
use QL\QueryList; use QL\QueryList;
use Closure; use Closure;
class Query class Query
{ {
protected $html; protected $html;
/**
* @var \phpQueryObject
*/
protected $document; protected $document;
protected $rules; protected $rules;
protected $range = null; protected $range = null;
@ -31,11 +35,12 @@ class Query
} }
/** /**
* @return mixed * @param bool $rel
* @return String
*/ */
public function getHtml() public function getHtml($rel = true)
{ {
return $this->html; return $rel ? $this->document->htmlOuter() : $this->html;
} }
/** /**
@ -46,7 +51,8 @@ class Query
public function setHtml($html, $charset = null) public function setHtml($html, $charset = null)
{ {
$this->html = value($html); $this->html = value($html);
$this->document = phpQuery::newDocumentHTML($this->html,$charset); $this->destroyDocument();
$this->document = phpQuery::newDocumentHTML($this->html, $charset);
return $this->ql; return $this->ql;
} }
@ -58,7 +64,7 @@ class Query
*/ */
public function getData(Closure $callback = null) public function getData(Closure $callback = null)
{ {
return is_null($callback) ? $this->data : $this->data->map($callback); return $this->handleData($this->data, $callback);
} }
/** /**
@ -119,8 +125,8 @@ class Query
*/ */
public function removeHead() public function removeHead()
{ {
$html = preg_replace('/<head.+?>.+<\/head>/is','<head></head>',$this->html); $html = preg_replace('/(<head>|<head\s+.+?>).+?<\/head>/is', '<head></head>', $this->html);
$this->setHtml($html); $html && $this->setHtml($html);
return $this->ql; return $this->ql;
} }
@ -133,113 +139,147 @@ class Query
public function query(Closure $callback = null) public function query(Closure $callback = null)
{ {
$this->data = $this->getList(); $this->data = $this->getList();
$callback && $this->data = $this->data->map($callback); $this->data = $this->handleData($this->data, $callback);
return $this->ql; return $this->ql;
} }
public function handleData(Collection $data, $callback)
{
if (is_callable($callback)) {
if (empty($this->range)) {
$data = new Collection($callback($data->all(), null));
} else {
$data = $data->map($callback);
}
}
return $data;
}
protected function getList() protected function getList()
{ {
$data = []; $data = [];
if (!empty($this->range)) { if (empty($this->range)) {
$robj = $this->document->find($this->range); foreach ($this->rules as $key => $reg_value) {
$rule = $this->parseRule($reg_value);
$contentElements = $this->document->find($rule['selector']);
$data[$key] = $this->extractContent($contentElements, $key, $rule);
}
} else {
$rangeElements = $this->document->find($this->range);
$i = 0; $i = 0;
foreach ($robj as $item) { foreach ($rangeElements as $element) {
foreach ($this->rules as $key => $reg_value){ foreach ($this->rules as $key => $reg_value) {
$tags = $reg_value[2] ?? ''; $rule = $this->parseRule($reg_value);
$iobj = pq($item,$this->document)->find($reg_value[0]); $contentElements = pq($element)->find($rule['selector']);
switch ($reg_value[1]) { $data[$i][$key] = $this->extractContent($contentElements, $key, $rule);
case 'text':
$data[$i][$key] = $this->allowTags(pq($iobj)->html(),$tags);
break;
case 'html':
$data[$i][$key] = $this->stripTags(pq($iobj)->html(),$tags);
break;
default:
$data[$i][$key] = pq($iobj)->attr($reg_value[1]);
break;
}
if(isset($reg_value[3])){
$data[$i][$key] = call_user_func($reg_value[3],$data[$i][$key],$key);
}
} }
$i++; $i++;
} }
} else {
foreach ($this->rules as $key => $reg_value){
$tags = $reg_value[2] ?? '';
$lobj = $this->document->find($reg_value[0]);
$i = 0;
foreach ($lobj as $item) {
switch ($reg_value[1]) {
case 'text':
$data[$i][$key] = $this->allowTags(pq($item,$this->document)->html(),$tags);
break;
case 'html':
$data[$i][$key] = $this->stripTags(pq($item,$this->document)->html(),$tags);
break;
default:
$data[$i][$key] = pq($item,$this->document)->attr($reg_value[1]);
break;
}
if(isset($reg_value[3])){
$data[$i][$key] = call_user_func($reg_value[3],$data[$i][$key],$key);
}
$i++;
}
}
} }
// phpQuery::$documents = array();
return collect($data); return new Collection($data);
}
protected function extractContent(phpQueryObject $pqObj, $ruleName, $rule)
{
switch ($rule['attr']) {
case 'text':
$content = $this->allowTags($pqObj->html(), $rule['filter_tags']);
break;
case 'texts':
$content = (new Elements($pqObj))->map(function (Elements $element) use ($rule) {
return $this->allowTags($element->html(), $rule['filter_tags']);
})->all();
break;
case 'html':
$content = $this->stripTags($pqObj->html(), $rule['filter_tags']);
break;
case 'htmls':
$content = (new Elements($pqObj))->map(function (Elements $element) use ($rule) {
return $this->stripTags($element->html(), $rule['filter_tags']);
})->all();
break;
case 'htmlOuter':
$content = $this->stripTags($pqObj->htmlOuter(), $rule['filter_tags']);
break;
case 'htmlOuters':
$content = (new Elements($pqObj))->map(function (Elements $element) use ($rule) {
return $this->stripTags($element->htmlOuter(), $rule['filter_tags']);
})->all();
break;
default:
if(preg_match('/attr\((.+)\)/', $rule['attr'], $arr)) {
$content = $pqObj->attr($arr[1]);
} elseif (preg_match('/attrs\((.+)\)/', $rule['attr'], $arr)) {
$content = (new Elements($pqObj))->attrs($arr[1])->all();
} else {
$content = $pqObj->attr($rule['attr']);
}
break;
}
if (is_callable($rule['handle_callback'])) {
$content = call_user_func($rule['handle_callback'], $content, $ruleName);
}
return $content;
}
protected function parseRule($rule)
{
$result = [];
$result['selector'] = $rule[0];
$result['attr'] = $rule[1];
$result['filter_tags'] = $rule[2] ?? '';
$result['handle_callback'] = $rule[3] ?? null;
return $result;
} }
/** /**
* 去除特定的html标签 * 去除特定的html标签
* @param string $html * @param string $html
* @param string $tags_str 多个标签名之间用空格隔开 * @param string $tags_str 多个标签名之间用空格隔开
* @return string * @return string
*/ */
protected function stripTags($html,$tags_str) protected function stripTags($html, $tags_str)
{ {
$tagsArr = $this->tag($tags_str); $tagsArr = $this->tag($tags_str);
$html = $this->removeTags($html,$tagsArr[1]); $html = $this->removeTags($html, $tagsArr[1]);
$p = array(); $p = array();
foreach ($tagsArr[0] as $tag) { foreach ($tagsArr[0] as $tag) {
$p[]="/(<(?:\/".$tag."|".$tag.")[^>]*>)/i"; $p[] = "/(<(?:\/" . $tag . "|" . $tag . ")[^>]*>)/i";
} }
$html = preg_replace($p,"",trim($html)); $html = preg_replace($p, "", trim($html));
return $html; return $html;
} }
/** /**
* 保留特定的html标签 * 保留特定的html标签
* @param string $html * @param string $html
* @param string $tags_str 多个标签名之间用空格隔开 * @param string $tags_str 多个标签名之间用空格隔开
* @return string * @return string
*/ */
protected function allowTags($html,$tags_str) protected function allowTags($html, $tags_str)
{ {
$tagsArr = $this->tag($tags_str); $tagsArr = $this->tag($tags_str);
$html = $this->removeTags($html,$tagsArr[1]); $html = $this->removeTags($html, $tagsArr[1]);
$allow = ''; $allow = '';
foreach ($tagsArr[0] as $tag) { foreach ($tagsArr[0] as $tag) {
$allow .= "<$tag> "; $allow .= "<$tag> ";
} }
return strip_tags(trim($html),$allow); return strip_tags(trim($html), $allow);
} }
protected function tag($tags_str) protected function tag($tags_str)
{ {
$tagArr = preg_split("/\s+/",$tags_str,-1,PREG_SPLIT_NO_EMPTY); $tagArr = preg_split("/\s+/", $tags_str, -1, PREG_SPLIT_NO_EMPTY);
$tags = array(array(),array()); $tags = array(array(), array());
foreach($tagArr as $tag) foreach ($tagArr as $tag) {
{ if (preg_match('/-(.+)/', $tag, $arr)) {
if(preg_match('/-(.+)/', $tag,$arr))
{
array_push($tags[1], $arr[1]); array_push($tags[1], $arr[1]);
}else{ } else {
array_push($tags[0], $tag); array_push($tags[0], $tag);
} }
} }
@ -248,17 +288,16 @@ class Query
/** /**
* 移除特定的html标签 * 移除特定的html标签
* @param string $html * @param string $html
* @param array $tags 标签数组 * @param array $tags 标签数组
* @return string * @return string
*/ */
protected function removeTags($html,$tags) protected function removeTags($html, $tags)
{ {
$tag_str = ''; $tag_str = '';
if(count($tags)) if (count($tags)) {
{
foreach ($tags as $tag) { foreach ($tags as $tag) {
$tag_str .= $tag_str?','.$tag:$tag; $tag_str .= $tag_str ? ',' . $tag : $tag;
} }
// phpQuery::$defaultCharset = $this->inputEncoding?$this->inputEncoding:$this->htmlEncoding; // phpQuery::$defaultCharset = $this->inputEncoding?$this->inputEncoding:$this->htmlEncoding;
$doc = phpQuery::newDocumentHTML($html); $doc = phpQuery::newDocumentHTML($html);
@ -268,4 +307,16 @@ class Query
} }
return $html; return $html;
} }
}
protected function destroyDocument()
{
if ($this->document instanceof phpQueryObject) {
$this->document->unloadDocument();
}
}
public function __destruct()
{
$this->destroyDocument();
}
}

View File

@ -14,6 +14,7 @@ use Closure;
use QL\Providers\HttpServiceProvider; use QL\Providers\HttpServiceProvider;
use QL\Providers\PluginServiceProvider; use QL\Providers\PluginServiceProvider;
use QL\Providers\SystemServiceProvider; use QL\Providers\SystemServiceProvider;
use Tightenco\Collect\Support\Collection;
class Kernel class Kernel
{ {
@ -34,7 +35,7 @@ class Kernel
public function __construct(QueryList $ql) public function __construct(QueryList $ql)
{ {
$this->ql = $ql; $this->ql = $ql;
$this->binds = collect(); $this->binds = new Collection();
} }
public function bootstrap() public function bootstrap()

View File

@ -23,7 +23,7 @@ use QL\Services\MultiRequestService;
* Class QueryList * Class QueryList
* @package QL * @package QL
* *
* @method string getHtml() * @method string getHtml($rel = true)
* @method QueryList setHtml($html) * @method QueryList setHtml($html)
* @method QueryList html($html) * @method QueryList html($html)
* @method Dom\Elements find($selector) * @method Dom\Elements find($selector)
@ -104,6 +104,15 @@ class QueryList
* Destruction of resources * Destruction of resources
*/ */
public function destruct() public function destruct()
{
unset($this->query);
unset($this->kernel);
}
/**
* Destroy all documents
*/
public static function destructDocuments()
{ {
phpQuery::$documents = []; phpQuery::$documents = [];
} }

View File

@ -17,7 +17,7 @@ class FindTest extends TestCaseBase
protected $html; protected $html;
protected $ql; protected $ql;
public function setUp() protected function setUp(): void
{ {
$this->html = $this->getSnippet('snippet-1'); $this->html = $this->getSnippet('snippet-1');
$this->ql = QueryList::html($this->html); $this->ql = QueryList::html($this->html);

View File

@ -18,7 +18,7 @@ class RulesTest extends TestCaseBase
protected $html; protected $html;
protected $ql; protected $ql;
public function setUp() protected function setUp(): void
{ {
$this->html = $this->getSnippet('snippet-2'); $this->html = $this->getSnippet('snippet-2');
$this->ql = QueryList::html($this->html); $this->ql = QueryList::html($this->html);

View File

@ -18,7 +18,7 @@ class HttpTest extends TestCaseBase
{ {
protected $urls; protected $urls;
public function setUp() protected function setUp(): void
{ {
$this->urls = [ $this->urls = [
'http://httpbin.org/get?name=php', 'http://httpbin.org/get?name=php',

View File

@ -16,7 +16,7 @@ class InstanceTest extends TestCaseBase
{ {
protected $html; protected $html;
public function setUp() protected function setUp(): void
{ {
$this->html = $this->getSnippet('snippet-1'); $this->html = $this->getSnippet('snippet-1');
} }
@ -38,11 +38,11 @@ class InstanceTest extends TestCaseBase
public function get_new_object() public function get_new_object()
{ {
$ql = (new QueryList())->html($this->html); $ql = (new QueryList())->html($this->html);
$ql2 = new QueryList(); $ql2 = (new QueryList())->html('');
$this->assertNotEquals($ql->getHtml(),$ql2->getHtml()); $this->assertNotEquals($ql->getHtml(),$ql2->getHtml());
$ql = QueryList::range('')->html($this->html); $ql = QueryList::range('')->html($this->html);
$ql2 = QueryList::range(''); $ql2 = QueryList::range('')->html('');
$this->assertNotEquals($ql->getHtml(),$ql2->getHtml()); $this->assertNotEquals($ql->getHtml(),$ql2->getHtml());
} }
} }

View File

@ -16,7 +16,7 @@ class MethodTest extends TestCaseBase
{ {
protected $html; protected $html;
public function setUp() protected function setUp(): void
{ {
$this->html = $this->getSnippet('snippet-1'); $this->html = $this->getSnippet('snippet-1');
} }
@ -30,7 +30,7 @@ class MethodTest extends TestCaseBase
$qlHtml = QueryList::pipe(function(QueryList $ql) use($html){ $qlHtml = QueryList::pipe(function(QueryList $ql) use($html){
$ql->setHtml($html); $ql->setHtml($html);
return $ql; return $ql;
})->getHtml(); })->getHtml(false);
$this->assertEquals($html,$qlHtml); $this->assertEquals($html,$qlHtml);
} }
} }