Compare commits
36 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
894fb4344e | ||
|
e4fc716acd | ||
|
39dc0ca9c6 | ||
|
ef0a2efd4f | ||
|
5953daac54 | ||
|
465c6aefc7 | ||
|
92cb319d44 | ||
|
cbf3e0fcad | ||
|
cfa2d94a79 | ||
|
47a444bf9e | ||
|
85903fa9b5 | ||
|
e527c637c7 | ||
|
f0a9798925 | ||
|
faea883c6f | ||
|
c16826a573 | ||
|
1492751f98 | ||
|
b7954b9aef | ||
|
b3d84cf057 | ||
|
52bbdeae14 | ||
|
25b2dbdc86 | ||
|
02c2b125d8 | ||
|
fc8b701ef2 | ||
|
75e436c73f | ||
|
aa90e5a21d | ||
|
dd9af6881d | ||
|
b07d4bfc74 | ||
|
8c1614c4c3 | ||
|
b387ef5bb0 | ||
|
67f0052c5d | ||
|
7c86f82527 | ||
|
6ee6a26aee | ||
|
116f19da65 | ||
|
67cbd0f473 | ||
|
3eb26451c6 | ||
|
a76ecb4258 | ||
|
46f564bc8b |
12
.github/FUNDING.yml
vendored
Normal file
12
.github/FUNDING.yml
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
# These are supported funding model platforms
|
||||
|
||||
github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
|
||||
patreon: # Replace with a single Patreon username
|
||||
open_collective: querylist # Replace with a single Open Collective username
|
||||
ko_fi: # Replace with a single Ko-fi username
|
||||
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
|
||||
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
|
||||
liberapay: # Replace with a single Liberapay username
|
||||
issuehunt: # Replace with a single IssueHunt username
|
||||
otechie: # Replace with a single Otechie username
|
||||
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -2,3 +2,4 @@
|
||||
.idea/
|
||||
composer.lock
|
||||
.DS_Store
|
||||
*.cache
|
@ -29,7 +29,7 @@
|
||||
- .....
|
||||
|
||||
## 环境要求
|
||||
- PHP >= 7.0
|
||||
- PHP >= 7.1
|
||||
|
||||
> 如果你的PHP版本还停留在PHP5,或者不会使用Composer,你可以选择使用QueryList3,QueryList3支持php5.3以及手动安装。
|
||||
QueryList3 文档:http://v3.querylist.cc
|
||||
|
@ -31,7 +31,7 @@ Through plug-ins you can easily implement things like:
|
||||
- .....
|
||||
|
||||
## Requirements
|
||||
- PHP >= 7.0
|
||||
- PHP >= 7.1
|
||||
|
||||
## Installation
|
||||
By Composer installation:
|
||||
|
@ -4,10 +4,11 @@
|
||||
"keywords":["QueryList","phpQuery","spider"],
|
||||
"homepage": "http://querylist.cc",
|
||||
"require": {
|
||||
"PHP":">=7.0",
|
||||
"jaeger/phpquery-single": "^0.9",
|
||||
"tightenco/collect": "^5",
|
||||
"jaeger/g-http": "^1.1"
|
||||
"PHP":">=7.1",
|
||||
"jaeger/phpquery-single": "^1",
|
||||
"jaeger/g-http": "^1.1",
|
||||
"ext-dom": "*",
|
||||
"tightenco/collect": ">5.0"
|
||||
},
|
||||
"suggest":{
|
||||
|
||||
@ -31,6 +32,9 @@
|
||||
},
|
||||
"require-dev": {
|
||||
"symfony/var-dumper": "^3.3",
|
||||
"phpunit/phpunit": "^7.5"
|
||||
"phpunit/phpunit": "^8.5"
|
||||
},
|
||||
"scripts": {
|
||||
"test": "./vendor/bin/phpunit"
|
||||
}
|
||||
}
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
namespace QL;
|
||||
use Closure;
|
||||
use Tightenco\Collect\Support\Collection;
|
||||
|
||||
class Config
|
||||
{
|
||||
@ -20,8 +21,8 @@ class Config
|
||||
*/
|
||||
public function __construct()
|
||||
{
|
||||
$this->plugins = collect();
|
||||
$this->binds = collect();
|
||||
$this->plugins = new Collection();
|
||||
$this->binds = new Collection();
|
||||
}
|
||||
|
||||
|
||||
|
@ -7,70 +7,72 @@
|
||||
|
||||
namespace QL\Dom;
|
||||
|
||||
use phpDocumentor\Reflection\Types\Null_;
|
||||
use phpQueryObject;
|
||||
use Tightenco\Collect\Support\Collection;
|
||||
|
||||
/**
|
||||
* Class Elements
|
||||
* @package QL\Dom
|
||||
*
|
||||
* @method Elements toReference($var)
|
||||
* @method documentFragment($state)
|
||||
* @method Elements toReference(&$var)
|
||||
* @method Elements documentFragment($state = null)
|
||||
* @method Elements toRoot()
|
||||
* @method Elements getDocumentIDRef($documentID)
|
||||
* @method Elements getDocumentIDRef(&$documentID)
|
||||
* @method Elements getDocument()
|
||||
* @method getDOMDocument()
|
||||
* @method \DOMDocument getDOMDocument()
|
||||
* @method Elements getDocumentID()
|
||||
* @method Elements unloadDocument()
|
||||
* @method isHTML()
|
||||
* @method isXHTML()
|
||||
* @method isXML()
|
||||
* @method serialize()
|
||||
* @method serializeArray($submit)
|
||||
* @method get($index,$callback1,$callback2,$callback3)
|
||||
* @method getString($index,$callback1,$callback2,$callback3)
|
||||
* @method getStrings($index,$callback1,$callback2,$callback3)
|
||||
* @method newInstance($newStack)
|
||||
* @method Elements find($selectors,$context,$noHistory)
|
||||
* @method Elements is($selector,$nodes)
|
||||
* @method Elements filterCallback($callback,$_skipHistory)
|
||||
* @method Elements filter($selectors,$_skipHistory)
|
||||
* @method load($url,$data,$callback)
|
||||
* @method Elements trigger($type,$data)
|
||||
* @method Elements triggerHandler($type,$data)
|
||||
* @method Elements bind($type,$data,$callback)
|
||||
* @method unbind($type,$callback)
|
||||
* @method Elements change($callback)
|
||||
* @method Elements submit($callback)
|
||||
* @method Elements click($callback)
|
||||
* @method bool isHTML()
|
||||
* @method bool isXHTML()
|
||||
* @method bool isXML()
|
||||
* @method string serialize()
|
||||
* @method array serializeArray($submit = null)
|
||||
* @method \DOMElement|\DOMElement[] get($index = null, $callback1 = null, $callback2 = null, $callback3 = null)
|
||||
* @method string|array getString($index = null, $callback1 = null, $callback2 = null, $callback3 = null)
|
||||
* @method string|array getStrings($index = null, $callback1 = null, $callback2 = null, $callback3 = null)
|
||||
* @method Elements newInstance($newStack = null)
|
||||
* @method Elements find($selectors, $context = null, $noHistory = false)
|
||||
* @method Elements|bool is($selector, $nodes = null)
|
||||
* @method Elements filterCallback($callback, $_skipHistory = false)
|
||||
* @method Elements filter($selectors, $_skipHistory = false)
|
||||
* @method Elements load($url, $data = null, $callback = null)
|
||||
* @method Elements trigger($type, $data = [])
|
||||
* @method Elements triggerHandler($type, $data = [])
|
||||
* @method Elements bind($type, $data, $callback = null)
|
||||
* @method Elements unbind($type = null, $callback = null)
|
||||
* @method Elements change($callback = null)
|
||||
* @method Elements submit($callback = null)
|
||||
* @method Elements click($callback = null)
|
||||
* @method Elements wrapAllOld($wrapper)
|
||||
* @method Elements wrapAll($wrapper)
|
||||
* @method Elements wrapAllPHP($codeBefore,$codeAfter)
|
||||
* @method Elements wrapAllPHP($codeBefore, $codeAfter)
|
||||
* @method Elements wrap($wrapper)
|
||||
* @method Elements wrapPHP($codeBefore,$codeAfter)
|
||||
* @method Elements wrapPHP($codeBefore, $codeAfter)
|
||||
* @method Elements wrapInner($wrapper)
|
||||
* @method Elements wrapInnerPHP($codeBefore,$codeAfter)
|
||||
* @method Elements wrapInnerPHP($codeBefore, $codeAfter)
|
||||
* @method Elements contents()
|
||||
* @method Elements contentsUnwrap()
|
||||
* @method switchWith($markup)
|
||||
* @method Elements switchWith($markup)
|
||||
* @method Elements eq($num)
|
||||
* @method Elements size()
|
||||
* @method Elements length()
|
||||
* @method count()
|
||||
* @method Elements end($level)
|
||||
* @method int count()
|
||||
* @method Elements end($level = 1)
|
||||
* @method Elements _clone()
|
||||
* @method Elements replaceWithPHP($code)
|
||||
* @method Elements replaceWith($content)
|
||||
* @method Elements replaceAll($selector)
|
||||
* @method Elements remove($selector)
|
||||
* @method markup($markup,$callback1,$callback2,$callback3)
|
||||
* @method markupOuter($callback1,$callback2,$callback3)
|
||||
* @method html($html,$callback1,$callback2,$callback3)
|
||||
* @method xml($xml,$callback1,$callback2,$callback3)
|
||||
* @method htmlOuter($callback1,$callback2,$callback3)
|
||||
* @method xmlOuter($callback1,$callback2,$callback3)
|
||||
* @method Elements remove($selector = null)
|
||||
* @method Elements|string markup($markup = null, $callback1 = null, $callback2 = null, $callback3 = null)
|
||||
* @method string markupOuter($callback1 = null, $callback2 = null, $callback3 = null)
|
||||
* @method Elements|string html($html = null, $callback1 = null, $callback2 = null, $callback3 = null)
|
||||
* @method Elements|string xml($xml = null, $callback1 = null, $callback2 = null, $callback3 = null)
|
||||
* @method string htmlOuter($callback1 = null, $callback2 = null, $callback3 = null)
|
||||
* @method string xmlOuter($callback1 = null, $callback2 = null, $callback3 = null)
|
||||
* @method Elements php($code)
|
||||
* @method markupPHP($code)
|
||||
* @method markupOuterPHP()
|
||||
* @method string markupPHP($code)
|
||||
* @method string markupOuterPHP()
|
||||
* @method Elements children($selector)
|
||||
* @method Elements ancestors($selector)
|
||||
* @method Elements append($content)
|
||||
@ -85,56 +87,52 @@ use phpQueryObject;
|
||||
* @method Elements after($content)
|
||||
* @method Elements afterPHP($content)
|
||||
* @method Elements insertAfter($seletor)
|
||||
* @method Elements insert($target,$type)
|
||||
* @method index($subject)
|
||||
* @method Elements slice($start,$end)
|
||||
* @method Elements insert($target, $type)
|
||||
* @method int index($subject)
|
||||
* @method Elements slice($start, $end = null)
|
||||
* @method Elements reverse()
|
||||
* @method text($text,$callback1,$callback2,$callback3)
|
||||
* @method Elements plugin($class,$file)
|
||||
* @method extend($class,$file)
|
||||
* @method Elements _next($selector)
|
||||
* @method Elements _prev($selector)
|
||||
* @method Elements prev($selector)
|
||||
* @method Elements prevAll($selector)
|
||||
* @method Elements nextAll($selector)
|
||||
* @method Elements siblings($selector)
|
||||
* @method Elements not($selector)
|
||||
* @method Elements add($selector)
|
||||
* @method Elements parent($selector)
|
||||
* @method Elements parents($selector)
|
||||
* @method stack($nodeTypes)
|
||||
* @method attr($attr,$value)
|
||||
* @method Elements attrPHP($attr,$code)
|
||||
* @method Elements|string text($text = null, $callback1 = null, $callback2 = null, $callback3 = null)
|
||||
* @method Elements plugin($class, $file = null)
|
||||
* @method Elements _next($selector = null)
|
||||
* @method Elements _prev($selector = null)
|
||||
* @method Elements prev($selector = null)
|
||||
* @method Elements prevAll($selector = null)
|
||||
* @method Elements nextAll($selector = null)
|
||||
* @method Elements siblings($selector = null)
|
||||
* @method Elements not($selector = null)
|
||||
* @method Elements add($selector = null)
|
||||
* @method Elements parent($selector = null)
|
||||
* @method Elements parents($selector = null)
|
||||
* @method Elements stack($nodeTypes = null)
|
||||
* @method Elements|string attr($attr = null, $value = null)
|
||||
* @method Elements attrPHP($attr, $code)
|
||||
* @method Elements removeAttr($attr)
|
||||
* @method val($val)
|
||||
* @method Elements|string val($val = null)
|
||||
* @method Elements andSelf()
|
||||
* @method Elements addClass($className)
|
||||
* @method Elements addClassPHP($className)
|
||||
* @method hasClass($className)
|
||||
* @method bool hasClass($className)
|
||||
* @method Elements removeClass($className)
|
||||
* @method Elements toggleClass($className)
|
||||
* @method Elements _empty()
|
||||
* @method Elements each($callback,$param1,$param2,$param3)
|
||||
* @method Elements callback($callback,$param1,$param2,$param3)
|
||||
* @method data($key,$value)
|
||||
* @method removeData($key)
|
||||
* @method rewind()
|
||||
* @method current()
|
||||
* @method key()
|
||||
* @method Elements next($cssSelector)
|
||||
* @method valid()
|
||||
* @method offsetExists($offset)
|
||||
* @method offsetGet($offset)
|
||||
* @method offsetSet($offset,$value)
|
||||
* @method offsetUnset($offset)
|
||||
* @method whois($oneNode)
|
||||
* @method Elements callback($callback, $param1 = null, $param2 = null, $param3 = null)
|
||||
* @method string data($key, $value = null)
|
||||
* @method Elements removeData($key)
|
||||
* @method void rewind()
|
||||
* @method Elements current()
|
||||
* @method int key()
|
||||
* @method Elements next($cssSelector = null)
|
||||
* @method bool valid()
|
||||
* @method bool offsetExists($offset)
|
||||
* @method Elements offsetGet($offset)
|
||||
* @method void offsetSet($offset, $value)
|
||||
* @method string whois($oneNode)
|
||||
* @method Elements dump()
|
||||
* @method dumpWhois()
|
||||
* @method dumpLength()
|
||||
* @method dumpTree($html,$title)
|
||||
* @method Elements dumpWhois()
|
||||
* @method Elements dumpLength()
|
||||
* @method Elements dumpTree($html, $title)
|
||||
* @method dumpDie()
|
||||
*/
|
||||
|
||||
class Elements
|
||||
{
|
||||
/**
|
||||
@ -153,30 +151,49 @@ class Elements
|
||||
|
||||
public function __get($name)
|
||||
{
|
||||
return property_exists($this->elements,$name)?$this->elements->$name:$this->elements->attr($name);
|
||||
return property_exists($this->elements, $name) ? $this->elements->$name : $this->elements->attr($name);
|
||||
}
|
||||
|
||||
public function __call($name, $arguments)
|
||||
{
|
||||
$obj = call_user_func_array([$this->elements,$name],$arguments);
|
||||
if($obj instanceof phpQueryObject){
|
||||
$obj = call_user_func_array([$this->elements, $name], $arguments);
|
||||
if ($obj instanceof phpQueryObject) {
|
||||
$obj = new self($obj);
|
||||
}else if(is_string($obj)){
|
||||
} else if (is_string($obj)) {
|
||||
$obj = trim($obj);
|
||||
}
|
||||
return $obj;
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterating elements
|
||||
*
|
||||
* @param callable $callback
|
||||
*
|
||||
* @return $this
|
||||
*/
|
||||
public function each(callable $callback)
|
||||
{
|
||||
foreach ($this->elements as $key => $element) {
|
||||
$break = $callback(new self(pq($element)), $key);
|
||||
if ($break === false) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterating elements
|
||||
*
|
||||
* @param $callback
|
||||
* @return \Illuminate\Support\Collection
|
||||
* @return \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection
|
||||
*/
|
||||
public function map($callback)
|
||||
{
|
||||
$collection = collect();
|
||||
$this->elements->each(function($dom) use(& $collection,$callback){
|
||||
$collection = new Collection();
|
||||
$this->elements->each(function ($dom) use (& $collection, $callback) {
|
||||
$collection->push($callback(new self(pq($dom))));
|
||||
});
|
||||
return $collection;
|
||||
@ -185,12 +202,12 @@ class Elements
|
||||
/**
|
||||
* Gets the attributes of all the elements
|
||||
*
|
||||
* @param $attr HTML attribute name
|
||||
* @return \Illuminate\Support\Collection
|
||||
* @param string $attr HTML attribute name
|
||||
* @return \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection
|
||||
*/
|
||||
public function attrs($attr)
|
||||
{
|
||||
return $this->map(function($item) use($attr){
|
||||
return $this->map(function ($item) use ($attr) {
|
||||
return $item->attr($attr);
|
||||
});
|
||||
}
|
||||
@ -198,11 +215,11 @@ class Elements
|
||||
/**
|
||||
* Gets the text of all the elements
|
||||
*
|
||||
* @return \Illuminate\Support\Collection
|
||||
* @return \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection
|
||||
*/
|
||||
public function texts()
|
||||
{
|
||||
return $this->map(function($item){
|
||||
return $this->map(function ($item) {
|
||||
return trim($item->text());
|
||||
});
|
||||
}
|
||||
@ -210,15 +227,28 @@ class Elements
|
||||
/**
|
||||
* Gets the html of all the elements
|
||||
*
|
||||
* @return \Illuminate\Support\Collection
|
||||
* @return \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection
|
||||
*/
|
||||
public function htmls()
|
||||
{
|
||||
return $this->map(function($item){
|
||||
return $this->map(function ($item) {
|
||||
return trim($item->html());
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the htmlOuter of all the elements
|
||||
*
|
||||
* @return \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection
|
||||
*/
|
||||
public function htmlOuters()
|
||||
{
|
||||
return $this->map(function ($item) {
|
||||
return trim($item->htmlOuter());
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return phpQueryObject
|
||||
*/
|
||||
|
@ -9,12 +9,16 @@ namespace QL\Dom;
|
||||
|
||||
use Tightenco\Collect\Support\Collection;
|
||||
use phpQuery;
|
||||
use phpQueryObject;
|
||||
use QL\QueryList;
|
||||
use Closure;
|
||||
|
||||
class Query
|
||||
{
|
||||
protected $html;
|
||||
/**
|
||||
* @var \phpQueryObject
|
||||
*/
|
||||
protected $document;
|
||||
protected $rules;
|
||||
protected $range = null;
|
||||
@ -31,11 +35,12 @@ class Query
|
||||
}
|
||||
|
||||
/**
|
||||
* @return mixed
|
||||
* @param bool $rel
|
||||
* @return String
|
||||
*/
|
||||
public function getHtml()
|
||||
public function getHtml($rel = true)
|
||||
{
|
||||
return $this->html;
|
||||
return $rel ? $this->document->htmlOuter() : $this->html;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -46,7 +51,8 @@ class Query
|
||||
public function setHtml($html, $charset = null)
|
||||
{
|
||||
$this->html = value($html);
|
||||
$this->document = phpQuery::newDocumentHTML($this->html,$charset);
|
||||
$this->destroyDocument();
|
||||
$this->document = phpQuery::newDocumentHTML($this->html, $charset);
|
||||
return $this->ql;
|
||||
}
|
||||
|
||||
@ -58,7 +64,7 @@ class Query
|
||||
*/
|
||||
public function getData(Closure $callback = null)
|
||||
{
|
||||
return is_null($callback) ? $this->data : $this->data->map($callback);
|
||||
return $this->handleData($this->data, $callback);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -119,8 +125,8 @@ class Query
|
||||
*/
|
||||
public function removeHead()
|
||||
{
|
||||
$html = preg_replace('/<head.+?>.+<\/head>/is','<head></head>',$this->html);
|
||||
$this->setHtml($html);
|
||||
$html = preg_replace('/(<head>|<head\s+.+?>).+?<\/head>/is', '<head></head>', $this->html);
|
||||
$html && $this->setHtml($html);
|
||||
return $this->ql;
|
||||
}
|
||||
|
||||
@ -133,113 +139,147 @@ class Query
|
||||
public function query(Closure $callback = null)
|
||||
{
|
||||
$this->data = $this->getList();
|
||||
$callback && $this->data = $this->data->map($callback);
|
||||
$this->data = $this->handleData($this->data, $callback);
|
||||
return $this->ql;
|
||||
}
|
||||
|
||||
public function handleData(Collection $data, $callback)
|
||||
{
|
||||
if (is_callable($callback)) {
|
||||
if (empty($this->range)) {
|
||||
$data = new Collection($callback($data->all(), null));
|
||||
} else {
|
||||
$data = $data->map($callback);
|
||||
}
|
||||
}
|
||||
|
||||
return $data;
|
||||
}
|
||||
|
||||
protected function getList()
|
||||
{
|
||||
$data = [];
|
||||
if (!empty($this->range)) {
|
||||
$robj = $this->document->find($this->range);
|
||||
if (empty($this->range)) {
|
||||
foreach ($this->rules as $key => $reg_value) {
|
||||
$rule = $this->parseRule($reg_value);
|
||||
$contentElements = $this->document->find($rule['selector']);
|
||||
$data[$key] = $this->extractContent($contentElements, $key, $rule);
|
||||
}
|
||||
} else {
|
||||
$rangeElements = $this->document->find($this->range);
|
||||
$i = 0;
|
||||
foreach ($robj as $item) {
|
||||
foreach ($this->rules as $key => $reg_value){
|
||||
$tags = $reg_value[2] ?? '';
|
||||
$iobj = pq($item,$this->document)->find($reg_value[0]);
|
||||
switch ($reg_value[1]) {
|
||||
case 'text':
|
||||
$data[$i][$key] = $this->allowTags(pq($iobj)->html(),$tags);
|
||||
break;
|
||||
case 'html':
|
||||
$data[$i][$key] = $this->stripTags(pq($iobj)->html(),$tags);
|
||||
break;
|
||||
default:
|
||||
$data[$i][$key] = pq($iobj)->attr($reg_value[1]);
|
||||
break;
|
||||
}
|
||||
|
||||
if(isset($reg_value[3])){
|
||||
$data[$i][$key] = call_user_func($reg_value[3],$data[$i][$key],$key);
|
||||
}
|
||||
foreach ($rangeElements as $element) {
|
||||
foreach ($this->rules as $key => $reg_value) {
|
||||
$rule = $this->parseRule($reg_value);
|
||||
$contentElements = pq($element)->find($rule['selector']);
|
||||
$data[$i][$key] = $this->extractContent($contentElements, $key, $rule);
|
||||
}
|
||||
$i++;
|
||||
}
|
||||
} else {
|
||||
foreach ($this->rules as $key => $reg_value){
|
||||
$tags = $reg_value[2] ?? '';
|
||||
$lobj = $this->document->find($reg_value[0]);
|
||||
$i = 0;
|
||||
foreach ($lobj as $item) {
|
||||
switch ($reg_value[1]) {
|
||||
case 'text':
|
||||
$data[$i][$key] = $this->allowTags(pq($item,$this->document)->html(),$tags);
|
||||
break;
|
||||
case 'html':
|
||||
$data[$i][$key] = $this->stripTags(pq($item,$this->document)->html(),$tags);
|
||||
break;
|
||||
default:
|
||||
$data[$i][$key] = pq($item,$this->document)->attr($reg_value[1]);
|
||||
break;
|
||||
}
|
||||
|
||||
if(isset($reg_value[3])){
|
||||
$data[$i][$key] = call_user_func($reg_value[3],$data[$i][$key],$key);
|
||||
}
|
||||
|
||||
$i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
// phpQuery::$documents = array();
|
||||
return collect($data);
|
||||
|
||||
return new Collection($data);
|
||||
}
|
||||
|
||||
protected function extractContent(phpQueryObject $pqObj, $ruleName, $rule)
|
||||
{
|
||||
switch ($rule['attr']) {
|
||||
case 'text':
|
||||
$content = $this->allowTags($pqObj->html(), $rule['filter_tags']);
|
||||
break;
|
||||
case 'texts':
|
||||
$content = (new Elements($pqObj))->map(function (Elements $element) use ($rule) {
|
||||
return $this->allowTags($element->html(), $rule['filter_tags']);
|
||||
})->all();
|
||||
break;
|
||||
case 'html':
|
||||
$content = $this->stripTags($pqObj->html(), $rule['filter_tags']);
|
||||
break;
|
||||
case 'htmls':
|
||||
$content = (new Elements($pqObj))->map(function (Elements $element) use ($rule) {
|
||||
return $this->stripTags($element->html(), $rule['filter_tags']);
|
||||
})->all();
|
||||
break;
|
||||
case 'htmlOuter':
|
||||
$content = $this->stripTags($pqObj->htmlOuter(), $rule['filter_tags']);
|
||||
break;
|
||||
case 'htmlOuters':
|
||||
$content = (new Elements($pqObj))->map(function (Elements $element) use ($rule) {
|
||||
return $this->stripTags($element->htmlOuter(), $rule['filter_tags']);
|
||||
})->all();
|
||||
break;
|
||||
default:
|
||||
if(preg_match('/attr\((.+)\)/', $rule['attr'], $arr)) {
|
||||
$content = $pqObj->attr($arr[1]);
|
||||
} elseif (preg_match('/attrs\((.+)\)/', $rule['attr'], $arr)) {
|
||||
$content = (new Elements($pqObj))->attrs($arr[1])->all();
|
||||
} else {
|
||||
$content = $pqObj->attr($rule['attr']);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_callable($rule['handle_callback'])) {
|
||||
$content = call_user_func($rule['handle_callback'], $content, $ruleName);
|
||||
}
|
||||
|
||||
return $content;
|
||||
}
|
||||
|
||||
protected function parseRule($rule)
|
||||
{
|
||||
$result = [];
|
||||
$result['selector'] = $rule[0];
|
||||
$result['attr'] = $rule[1];
|
||||
$result['filter_tags'] = $rule[2] ?? '';
|
||||
$result['handle_callback'] = $rule[3] ?? null;
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* 去除特定的html标签
|
||||
* @param string $html
|
||||
* @param string $tags_str 多个标签名之间用空格隔开
|
||||
* @param string $html
|
||||
* @param string $tags_str 多个标签名之间用空格隔开
|
||||
* @return string
|
||||
*/
|
||||
protected function stripTags($html,$tags_str)
|
||||
protected function stripTags($html, $tags_str)
|
||||
{
|
||||
$tagsArr = $this->tag($tags_str);
|
||||
$html = $this->removeTags($html,$tagsArr[1]);
|
||||
$html = $this->removeTags($html, $tagsArr[1]);
|
||||
$p = array();
|
||||
foreach ($tagsArr[0] as $tag) {
|
||||
$p[]="/(<(?:\/".$tag."|".$tag.")[^>]*>)/i";
|
||||
$p[] = "/(<(?:\/" . $tag . "|" . $tag . ")[^>]*>)/i";
|
||||
}
|
||||
$html = preg_replace($p,"",trim($html));
|
||||
$html = preg_replace($p, "", trim($html));
|
||||
return $html;
|
||||
}
|
||||
|
||||
/**
|
||||
* 保留特定的html标签
|
||||
* @param string $html
|
||||
* @param string $tags_str 多个标签名之间用空格隔开
|
||||
* @param string $html
|
||||
* @param string $tags_str 多个标签名之间用空格隔开
|
||||
* @return string
|
||||
*/
|
||||
protected function allowTags($html,$tags_str)
|
||||
protected function allowTags($html, $tags_str)
|
||||
{
|
||||
$tagsArr = $this->tag($tags_str);
|
||||
$html = $this->removeTags($html,$tagsArr[1]);
|
||||
$html = $this->removeTags($html, $tagsArr[1]);
|
||||
$allow = '';
|
||||
foreach ($tagsArr[0] as $tag) {
|
||||
$allow .= "<$tag> ";
|
||||
}
|
||||
return strip_tags(trim($html),$allow);
|
||||
return strip_tags(trim($html), $allow);
|
||||
}
|
||||
|
||||
protected function tag($tags_str)
|
||||
{
|
||||
$tagArr = preg_split("/\s+/",$tags_str,-1,PREG_SPLIT_NO_EMPTY);
|
||||
$tags = array(array(),array());
|
||||
foreach($tagArr as $tag)
|
||||
{
|
||||
if(preg_match('/-(.+)/', $tag,$arr))
|
||||
{
|
||||
$tagArr = preg_split("/\s+/", $tags_str, -1, PREG_SPLIT_NO_EMPTY);
|
||||
$tags = array(array(), array());
|
||||
foreach ($tagArr as $tag) {
|
||||
if (preg_match('/-(.+)/', $tag, $arr)) {
|
||||
array_push($tags[1], $arr[1]);
|
||||
}else{
|
||||
} else {
|
||||
array_push($tags[0], $tag);
|
||||
}
|
||||
}
|
||||
@ -248,17 +288,16 @@ class Query
|
||||
|
||||
/**
|
||||
* 移除特定的html标签
|
||||
* @param string $html
|
||||
* @param array $tags 标签数组
|
||||
* @param string $html
|
||||
* @param array $tags 标签数组
|
||||
* @return string
|
||||
*/
|
||||
protected function removeTags($html,$tags)
|
||||
protected function removeTags($html, $tags)
|
||||
{
|
||||
$tag_str = '';
|
||||
if(count($tags))
|
||||
{
|
||||
if (count($tags)) {
|
||||
foreach ($tags as $tag) {
|
||||
$tag_str .= $tag_str?','.$tag:$tag;
|
||||
$tag_str .= $tag_str ? ',' . $tag : $tag;
|
||||
}
|
||||
// phpQuery::$defaultCharset = $this->inputEncoding?$this->inputEncoding:$this->htmlEncoding;
|
||||
$doc = phpQuery::newDocumentHTML($html);
|
||||
@ -268,4 +307,16 @@ class Query
|
||||
}
|
||||
return $html;
|
||||
}
|
||||
|
||||
protected function destroyDocument()
|
||||
{
|
||||
if ($this->document instanceof phpQueryObject) {
|
||||
$this->document->unloadDocument();
|
||||
}
|
||||
}
|
||||
|
||||
public function __destruct()
|
||||
{
|
||||
$this->destroyDocument();
|
||||
}
|
||||
}
|
@ -14,6 +14,7 @@ use Closure;
|
||||
use QL\Providers\HttpServiceProvider;
|
||||
use QL\Providers\PluginServiceProvider;
|
||||
use QL\Providers\SystemServiceProvider;
|
||||
use Tightenco\Collect\Support\Collection;
|
||||
|
||||
class Kernel
|
||||
{
|
||||
@ -34,7 +35,7 @@ class Kernel
|
||||
public function __construct(QueryList $ql)
|
||||
{
|
||||
$this->ql = $ql;
|
||||
$this->binds = collect();
|
||||
$this->binds = new Collection();
|
||||
}
|
||||
|
||||
public function bootstrap()
|
||||
|
@ -23,7 +23,7 @@ use QL\Services\MultiRequestService;
|
||||
* Class QueryList
|
||||
* @package QL
|
||||
*
|
||||
* @method string getHtml()
|
||||
* @method string getHtml($rel = true)
|
||||
* @method QueryList setHtml($html)
|
||||
* @method QueryList html($html)
|
||||
* @method Dom\Elements find($selector)
|
||||
@ -104,6 +104,15 @@ class QueryList
|
||||
* Destruction of resources
|
||||
*/
|
||||
public function destruct()
|
||||
{
|
||||
unset($this->query);
|
||||
unset($this->kernel);
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroy all documents
|
||||
*/
|
||||
public static function destructDocuments()
|
||||
{
|
||||
phpQuery::$documents = [];
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ class FindTest extends TestCaseBase
|
||||
protected $html;
|
||||
protected $ql;
|
||||
|
||||
public function setUp()
|
||||
protected function setUp(): void
|
||||
{
|
||||
$this->html = $this->getSnippet('snippet-1');
|
||||
$this->ql = QueryList::html($this->html);
|
||||
|
@ -18,7 +18,7 @@ class RulesTest extends TestCaseBase
|
||||
protected $html;
|
||||
protected $ql;
|
||||
|
||||
public function setUp()
|
||||
protected function setUp(): void
|
||||
{
|
||||
$this->html = $this->getSnippet('snippet-2');
|
||||
$this->ql = QueryList::html($this->html);
|
||||
|
@ -18,7 +18,7 @@ class HttpTest extends TestCaseBase
|
||||
{
|
||||
protected $urls;
|
||||
|
||||
public function setUp()
|
||||
protected function setUp(): void
|
||||
{
|
||||
$this->urls = [
|
||||
'http://httpbin.org/get?name=php',
|
||||
|
@ -16,7 +16,7 @@ class InstanceTest extends TestCaseBase
|
||||
{
|
||||
protected $html;
|
||||
|
||||
public function setUp()
|
||||
protected function setUp(): void
|
||||
{
|
||||
$this->html = $this->getSnippet('snippet-1');
|
||||
}
|
||||
@ -38,11 +38,11 @@ class InstanceTest extends TestCaseBase
|
||||
public function get_new_object()
|
||||
{
|
||||
$ql = (new QueryList())->html($this->html);
|
||||
$ql2 = new QueryList();
|
||||
$ql2 = (new QueryList())->html('');
|
||||
$this->assertNotEquals($ql->getHtml(),$ql2->getHtml());
|
||||
|
||||
$ql = QueryList::range('')->html($this->html);
|
||||
$ql2 = QueryList::range('');
|
||||
$ql2 = QueryList::range('')->html('');
|
||||
$this->assertNotEquals($ql->getHtml(),$ql2->getHtml());
|
||||
}
|
||||
}
|
@ -16,7 +16,7 @@ class MethodTest extends TestCaseBase
|
||||
{
|
||||
protected $html;
|
||||
|
||||
public function setUp()
|
||||
protected function setUp(): void
|
||||
{
|
||||
$this->html = $this->getSnippet('snippet-1');
|
||||
}
|
||||
@ -30,7 +30,7 @@ class MethodTest extends TestCaseBase
|
||||
$qlHtml = QueryList::pipe(function(QueryList $ql) use($html){
|
||||
$ql->setHtml($html);
|
||||
return $ql;
|
||||
})->getHtml();
|
||||
})->getHtml(false);
|
||||
$this->assertEquals($html,$qlHtml);
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user