fix: data callback
This commit is contained in:
parent
c16826a573
commit
faea883c6f
@ -32,6 +32,9 @@
|
||||
},
|
||||
"require-dev": {
|
||||
"symfony/var-dumper": "^3.3",
|
||||
"phpunit/phpunit": "^7.5"
|
||||
"phpunit/phpunit": "^8.5"
|
||||
},
|
||||
"scripts": {
|
||||
"test": "./vendor/bin/phpunit"
|
||||
}
|
||||
}
|
||||
|
@ -52,7 +52,7 @@ class Query
|
||||
{
|
||||
$this->html = value($html);
|
||||
$this->destroyDocument();
|
||||
$this->document = phpQuery::newDocumentHTML($this->html,$charset);
|
||||
$this->document = phpQuery::newDocumentHTML($this->html, $charset);
|
||||
return $this->ql;
|
||||
}
|
||||
|
||||
@ -64,7 +64,7 @@ class Query
|
||||
*/
|
||||
public function getData(Closure $callback = null)
|
||||
{
|
||||
return is_null($callback) ? $this->data : $this->data->map($callback);
|
||||
return $this->handleData($this->data, $callback);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -125,7 +125,7 @@ class Query
|
||||
*/
|
||||
public function removeHead()
|
||||
{
|
||||
$html = preg_replace('/<head.+?>.+<\/head>/is','<head></head>',$this->html);
|
||||
$html = preg_replace('/<head.+?>.+<\/head>/is', '<head></head>', $this->html);
|
||||
$this->setHtml($html);
|
||||
return $this->ql;
|
||||
}
|
||||
@ -139,24 +139,37 @@ class Query
|
||||
public function query(Closure $callback = null)
|
||||
{
|
||||
$this->data = $this->getList();
|
||||
$callback && $this->data = $this->data->map($callback);
|
||||
$this->data = $this->handleData($this->data, $callback);
|
||||
return $this->ql;
|
||||
}
|
||||
|
||||
public function handleData(Collection $data, $callback)
|
||||
{
|
||||
if (is_callable($callback)) {
|
||||
if (empty($this->range)) {
|
||||
$data = collect($callback($data->all(), null));
|
||||
} else {
|
||||
$data = $data->map($callback);
|
||||
}
|
||||
}
|
||||
|
||||
return $data;
|
||||
}
|
||||
|
||||
protected function getList()
|
||||
{
|
||||
$data = [];
|
||||
if (empty($this->range)) {
|
||||
foreach ($this->rules as $key => $reg_value){
|
||||
foreach ($this->rules as $key => $reg_value) {
|
||||
$rule = $this->parseRule($reg_value);
|
||||
$contentElements = $this->document->find($rule['selector']);
|
||||
$data[$key] = $this->extractContent($contentElements, $key, $rule);
|
||||
}
|
||||
} else {
|
||||
$rangeElements = $this->document->find($this->range);
|
||||
$rangeElements = $this->document->find($this->range);
|
||||
$i = 0;
|
||||
foreach ($rangeElements as $element) {
|
||||
foreach ($this->rules as $key => $reg_value){
|
||||
foreach ($this->rules as $key => $reg_value) {
|
||||
$rule = $this->parseRule($reg_value);
|
||||
$contentElements = pq($element)->find($rule['selector']);
|
||||
$data[$i][$key] = $this->extractContent($contentElements, $key, $rule);
|
||||
@ -175,7 +188,7 @@ class Query
|
||||
$content = $this->allowTags($pqObj->html(), $rule['filter_tags']);
|
||||
break;
|
||||
case 'texts':
|
||||
$content = (new Elements($pqObj))->map(function(Elements $element) use($rule){
|
||||
$content = (new Elements($pqObj))->map(function (Elements $element) use ($rule) {
|
||||
return $this->allowTags($element->html(), $rule['filter_tags']);
|
||||
})->all();
|
||||
break;
|
||||
@ -183,7 +196,7 @@ class Query
|
||||
$content = $this->stripTags($pqObj->html(), $rule['filter_tags']);
|
||||
break;
|
||||
case 'htmls':
|
||||
$content = (new Elements($pqObj))->map(function(Elements $element) use($rule){
|
||||
$content = (new Elements($pqObj))->map(function (Elements $element) use ($rule) {
|
||||
return $this->stripTags($element->html(), $rule['filter_tags']);
|
||||
})->all();
|
||||
break;
|
||||
@ -191,7 +204,7 @@ class Query
|
||||
$content = $this->stripTags($pqObj->htmlOuter(), $rule['filter_tags']);
|
||||
break;
|
||||
case 'htmlOuters':
|
||||
$content = (new Elements($pqObj))->map(function(Elements $element) use($rule){
|
||||
$content = (new Elements($pqObj))->map(function (Elements $element) use ($rule) {
|
||||
return $this->stripTags($element->htmlOuter(), $rule['filter_tags']);
|
||||
})->all();
|
||||
break;
|
||||
@ -200,7 +213,7 @@ class Query
|
||||
break;
|
||||
}
|
||||
|
||||
if(is_callable($rule['handle_callback'])){
|
||||
if (is_callable($rule['handle_callback'])) {
|
||||
$content = call_user_func($rule['handle_callback'], $content, $ruleName);
|
||||
}
|
||||
|
||||
@ -220,49 +233,47 @@ class Query
|
||||
|
||||
/**
|
||||
* 去除特定的html标签
|
||||
* @param string $html
|
||||
* @param string $tags_str 多个标签名之间用空格隔开
|
||||
* @param string $html
|
||||
* @param string $tags_str 多个标签名之间用空格隔开
|
||||
* @return string
|
||||
*/
|
||||
protected function stripTags($html,$tags_str)
|
||||
protected function stripTags($html, $tags_str)
|
||||
{
|
||||
$tagsArr = $this->tag($tags_str);
|
||||
$html = $this->removeTags($html,$tagsArr[1]);
|
||||
$html = $this->removeTags($html, $tagsArr[1]);
|
||||
$p = array();
|
||||
foreach ($tagsArr[0] as $tag) {
|
||||
$p[]="/(<(?:\/".$tag."|".$tag.")[^>]*>)/i";
|
||||
$p[] = "/(<(?:\/" . $tag . "|" . $tag . ")[^>]*>)/i";
|
||||
}
|
||||
$html = preg_replace($p,"",trim($html));
|
||||
$html = preg_replace($p, "", trim($html));
|
||||
return $html;
|
||||
}
|
||||
|
||||
/**
|
||||
* 保留特定的html标签
|
||||
* @param string $html
|
||||
* @param string $tags_str 多个标签名之间用空格隔开
|
||||
* @param string $html
|
||||
* @param string $tags_str 多个标签名之间用空格隔开
|
||||
* @return string
|
||||
*/
|
||||
protected function allowTags($html,$tags_str)
|
||||
protected function allowTags($html, $tags_str)
|
||||
{
|
||||
$tagsArr = $this->tag($tags_str);
|
||||
$html = $this->removeTags($html,$tagsArr[1]);
|
||||
$html = $this->removeTags($html, $tagsArr[1]);
|
||||
$allow = '';
|
||||
foreach ($tagsArr[0] as $tag) {
|
||||
$allow .= "<$tag> ";
|
||||
}
|
||||
return strip_tags(trim($html),$allow);
|
||||
return strip_tags(trim($html), $allow);
|
||||
}
|
||||
|
||||
protected function tag($tags_str)
|
||||
{
|
||||
$tagArr = preg_split("/\s+/",$tags_str,-1,PREG_SPLIT_NO_EMPTY);
|
||||
$tags = array(array(),array());
|
||||
foreach($tagArr as $tag)
|
||||
{
|
||||
if(preg_match('/-(.+)/', $tag,$arr))
|
||||
{
|
||||
$tagArr = preg_split("/\s+/", $tags_str, -1, PREG_SPLIT_NO_EMPTY);
|
||||
$tags = array(array(), array());
|
||||
foreach ($tagArr as $tag) {
|
||||
if (preg_match('/-(.+)/', $tag, $arr)) {
|
||||
array_push($tags[1], $arr[1]);
|
||||
}else{
|
||||
} else {
|
||||
array_push($tags[0], $tag);
|
||||
}
|
||||
}
|
||||
@ -271,17 +282,16 @@ class Query
|
||||
|
||||
/**
|
||||
* 移除特定的html标签
|
||||
* @param string $html
|
||||
* @param array $tags 标签数组
|
||||
* @param string $html
|
||||
* @param array $tags 标签数组
|
||||
* @return string
|
||||
*/
|
||||
protected function removeTags($html,$tags)
|
||||
protected function removeTags($html, $tags)
|
||||
{
|
||||
$tag_str = '';
|
||||
if(count($tags))
|
||||
{
|
||||
if (count($tags)) {
|
||||
foreach ($tags as $tag) {
|
||||
$tag_str .= $tag_str?','.$tag:$tag;
|
||||
$tag_str .= $tag_str ? ',' . $tag : $tag;
|
||||
}
|
||||
// phpQuery::$defaultCharset = $this->inputEncoding?$this->inputEncoding:$this->htmlEncoding;
|
||||
$doc = phpQuery::newDocumentHTML($html);
|
||||
@ -294,7 +304,7 @@ class Query
|
||||
|
||||
protected function destroyDocument()
|
||||
{
|
||||
if($this->document instanceof phpQueryObject) {
|
||||
if ($this->document instanceof phpQueryObject) {
|
||||
$this->document->unloadDocument();
|
||||
}
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ class FindTest extends TestCaseBase
|
||||
protected $html;
|
||||
protected $ql;
|
||||
|
||||
public function setUp()
|
||||
protected function setUp(): void
|
||||
{
|
||||
$this->html = $this->getSnippet('snippet-1');
|
||||
$this->ql = QueryList::html($this->html);
|
||||
|
@ -18,7 +18,7 @@ class RulesTest extends TestCaseBase
|
||||
protected $html;
|
||||
protected $ql;
|
||||
|
||||
public function setUp()
|
||||
protected function setUp(): void
|
||||
{
|
||||
$this->html = $this->getSnippet('snippet-2');
|
||||
$this->ql = QueryList::html($this->html);
|
||||
|
@ -18,7 +18,7 @@ class HttpTest extends TestCaseBase
|
||||
{
|
||||
protected $urls;
|
||||
|
||||
public function setUp()
|
||||
protected function setUp(): void
|
||||
{
|
||||
$this->urls = [
|
||||
'http://httpbin.org/get?name=php',
|
||||
|
@ -16,7 +16,7 @@ class InstanceTest extends TestCaseBase
|
||||
{
|
||||
protected $html;
|
||||
|
||||
public function setUp()
|
||||
protected function setUp(): void
|
||||
{
|
||||
$this->html = $this->getSnippet('snippet-1');
|
||||
}
|
||||
@ -38,11 +38,11 @@ class InstanceTest extends TestCaseBase
|
||||
public function get_new_object()
|
||||
{
|
||||
$ql = (new QueryList())->html($this->html);
|
||||
$ql2 = new QueryList();
|
||||
$ql2 = (new QueryList())->html('');
|
||||
$this->assertNotEquals($ql->getHtml(),$ql2->getHtml());
|
||||
|
||||
$ql = QueryList::range('')->html($this->html);
|
||||
$ql2 = QueryList::range('');
|
||||
$ql2 = QueryList::range('')->html('');
|
||||
$this->assertNotEquals($ql->getHtml(),$ql2->getHtml());
|
||||
}
|
||||
}
|
@ -16,7 +16,7 @@ class MethodTest extends TestCaseBase
|
||||
{
|
||||
protected $html;
|
||||
|
||||
public function setUp()
|
||||
protected function setUp(): void
|
||||
{
|
||||
$this->html = $this->getSnippet('snippet-1');
|
||||
}
|
||||
@ -30,7 +30,7 @@ class MethodTest extends TestCaseBase
|
||||
$qlHtml = QueryList::pipe(function(QueryList $ql) use($html){
|
||||
$ql->setHtml($html);
|
||||
return $ql;
|
||||
})->getHtml();
|
||||
})->getHtml(false);
|
||||
$this->assertEquals($html,$qlHtml);
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user