feat: rules add texts and htmls attribute

This commit is contained in:
Jaeger 2020-03-13 21:39:42 +08:00
parent b387ef5bb0
commit b07d4bfc74

View File

@ -146,66 +146,81 @@ class Query
{ {
$data = []; $data = [];
if (!empty($this->range)) { if (!empty($this->range)) {
$robj = $this->document->find($this->range); $rangeElements = $this->document->find($this->range);
$i = 0; $i = 0;
foreach ($robj as $item) { foreach ($rangeElements as $element) {
foreach ($this->rules as $key => $reg_value){ foreach ($this->rules as $key => $reg_value){
$tags = $reg_value[2] ?? ''; $rule = $this->parseRule($reg_value);
$iobj = pq($item,$this->document)->find($reg_value[0]); $contentElements = pq($element)->find($rule['selector']);
switch ($reg_value[1]) { $data[$i][$key] = $this->extractContent($contentElements, $key, $rule);
case 'text':
$data[$i][$key] = $this->allowTags(pq($iobj)->html(),$tags);
break;
case 'html':
$data[$i][$key] = $this->stripTags(pq($iobj)->html(),$tags);
break;
case 'htmlOuter':
$data[$i][$key] = $this->stripTags(pq($iobj)->htmlOuter(),$tags);
break;
default:
$data[$i][$key] = pq($iobj)->attr($reg_value[1]);
break;
}
if(isset($reg_value[3])){
$data[$i][$key] = call_user_func($reg_value[3],$data[$i][$key],$key);
}
} }
$i++; $i++;
} }
} else { } else {
foreach ($this->rules as $key => $reg_value){ foreach ($this->rules as $key => $reg_value){
$tags = $reg_value[2] ?? ''; $rule = $this->parseRule($reg_value);
$lobj = $this->document->find($reg_value[0]); $contentElements = $this->document->find($rule['selector']);
$i = 0; $i = 0;
foreach ($lobj as $item) { foreach ($contentElements as $element) {
switch ($reg_value[1]) { $data[$i][$key] = $this->extractContent(pq($element), $key, $rule);
case 'text':
$data[$i][$key] = $this->allowTags(pq($item,$this->document)->html(),$tags);
break;
case 'html':
$data[$i][$key] = $this->stripTags(pq($item,$this->document)->html(),$tags);
break;
case 'htmlOuter':
$data[$i][$key] = $this->stripTags(pq($item,$this->document)->htmlOuter(),$tags);
break;
default:
$data[$i][$key] = pq($item,$this->document)->attr($reg_value[1]);
break;
}
if(isset($reg_value[3])){
$data[$i][$key] = call_user_func($reg_value[3],$data[$i][$key],$key);
}
$i++; $i++;
} }
} }
} }
// phpQuery::$documents = array();
return collect($data); return collect($data);
} }
protected function extractContent(phpQueryObject $pqObj, $ruleName, $rule)
{
switch ($rule['attr']) {
case 'text':
$content = $this->allowTags($pqObj->html(), $rule['filter_tags']);
break;
case 'texts':
$content = (new Elements($pqObj))->map(function(Elements $element) use($rule){
return $this->allowTags($element->html(), $rule['filter_tags']);
})->all();
break;
case 'html':
$content = $this->stripTags($pqObj->html(), $rule['filter_tags']);
break;
case 'htmls':
$content = (new Elements($pqObj))->map(function(Elements $element) use($rule){
return $this->stripTags($element->html(), $rule['filter_tags']);
})->all();
break;
case 'htmlOuter':
$content = $this->stripTags($pqObj->htmlOuter(), $rule['filter_tags']);
break;
case 'htmlOuters':
$content = (new Elements($pqObj))->map(function(Elements $element) use($rule){
return $this->stripTags($element->htmlOuter(), $rule['filter_tags']);
})->all();
break;
default:
$content = $pqObj->attr($rule['attr']);
break;
}
if(isset($reg_value[3])){
$content = call_user_func($rule['handle_callback'], $content, $ruleName);
}
return $content;
}
protected function parseRule($rule)
{
$result = [];
$result['selector'] = $rule[0];
$result['attr'] = $rule[1];
$result['filter_tags'] = $rule[2] ?? '';
$result['handle_callback'] = $rule[3] ?? null;
return $result;
}
/** /**
* 去除特定的html标签 * 去除特定的html标签
* @param string $html * @param string $html