diff --git a/src/Dom/Query.php b/src/Dom/Query.php index 13fbffb..b6fd16d 100644 --- a/src/Dom/Query.php +++ b/src/Dom/Query.php @@ -146,66 +146,81 @@ class Query { $data = []; if (!empty($this->range)) { - $robj = $this->document->find($this->range); + $rangeElements = $this->document->find($this->range); $i = 0; - foreach ($robj as $item) { + foreach ($rangeElements as $element) { foreach ($this->rules as $key => $reg_value){ - $tags = $reg_value[2] ?? ''; - $iobj = pq($item,$this->document)->find($reg_value[0]); - switch ($reg_value[1]) { - case 'text': - $data[$i][$key] = $this->allowTags(pq($iobj)->html(),$tags); - break; - case 'html': - $data[$i][$key] = $this->stripTags(pq($iobj)->html(),$tags); - break; - case 'htmlOuter': - $data[$i][$key] = $this->stripTags(pq($iobj)->htmlOuter(),$tags); - break; - default: - $data[$i][$key] = pq($iobj)->attr($reg_value[1]); - break; - } - - if(isset($reg_value[3])){ - $data[$i][$key] = call_user_func($reg_value[3],$data[$i][$key],$key); - } + $rule = $this->parseRule($reg_value); + $contentElements = pq($element)->find($rule['selector']); + $data[$i][$key] = $this->extractContent($contentElements, $key, $rule); } $i++; } } else { foreach ($this->rules as $key => $reg_value){ - $tags = $reg_value[2] ?? ''; - $lobj = $this->document->find($reg_value[0]); + $rule = $this->parseRule($reg_value); + $contentElements = $this->document->find($rule['selector']); $i = 0; - foreach ($lobj as $item) { - switch ($reg_value[1]) { - case 'text': - $data[$i][$key] = $this->allowTags(pq($item,$this->document)->html(),$tags); - break; - case 'html': - $data[$i][$key] = $this->stripTags(pq($item,$this->document)->html(),$tags); - break; - case 'htmlOuter': - $data[$i][$key] = $this->stripTags(pq($item,$this->document)->htmlOuter(),$tags); - break; - default: - $data[$i][$key] = pq($item,$this->document)->attr($reg_value[1]); - break; - } - - if(isset($reg_value[3])){ - $data[$i][$key] = call_user_func($reg_value[3],$data[$i][$key],$key); - } - + foreach ($contentElements as $element) { + $data[$i][$key] = $this->extractContent(pq($element), $key, $rule); $i++; } } } -// phpQuery::$documents = array(); + return collect($data); } + protected function extractContent(phpQueryObject $pqObj, $ruleName, $rule) + { + switch ($rule['attr']) { + case 'text': + $content = $this->allowTags($pqObj->html(), $rule['filter_tags']); + break; + case 'texts': + $content = (new Elements($pqObj))->map(function(Elements $element) use($rule){ + return $this->allowTags($element->html(), $rule['filter_tags']); + })->all(); + break; + case 'html': + $content = $this->stripTags($pqObj->html(), $rule['filter_tags']); + break; + case 'htmls': + $content = (new Elements($pqObj))->map(function(Elements $element) use($rule){ + return $this->stripTags($element->html(), $rule['filter_tags']); + })->all(); + break; + case 'htmlOuter': + $content = $this->stripTags($pqObj->htmlOuter(), $rule['filter_tags']); + break; + case 'htmlOuters': + $content = (new Elements($pqObj))->map(function(Elements $element) use($rule){ + return $this->stripTags($element->htmlOuter(), $rule['filter_tags']); + })->all(); + break; + default: + $content = $pqObj->attr($rule['attr']); + break; + } + + if(isset($reg_value[3])){ + $content = call_user_func($rule['handle_callback'], $content, $ruleName); + } + + return $content; + } + + protected function parseRule($rule) + { + $result = []; + $result['selector'] = $rule[0]; + $result['attr'] = $rule[1]; + $result['filter_tags'] = $rule[2] ?? ''; + $result['handle_callback'] = $rule[3] ?? null; + + return $result; + } + /** * 去除特定的html标签 * @param string $html