diff --git a/src/Dom/Elements.php b/src/Dom/Elements.php index 4df312f..d093886 100644 --- a/src/Dom/Elements.php +++ b/src/Dom/Elements.php @@ -30,7 +30,12 @@ class Elements public function __call($name, $arguments) { $obj = call_user_func_array([$this->elements,$name],$arguments); - return $obj instanceof phpQueryObject?(new self($obj)):$obj; + if($obj instanceof phpQueryObject){ + $obj = new self($obj); + }else if(is_string($obj)){ + $obj = trim($obj); + } + return $obj; } public function map($callback) @@ -49,5 +54,17 @@ class Elements }); } + public function texts() + { + return $this->map(function($item){ + return trim($item->text()); + }); + } + public function htmls() + { + return $this->map(function($item){ + return trim($item->html()); + }); + } } \ No newline at end of file diff --git a/src/QueryList.php b/src/QueryList.php index 632ca86..555bcd7 100644 --- a/src/QueryList.php +++ b/src/QueryList.php @@ -17,8 +17,11 @@ use QL\Dom\Dom; class QueryList { - private $html; - private $document; + protected $html; + protected $document; + protected $rules; + protected $range = null; + protected $isRemoveHead = false; /** * QueryList constructor. @@ -52,5 +55,164 @@ class QueryList return (new Dom($this->document))->find($selector); } + public function rules(array $rules) + { + $this->rules = $rules; + return $this; + } + + public function range($range) + { + $this->range = $range; + return $this; + } + + public function removeHead($isRemoveHead = true) + { + $this->isRemoveHead = $isRemoveHead; + return $this; + } + + public function query($callback = null) + { + $data = $this->_getList(); + return is_null($callback)?$data:$data->map($callback); + } + + protected function _getList() + { + $data = []; + $document = $this->document; + if (!empty($this->range)) { + $robj = pq($document)->find($this->range); + $i = 0; + foreach ($robj as $item) { + while (list($key, $reg_value) = each($this->rules)) { + $tags = isset($reg_value[2])?$reg_value[2]:''; + $iobj = pq($item)->find($reg_value[0]); + + switch ($reg_value[1]) { + case 'text': + $data[$i][$key] = $this->_allowTags(pq($iobj)->html(),$tags); + break; + case 'html': + $data[$i][$key] = $this->_stripTags(pq($iobj)->html(),$tags); + break; + default: + $data[$i][$key] = pq($iobj)->attr($reg_value[1]); + break; + } + + if(isset($reg_value[3])){ + $data[$i][$key] = call_user_func($reg_value[3],$data[$i][$key],$key); + } + } + //重置数组指针 + reset($this->rules); + $i++; + } + } else { + while (list($key, $reg_value) = each($this->rules)) { + $tags = isset($reg_value[2])?$reg_value[2]:''; + $lobj = pq($document)->find($reg_value[0]); + $i = 0; + foreach ($lobj as $item) { + switch ($reg_value[1]) { + case 'text': + $data[$i][$key] = $this->_allowTags(pq($item)->html(),$tags); + break; + case 'html': + $data[$i][$key] = $this->_stripTags(pq($item)->html(),$tags); + break; + default: + $data[$i][$key] = pq($item)->attr($reg_value[1]); + break; + } + + if(isset($reg_value[3])){ + $data[$i][$key] = call_user_func($reg_value[3],$data[$i][$key],$key); + } + + $i++; + } + } + } + phpQuery::$documents = array(); + return collect($data); + } + + /** + * 去除特定的html标签 + * @param string $html + * @param string $tags_str 多个标签名之间用空格隔开 + * @return string + */ + protected function _stripTags($html,$tags_str) + { + $tagsArr = $this->_tag($tags_str); + $html = $this->_removeTags($html,$tagsArr[1]); + $p = array(); + foreach ($tagsArr[0] as $tag) { + $p[]="/(<(?:\/".$tag."|".$tag.")[^>]*>)/i"; + } + $html = preg_replace($p,"",trim($html)); + return $html; + } + + /** + * 保留特定的html标签 + * @param string $html + * @param string $tags_str 多个标签名之间用空格隔开 + * @return string + */ + protected function _allowTags($html,$tags_str) + { + $tagsArr = $this->_tag($tags_str); + $html = $this->_removeTags($html,$tagsArr[1]); + $allow = ''; + foreach ($tagsArr[0] as $tag) { + $allow .= "<$tag> "; + } + return strip_tags(trim($html),$allow); + } + + protected function _tag($tags_str) + { + $tagArr = preg_split("/\s+/",$tags_str,-1,PREG_SPLIT_NO_EMPTY); + $tags = array(array(),array()); + foreach($tagArr as $tag) + { + if(preg_match('/-(.+)/', $tag,$arr)) + { + array_push($tags[1], $arr[1]); + }else{ + array_push($tags[0], $tag); + } + } + return $tags; + } + + /** + * 移除特定的html标签 + * @param string $html + * @param array $tags 标签数组 + * @return string + */ + protected function _removeTags($html,$tags) + { + $tag_str = ''; + if(count($tags)) + { + foreach ($tags as $tag) { + $tag_str .= $tag_str?','.$tag:$tag; + } + phpQuery::$defaultCharset = $this->inputEncoding?$this->inputEncoding:$this->htmlEncoding; + $doc = phpQuery::newDocumentHTML($html); + pq($doc)->find($tag_str)->remove(); + $html = pq($doc)->htmlOuter(); + $doc->unloadDocument(); + } + return $html; + } } \ No newline at end of file