add Query

This commit is contained in:
Jaeger 2017-09-22 01:51:46 +08:00
parent ad9b493fc0
commit 2013e4d2b0
5 changed files with 395 additions and 185 deletions

View File

@ -9,8 +9,140 @@ namespace QL\Dom;
use phpQueryObject;
/**
* Class Elements
* @package QL\Dom
*
* @method Elements toReference($var)
* @method documentFragment($state)
* @method Elements toRoot()
* @method Elements getDocumentIDRef($documentID)
* @method Elements getDocument()
* @method getDOMDocument()
* @method Elements getDocumentID()
* @method Elements unloadDocument()
* @method isHTML()
* @method isXHTML()
* @method isXML()
* @method serialize()
* @method serializeArray($submit)
* @method get($index,$callback1,$callback2,$callback3)
* @method getString($index,$callback1,$callback2,$callback3)
* @method getStrings($index,$callback1,$callback2,$callback3)
* @method newInstance($newStack)
* @method Elements find($selectors,$context,$noHistory)
* @method Elements is($selector,$nodes)
* @method Elements filterCallback($callback,$_skipHistory)
* @method Elements filter($selectors,$_skipHistory)
* @method load($url,$data,$callback)
* @method css()
* @method show()
* @method hide()
* @method Elements trigger($type,$data)
* @method Elements triggerHandler($type,$data)
* @method Elements bind($type,$data,$callback)
* @method unbind($type,$callback)
* @method Elements change($callback)
* @method Elements submit($callback)
* @method Elements click($callback)
* @method Elements wrapAllOld($wrapper)
* @method Elements wrapAll($wrapper)
* @method Elements wrapAllPHP($codeBefore,$codeAfter)
* @method Elements wrap($wrapper)
* @method Elements wrapPHP($codeBefore,$codeAfter)
* @method Elements wrapInner($wrapper)
* @method Elements wrapInnerPHP($codeBefore,$codeAfter)
* @method Elements contents()
* @method Elements contentsUnwrap()
* @method switchWith($markup)
* @method Elements eq($num)
* @method Elements size()
* @method Elements length()
* @method count()
* @method Elements end($level)
* @method Elements _clone()
* @method Elements replaceWithPHP($code)
* @method Elements replaceWith($content)
* @method Elements replaceAll($selector)
* @method Elements remove($selector)
* @method markup($markup,$callback1,$callback2,$callback3)
* @method markupOuter($callback1,$callback2,$callback3)
* @method html($html,$callback1,$callback2,$callback3)
* @method xml($xml,$callback1,$callback2,$callback3)
* @method htmlOuter($callback1,$callback2,$callback3)
* @method xmlOuter($callback1,$callback2,$callback3)
* @method Elements php($code)
* @method markupPHP($code)
* @method markupOuterPHP()
* @method Elements children($selector)
* @method Elements ancestors($selector)
* @method Elements append($content)
* @method Elements appendPHP($content)
* @method Elements appendTo($seletor)
* @method Elements prepend($content)
* @method Elements prependPHP($content)
* @method Elements prependTo($seletor)
* @method Elements before($content)
* @method Elements beforePHP($content)
* @method Elements insertBefore($seletor)
* @method Elements after($content)
* @method Elements afterPHP($content)
* @method Elements insertAfter($seletor)
* @method Elements insert($target,$type)
* @method index($subject)
* @method Elements slice($start,$end)
* @method Elements reverse()
* @method text($text,$callback1,$callback2,$callback3)
* @method Elements plugin($class,$file)
* @method extend($class,$file)
* @method Elements _next($selector)
* @method Elements _prev($selector)
* @method Elements prev($selector)
* @method Elements prevAll($selector)
* @method Elements nextAll($selector)
* @method Elements siblings($selector)
* @method Elements not($selector)
* @method Elements add($selector)
* @method Elements parent($selector)
* @method Elements parents($selector)
* @method stack($nodeTypes)
* @method attr($attr,$value)
* @method Elements attrPHP($attr,$code)
* @method Elements removeAttr($attr)
* @method val($val)
* @method Elements andSelf()
* @method Elements addClass($className)
* @method Elements addClassPHP($className)
* @method hasClass($className)
* @method Elements removeClass($className)
* @method Elements toggleClass($className)
* @method Elements _empty()
* @method Elements each($callback,$param1,$param2,$param3)
* @method Elements callback($callback,$param1,$param2,$param3)
* @method data($key,$value)
* @method removeData($key)
* @method rewind()
* @method current()
* @method key()
* @method Elements next($cssSelector)
* @method valid()
* @method offsetExists($offset)
* @method offsetGet($offset)
* @method offsetSet($offset,$value)
* @method offsetUnset($offset)
* @method whois($oneNode)
* @method Elements dump()
* @method dumpWhois()
* @method dumpLength()
* @method dumpTree($html,$title)
* @method dumpDie()
*/
class Elements
{
/**
* @var phpQueryObject
*/
protected $elements;
/**

206
src/Dom/Query.php Normal file
View File

@ -0,0 +1,206 @@
<?php
/**
* Created by PhpStorm.
* User: Jaeger <JaegerCode@gmail.com>
* Date: 2017/9/21
*/
namespace QL\Dom;
use phpQuery;
use QL\QueryList;
class Query
{
protected $html;
protected $document;
protected $rules;
protected $range = null;
protected $ql;
public function __construct(QueryList $ql)
{
$this->ql = $ql;
}
/**
* @return mixed
*/
public function getHtml()
{
return $this->html;
}
public function setHtml($html)
{
$this->html = $html;
$this->document = phpQuery::newDocumentHTML($this->html);
return $this->ql;
}
public function find($selector)
{
return (new Dom($this->document))->find($selector);
}
public function rules(array $rules)
{
$this->rules = $rules;
return $this->ql;
}
public function range($range)
{
$this->range = $range;
return $this->ql;
}
public function removeHead()
{
$html = preg_replace('/<head.+?>.+<\/head>/is','<head></head>',$this->html);
$this->setHtml($html);
return $this->ql;
}
public function query($callback = null)
{
$data = $this->getList();
return is_null($callback)?$data:$data->map($callback);
}
protected function getList()
{
$data = [];
$document = $this->document;
if (!empty($this->range)) {
$robj = pq($document)->find($this->range);
$i = 0;
foreach ($robj as $item) {
foreach ($this->rules as $key => $reg_value){
$tags = $reg_value[2] ?? '';
$iobj = pq($item)->find($reg_value[0]);
switch ($reg_value[1]) {
case 'text':
$data[$i][$key] = $this->allowTags(pq($iobj)->html(),$tags);
break;
case 'html':
$data[$i][$key] = $this->stripTags(pq($iobj)->html(),$tags);
break;
default:
$data[$i][$key] = pq($iobj)->attr($reg_value[1]);
break;
}
if(isset($reg_value[3])){
$data[$i][$key] = call_user_func($reg_value[3],$data[$i][$key],$key);
}
}
$i++;
}
} else {
foreach ($this->rules as $key => $reg_value){
$tags = $reg_value[2] ?? '';
$lobj = pq($document)->find($reg_value[0]);
$i = 0;
foreach ($lobj as $item) {
switch ($reg_value[1]) {
case 'text':
$data[$i][$key] = $this->allowTags(pq($item)->html(),$tags);
break;
case 'html':
$data[$i][$key] = $this->stripTags(pq($item)->html(),$tags);
break;
default:
$data[$i][$key] = pq($item)->attr($reg_value[1]);
break;
}
if(isset($reg_value[3])){
$data[$i][$key] = call_user_func($reg_value[3],$data[$i][$key],$key);
}
$i++;
}
}
}
// phpQuery::$documents = array();
return collect($data);
}
/**
* 去除特定的html标签
* @param string $html
* @param string $tags_str 多个标签名之间用空格隔开
* @return string
*/
protected function stripTags($html,$tags_str)
{
$tagsArr = $this->tag($tags_str);
$html = $this->removeTags($html,$tagsArr[1]);
$p = array();
foreach ($tagsArr[0] as $tag) {
$p[]="/(<(?:\/".$tag."|".$tag.")[^>]*>)/i";
}
$html = preg_replace($p,"",trim($html));
return $html;
}
/**
* 保留特定的html标签
* @param string $html
* @param string $tags_str 多个标签名之间用空格隔开
* @return string
*/
protected function allowTags($html,$tags_str)
{
$tagsArr = $this->tag($tags_str);
$html = $this->removeTags($html,$tagsArr[1]);
$allow = '';
foreach ($tagsArr[0] as $tag) {
$allow .= "<$tag> ";
}
return strip_tags(trim($html),$allow);
}
protected function tag($tags_str)
{
$tagArr = preg_split("/\s+/",$tags_str,-1,PREG_SPLIT_NO_EMPTY);
$tags = array(array(),array());
foreach($tagArr as $tag)
{
if(preg_match('/-(.+)/', $tag,$arr))
{
array_push($tags[1], $arr[1]);
}else{
array_push($tags[0], $tag);
}
}
return $tags;
}
/**
* 移除特定的html标签
* @param string $html
* @param array $tags 标签数组
* @return string
*/
protected function removeTags($html,$tags)
{
$tag_str = '';
if(count($tags))
{
foreach ($tags as $tag) {
$tag_str .= $tag_str?','.$tag:$tag;
}
phpQuery::$defaultCharset = $this->inputEncoding?$this->inputEncoding:$this->htmlEncoding;
$doc = phpQuery::newDocumentHTML($html);
pq($doc)->find($tag_str)->remove();
$html = pq($doc)->htmlOuter();
$doc->unloadDocument();
}
return $html;
}
}

View File

@ -49,7 +49,7 @@ class Kernel
$this->binds[$name] = $provider;
}
public function getBind(string $name)
public function getService(string $name)
{
if(!$this->binds->offsetExists($name)){
throw new ServiceNotFoundException("Service: {$name} not found!");

View File

@ -1,8 +1,4 @@
<?php
namespace QL;
use phpQuery;
use QL\Dom\Dom;
/**
* QueryList
*
@ -15,12 +11,27 @@ use QL\Dom\Dom;
*
*/
namespace QL;
use phpQuery;
use QL\Dom\Query;
/**
* Class QueryList
* @package QL
*
* @method QueryList getHtml()
* @method QueryList setHtml($html)
* @method Dom\Elements find($selector)
* @method QueryList rules(array $rules)
* @method QueryList range($range)
* @method QueryList removeHead()
* @method \Illuminate\Support\Collection query($callback = null)
* @method QueryList encoding(string $outputEncoding,string $inputEncoding = null)
*/
class QueryList
{
protected $html;
protected $document;
protected $rules;
protected $range = null;
protected $query;
protected $kernel;
/**
@ -28,195 +39,41 @@ class QueryList
*/
public function __construct()
{
$this->query = new Query($this);
$this->kernel = (new Kernel($this))->bootstrap();
}
public function __call($name, $arguments)
{
return $this->kernel->getBind($name)->call($this,...$arguments);
}
/**
* @return mixed
*/
public function getHtml()
{
return $this->html;
}
/**
* @param $html
* @return $this
*/
public function setHtml($html)
{
$this->html = $html;
$this->document = phpQuery::newDocumentHTML($this->html);
return $this;
}
public function find($selector)
{
return (new Dom($this->document))->find($selector);
}
public function rules(array $rules)
{
$this->rules = $rules;
return $this;
}
public function range($range)
{
$this->range = $range;
return $this;
}
public function removeHead()
{
$html = preg_replace('/<head.+?>.+<\/head>/is','<head></head>',$this->html);
$this->setHtml($html);
return $this;
}
public function query($callback = null)
{
$data = $this->getList();
return is_null($callback)?$data:$data->map($callback);
}
protected function getList()
{
$data = [];
$document = $this->document;
if (!empty($this->range)) {
$robj = pq($document)->find($this->range);
$i = 0;
foreach ($robj as $item) {
foreach ($this->rules as $key => $reg_value){
$tags = $reg_value[2] ?? '';
$iobj = pq($item)->find($reg_value[0]);
switch ($reg_value[1]) {
case 'text':
$data[$i][$key] = $this->allowTags(pq($iobj)->html(),$tags);
break;
case 'html':
$data[$i][$key] = $this->stripTags(pq($iobj)->html(),$tags);
break;
default:
$data[$i][$key] = pq($iobj)->attr($reg_value[1]);
break;
}
if(isset($reg_value[3])){
$data[$i][$key] = call_user_func($reg_value[3],$data[$i][$key],$key);
}
}
$i++;
}
} else {
foreach ($this->rules as $key => $reg_value){
$tags = $reg_value[2] ?? '';
$lobj = pq($document)->find($reg_value[0]);
$i = 0;
foreach ($lobj as $item) {
switch ($reg_value[1]) {
case 'text':
$data[$i][$key] = $this->allowTags(pq($item)->html(),$tags);
break;
case 'html':
$data[$i][$key] = $this->stripTags(pq($item)->html(),$tags);
break;
default:
$data[$i][$key] = pq($item)->attr($reg_value[1]);
break;
}
if(isset($reg_value[3])){
$data[$i][$key] = call_user_func($reg_value[3],$data[$i][$key],$key);
}
$i++;
}
}
}
phpQuery::$documents = array();
return collect($data);
}
/**
* 去除特定的html标签
* @param string $html
* @param string $tags_str 多个标签名之间用空格隔开
* @return string
*/
protected function stripTags($html,$tags_str)
{
$tagsArr = $this->tag($tags_str);
$html = $this->removeTags($html,$tagsArr[1]);
$p = array();
foreach ($tagsArr[0] as $tag) {
$p[]="/(<(?:\/".$tag."|".$tag.")[^>]*>)/i";
}
$html = preg_replace($p,"",trim($html));
return $html;
}
/**
* 保留特定的html标签
* @param string $html
* @param string $tags_str 多个标签名之间用空格隔开
* @return string
*/
protected function allowTags($html,$tags_str)
{
$tagsArr = $this->tag($tags_str);
$html = $this->removeTags($html,$tagsArr[1]);
$allow = '';
foreach ($tagsArr[0] as $tag) {
$allow .= "<$tag> ";
}
return strip_tags(trim($html),$allow);
}
protected function tag($tags_str)
{
$tagArr = preg_split("/\s+/",$tags_str,-1,PREG_SPLIT_NO_EMPTY);
$tags = array(array(),array());
foreach($tagArr as $tag)
{
if(preg_match('/-(.+)/', $tag,$arr))
{
array_push($tags[1], $arr[1]);
if(method_exists($this->query,$name)){
$result = $this->query->$name(...$arguments);
}else{
array_push($tags[0], $tag);
$result = $this->kernel->getService($name)->call($this,...$arguments);
}
}
return $tags;
return $result;
}
/**
* 移除特定的html标签
* @param string $html
* @param array $tags 标签数组
* @return string
*/
protected function removeTags($html,$tags)
public static function __callStatic($name, $arguments)
{
$tag_str = '';
if(count($tags))
$instance = self::getInstance();
return $instance->$name(...$arguments);
}
public function __destruct()
{
foreach ($tags as $tag) {
$tag_str .= $tag_str?','.$tag:$tag;
$this->destruct();
}
phpQuery::$defaultCharset = $this->inputEncoding?$this->inputEncoding:$this->htmlEncoding;
$doc = phpQuery::newDocumentHTML($html);
pq($doc)->find($tag_str)->remove();
$html = pq($doc)->htmlOuter();
$doc->unloadDocument();
public static function getInstance()
{
$instance = new self();
return $instance;
}
return $html;
public function destruct()
{
phpQuery::$documents = [];
}
}

View File

@ -3,20 +3,35 @@
* Created by PhpStorm.
* User: Jaeger <JaegerCode@gmail.com>
* Date: 2017/9/20
* 编码转换服务
*/
namespace QL\Services;
use QL\QueryList;
class EncodeService
{
public static function convert($ql,string $outputEncoding,string $inputEncoding = null)
public static function convert(QueryList $ql,string $outputEncoding,string $inputEncoding = null)
{
dump($outputEncoding,$inputEncoding);
$html = $ql->getHtml();
$inputEncoding || $inputEncoding = self::detect($html);
$html = iconv($inputEncoding,$outputEncoding.'//IGNORE',$html);
$ql->setHtml($html);
return $ql;
}
public static function detect()
/**
* Attempts to detect the encoding
* @param $string
* @return bool|false|mixed|string
*/
public static function detect($string)
{
$charset=mb_detect_encoding($string, array('ASCII', 'GB2312', 'GBK', 'UTF-8'),true);
if(strtolower($charset)=='cp936')
$charset='GBK';
return $charset;
}
}