diff --git a/QueryList.class.php b/QueryList.class.php
index 16c3e6b..3666f2d 100644
--- a/QueryList.class.php
+++ b/QueryList.class.php
@@ -7,7 +7,7 @@
* @author Jaeger
* @email 734708094@qq.com
* @link http://git.oschina.net/jae/QueryList
- * @version 2.2.0
+ * @version 2.2.1
*
* @example
*
@@ -55,6 +55,8 @@ class QueryList
private $outputEncoding;
private $htmlEncoding;
private static $ql;
+ private function __construct() {
+ }
/**
* 静态方法,访问入口
* @param string $page 要抓取的网页URL地址(支持https);或者是html源代码
@@ -119,7 +121,7 @@ class QueryList
}
//获取编码格式
$this->htmlEncoding = $this->_getEncode($this->html);
- $this->html = $this->_removeTags($this->html,array('script','style'));
+ // $this->html = $this->_removeTags($this->html,array('script','style'));
if (!empty($regArr)) {
$this->regArr = $regArr;
$this->regRange = $regRange;
diff --git a/demo/demo.php b/demo/demo.php
index 63eb10b..dc4c7c6 100644
--- a/demo/demo.php
+++ b/demo/demo.php
@@ -1,17 +1,20 @@
array(".code_title a:eq(0)","text"),"url"=>array(".code_title a:eq(0)","href"),"author"=>array("img","title"));
$rang = ".code_list li";
-//使用curl抓取源码并以GB2312编码格式输出
-$hj = QueryList::Query($url,$reg,$rang,'curl','GB2312');
+//使用curl抓取源码并以GBK编码格式输出
+$hj = QueryList::Query($url,$reg,$rang,'curl','GBK');
$arr = $hj->jsonArr;
echo "
";
print_r($arr);
echo "
";
+echo '上面的是GBK格式输出的,而页面是UTF-8格式的,所以会看到输出是乱码!';
+echo '
';
+
//如果还想采当前页面右边的 TOP40活跃贡献者 图像,得到JSON数据,可以这样写
$reg = array("portrait"=>array(".hot_top img","src"));
$hj->setQuery($reg);
@@ -25,4 +28,41 @@ $hj = QueryList::Query($url,$reg);
$arr = $hj->jsonArr;
echo "";
print_r($arr);
-echo "
";
\ No newline at end of file
+echo "
";
+
+//抓取网站基本信息
+//设置规则
+$reg = array(
+ //抓取网站keywords
+ "kw" => array("meta[name=keywords]","content"),
+ //抓取网站描述
+ "desc" => array("meta[name=description]","content"),
+ //抓取网站标题
+ "title" => array("title","text"),
+ //抓取网站第一个css link的链接
+ "css1" => array("link:eq(0)","href"),
+ //抓取网站第二个js link的链接
+ "js2" => array("script[src]:eq(1)","src")
+ );
+//抓取的目标站
+$url = 'http://x.44i.cc/';
+//抓取
+$data = QueryList::Query($url,$reg)->jsonArr;
+print_r($data);
+
+//下面单独演示回调函数的用法
+//抓取网站keywords并分离每个关键词
+$reg = array(
+ //抓取网站keywords,并调用自定义函数fun
+ "kw" => array("meta[name=keywords]","content",'','fun')
+ );
+//自定义回调函数
+function fun($content,$key){
+ //分离关键词
+ return explode(',', $content);
+}
+//抓取的目标站
+$url = 'http://x.44i.cc/';
+//抓取
+$data = QueryList::Query($url,$reg)->jsonArr;
+print_r($data);
\ No newline at end of file
diff --git a/demo/一个完整的DEMO项目.rar b/demo/一个完整的DEMO项目.rar
deleted file mode 100644
index c50397c..0000000
Binary files a/demo/一个完整的DEMO项目.rar and /dev/null differ
diff --git a/demo/一个完整的DEMO项目.zip b/demo/一个完整的DEMO项目.zip
new file mode 100644
index 0000000..74d997e
Binary files /dev/null and b/demo/一个完整的DEMO项目.zip differ