复制代码
array(.code_title a:eq(0),text),url=>array(.code_title a:eq(0),href),author=>array(img,title));$rang = .code_list li;$hj = new querylist($url,$reg,$rang);$arr = $hj->jsonarr;print_r($arr);//如果还想采当前页面右边的 top40活跃贡献者 图像,得到json数据,可以这样写$reg = array(portrait=>array(.hot_top img,src));$hj->setquery($reg);$json = $hj->getjson();echo $json . ;//采osc内容页内容$url = http://www.oschina.net/code/snippet_186288_23816;$reg = array(title=>array(.qtitle h1,text),con=>array(.content,html));$hj = new querylist($url,$reg);$arr = $hj->jsonarr;print_r($arr);//就举这么多例子吧,是不是用来做采集很方便
复制代码
regarr = array(title=>array(h3.t a,#ting_singlesong_box a,text),tcon=>array(div.c-abstract,font:slice(0,2),div#weibo,table tr:eq(0),div.c-abstract-size p:eq(0),div.vd_sitcom_new_tinfo,text),url=>array(h3.t a,#ting_singlesong_box a,href)); $this->regrange = 'table.result,table.result-op'; $this->regznum=array(znum=>array(span.nums,text)); } else if($searcher=='google') { $this->regarr = array(title=>array(h3.r a,text),tcon=>array(span.st,text),url=>array(h3.r a,href)); $this->regrange = 'li.g'; $this->regznum=array(znum=>array(div#resultstats,text)); } $this->searcher = $searcher; $this->key = $key; $this->num = $num; $this->page = $page-1; $this->getlist(); } private function getlist() { $s = urlencode($this->key); $num = $this->num; $start = $this->num*$this->page; if($this->searcher=='baidu') { $url = http://www.baidu.com/s?pn=$start&rn=$num&wd=$s; $reg_znum='/[\d,]+/'; } else if($this->searcher=='google') { $url=https://www.google.com.hk/search?filter=0&lr=&newwindow=1&safe=images&hl=en&as_qdr=all&num=$num&start=$start&q=$s; $reg_znum='/([\d,]+) result(s)?/'; } $searcherobj = new querylist($url,$this->regarr,$this->regrange); for($i=0;$ijsonarr);$i++) { if($this->searcher=='baidu') { $searcherobj->jsonarr[$i]['url'] = $this->getbaidurealurl($searcherobj->jsonarr[$i]['url']); } else if($this->searcher=='google') { $searcherobj->jsonarr[$i]['url'] = $this->getgooglerealurl($searcherobj->jsonarr[$i]['url']); } } $this->jsonarr = $searcherobj->jsonarr ; //获取总共结果条数 $searcherobj->setquery($this->regznum); $znum = $searcherobj->jsonarr[0]['znum']; preg_match($reg_znum,$znum,$arr)?$znum=$arr[0]:$znum=0; $znum = (int)str_replace(',','',$znum); //计算总页数 $zpage = ceil($znum/$this->num); $this->jsonarr=array('num'=>$this->num,'page'=>((int)$this->page+1),'znum'=>$znum,'zpage'=>$zpage,s=>$this->key,'other'=>array('author'=>'jae','qq'=>'734708094','blog'=>'http://blog.jaekj.com'),'data'=>$this->jsonarr); } function getjson() { return json_encode($this->jsonarr); } private function getbaidurealurl($url) { //得到百度跳转的真正地址 $header = get_headers($url,1); if (strpos($header[0],'301') || strpos($header[0],'302')) { if(is_array($header['location'])) { //return $header['location'][count($header['location'])-1]; return $header['location'][0]; } else { return $header['location']; } } else { return $url; } } private function getgooglerealurl($url) { $reg_url = '/q=(.+)&/u'; return preg_match($reg_url,$url,$arr)?urldecode($arr[1]):$url; } }// $hj = new searcher('google','oschina',20,2); // print_r( $hj->jsonarr);//效果演示地址//http://blog.jaekj.com//jae/demo/searcher/searcher_class.php?searcher=baidu&s=jaekj&num=20&page=1
复制代码
