您好,欢迎来到三六零分类信息网!老站,搜索引擎当天收录,欢迎发信息

网页快照功能_PHP教程

2025/9/20 13:47:18发布17次查看
log = new snap() object instantiated.
n;  
        $this->dir = dirname(__file__)./;
    }
    function fetch($url=,$ttl=10){
        $this->log .= --------------------------------
fetch() called
n;
        $this->log .= url: .$url.
n;
        $hosts = parse_url($url);
        $this->host = $hosts['scheme'].'://'.$hosts['host'].'/';
        if (!$url) {
            $this->log .= oops: you need to pass a url!
;
            return false;
        }
        $this->ttl = $ttl;
        $this->url = $url;
        $this->name = md5($this->url);
        $this->filename = $this->dir.$this->name;
        $this->log .= filename: .$this->filename.
;
        $this->getfile_ts();
        $this->file_get_content();
    }
    function file_get_content(){
        ob_start();
        $this->ts = time() - $this->data_ts;
        if($this->data_ts 0 && $this->ts ttl){
            $this->log .= cache has expired
;
            @readfile($this->filename);  
            $this->contents = ob_get_contents();
            ob_end_clean();
        }else{
            $this->log .= cache hasn't expired
;       
            @readfile($this->url);  
            $this->contents = ob_get_contents();
            ob_end_clean();
            $this->savetocache();
        }
        return true;
    }
    function savetocache(){
        $this->log .= savetocache() called
;
        //create file pointer
        if (!$fp=@fopen($this->filename,w)) {
            $this->log .= could not open .$this->filename.
;
            return false;
        }
        $this->contents = $this->formaturl($this->contents,$this->host);
        $this->contents = preg_replace(''si,,$this->contents);
        //write to file
        if (!@fwrite($fp,$this->contents)) {
            $this->log .= could not write to .$this->filename.
;
            fclose($fp);
            return false;
        }
        //close file pointer
        fclose($fp);
        return true;
    }
    function getfile_ts(){
        $this->log .= getfile_ts() called
;
        if (!file_exists($this->filename)) {
            $this->data_ts = 0;
            $this->log .= $this->filename. does not exist
;
            return false;
        }
        $this->data_ts = filemtime($this->filename);
        return true;
    }
    function formaturl($l1,$l2){
    if (preg_match_all(/(]+src=([^]+)[^>]*>)|(]+href=([^]+)[^>]*>)|(]+href=([^]+)[^>]*>)|(]+src='([^']+)'[^>]*>)|(]+href='([^']+)'[^>]*>)/i,$l1,$regs)){
      foreach($regs[0] as $num => $url){
       $l1 = str_replace($url,$this->liiiil($url,$l2),$l1);
      }
    }
    return     $l1;
    }
    function liiiil($l1,$l2){
    if(preg_match(/(.*)(href|src)=(.+?)( |/>|>).*/i,$l1,$regs)){$i2 = $regs[3];}
    if(strlen($i2)>0){
      $i1 = str_replace(chr(34),,$i2);
      $i1 = str_replace(chr(39),,$i1);
    }else{return $l1;}
    $url_parsed = parse_url($l2);
    $scheme      = $url_parsed[scheme];if($scheme!=){$scheme = $scheme.://;}
    $host      = $url_parsed[host];  
    $l3       = $scheme.$host;
    if(strlen($l3)==0){return $l1;}
    $path      = dirname($url_parsed[path]);if($path[0]==){$path=;}
    $pos      = strpos($i1,#);
    if($pos>0) $i1 = substr($i1,0,$pos);
    //判断类型
    if(preg_match(/^(http|https|ftp):(//|\)(([w/+-~`@:%])+.)+([w/.=?+-~`@':!%#]|(&)|&)+/i,$i1)){return $l1; }//http开头的url类型要跳过
    elseif($i1[0]==/){$i1 = $l3.$i1;}//绝对路径
    elseif(substr($i1,0,3)==../){//相对路径
          while(substr($i1,0,3)==../){
       $i1 = substr($i1,strlen($i1)-(strlen($i1)-3),strlen($i1)-3);
       if(strlen($path)>0){
        $path = dirname($path);
       }
      }
      $i1 = $l3.$path./.$i1;
    }
    elseif(substr($i1,0,2)==./){
      $i1 = $l3.$path.substr($i1,strlen($i1)-(strlen($i1)-1),strlen($i1)-1);
    }
    elseif(strtolower(substr($i1,0,7))==mailto:||strtolower(substr($i1,0,11))==java script:){
      return $l1;
    }else{
      $i1 = $l3.$path./.$i1;
    }
    return str_replace($i2,$i1,$l1);
    }
}
?>
用法test.php:
 fetch($_get['url']);
//echo $h->log;
echo $h->contents;
?>
http://www.bkjia.com/phpjc/445124.htmlwww.bkjia.comtruehttp://www.bkjia.com/phpjc/445124.htmltecharticle?php //==================================================== // filename: snap.class.php // summary: 网页快照类 // author: millken(迷路林肯) // lastmodifed:2007-06-29 // co...
该用户其它信息

VIP推荐

免费发布信息,免费发布B2B信息网站平台 - 三六零分类信息网 沪ICP备09012988号-2
企业名录 Product