n;
$this->dir = dirname(__file__)./;
}
function fetch($url=,$ttl=10){
$this->log .= --------------------------------
fetch() called
n;
$this->log .= url: .$url.
n;
$hosts = parse_url($url);
$this->host = $hosts['scheme'].'://'.$hosts['host'].'/';
if (!$url) {
$this->log .= oops: you need to pass a url!
;
return false;
}
$this->ttl = $ttl;
$this->url = $url;
$this->name = md5($this->url);
$this->filename = $this->dir.$this->name;
$this->log .= filename: .$this->filename.
;
$this->getfile_ts();
$this->file_get_content();
}
function file_get_content(){
ob_start();
$this->ts = time() - $this->data_ts;
if($this->data_ts 0 && $this->ts ttl){
$this->log .= cache has expired
;
@readfile($this->filename);
$this->contents = ob_get_contents();
ob_end_clean();
}else{
$this->log .= cache hasn't expired
;
@readfile($this->url);
$this->contents = ob_get_contents();
ob_end_clean();
$this->savetocache();
}
return true;
}
function savetocache(){
$this->log .= savetocache() called
;
//create file pointer
if (!$fp=@fopen($this->filename,w)) {
$this->log .= could not open .$this->filename.
;
return false;
}
$this->contents = $this->formaturl($this->contents,$this->host);
$this->contents = preg_replace(''si,,$this->contents);
//write to file
if (!@fwrite($fp,$this->contents)) {
$this->log .= could not write to .$this->filename.
;
fclose($fp);
return false;
}
//close file pointer
fclose($fp);
return true;
}
function getfile_ts(){
$this->log .= getfile_ts() called
;
if (!file_exists($this->filename)) {
$this->data_ts = 0;
$this->log .= $this->filename. does not exist
;
return false;
}
$this->data_ts = filemtime($this->filename);
return true;
}
function formaturl($l1,$l2){
if (preg_match_all(/(]+src=([^]+)[^>]*>)|(]+href=([^]+)[^>]*>)|(]+href=([^]+)[^>]*>)|(]+src='([^']+)'[^>]*>)|(]+href='([^']+)'[^>]*>)/i,$l1,$regs)){
foreach($regs[0] as $num => $url){
$l1 = str_replace($url,$this->liiiil($url,$l2),$l1);
}
}
return $l1;
}
function liiiil($l1,$l2){
if(preg_match(/(.*)(href|src)=(.+?)( |/>|>).*/i,$l1,$regs)){$i2 = $regs[3];}
if(strlen($i2)>0){
$i1 = str_replace(chr(34),,$i2);
$i1 = str_replace(chr(39),,$i1);
}else{return $l1;}
$url_parsed = parse_url($l2);
$scheme = $url_parsed[scheme];if($scheme!=){$scheme = $scheme.://;}
$host = $url_parsed[host];
$l3 = $scheme.$host;
if(strlen($l3)==0){return $l1;}
$path = dirname($url_parsed[path]);if($path[0]==){$path=;}
$pos = strpos($i1,#);
if($pos>0) $i1 = substr($i1,0,$pos);
//判断类型
if(preg_match(/^(http|https|ftp):(//|\)(([w/+-~`@:%])+.)+([w/.=?+-~`@':!%#]|(&)|&)+/i,$i1)){return $l1; }//http开头的url类型要跳过
elseif($i1[0]==/){$i1 = $l3.$i1;}//绝对路径
elseif(substr($i1,0,3)==../){//相对路径
while(substr($i1,0,3)==../){
$i1 = substr($i1,strlen($i1)-(strlen($i1)-3),strlen($i1)-3);
if(strlen($path)>0){
$path = dirname($path);
}
}
$i1 = $l3.$path./.$i1;
}
elseif(substr($i1,0,2)==./){
$i1 = $l3.$path.substr($i1,strlen($i1)-(strlen($i1)-1),strlen($i1)-1);
}
elseif(strtolower(substr($i1,0,7))==mailto:||strtolower(substr($i1,0,11))==java script:){
return $l1;
}else{
$i1 = $l3.$path./.$i1;
}
return str_replace($i2,$i1,$l1);
}
}
?>
用法test.php:
fetch($_get['url']);
//echo $h->log;
echo $h->contents;
?>
http://www.bkjia.com/phpjc/445124.htmlwww.bkjia.comtruehttp://www.bkjia.com/phpjc/445124.htmltecharticle?php //==================================================== // filename: snap.class.php // summary: 网页快照类 // author: millken(迷路林肯) // lastmodifed:2007-06-29 // co...
