set_time_limit(0);// get测试淘宝详情页抓取$url = http://item.taobao.com/item.htm?id=37530539791;$url = http://item.taobao.com/item.htm?id=14861616067;//curl 伪造useragent$useragent = array( 'mozilla/4.0 (compatible; msie 8.0; windows nt 6.0)', 'mozilla/4.0 (compatible; msie 7.0; windows nt 5.2)', 'mozilla/4.0 (compatible; msie 6.0; windows nt 5.1)', 'mozilla/5.0 (windows; u; windows nt 5.2) gecko/2008070208 firefox/3.0.1', 'opera/9.27 (windows nt 5.2; u; zh-cn)', 'opera/8.0 (macintosh; ppc mac os x; u; en)', 'mozilla/5.0 (windows; u; windows nt 5.2) applewebkit/525.13 (khtml, like gecko) chrome/0.2.149.27 safari/525.13 ', 'mozilla/5.0 (windows; u; windows nt 5.2) applewebkit/525.13 (khtml, like gecko) version/3.1 safari/525.13');header(content-type: text/html; charset=utf-8);$cookiefile = realpath(./)./application/runtime/temp/cookie.txt;//创建一个用于存放cookie信息的临时文件,if (!file_exists($cookiefile)){ $file = @file_put_contents($cookiefile, );}$ch = curl_init();//设置选项,包括urlcurl_setopt($ch, curlopt_url, $url);curl_setopt($ch, curlopt_timeout, $timeout);curl_setopt($ch, curlopt_header, 0);curl_setopt($ch, curlopt_nobody,0);curl_setopt($ch, curlopt_maxredirs, 300);curl_setopt($ch, curlopt_returntransfer, true); //获取数据返回流形式curl_setopt($ch, curlopt_autoreferer, true); //重定向时,自动设置header中的referer:信息curl_setopt($ch, curlopt_followlocation, true); //启用时会将服务器服务器返回的location: 放在header中递归的返回给服务器,使用curlopt_maxredirs可以限定递归返回的数量// 设置ip和useragent/*curl_setopt($ch, curlopt_useragent, 'mozilla/4.0 (compatible; msie 8.0; windows nt 6.1; trident/4.0)');curl_setopt($ch, curlopt_httpheader, array('x-forwarded-for:28.58.88.'.$r, 'client-ip:225.28.58.'.$r)); //构造ip curl_setopt($ch, curlopt_referer, http://www.baidu.com); //构造来路 curl_setopt($ch, curlopt_useragent, array_rand($useragent));*/// 设置代理/*curl_setopt($ch, curlopt_httpproxytunnel, 1);curl_setopt($ch, curlopt_proxy, '218.213.168.131:80');*///curl_setopt($ch, curlopt_proxyuserpwd, 'user:password');// 对于cookie保存curl_setopt($ch, curlopt_cookiesession, true);curl_setopt($ch, curlopt_cookiefile, $cookiefile);//关闭连接时,将服务器端返回的cookie保存在以下文件中curl_setopt($ch, curlopt_cookiejar, $cookiefile);//执行并获取html文档内容for ($i=0;$i http://item.taobao.com/item.htm?id=14861616067
[content_type] => text/html
[http_code] => 200
[header_size] => 197
[request_size] => 156
[filetime] => -1
[ssl_verify_result] => 0
[redirect_count] => 0
[total_time] => 0.562
[namelookup_time] => 0
[connect_time] => 0
[pretransfer_time] => 0.015
[size_upload] => 0
[size_download] => 20
[speed_download] => 35
[speed_upload] => 0
[download_content_length] => -1
[upload_content_length] => 0
[starttransfer_time] => 0.562
[redirect_time] => 0
[redirect_url] =>
[primary_ip] => 58.63.255.240
[certinfo] => array
(
)
[primary_port] => 80
[local_ip] => 192.168.1.102
[local_port] => 65328
)
但是die返回结果却一直为空,用第一个url的时候是可以的。(cookie,伪造ip,设置浏览器信息都试过了,第二个url还是不行)。
求个大神指教下,还有什么设置没对吗?
回复讨论(解决方案) $url = http://item.taobao.com/item.htm?id=14861616067;echo curl_get($url);
得到的数据中有
t浦力顿500g 成犬幼犬狗粮泰迪贵宾比熊金毛萨摩耶博美 批发散装-淘宝网
t浦力顿500g 成犬幼犬狗粮泰迪贵宾比熊金毛萨摩耶博美 批发散装
证明访问是没有问题的
function curl_get($durl, $data=array()) { $cookiejar = realpath('cookie.txt'); $t = parse_url($durl); $ch = curl_init(); curl_setopt($ch, curlopt_url,$durl); curl_setopt($ch, curlopt_timeout,5); curl_setopt($ch, curlopt_ssl_verifypeer, 0); curl_setopt($ch, curlopt_useragent, $_server['http_user_agent']); curl_setopt($ch, curlopt_referer, http://$t[host]/); curl_setopt($ch, curlopt_cookiefile, $cookiejar); curl_setopt($ch, curlopt_cookiejar, $cookiejar); curl_setopt($ch, curlopt_returntransfer,1); curl_setopt($ch, curlopt_followlocation, true); if($data) { curl_setopt($ch, curlopt_post, 1); curl_setopt($ch, curlopt_postfields, $data); } $r = curl_exec($ch); curl_close($ch); return $r;}
谢谢,是可以用的,我的不可以是因为 来源模拟 错了吗?
你可以在我的代码中逐个注释掉来判断少了什么
估计是 少了 curlopt_referer
好的,谢谢。