存档

文章标签 ‘tumblr’

tumblr.com视频采集器,可以定位到某位博客地址

2017年5月9日 没有评论

tumblr.com视频采集器,可以定位到某位博客地址,支持将视频地址采集生成到txt文档中。

每10页采集到一个文档中。

<?php
header(‘content-type:text/html; charset=utf-8’);
set_time_limit(0);
function curl_file_get_contents($remoteUrl){
$ch=curl_init();curl_setopt($ch,CURLOPT_URL,$remoteUrl);curl_setopt($ch,CURLOPT_TIMEOUT,120);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);$result=curl_exec($ch);curl_close($ch);return $result;
}
$arr=array();
session_start();

//初始化
//$_SESSION[‘urls’]=array();
//$_SESSION[‘urls_time’]=1;
//die(‘xxxx’);

if(empty($_SESSION[‘urls’])){
$_SESSION[‘urls’]=array();
$_SESSION[‘urls_time’]=0;
}

$page=intval($_GET[‘page’]);
if($page>=217){die(‘over’);}//总页数

$content=curl_file_get_contents(‘http://xxx.tumblr.com/page/’.$page);
$iframes=array();
preg_match_all(“/<iframe src=\’https\:\/\/www\.tumblr\.com\/video\/xxx\/([^\”]*)\/700\/\’ style=/is”,$content,$iframes);
foreach($iframes[1] as $v){
$content_one=file_get_contents(‘https://www.tumblr.com/video/xxx/’.$v.’/700/’);
$one_url=array();
preg_match_all(“/<source src=\”([^\”]*)\” type=\”video\/mp4\”><\/video>/is”,$content_one,$one_url);
$_SESSION[‘urls’][]=$one_url[1][0];
}
$urls_time=$_SESSION[‘urls_time’];
if($urls_time>8){
$stream = fopen($page.”.txt”, “w+”);
$write_arr=$_SESSION[‘urls’];
foreach($write_arr as $v){
fwrite($stream, $v.”\r\n”);
}
fclose($stream);
$_SESSION[‘urls’]=array();
$_SESSION[‘urls_time’]=0;
echo ’10 times ,sleep 15s<br />’;
sleep(20);
}
else {
$_SESSION[‘urls_time’]=$_SESSION[‘urls_time’]+1;
echo ‘page ‘.$page.’,sleep 5s’;
sleep(5);
}
echo “<script type=’text/ecmascript’>window.location.href=’caiji.php?page=”.($page+1).”‘</script>”;
?>

分类: 张宏良张洪量 标签: ,
css.php