Commit 5f2a4562 authored by clone's avatar clone

1

parent ae5a5c50
......@@ -2,6 +2,13 @@
# https://curl.haxx.se/docs/http-cookies.html
# This file was generated by libcurl! Edit at your own risk.
www.haozu.com FALSE / FALSE 1560838217 lookHouse 464034%2C101612%2C1103785%2C714981%2C1346697%2C1285245%2C555537%2C1107172%2C992988%2C1173150%2C583706%2C1272465%2C150430%2C583678%2C1278875%2C1012250%2C319832%2C1053143%2C583709%2C204470%2C1319631%2C1011854%2C1141554%2C1381407%2C595860%2C900389%2C1265538%2C342069%2C1121322%2C514219%2C1159068
.www.haozu.com TRUE / FALSE 1575785417 1houseView %5B%7B%22viewId%22%3A%221173150%22%2C%22userId%22%3A0%2C%22circleId%22%3A%22178%22%2C%22streetId%22%3A%226081%22%2C%22districtId%22%3A%22191%22%2C%22cityId%22%3A%2213%22%2C%22type%22%3A2%2C%22category%22%3A1%2C%22viewTime%22%3A1560233406%7D%2C%7B%22viewId%22%3A%22992988%22%2C%22userId%22%3A0%2C%22circleId%22%3A%22216%22%2C%22streetId%22%3A%226071%22%2C%22districtId%22%3A%22195%22%2C%22cityId%22%3A%2213%22%2C%22type%22%3A2%2C%22category%22%3A1%2C%22viewTime%22%3A1560233407%7D%2C%7B%22viewId%22%3A%221107172%22%2C%22userId%22%3A0%2C%22circleId%22%3A%22226%22%2C%22streetId%22%3A%226039%22%2C%22districtId%22%3A%22200%22%2C%22cityId%22%3A%2213%22%2C%22type%22%3A2%2C%22category%22%3A1%2C%22viewTime%22%3A1560233408%7D%2C%7B%22viewId%22%3A%22555537%22%2C%22userId%22%3A0%2C%22circleId%22%3A%22216%22%2C%22streetId%22%3A%226071%22%2C%22districtId%22%3A%22195%22%2C%22cityId%22%3A%2213%22%2C%22type%22%3A2%2C%22category%22%3A1%2C%22viewTime%22%3A1560233409%7D%2C%7B%22viewId%22%3A%221285245%22%2C%22userId%22%3A0%2C%22circleId%22%3A%22216%22%2C%22streetId%22%3A%226071%22%2C%22districtId%22%3A%22195%22%2C%22cityId%22%3A%2213%22%2C%22type%22%3A2%2C%22category%22%3A1%2C%22viewTime%22%3A1560233410%7D%2C%7B%22viewId%22%3A%221346697%22%2C%22userId%22%3A0%2C%22circleId%22%3A%22338%22%2C%22streetId%22%3A%227251%22%2C%22districtId%22%3A%22197%22%2C%22cityId%22%3A%2213%22%2C%22type%22%3A2%2C%22category%22%3A1%2C%22viewTime%22%3A1560233411%7D%2C%7B%22viewId%22%3A%22714981%22%2C%22userId%22%3A0%2C%22circleId%22%3A%22216%22%2C%22streetId%22%3A%226071%22%2C%22districtId%22%3A%22195%22%2C%22cityId%22%3A%2213%22%2C%22type%22%3A2%2C%22category%22%3A1%2C%22viewTime%22%3A1560233412%7D%2C%7B%22viewId%22%3A%221103785%22%2C%22userId%22%3A0%2C%22circleId%22%3A%22216%22%2C%22streetId%22%3A%226071%22%2C%22districtId%22%3A%22195%22%2C%22cityId%22%3A%2213%22%2C%22type%22%3A2%2C%22category%22%3A1%2C%22viewTime%22%3A1560233413%7D%2C%7B%22viewId%22%3A%22101612%22%2C%22userId%22%3A0%2C%22circleId%22%3A%22338%22%2C%22streetId%22%3A%227251%22%2C%22districtId%22%3A%22197%22%2C%22cityId%22%3A%2213%22%2C%22type%22%3A2%2C%22category%22%3A1%2C%22viewTime%22%3A1560233414%7D%2C%7B%22viewId%22%3A%22464034%22%2C%22userId%22%3A0%2C%22circleId%22%3A%22225%22%2C%22streetId%22%3A%226027%22%2C%22districtId%22%3A%22200%22%2C%22cityId%22%3A%2213%22%2C%22type%22%3A2%2C%22category%22%3A1%2C%22viewTime%22%3A1560233415%7D%5D
.baidu.com TRUE / FALSE 0 H_PS_PSSID 1453_21126_29238_28519_29099_28835_29221_29131
www.baidu.com FALSE / FALSE 0 BD_HOME 0
www.baidu.com FALSE / FALSE 0 BDSVRTM 11
.baidu.com TRUE / FALSE 0 delPer 0
.baidu.com TRUE / FALSE 3707813702 PSTM 1560330052
.baidu.com TRUE / FALSE 3707813702 BIDUPSID BDD7053CE9E9ADAE5E8E7F7532C4A7E9
.baidu.com TRUE / FALSE 3707813702 BAIDUID BDD7053CE9E9ADAE5E8E7F7532C4A7E9:FG=1
.haozu.com TRUE / FALSE 1560406217 citydomain sh
.www.haozu.com TRUE / FALSE 1575785417 1houseView %5B%7B%22viewId%22%3A%221173150%22%2C%22userId%22%3A0%2C%22circleId%22%3A%22178%22%2C%22streetId%22%3A%226081%22%2C%22districtId%22%3A%22191%22%2C%22cityId%22%3A%2213%22%2C%22type%22%3A2%2C%22category%22%3A1%2C%22viewTime%22%3A1560233406%7D%2C%7B%22viewId%22%3A%22992988%22%2C%22userId%22%3A0%2C%22circleId%22%3A%22216%22%2C%22streetId%22%3A%226071%22%2C%22districtId%22%3A%22195%22%2C%22cityId%22%3A%2213%22%2C%22type%22%3A2%2C%22category%22%3A1%2C%22viewTime%22%3A1560233407%7D%2C%7B%22viewId%22%3A%221107172%22%2C%22userId%22%3A0%2C%22circleId%22%3A%22226%22%2C%22streetId%22%3A%226039%22%2C%22districtId%22%3A%22200%22%2C%22cityId%22%3A%2213%22%2C%22type%22%3A2%2C%22category%22%3A1%2C%22viewTime%22%3A1560233408%7D%2C%7B%22viewId%22%3A%22555537%22%2C%22userId%22%3A0%2C%22circleId%22%3A%22216%22%2C%22streetId%22%3A%226071%22%2C%22districtId%22%3A%22195%22%2C%22cityId%22%3A%2213%22%2C%22type%22%3A2%2C%22category%22%3A1%2C%22viewTime%22%3A1560233409%7D%2C%7B%22viewId%22%3A%221285245%22%2C%22userId%22%3A0%2C%22circleId%22%3A%22216%22%2C%22streetId%22%3A%226071%22%2C%22districtId%22%3A%22195%22%2C%22cityId%22%3A%2213%22%2C%22type%22%3A2%2C%22category%22%3A1%2C%22viewTime%22%3A1560233410%7D%2C%7B%22viewId%22%3A%221346697%22%2C%22userId%22%3A0%2C%22circleId%22%3A%22338%22%2C%22streetId%22%3A%227251%22%2C%22districtId%22%3A%22197%22%2C%22cityId%22%3A%2213%22%2C%22type%22%3A2%2C%22category%22%3A1%2C%22viewTime%22%3A1560233411%7D%2C%7B%22viewId%22%3A%22714981%22%2C%22userId%22%3A0%2C%22circleId%22%3A%22216%22%2C%22streetId%22%3A%226071%22%2C%22districtId%22%3A%22195%22%2C%22cityId%22%3A%2213%22%2C%22type%22%3A2%2C%22category%22%3A1%2C%22viewTime%22%3A1560233412%7D%2C%7B%22viewId%22%3A%221103785%22%2C%22userId%22%3A0%2C%22circleId%22%3A%22216%22%2C%22streetId%22%3A%226071%22%2C%22districtId%22%3A%22195%22%2C%22cityId%22%3A%2213%22%2C%22type%22%3A2%2C%22category%22%3A1%2C%22viewTime%22%3A1560233413%7D%2C%7B%22viewId%22%3A%22101612%22%2C%22userId%22%3A0%2C%22circleId%22%3A%22338%22%2C%22streetId%22%3A%227251%22%2C%22districtId%22%3A%22197%22%2C%22cityId%22%3A%2213%22%2C%22type%22%3A2%2C%22category%22%3A1%2C%22viewTime%22%3A1560233414%7D%2C%7B%22viewId%22%3A%22464034%22%2C%22userId%22%3A0%2C%22circleId%22%3A%22225%22%2C%22streetId%22%3A%226027%22%2C%22districtId%22%3A%22200%22%2C%22cityId%22%3A%2213%22%2C%22type%22%3A2%2C%22category%22%3A1%2C%22viewTime%22%3A1560233415%7D%5D
www.haozu.com FALSE / FALSE 1560838217 lookHouse 464034%2C101612%2C1103785%2C714981%2C1346697%2C1285245%2C555537%2C1107172%2C992988%2C1173150%2C583706%2C1272465%2C150430%2C583678%2C1278875%2C1012250%2C319832%2C1053143%2C583709%2C204470%2C1319631%2C1011854%2C1141554%2C1381407%2C595860%2C900389%2C1265538%2C342069%2C1121322%2C514219%2C1159068
......@@ -2,8 +2,11 @@
namespace app\search\controller;
use app\extra\RedisExt;
use app\model\OfficeGBuilding;
use app\search\extend\Basic;
use app\search\service\ReptileService;
use think\Cache;
use Think\Exception;
use think\Request;
......@@ -16,11 +19,15 @@ use think\Request;
class Reptile extends Basic
{
private $service_;
private $buildingModel;
private $redis_;
public function __construct(Request $request = null)
{
parent::__construct($request);
$this->service_ = new ReptileService();
$this->service_ = new ReptileService();
$this->buildingModel = new OfficeGBuilding();
$this->redis_ = RedisExt::getRedis();
}
public function index()
......@@ -28,55 +35,69 @@ class Reptile extends Basic
return view("reptile/index");
}
# 实时爬取米扑代理API接口 (curl)
function getHaoZu1111()
private function getUrl($i)
{
$proxy_url = "https://proxyapi.mimvp.com/api/fetchsecret.php?orderid=867304249961220216&http_type=3";
$result = $this->service_->getHaoZu($proxy_url);
//var_dump($result);
$proxy_list = explode("\n", $result);
foreach ($proxy_list as $proxy) {
echo " $proxy";
echo "-----";
}
$url = "https://www.haozu.com/sh/house-list/o$i/";
return $url;
}
public function getHaoZu()
{
$stop_url = "106.12.11.187:9899";
// $url = "https://www.haozu.com/sh/house-list/o2";
$url = "https://www.baidu.com";
$result = $this->service_->getHaoZu( $url);
dump($result);
echo "--------";
$result = $this->service_->getNewCurl( $url);
dump($result);
$i = 1;
while ($i < 5000) {
$this->getHaoZuItem($i);
$i++;
}
}
public function getHaoZu111()
public function getHaoZuItem($i)
{
set_time_limit(0); // 取消脚本运行时间的超时上限
$url = "https://www.haozu.com/sh/house-list/o2";
$result = $this->service_->getHaoZu($url);
//https://www.haozu.com/sh/house1281461/
$proxy_uri = $this->service_->getProxyUri();
if (!$proxy_uri) {//重试一次
$proxy_uri = $this->service_->getProxyUri();
}
$url = $this->getUrl($i);
// echo $url;echo "-------";
$result = $this->service_->curl_post($proxy_uri, $url);
preg_match_all("/www.haozu.com\\/\w+\\/house\d+/", $result, $matches);
$new_arr = array_unique($matches[0]);
foreach ($new_arr as $key) {
$params = [];
$body = $this->service_->getHaoZu($key);
$params["title"] = $this->getTitle($body);
$value = $this->getAddress($body);
$params["address"] = $value[10];
dump($params);
/* preg_match("/\d+/", $value[14], $match1);
$params["floor_total"] = $match1[0];*/
if (count($new_arr) > 0) {
foreach ($new_arr as $key) {
try {
$params = [];
$body = $this->service_->curl_post($proxy_uri, "https://" . $key . "/");
$params["title"] = $this->getTitle($body);
if($this->redis_->get($params["title"])){
continue;
}
$this->redis_->set($params["title"],1);
$value = $this->getAddress($body);
$address = explode('&nbsp;',$value[10]);
$params["address"] = $address[1];
$params["province"] = "上海市";
$params["city"] = "上海市";
$params["disc"] = substr($value[10], 0, 6) . "区";
$params["type"] = 1;
$params["status"] = 0;
$params["floor_total"] = rtrim($value[14], "层");
$v16 = strpos($value[16], '企业') || strpos($value[16], '公司');
$v17 = strpos($value[17], '企业') || strpos($value[17], '公司');
if ($v16) {
$params["intro"] = $value[16];
} else if ($v17) {
$params["intro"] = $value[17];
} else {
$params["intro"] = "";
}
//dump($params);
$this->buildingModel->addOffice($params);
// echo $key;
} catch (Exception $e) {
//echo "**************";
}
}
}
//return $this->response("200","success",$result);
}
/**
......@@ -86,9 +107,9 @@ class Reptile extends Basic
*/
private function getTitle($result)
{
try {
preg_match("/(?<=\"h3-title\">)[^<>]+(?=<)/", $result, $match1);
dump($match1);
} catch (Exception $e) {
$match1[0] = "";
}
......
......@@ -3,7 +3,7 @@
namespace app\search\service;
use app\chat\utils\CurlUtil;
use Curl\Curl;
use Think\Cache;
/**
......@@ -14,71 +14,18 @@ use Curl\Curl;
*/
class ReptileService
{
/* public function getHaoZu($url)
{
$curl = new CurlUtil();
$curl->headers = [
"Accept" => "application/json",
"Content-Type" => "application/json;charset=utf-8",
"Accept-Language:zh-CN,zh;q=0.8,en;q=0.6,ja;q=0.4",
"Referer:http://proxy.mimvp.com/fetch.php",
"Host:proxy.mimvp.com",
"User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36"
];
$curl->options = [
"CURLOPT_PROXY" => "182.91.218.200",
"CURLOPT_PROXYPORT" => "27353",
"CURLOPT_SSL_VERIFYPEER" => 0,
"CURLOPT_SSL_VERIFYHOST" => 2,
];
$response = $curl->get($url);
return $response;
}*/
public function getNewCurl($url)
{
$curl = new Curl();
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
$curl->setOpt(CURLOPT_SSL_VERIFYHOST, false);
//$curl->setOpt(CURLOPT_PROXY, "139.159.136.149:50838");
$curl->setOpt(CURLOPT_PROXY, "182.91.218.200"); //代理服务器地址
$curl->setOpt( CURLOPT_PROXYPORT, 27353); //代理服务器端口
//$curl->setOpt( CURLOPT_PROXYTYPE, CURLPROXY_HTTPS);
// $curl->setOpt(CURLOPT_REFERER, 1000);
$headers = [
"Accept" => "application/json",
"Content-Type" => "application/json;charset=utf-8",
"Accept-Language:zh-CN,zh;q=0.8,en;q=0.6,ja;q=0.4",
"Connection" => "close",
"Referer:http://proxy.mimvp.com/fetch.php",
"Cache-Control" => 'max-age=0',
"Accept-Encoding" => 'gzip,deflate'
];
$curl->setHeaders($headers);
$response = $curl->get($url);
return $response;
}
/**
*
* @param $proxy_uri
* @param $mimvp_url
* @return bool|string
*/
public function curl_post($proxy_uri, $mimvp_url)
{
/* $PROXY_USERNAME = "6c15b9a7798a";
$PROXY_PASSEORD = "988863e576";*/
/* $proxy_type = explode('://', $proxy_uri)[0]; // http, https, socks4, socks5
$proxy_ip_port = explode('://', $proxy_uri)[1]; // ip:port
echo "proxy_uri: $proxy_uri ; proxy_type: $proxy_type , proxy_ip_port: $proxy_ip_port ";*/
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $mimvp_url);
curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, false);
curl_setopt($ch, CURLOPT_PROXY, $proxy_uri);
# 设置代理授权
/* curl_setopt($ch, CURLOPT_PROXYAUTH, CURLAUTH_BASIC);
curl_setopt($ch, CURLOPT_PROXYUSERPWD, "{$PROXY_USERNAME}:{$PROXY_PASSEORD}");*/
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0); // https
......@@ -88,8 +35,25 @@ class ReptileService
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); // 返回网页内容*/
$result = curl_exec($ch);
dump($result);
curl_close($ch);
return $result;
}
public function getProxyUri(){
$curl = new CurlUtil();
$curl->headers = [
"Accept" => "application/json",
"Content-Type" => "application/json;charset=utf-8",
];
$curl->options = [
"CURLOPT_SSL_VERIFYPEER" => 0,
"CURLOPT_SSL_VERIFYHOST" => 2,
];
$url = "https://proxyapi.mimvp.com/api/fetchsecret.php?orderid=867304249961220216&num=1&http_type=3&anonymous=5&result_format=json&tdsourcetag=s_pcqq_aiomsg";
$response = $curl->get($url);
$response = json_decode($response,"true");
if($response["result"]){
return $response["result"][0]["ip:port"];
}
return false;
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment