Commit 7f68fe4e authored by hujun's avatar hujun

add

parents
import re
from urllib import request
import json
import os
from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.request import CommonRequest
import datetime
import time
import threading
class DownFile:
def __init__(self, url, call_date_string="call_date", page_string="page_no", save_path="./"):
self.url = url
self.save_path = save_path
self.page_string = page_string
self.call_date_string = call_date_string
def ali_yun_sdk(self, call_time, call_id):
"""
获取下载链接
:param call_time:
:param call_id:
:return:
"""
client = AcsClient('LTAIEeZUju3PFZlJ', 'ljKCItiUp19F7wtzfn225WoBH0Qe8Y', 'cn-hangzhou')
request = CommonRequest()
request.set_accept_format('json')
request.set_domain('dyplsapi.aliyuncs.com')
request.set_method('POST')
request.set_protocol_type('https') # https | http
request.set_version('2017-05-25')
request.set_action_name('QueryRecordFileDownloadUrl')
request.add_query_param('RegionId', "cn-hangzhou")
request.add_query_param('CallId', call_id)
request.add_query_param('PoolKey', "FC100000022056027")
request.add_query_param('CallTime', call_time)
response = client.do_action(request)
json_string = str(response, encoding='utf-8')
result = json.loads(json_string)
if result['Code'] == "OK":
down_url = result['DownloadUrl']
else:
down_url = ''
return down_url
def get_data(self, url):
"""
获取通话记录
:param url:
:return:
"""
result = request.urlopen(url)
data = json.loads(result.read().decode("utf-8"))
if data['code'] == 200:
return data['data']
else:
return []
def download(self, data, is_remote=1):
"""
下载录音
:return:
"""
num = 0
old_num = 0
count_sleep = 0
for item in data:
if is_remote:
if count_sleep == 50:
self.log('sleep', '-----10s-----')
time.sleep(10)
count_sleep = 0
if len(item['voice_file']) > 0:
local_file = self.save_path + item['voice_file']
string_end = item['voice_file'].find('/')
local_dir = item['voice_file'][0: string_end]
else:
string_end = str.find("?")
file_url = str[0:string_end]
time_array = time.strptime(item['call_time'], '%Y-%m-%d')
local_dir = time.strftime('%Y-%m-%d', time_array)
local_file = local_dir + re.findall(r".com\D.*", file_url)[0][5:]
if self.check_file(local_file):
old_num = old_num + 1
continue
try:
count_sleep = count_sleep + 1 # 暂停计数
url = self.ali_yun_sdk(item['call_time'], item['call_id'])
is_ok = 0
except Exception as result:
self.log("exception 20s------------", str(result))
time.sleep(20)
is_ok = 1
# 第二次尝试
if is_ok == 1:
try:
url = self.ali_yun_sdk(item['call_time'], item['call_id'])
except Exception as result:
self.log(
"two exception error-----call_time=" + item['call_time'] + '&call_id=' + item['call_id'],
str(result))
else:
url = "http://file.tonglianjituan.com/static/voice/" + item['voice_file']
self.check_dir(self.save_path + local_dir)
result = request.urlretrieve(url, local_file)
num = num + 1
self.log('download_file', result)
self.log("download_num", num)
self.log("old_num", old_num)
def check_file(self, file):
result = False
if os.path.exists(file):
result = True
return result
def check_dir(self, dir):
if os.path.exists(dir):
result = True
else:
os.mkdir(dir)
result = True
return result
def start(self):
page_no = 1
thread = []
while True:
url = self.url + "&" + self.page_string + "=" + str(page_no)
data = self.get_data(url)
self.log('download_page', "page_no=" + str(page_no))
if len(data):
t = threading.Thread(target=self.download, args=(data,))
thread.append(t)
t.start()
else:
break
page_no = page_no + 1
for tt in thread:
tt.join()
self.log('download_successful', self.page_string + "=" + str(page_no))
def start_num(self, day_num=7):
today = datetime.date.today()
result = self.get_last_download(1, day_num)
page_no = result['page_no']
day_num = result['day_num']
while day_num > 0:
one_day = datetime.timedelta(days=day_num)
yesterday = today - one_day
day_num = day_num - 1
while True:
url = self.url + "&" + self.page_string + "=" + str(page_no) + "&" + self.call_date_string + "=" + str(
yesterday)
data = self.get_data(url)
self.log('download_page', "page_no=" + str(page_no) + "&day=" + str(yesterday))
# 更新下载参数
self.set_last_download(page_no, day_num)
if len(data):
self.download(data)
else:
break
page_no = page_no + 1
# 重置页码
page_no = 1
# 跑完一天停留30秒
time.sleep(30)
return page_no
def get_last_download(self, page_no, day_num):
fileurl = './record.json'
result = {'page_no': page_no, 'day_num': day_num}
if os.path.isfile(fileurl):
file = open(fileurl, 'r')
record_string = file.read()
if len(record_string) > 0:
result = json.loads(record_string)
file.close()
return result
def set_last_download(self, page_no, day_num):
fileurl = './record.json'
result = {'page_no': page_no, 'day_num': day_num}
file = open(fileurl, 'w')
file.write(json.dumps(result))
file.close()
return result
def log(self, title, content):
time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
content2 = "[{time}] {a} {b} {c}".format(time=time_str, a=title, b=content, c='\n')
fileurl = './run.log'
f = open(fileurl, 'a')
f.write(content2)
f.close()
def start_num_th(self, day_num=7):
today = datetime.date.today()
# result = self.get_last_download(1, day_num)
# page_no = result['page_no']
# day_num = result['day_num']
page_no = 1
while day_num > 0:
one_day = datetime.timedelta(days=day_num)
yesterday = today - one_day
day_num = day_num - 1
self.log('download_start', self.call_date_string + "=" + str(yesterday))
thread = []
while True:
url = self.url + "&" + self.page_string + "=" + str(
page_no) + "&" + self.call_date_string + "=" + str(yesterday)
data = self.get_data(url)
self.log('download_page', "page_no=" + str(page_no) + "&day=" + str(yesterday))
# 更新下载参数
self.set_last_download(page_no, day_num)
if len(data):
t = threading.Thread(target=self.download, args=(data,))
thread.append(t)
t.start()
else:
break
page_no = page_no + 1
for tt in thread:
tt.join()
self.log('download_successful', self.call_date_string + "=" + str(yesterday))
# 重置页码
page_no = 1
# 跑完一天停留30秒
time.sleep(30)
self.log('download_over', '')
def check_file(self):
a = 1
# today = datetime.date.today()
# one_day = datetime.timedelta(days=1)
# yesterday = today - one_day
# params_url = "https://api.tonglianjituan.com/task/getDownVoiceParamsReport?page_size=3000&source=3&call_date=" + str(yesterday)
# params_url = "https://api.tonglianjituan.com/task/getDownVoiceParamsReport?page_size=100&source=3&call_date=2019-12-1"
# DownFile(params_url).start()
#
params_url = "https://api.tonglianjituan.com/task/getDownVoiceParamsReport?source=3"
# DownFile(params_url).start_num(7)
DownFile(params_url).start_num_th(1)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment