Commit 59b9124f authored by hujun's avatar hujun

1

parent 7f68fe4e
...@@ -10,7 +10,7 @@ import threading ...@@ -10,7 +10,7 @@ import threading
class DownFile: class DownFile:
def __init__(self, url, call_date_string="call_date", page_string="page_no", save_path="./"): def __init__(self, url, save_path="./", call_date_string="call_date", page_string="page_no"):
self.url = url self.url = url
self.save_path = save_path self.save_path = save_path
self.page_string = page_string self.page_string = page_string
...@@ -60,57 +60,78 @@ class DownFile: ...@@ -60,57 +60,78 @@ class DownFile:
else: else:
return [] return []
def download(self, data, is_remote=1): def download_aliyun(self, data):
""" """
下载录音 下载录音
:return: :return:
""" """
num = 0 url = ""
download_num = 0
old_num = 0 old_num = 0
count_sleep = 0 count_sleep = 0
for item in data: for item in data:
if is_remote: if count_sleep == 50:
if count_sleep == 50: self.log('sleep', '-----10s-----')
self.log('sleep', '-----10s-----') time.sleep(10)
time.sleep(10) count_sleep = 0
count_sleep = 0
if len(item['voice_file']) > 0:
if len(item['voice_file']) > 0: local_file = self.save_path + item['voice_file']
local_file = self.save_path + item['voice_file'] string_end = item['voice_file'].find('/')
string_end = item['voice_file'].find('/') local_dir = item['voice_file'][0: string_end]
local_dir = item['voice_file'][0: string_end] else:
else: string_end = str.find("?")
string_end = str.find("?") file_url = str[0:string_end]
file_url = str[0:string_end]
time_array = time.strptime(item['call_time'], '%Y-%m-%d')
time_array = time.strptime(item['call_time'], '%Y-%m-%d') local_dir = time.strftime('%Y-%m-%d', time_array)
local_dir = time.strftime('%Y-%m-%d', time_array) local_file = self.save_path + local_dir + re.findall(r".com\D.*", file_url)[0][5:]
local_file = local_dir + re.findall(r".com\D.*", file_url)[0][5:]
if self.check_file(local_file):
if self.check_file(local_file): old_num = old_num + 1
old_num = old_num + 1 continue
continue
try:
count_sleep = count_sleep + 1 # 增加暂停计数
url = self.ali_yun_sdk(item['call_time'], item['call_id'])
is_ok = 0
except Exception as result:
self.log("exception 20s------------", str(result))
time.sleep(20)
is_ok = 1
# 第二次尝试
if is_ok == 1:
try: try:
count_sleep = count_sleep + 1 # 暂停计数 count_sleep = count_sleep + 1 # 增加暂停计数
url = self.ali_yun_sdk(item['call_time'], item['call_id']) url = self.ali_yun_sdk(item['call_time'], item['call_id'])
is_ok = 0
except Exception as result: except Exception as result:
self.log("exception 20s------------", str(result)) self.log(
time.sleep(20) "two exception error-----call_time=" + item['call_time'] + '&call_id=' + item['call_id'],
is_ok = 1 str(result))
# 第二次尝试 if url == '':
if is_ok == 1: continue
try:
url = self.ali_yun_sdk(item['call_time'], item['call_id']) self.check_dir(self.save_path + local_dir)
except Exception as result: result = request.urlretrieve(url, local_file)
self.log( download_num = download_num + 1
"two exception error-----call_time=" + item['call_time'] + '&call_id=' + item['call_id'], self.log('download_file', result)
str(result)) self.log("download_num", download_num)
else: self.log("old_num", old_num)
url = "http://file.tonglianjituan.com/static/voice/" + item['voice_file']
def download_remote(self, data):
num = 0
old_num = 0
for item in data:
url = "http://file.tonglianjituan.com/static/voice/" + item['voice_file']
local_file = self.save_path + item['voice_file']
if self.check_file(local_file):
old_num = old_num + 1
continue
time_array = time.strptime(item['call_time'], '%Y-%m-%d')
local_dir = time.strftime('%Y-%m-%d', time_array)
self.check_dir(self.save_path + local_dir) self.check_dir(self.save_path + local_dir)
result = request.urlretrieve(url, local_file) result = request.urlretrieve(url, local_file)
num = num + 1 num = num + 1
...@@ -151,70 +172,17 @@ class DownFile: ...@@ -151,70 +172,17 @@ class DownFile:
tt.join() tt.join()
self.log('download_successful', self.page_string + "=" + str(page_no)) self.log('download_successful', self.page_string + "=" + str(page_no))
def start_num(self, day_num=7):
today = datetime.date.today()
result = self.get_last_download(1, day_num)
page_no = result['page_no']
day_num = result['day_num']
while day_num > 0:
one_day = datetime.timedelta(days=day_num)
yesterday = today - one_day
day_num = day_num - 1
while True:
url = self.url + "&" + self.page_string + "=" + str(page_no) + "&" + self.call_date_string + "=" + str(
yesterday)
data = self.get_data(url)
self.log('download_page', "page_no=" + str(page_no) + "&day=" + str(yesterday))
# 更新下载参数
self.set_last_download(page_no, day_num)
if len(data):
self.download(data)
else:
break
page_no = page_no + 1
# 重置页码
page_no = 1
# 跑完一天停留30秒
time.sleep(30)
return page_no
def get_last_download(self, page_no, day_num):
fileurl = './record.json'
result = {'page_no': page_no, 'day_num': day_num}
if os.path.isfile(fileurl):
file = open(fileurl, 'r')
record_string = file.read()
if len(record_string) > 0:
result = json.loads(record_string)
file.close()
return result
def set_last_download(self, page_no, day_num):
fileurl = './record.json'
result = {'page_no': page_no, 'day_num': day_num}
file = open(fileurl, 'w')
file.write(json.dumps(result))
file.close()
return result
def log(self, title, content): def log(self, title, content):
time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
content2 = "[{time}] {a} {b} {c}".format(time=time_str, a=title, b=content, c='\n') insert_string = "[{time}] {a} {b} {c}".format(time=time_str, a=title, b=content, c='\n')
fileurl = './run.log' date = time.strftime("%Y-%m-%d", time.localtime())
f = open(fileurl, 'a') file_url = "./log_" + date + ".log"
f.write(content2) f = open(file_url, 'a')
f.write(insert_string)
f.close() f.close()
def start_num_th(self, day_num=7): def start_thread(self, day_num=7):
today = datetime.date.today() today = datetime.date.today()
# result = self.get_last_download(1, day_num)
# page_no = result['page_no']
# day_num = result['day_num']
page_no = 1 page_no = 1
while day_num > 0: while day_num > 0:
one_day = datetime.timedelta(days=day_num) one_day = datetime.timedelta(days=day_num)
...@@ -229,10 +197,8 @@ class DownFile: ...@@ -229,10 +197,8 @@ class DownFile:
data = self.get_data(url) data = self.get_data(url)
self.log('download_page', "page_no=" + str(page_no) + "&day=" + str(yesterday)) self.log('download_page', "page_no=" + str(page_no) + "&day=" + str(yesterday))
# 更新下载参数
self.set_last_download(page_no, day_num)
if len(data): if len(data):
t = threading.Thread(target=self.download, args=(data,)) t = threading.Thread(target=self.download_aliyun, args=(data,))
thread.append(t) thread.append(t)
t.start() t.start()
else: else:
...@@ -248,10 +214,8 @@ class DownFile: ...@@ -248,10 +214,8 @@ class DownFile:
page_no = 1 page_no = 1
# 跑完一天停留30秒 # 跑完一天停留30秒
time.sleep(30) time.sleep(30)
self.log('download_over', '')
def check_file(self): self.log('download_over', '')
a = 1
# today = datetime.date.today() # today = datetime.date.today()
...@@ -262,5 +226,5 @@ class DownFile: ...@@ -262,5 +226,5 @@ class DownFile:
# DownFile(params_url).start() # DownFile(params_url).start()
# #
params_url = "https://api.tonglianjituan.com/task/getDownVoiceParamsReport?source=3" params_url = "https://api.tonglianjituan.com/task/getDownVoiceParamsReport?source=3"
# DownFile(params_url).start_num(7) save_path = "/data/html/tl_estate/public/static/voice/"
DownFile(params_url).start_num_th(1) DownFile(params_url, save_path).start_thread(1)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment