1

59b9124f · hujun · 7f68fe4e · 59b9124f
Commit 59b9124f authored Dec 10, 2019 by hujun
Hide whitespace changes
Inline Side-by-side

Showing with 71 additions and 107 deletions

dowload.py dowload.py +71 -107

No files found.
--- a/dowload.py
+++ b/dowload.py
@@ -10,7 +10,7 @@ import threading
 class DownFile:
-    def __init__(self, url, call_date_string="call_date", page_string="page_no", save_path="./"):
+    def __init__(self, url, save_path="./", call_date_string="call_date", page_string="page_no"):
        self.url = url
        self.save_path = save_path
        self.page_string = page_string
@@ -60,57 +60,78 @@ class DownFile:
        else:
            return []
-    def download(self, data, is_remote=1):
+    def download_aliyun(self, data):
        """
        下载录音
        :return:
        """
-        num = 0
+        url = ""
+        download_num = 0
        old_num = 0
        count_sleep = 0
        for item in data:
-            if is_remote:
+            if count_sleep == 50:
-                if count_sleep == 50:
+                self.log('sleep', '-----10s-----')
-                    self.log('sleep', '-----10s-----')
+                time.sleep(10)
-                    time.sleep(10)
+                count_sleep = 0
-                    count_sleep = 0
+            if len(item['voice_file']) > 0:
-                if len(item['voice_file']) > 0:
+                local_file = self.save_path + item['voice_file']
-                    local_file = self.save_path + item['voice_file']
+                string_end = item['voice_file'].find('/')
-                    string_end = item['voice_file'].find('/')
+                local_dir = item['voice_file'][0: string_end]
-                    local_dir = item['voice_file'][0: string_end]
+            else:
-                else:
+                string_end = str.find("?")
-                    string_end = str.find("?")
+                file_url = str[0:string_end]
-                    file_url = str[0:string_end]
+                time_array = time.strptime(item['call_time'], '%Y-%m-%d')
-                    time_array = time.strptime(item['call_time'], '%Y-%m-%d')
+                local_dir = time.strftime('%Y-%m-%d', time_array)
-                    local_dir = time.strftime('%Y-%m-%d', time_array)
+                local_file = self.save_path + local_dir + re.findall(r".com\D.*", file_url)[0][5:]
-                    local_file = local_dir + re.findall(r".com\D.*", file_url)[0][5:]
+            if self.check_file(local_file):
-                if self.check_file(local_file):
+                old_num = old_num + 1
-                    old_num = old_num + 1
+                continue
-                    continue
+            try:
+                count_sleep = count_sleep + 1  # 增加暂停计数
+                url = self.ali_yun_sdk(item['call_time'], item['call_id'])
+                is_ok = 0
+            except Exception as result:
+                self.log("exception 20s------------", str(result))
+                time.sleep(20)
+                is_ok = 1
+            # 第二次尝试
+            if is_ok == 1:
                try:
-                    count_sleep = count_sleep + 1  # 暂停计数
+                    count_sleep = count_sleep + 1  # 增加暂停计数
                    url = self.ali_yun_sdk(item['call_time'], item['call_id'])
-                    is_ok = 0
                except Exception as result:
-                    self.log("exception 20s------------", str(result))
+                    self.log(
-                    time.sleep(20)
+                        "two exception error-----call_time=" + item['call_time'] + '&call_id=' + item['call_id'],
-                    is_ok = 1
+                        str(result))
-                # 第二次尝试
+            if url == '':
-                if is_ok == 1:
+                continue
-                    try:
-                        url = self.ali_yun_sdk(item['call_time'], item['call_id'])
+            self.check_dir(self.save_path + local_dir)
-                    except Exception as result:
+            result = request.urlretrieve(url, local_file)
-                        self.log(
+            download_num = download_num + 1
-                            "two exception error-----call_time=" + item['call_time'] + '&call_id=' + item['call_id'],
+            self.log('download_file', result)
-                            str(result))
+            self.log("download_num", download_num)
-            else:
+            self.log("old_num", old_num)
-                url = "http://file.tonglianjituan.com/static/voice/" + item['voice_file']
+    def download_remote(self, data):
+        num = 0
+        old_num = 0
+        for item in data:
+            url = "http://file.tonglianjituan.com/static/voice/" + item['voice_file']
+            local_file = self.save_path + item['voice_file']
+            if self.check_file(local_file):
+                old_num = old_num + 1
+                continue
+            time_array = time.strptime(item['call_time'], '%Y-%m-%d')
+            local_dir = time.strftime('%Y-%m-%d', time_array)
            self.check_dir(self.save_path + local_dir)
            result = request.urlretrieve(url, local_file)
            num = num + 1
@@ -151,70 +172,17 @@ class DownFile:
            tt.join()
        self.log('download_successful', self.page_string + "=" + str(page_no))
-    def start_num(self, day_num=7):
-        today = datetime.date.today()
-        result = self.get_last_download(1, day_num)
-        page_no = result['page_no']
-        day_num = result['day_num']
-        while day_num > 0:
-            one_day = datetime.timedelta(days=day_num)
-            yesterday = today - one_day
-            day_num = day_num - 1
-            while True:
-                url = self.url + "&" + self.page_string + "=" + str(page_no) + "&" + self.call_date_string + "=" + str(
-                    yesterday)
-                data = self.get_data(url)
-                self.log('download_page', "page_no=" + str(page_no) + "&day=" + str(yesterday))
-                # 更新下载参数
-                self.set_last_download(page_no, day_num)
-                if len(data):
-                    self.download(data)
-                else:
-                    break
-                page_no = page_no + 1
-            # 重置页码
-            page_no = 1
-            # 跑完一天停留30秒
-            time.sleep(30)
-        return page_no
-    def get_last_download(self, page_no, day_num):
-        fileurl = './record.json'
-        result = {'page_no': page_no, 'day_num': day_num}
-        if os.path.isfile(fileurl):
-            file = open(fileurl, 'r')
-            record_string = file.read()
-            if len(record_string) > 0:
-                result = json.loads(record_string)
-            file.close()
-        return result
-    def set_last_download(self, page_no, day_num):
-        fileurl = './record.json'
-        result = {'page_no': page_no, 'day_num': day_num}
-        file = open(fileurl, 'w')
-        file.write(json.dumps(result))
-        file.close()
-        return result
    def log(self, title, content):
        time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
-        content2 = "[{time}] {a} {b} {c}".format(time=time_str, a=title, b=content, c='\n')
+        insert_string = "[{time}] {a} {b} {c}".format(time=time_str, a=title, b=content, c='\n')
-        fileurl = './run.log'
+        date = time.strftime("%Y-%m-%d", time.localtime())
-        f = open(fileurl, 'a')
+        file_url = "./log_" + date + ".log"
-        f.write(content2)
+        f = open(file_url, 'a')
+        f.write(insert_string)
        f.close()
-    def start_num_th(self, day_num=7):
+    def start_thread(self, day_num=7):
        today = datetime.date.today()
-        # result = self.get_last_download(1, day_num)
-        # page_no = result['page_no']
-        # day_num = result['day_num']
        page_no = 1
        while day_num > 0:
            one_day = datetime.timedelta(days=day_num)
@@ -229,10 +197,8 @@ class DownFile:
                data = self.get_data(url)
                self.log('download_page', "page_no=" + str(page_no) + "&day=" + str(yesterday))
-                # 更新下载参数
-                self.set_last_download(page_no, day_num)
                if len(data):
-                    t = threading.Thread(target=self.download, args=(data,))
+                    t = threading.Thread(target=self.download_aliyun, args=(data,))
                    thread.append(t)
                    t.start()
                else:
@@ -248,10 +214,8 @@ class DownFile:
            page_no = 1
            # 跑完一天停留30秒
            time.sleep(30)
-        self.log('download_over', '')
-    def check_file(self):
+        self.log('download_over', '')
-        a = 1
 # today = datetime.date.today()
@@ -262,5 +226,5 @@ class DownFile:
 # DownFile(params_url).start()
 #
 params_url = "https://api.tonglianjituan.com/task/getDownVoiceParamsReport?source=3"
-# DownFile(params_url).start_num(7)
+save_path = "/data/html/tl_estate/public/static/voice/"
-DownFile(params_url).start_num_th(1)
+DownFile(params_url, save_path).start_thread(1)