Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
D
download
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
hujun
download
Commits
59b9124f
Commit
59b9124f
authored
Dec 10, 2019
by
hujun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
1
parent
7f68fe4e
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
71 additions
and
107 deletions
+71
-107
dowload.py
dowload.py
+71
-107
No files found.
dowload.py
View file @
59b9124f
...
...
@@ -10,7 +10,7 @@ import threading
class
DownFile
:
def
__init__
(
self
,
url
,
call_date_string
=
"call_date"
,
page_string
=
"page_no"
,
save_path
=
"./
"
):
def
__init__
(
self
,
url
,
save_path
=
"./"
,
call_date_string
=
"call_date"
,
page_string
=
"page_no
"
):
self
.
url
=
url
self
.
save_path
=
save_path
self
.
page_string
=
page_string
...
...
@@ -60,57 +60,78 @@ class DownFile:
else
:
return
[]
def
download
(
self
,
data
,
is_remote
=
1
):
def
download
_aliyun
(
self
,
data
):
"""
下载录音
:return:
"""
num
=
0
url
=
""
download_num
=
0
old_num
=
0
count_sleep
=
0
for
item
in
data
:
if
is_remote
:
if
count_sleep
==
50
:
self
.
log
(
'sleep'
,
'-----10s-----'
)
time
.
sleep
(
10
)
count_sleep
=
0
if
len
(
item
[
'voice_file'
])
>
0
:
local_file
=
self
.
save_path
+
item
[
'voice_file'
]
string_end
=
item
[
'voice_file'
]
.
find
(
'/'
)
local_dir
=
item
[
'voice_file'
][
0
:
string_end
]
else
:
string_end
=
str
.
find
(
"?"
)
file_url
=
str
[
0
:
string_end
]
time_array
=
time
.
strptime
(
item
[
'call_time'
],
'
%
Y-
%
m-
%
d'
)
local_dir
=
time
.
strftime
(
'
%
Y-
%
m-
%
d'
,
time_array
)
local_file
=
local_dir
+
re
.
findall
(
r".com\D.*"
,
file_url
)[
0
][
5
:]
if
self
.
check_file
(
local_file
):
old_num
=
old_num
+
1
continue
if
count_sleep
==
50
:
self
.
log
(
'sleep'
,
'-----10s-----'
)
time
.
sleep
(
10
)
count_sleep
=
0
if
len
(
item
[
'voice_file'
])
>
0
:
local_file
=
self
.
save_path
+
item
[
'voice_file'
]
string_end
=
item
[
'voice_file'
]
.
find
(
'/'
)
local_dir
=
item
[
'voice_file'
][
0
:
string_end
]
else
:
string_end
=
str
.
find
(
"?"
)
file_url
=
str
[
0
:
string_end
]
time_array
=
time
.
strptime
(
item
[
'call_time'
],
'
%
Y-
%
m-
%
d'
)
local_dir
=
time
.
strftime
(
'
%
Y-
%
m-
%
d'
,
time_array
)
local_file
=
self
.
save_path
+
local_dir
+
re
.
findall
(
r".com\D.*"
,
file_url
)[
0
][
5
:]
if
self
.
check_file
(
local_file
):
old_num
=
old_num
+
1
continue
try
:
count_sleep
=
count_sleep
+
1
# 增加暂停计数
url
=
self
.
ali_yun_sdk
(
item
[
'call_time'
],
item
[
'call_id'
])
is_ok
=
0
except
Exception
as
result
:
self
.
log
(
"exception 20s------------"
,
str
(
result
))
time
.
sleep
(
20
)
is_ok
=
1
# 第二次尝试
if
is_ok
==
1
:
try
:
count_sleep
=
count_sleep
+
1
# 暂停计数
count_sleep
=
count_sleep
+
1
#
增加
暂停计数
url
=
self
.
ali_yun_sdk
(
item
[
'call_time'
],
item
[
'call_id'
])
is_ok
=
0
except
Exception
as
result
:
self
.
log
(
"exception 20s------------"
,
str
(
result
))
time
.
sleep
(
20
)
is_ok
=
1
self
.
log
(
"two exception error-----call_time="
+
item
[
'call_time'
]
+
'&call_id='
+
item
[
'call_id'
],
str
(
result
))
# 第二次尝试
if
is_ok
==
1
:
try
:
url
=
self
.
ali_yun_sdk
(
item
[
'call_time'
],
item
[
'call_id'
])
except
Exception
as
result
:
self
.
log
(
"two exception error-----call_time="
+
item
[
'call_time'
]
+
'&call_id='
+
item
[
'call_id'
],
str
(
result
))
else
:
url
=
"http://file.tonglianjituan.com/static/voice/"
+
item
[
'voice_file'
]
if
url
==
''
:
continue
self
.
check_dir
(
self
.
save_path
+
local_dir
)
result
=
request
.
urlretrieve
(
url
,
local_file
)
download_num
=
download_num
+
1
self
.
log
(
'download_file'
,
result
)
self
.
log
(
"download_num"
,
download_num
)
self
.
log
(
"old_num"
,
old_num
)
def
download_remote
(
self
,
data
):
num
=
0
old_num
=
0
for
item
in
data
:
url
=
"http://file.tonglianjituan.com/static/voice/"
+
item
[
'voice_file'
]
local_file
=
self
.
save_path
+
item
[
'voice_file'
]
if
self
.
check_file
(
local_file
):
old_num
=
old_num
+
1
continue
time_array
=
time
.
strptime
(
item
[
'call_time'
],
'
%
Y-
%
m-
%
d'
)
local_dir
=
time
.
strftime
(
'
%
Y-
%
m-
%
d'
,
time_array
)
self
.
check_dir
(
self
.
save_path
+
local_dir
)
result
=
request
.
urlretrieve
(
url
,
local_file
)
num
=
num
+
1
...
...
@@ -151,70 +172,17 @@ class DownFile:
tt
.
join
()
self
.
log
(
'download_successful'
,
self
.
page_string
+
"="
+
str
(
page_no
))
def
start_num
(
self
,
day_num
=
7
):
today
=
datetime
.
date
.
today
()
result
=
self
.
get_last_download
(
1
,
day_num
)
page_no
=
result
[
'page_no'
]
day_num
=
result
[
'day_num'
]
while
day_num
>
0
:
one_day
=
datetime
.
timedelta
(
days
=
day_num
)
yesterday
=
today
-
one_day
day_num
=
day_num
-
1
while
True
:
url
=
self
.
url
+
"&"
+
self
.
page_string
+
"="
+
str
(
page_no
)
+
"&"
+
self
.
call_date_string
+
"="
+
str
(
yesterday
)
data
=
self
.
get_data
(
url
)
self
.
log
(
'download_page'
,
"page_no="
+
str
(
page_no
)
+
"&day="
+
str
(
yesterday
))
# 更新下载参数
self
.
set_last_download
(
page_no
,
day_num
)
if
len
(
data
):
self
.
download
(
data
)
else
:
break
page_no
=
page_no
+
1
# 重置页码
page_no
=
1
# 跑完一天停留30秒
time
.
sleep
(
30
)
return
page_no
def
get_last_download
(
self
,
page_no
,
day_num
):
fileurl
=
'./record.json'
result
=
{
'page_no'
:
page_no
,
'day_num'
:
day_num
}
if
os
.
path
.
isfile
(
fileurl
):
file
=
open
(
fileurl
,
'r'
)
record_string
=
file
.
read
()
if
len
(
record_string
)
>
0
:
result
=
json
.
loads
(
record_string
)
file
.
close
()
return
result
def
set_last_download
(
self
,
page_no
,
day_num
):
fileurl
=
'./record.json'
result
=
{
'page_no'
:
page_no
,
'day_num'
:
day_num
}
file
=
open
(
fileurl
,
'w'
)
file
.
write
(
json
.
dumps
(
result
))
file
.
close
()
return
result
def
log
(
self
,
title
,
content
):
time_str
=
time
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
,
time
.
localtime
())
content2
=
"[{time}] {a} {b} {c}"
.
format
(
time
=
time_str
,
a
=
title
,
b
=
content
,
c
=
'
\n
'
)
fileurl
=
'./run.log'
f
=
open
(
fileurl
,
'a'
)
f
.
write
(
content2
)
insert_string
=
"[{time}] {a} {b} {c}"
.
format
(
time
=
time_str
,
a
=
title
,
b
=
content
,
c
=
'
\n
'
)
date
=
time
.
strftime
(
"
%
Y-
%
m-
%
d"
,
time
.
localtime
())
file_url
=
"./log_"
+
date
+
".log"
f
=
open
(
file_url
,
'a'
)
f
.
write
(
insert_string
)
f
.
close
()
def
start_
num_th
(
self
,
day_num
=
7
):
def
start_
thread
(
self
,
day_num
=
7
):
today
=
datetime
.
date
.
today
()
# result = self.get_last_download(1, day_num)
# page_no = result['page_no']
# day_num = result['day_num']
page_no
=
1
while
day_num
>
0
:
one_day
=
datetime
.
timedelta
(
days
=
day_num
)
...
...
@@ -229,10 +197,8 @@ class DownFile:
data
=
self
.
get_data
(
url
)
self
.
log
(
'download_page'
,
"page_no="
+
str
(
page_no
)
+
"&day="
+
str
(
yesterday
))
# 更新下载参数
self
.
set_last_download
(
page_no
,
day_num
)
if
len
(
data
):
t
=
threading
.
Thread
(
target
=
self
.
download
,
args
=
(
data
,))
t
=
threading
.
Thread
(
target
=
self
.
download
_aliyun
,
args
=
(
data
,))
thread
.
append
(
t
)
t
.
start
()
else
:
...
...
@@ -248,10 +214,8 @@ class DownFile:
page_no
=
1
# 跑完一天停留30秒
time
.
sleep
(
30
)
self
.
log
(
'download_over'
,
''
)
def
check_file
(
self
):
a
=
1
self
.
log
(
'download_over'
,
''
)
# today = datetime.date.today()
...
...
@@ -262,5 +226,5 @@ class DownFile:
# DownFile(params_url).start()
#
params_url
=
"https://api.tonglianjituan.com/task/getDownVoiceParamsReport?source=3"
# DownFile(params_url).start_num(7)
DownFile
(
params_url
)
.
start_num_th
(
1
)
save_path
=
"/data/html/tl_estate/public/static/voice/"
DownFile
(
params_url
,
save_path
)
.
start_thread
(
1
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment