Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
D
download
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
hujun
download
Commits
7f68fe4e
Commit
7f68fe4e
authored
Dec 06, 2019
by
hujun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add
parents
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
266 additions
and
0 deletions
+266
-0
dowload.py
dowload.py
+266
-0
No files found.
dowload.py
0 → 100644
View file @
7f68fe4e
import
re
from
urllib
import
request
import
json
import
os
from
aliyunsdkcore.client
import
AcsClient
from
aliyunsdkcore.request
import
CommonRequest
import
datetime
import
time
import
threading
class
DownFile
:
def
__init__
(
self
,
url
,
call_date_string
=
"call_date"
,
page_string
=
"page_no"
,
save_path
=
"./"
):
self
.
url
=
url
self
.
save_path
=
save_path
self
.
page_string
=
page_string
self
.
call_date_string
=
call_date_string
def
ali_yun_sdk
(
self
,
call_time
,
call_id
):
"""
获取下载链接
:param call_time:
:param call_id:
:return:
"""
client
=
AcsClient
(
'LTAIEeZUju3PFZlJ'
,
'ljKCItiUp19F7wtzfn225WoBH0Qe8Y'
,
'cn-hangzhou'
)
request
=
CommonRequest
()
request
.
set_accept_format
(
'json'
)
request
.
set_domain
(
'dyplsapi.aliyuncs.com'
)
request
.
set_method
(
'POST'
)
request
.
set_protocol_type
(
'https'
)
# https | http
request
.
set_version
(
'2017-05-25'
)
request
.
set_action_name
(
'QueryRecordFileDownloadUrl'
)
request
.
add_query_param
(
'RegionId'
,
"cn-hangzhou"
)
request
.
add_query_param
(
'CallId'
,
call_id
)
request
.
add_query_param
(
'PoolKey'
,
"FC100000022056027"
)
request
.
add_query_param
(
'CallTime'
,
call_time
)
response
=
client
.
do_action
(
request
)
json_string
=
str
(
response
,
encoding
=
'utf-8'
)
result
=
json
.
loads
(
json_string
)
if
result
[
'Code'
]
==
"OK"
:
down_url
=
result
[
'DownloadUrl'
]
else
:
down_url
=
''
return
down_url
def
get_data
(
self
,
url
):
"""
获取通话记录
:param url:
:return:
"""
result
=
request
.
urlopen
(
url
)
data
=
json
.
loads
(
result
.
read
()
.
decode
(
"utf-8"
))
if
data
[
'code'
]
==
200
:
return
data
[
'data'
]
else
:
return
[]
def
download
(
self
,
data
,
is_remote
=
1
):
"""
下载录音
:return:
"""
num
=
0
old_num
=
0
count_sleep
=
0
for
item
in
data
:
if
is_remote
:
if
count_sleep
==
50
:
self
.
log
(
'sleep'
,
'-----10s-----'
)
time
.
sleep
(
10
)
count_sleep
=
0
if
len
(
item
[
'voice_file'
])
>
0
:
local_file
=
self
.
save_path
+
item
[
'voice_file'
]
string_end
=
item
[
'voice_file'
]
.
find
(
'/'
)
local_dir
=
item
[
'voice_file'
][
0
:
string_end
]
else
:
string_end
=
str
.
find
(
"?"
)
file_url
=
str
[
0
:
string_end
]
time_array
=
time
.
strptime
(
item
[
'call_time'
],
'
%
Y-
%
m-
%
d'
)
local_dir
=
time
.
strftime
(
'
%
Y-
%
m-
%
d'
,
time_array
)
local_file
=
local_dir
+
re
.
findall
(
r".com\D.*"
,
file_url
)[
0
][
5
:]
if
self
.
check_file
(
local_file
):
old_num
=
old_num
+
1
continue
try
:
count_sleep
=
count_sleep
+
1
# 暂停计数
url
=
self
.
ali_yun_sdk
(
item
[
'call_time'
],
item
[
'call_id'
])
is_ok
=
0
except
Exception
as
result
:
self
.
log
(
"exception 20s------------"
,
str
(
result
))
time
.
sleep
(
20
)
is_ok
=
1
# 第二次尝试
if
is_ok
==
1
:
try
:
url
=
self
.
ali_yun_sdk
(
item
[
'call_time'
],
item
[
'call_id'
])
except
Exception
as
result
:
self
.
log
(
"two exception error-----call_time="
+
item
[
'call_time'
]
+
'&call_id='
+
item
[
'call_id'
],
str
(
result
))
else
:
url
=
"http://file.tonglianjituan.com/static/voice/"
+
item
[
'voice_file'
]
self
.
check_dir
(
self
.
save_path
+
local_dir
)
result
=
request
.
urlretrieve
(
url
,
local_file
)
num
=
num
+
1
self
.
log
(
'download_file'
,
result
)
self
.
log
(
"download_num"
,
num
)
self
.
log
(
"old_num"
,
old_num
)
def
check_file
(
self
,
file
):
result
=
False
if
os
.
path
.
exists
(
file
):
result
=
True
return
result
def
check_dir
(
self
,
dir
):
if
os
.
path
.
exists
(
dir
):
result
=
True
else
:
os
.
mkdir
(
dir
)
result
=
True
return
result
def
start
(
self
):
page_no
=
1
thread
=
[]
while
True
:
url
=
self
.
url
+
"&"
+
self
.
page_string
+
"="
+
str
(
page_no
)
data
=
self
.
get_data
(
url
)
self
.
log
(
'download_page'
,
"page_no="
+
str
(
page_no
))
if
len
(
data
):
t
=
threading
.
Thread
(
target
=
self
.
download
,
args
=
(
data
,))
thread
.
append
(
t
)
t
.
start
()
else
:
break
page_no
=
page_no
+
1
for
tt
in
thread
:
tt
.
join
()
self
.
log
(
'download_successful'
,
self
.
page_string
+
"="
+
str
(
page_no
))
def
start_num
(
self
,
day_num
=
7
):
today
=
datetime
.
date
.
today
()
result
=
self
.
get_last_download
(
1
,
day_num
)
page_no
=
result
[
'page_no'
]
day_num
=
result
[
'day_num'
]
while
day_num
>
0
:
one_day
=
datetime
.
timedelta
(
days
=
day_num
)
yesterday
=
today
-
one_day
day_num
=
day_num
-
1
while
True
:
url
=
self
.
url
+
"&"
+
self
.
page_string
+
"="
+
str
(
page_no
)
+
"&"
+
self
.
call_date_string
+
"="
+
str
(
yesterday
)
data
=
self
.
get_data
(
url
)
self
.
log
(
'download_page'
,
"page_no="
+
str
(
page_no
)
+
"&day="
+
str
(
yesterday
))
# 更新下载参数
self
.
set_last_download
(
page_no
,
day_num
)
if
len
(
data
):
self
.
download
(
data
)
else
:
break
page_no
=
page_no
+
1
# 重置页码
page_no
=
1
# 跑完一天停留30秒
time
.
sleep
(
30
)
return
page_no
def
get_last_download
(
self
,
page_no
,
day_num
):
fileurl
=
'./record.json'
result
=
{
'page_no'
:
page_no
,
'day_num'
:
day_num
}
if
os
.
path
.
isfile
(
fileurl
):
file
=
open
(
fileurl
,
'r'
)
record_string
=
file
.
read
()
if
len
(
record_string
)
>
0
:
result
=
json
.
loads
(
record_string
)
file
.
close
()
return
result
def
set_last_download
(
self
,
page_no
,
day_num
):
fileurl
=
'./record.json'
result
=
{
'page_no'
:
page_no
,
'day_num'
:
day_num
}
file
=
open
(
fileurl
,
'w'
)
file
.
write
(
json
.
dumps
(
result
))
file
.
close
()
return
result
def
log
(
self
,
title
,
content
):
time_str
=
time
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
,
time
.
localtime
())
content2
=
"[{time}] {a} {b} {c}"
.
format
(
time
=
time_str
,
a
=
title
,
b
=
content
,
c
=
'
\n
'
)
fileurl
=
'./run.log'
f
=
open
(
fileurl
,
'a'
)
f
.
write
(
content2
)
f
.
close
()
def
start_num_th
(
self
,
day_num
=
7
):
today
=
datetime
.
date
.
today
()
# result = self.get_last_download(1, day_num)
# page_no = result['page_no']
# day_num = result['day_num']
page_no
=
1
while
day_num
>
0
:
one_day
=
datetime
.
timedelta
(
days
=
day_num
)
yesterday
=
today
-
one_day
day_num
=
day_num
-
1
self
.
log
(
'download_start'
,
self
.
call_date_string
+
"="
+
str
(
yesterday
))
thread
=
[]
while
True
:
url
=
self
.
url
+
"&"
+
self
.
page_string
+
"="
+
str
(
page_no
)
+
"&"
+
self
.
call_date_string
+
"="
+
str
(
yesterday
)
data
=
self
.
get_data
(
url
)
self
.
log
(
'download_page'
,
"page_no="
+
str
(
page_no
)
+
"&day="
+
str
(
yesterday
))
# 更新下载参数
self
.
set_last_download
(
page_no
,
day_num
)
if
len
(
data
):
t
=
threading
.
Thread
(
target
=
self
.
download
,
args
=
(
data
,))
thread
.
append
(
t
)
t
.
start
()
else
:
break
page_no
=
page_no
+
1
for
tt
in
thread
:
tt
.
join
()
self
.
log
(
'download_successful'
,
self
.
call_date_string
+
"="
+
str
(
yesterday
))
# 重置页码
page_no
=
1
# 跑完一天停留30秒
time
.
sleep
(
30
)
self
.
log
(
'download_over'
,
''
)
def
check_file
(
self
):
a
=
1
# today = datetime.date.today()
# one_day = datetime.timedelta(days=1)
# yesterday = today - one_day
# params_url = "https://api.tonglianjituan.com/task/getDownVoiceParamsReport?page_size=3000&source=3&call_date=" + str(yesterday)
# params_url = "https://api.tonglianjituan.com/task/getDownVoiceParamsReport?page_size=100&source=3&call_date=2019-12-1"
# DownFile(params_url).start()
#
params_url
=
"https://api.tonglianjituan.com/task/getDownVoiceParamsReport?source=3"
# DownFile(params_url).start_num(7)
DownFile
(
params_url
)
.
start_num_th
(
1
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment