123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162 |
- from random import choice
- import json
- from lxml import etree
- import requests
- import re
- import os
- import time
- from pathlib import Path
- import subprocess
- # from pydub import AudioSegment
- def get_user_agent():
- user_agents = [
- "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
- "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
- "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
- "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
- "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
- "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
- "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
- "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
- "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
- "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
- "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
- "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
- "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
- "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
- "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
- "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)",
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 LBBROWSER",
- "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
- "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
- "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
- "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE)",
- "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
- "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
- "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",
- "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
- "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre",
- "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0",
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
- "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
- "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
- "Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1",
- "Mozilla/5.0 (Linux; Android 5.1.1; Nexus 6 Build/LYZ28E) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.23 Mobile Safari/537.36",
- "Mozilla/5.0 (iPod; U; CPU iPhone OS 2_1 like Mac OS X; ja-jp) AppleWebKit/525.18.1 (KHTML, like Gecko) Version/3.1.1 Mobile/5F137 Safari/525.20",
- "Mozilla/5.0 (Linux;u;Android 4.2.2;zh-cn;) AppleWebKit/534.46 (KHTML,like Gecko) Version/5.1 Mobile Safari/10600.6.3 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)",
- "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
- ]
- return choice(user_agents)
- def audio_download(aid):
- BASE_DIR = Path(__file__).resolve().parent
- headers = {
- 'user-agent':get_user_agent(),
- 'referer':'https://www.bilibili.com/'
- }
- file_list = os.listdir(os.path.join(BASE_DIR,'download'))
- os.chdir(os.path.join(BASE_DIR,'download'))
- if len(file_list) > 5:
- latest = [0,file_list[0]]
- for f in file_list:
- create_time = os.stat(f).st_ctime
- if create_time < latest[0]:
- latest[0] = create_time
- latest[1] = f
- os.system('rm \"{}\"'.format(f))
- for i in range(5):
- try:
- response = requests.get('https://www.bilibili.com/video/'+aid,headers=headers)
- if response.status_code == 200:
- html = etree.HTML(response.text)
- # print('获取成功!')
- title_lst = html.xpath('//title[@data-vue-meta="true"]/text()')
- title = title_lst[0]
- str = title
- title = re.sub('/', '|', str)
- title = re.sub('\"','\'',title)
- title = re.sub('#','No.',title)
- print(title)
- pattern = '<script>window.__playinfo__=(.*?)</script>'
- info_text = json.loads(re.findall(string=response.text, pattern=pattern)[0])
- audio_url_list = []
- audio_url_list.append(info_text['data']['dash']['audio'][0]['baseUrl'])
- audio_url_list.append(info_text['data']['dash']['audio'][0]['base_url'])
- audio_url_list.extend(info_text['data']['dash']['audio'][0]['backupUrl'])
- # print('url获取成功')
- break
- except Exception as e:
- print(e)
- time.sleep(0.5)
- else:
- return "FAIL"
- if os.path.exists(os.path.join(BASE_DIR,('download/' + title + '.mp3'))):
- print('已存在')
- return (title + '.mp3')
- for audio_url in audio_url_list:
- audio_content = requests.get(audio_url, headers=headers)
- if audio_content.status_code == 200:
- print('开始下载,下载中...')
- download_path = os.path.join(BASE_DIR,'download')
- temp_path = os.path.join(BASE_DIR,'temp')
- os.chdir(temp_path)
- with open('{}.m4s'.format(title), 'wb') as f:
- f.write(audio_content.content)
- f.flush()
- mp3file = title + '.mp3'
- m4sfile = title + '.m4s'
- # subprocess.run(['ffmpeg', '-i', m4sfile, '-vn', '-ar', '44100', '-ac', '2', '-ab', '192k', '-f', 'mp3', download_path+'/'+mp3file],stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
- os.system('ffmpeg -i \"{}\" -vn -ar 44100 -ac 2 -ab 192k -f mp3 \"{}\"'.format(m4sfile,download_path+'/'+mp3file))
- # sound = AudioSegment.from_file(title + '.m4s', format="m4s")
- # sound.export(title + '.mp3', format="mp3")
- # os.rename(mp3file,download_path + '/' + mp3file)
- os.chdir(temp_path)
- os.system('rm \"{}\"'.format(m4sfile))
- os.chdir(download_path)
- print(title, '下载完成!')
- return (mp3file)
- else:
- print("FAIL!!!")
- def get_urls(bv):
- headers = {
- 'user-agent':get_user_agent(),
- 'referer':'https://www.bilibili.com/'
- }
- url = 'https://www.bilibili.com/video/' + bv + '/?p=1'
- for count in range(3):
- try:
- response = requests.get(url=url,headers=headers)
- j = re.findall('window\.__INITIAL_STATE__=\{(.*?)\};\(function\(\)',response.text)[0]
- j = "{" + j + "}"
- j = json.loads(j)
- # print(j)
- # print(j['videoData'])
- # print(j['videoData']['pages'])
- namelist = []
- pagelist = []
- urllist = []
- url_list = j['videoData']['pages']
- for i in url_list:
- pagelist.append(i['page'])
- namelist.append(i['part'])
- urllist.append('https://www.bilibili.com/video/{}/?p={}'.format(bv,i['page']))
- ziplist = zip(pagelist,namelist,urllist)
- return ziplist
- except Exception as e:
- print(e)
- time.sleep(0.5)
- else:
- return "FAIL"
|