参考:https://cloud.tencent.com/developer/article/1768680
参考的代码有点问题,请求头需要修改,上代码:
import requests
import re # 正则表达式
import pprint
import json
from moviepy.editor import AudioFileClip, VideoFileClip
from bs4 import BeautifulSoup as bs
headers = {
# 防盗链 告诉服务器 我们请求的url网址是从哪里跳转过来的
'referer': 'https://www.bilibili.com/a',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
}
def send_request(url):
response = requests.get(url=url, headers=headers)
return response
def get_video_data(html_data):
"""解析视频数据"""
# 提取视频的标题
soup = bs(html_data, 'lxml')
title = soup.find_all(name='h1',attrs={"class":"video-title special-text-indent"})[0].get_text()
# print(title)
# 提取视频对应的json数据
json_data = re.findall('<script>window\.__playinfo__=(.*?)</script>', html_data)[0]
# print(json_data) # json_data 字符串
json_data = json.loads(json_data)
pprint.pprint(json_data)
# 提取音频的url地址
audio_url = json_data['data']['dash']['audio'][0]['backupUrl'][0]
print('解析到的音频地址:', audio_url)
# 提取视频画面的url地址
video_url = json_data['data']['dash']['video'][0]['backupUrl'][0]
print('解析到的视频地址:', video_url)
video_data = [title, audio_url, video_url]
return video_data
def save_data(file_name, audio_url, video_url):
# 请求数据
print('正在请求音频数据')
audio_data = send_request(audio_url).content
print('正在请求视频数据')
video_data = send_request(video_url).content
with open(file_name + '.mp3', mode='wb') as f:
f.write(audio_data)
print('正在保存音频数据')
with open(file_name + '.mp4', mode='wb') as f:
f.write(video_data)
print('正在保存视频数据')
def merge_data(video_name):
print('视频合成开始:', video_name)
audioclip = AudioFileClip(video_name+'.mp3')
videoclip = VideoFileClip(video_name+'.mp4')
# 3.获取视频和音频的时长
video_time = videoclip.duration
audio_time = audioclip.duration
# 4.对视频或者音频进行裁剪
if video_time > audio_time:
# 视频时长>音频时长,对视频进行截取
videoclip_new = videoclip.subclip(0, audio_time)
audioclip_new = audioclip
else:
# 音频时长>视频时长,对音频进行截取
videoclip_new = videoclip
audioclip_new = audioclip.subclip(0, video_time)
# 5.视频中加入音频
video_with_new_audio = videoclip_new.set_audio(audioclip_new)
# 6.写入到新的视频文件中
video_with_new_audio.write_videofile("output.mp4",
codec='libx264',
audio_codec='aac',
temp_audiofile='temp-video.m4a',
remove_temp=True
)
print('视频合成结束:', video_name)
url = 'https://www.bilibili.com/video/BV1bK421a7qG/?spm_id_from=333.1007.tianma.6-4-22.click'
response = send_request(url)
response.encoding = requests.utils.get_encodings_from_content(response.text)[0]
html_data = response.text
video_data = get_video_data(html_data)
save_data(video_data[0], video_data[1], video_data[2])
merge_data(video_data[0])
效果
小姐姐挺靓,就是左下角有水印,想办法去除水印,参考:python实战之去除视频水印&字幕_python 去除视频水印-CSDN博客
import os
import sys
import cv2
import numpy
from moviepy import editor
TEMP_VIDEO = 'temp.mp4'
class WatermarkRemover():
def __init__(self, video_path, output, threshold: int, kernel_size: int):
self.threshold = threshold # 阈值分割所用阈值
self.kernel_size = kernel_size # 膨胀运算核尺寸
self.video_path = video_path
self.output = output
#根据用户手动选择的ROI(Region of Interest,感兴趣区域)框选水印或字幕位置。
def select_roi(self, img: numpy.ndarray, hint: str) -> list:
'''
框选水印或字幕位置,SPACE或ENTER键退出
:param img: 显示图片
:return: 框选区域坐标
'''
COFF = 0.7
w, h = int(COFF * img.shape[1]), int(COFF * img.shape[0])
resize_img = cv2.resize(img, (w, h))
roi = cv2.selectROI(hint, resize_img, False, False)
cv2.destroyAllWindows()
watermark_roi = [int(roi[0] / COFF), int(roi[1] / COFF), int(roi[2] / COFF), int(roi[3] / COFF)]
return watermark_roi
#对输入的蒙版进行膨胀运算,扩大蒙版的范围
def dilate_mask(self, mask: numpy.ndarray) -> numpy.ndarray:
'''
对蒙版进行膨胀运算
:param mask: 蒙版图片
:return: 膨胀处理后蒙版
'''
kernel = numpy.ones((self.kernel_size, self.kernel_size), numpy.uint8)
mask = cv2.dilate(mask, kernel)
return mask
#根据手动选择的ROI区域,在单帧图像中生成水印或字幕的蒙版。
def generate_single_mask(self, img: numpy.ndarray, roi: list, threshold: int) -> numpy.ndarray:
'''
通过手动选择的ROI区域生成单帧图像的水印蒙版
:param img: 单帧图像
:param roi: 手动选择区域坐标
:param threshold: 二值化阈值
:return: 水印蒙版
'''
# 区域无效,程序退出
if len(roi) != 4:
print('NULL ROI!')
sys.exit()
# 复制单帧灰度图像ROI内像素点
roi_img = numpy.zeros((img.shape[0], img.shape[1]), numpy.uint8)
start_x, end_x = int(roi[1]), int(roi[1] + roi[3])
start_y, end_y = int(roi[0]), int(roi[0] + roi[2])
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
roi_img[start_x:end_x, start_y:end_y] = gray[start_x:end_x, start_y:end_y]
# 阈值分割
_, mask = cv2.threshold(roi_img, threshold, 255, cv2.THRESH_BINARY)
return mask
#通过截取视频中多帧图像生成多张水印蒙版,并通过逻辑与计算生成最终的水印蒙版
def generate_watermark_mask(self, video_path: str) -> numpy.ndarray:
'''
截取视频中多帧图像生成多张水印蒙版,通过逻辑与计算生成最终水印蒙版
:param video_path: 视频文件路径
:return: 水印蒙版
'''
video = cv2.VideoCapture(video_path)
success, frame = video.read()
roi = self.select_roi(frame, 'select watermark ROI')
mask = numpy.ones((frame.shape[0], frame.shape[1]), numpy.uint8)
mask.fill(255)
step = video.get(cv2.CAP_PROP_FRAME_COUNT) // 5
index = 0
while success:
if index % step == 0:
mask = cv2.bitwise_and(mask, self.generate_single_mask(frame, roi, self.threshold))
success, frame = video.read()
index += 1
video.release()
return self.dilate_mask(mask)
#根据手动选择的ROI区域,在单帧图像中生成字幕的蒙版。
def generate_subtitle_mask(self, frame: numpy.ndarray, roi: list) -> numpy.ndarray:
'''
通过手动选择ROI区域生成单帧图像字幕蒙版
:param frame: 单帧图像
:param roi: 手动选择区域坐标
:return: 字幕蒙版
'''
mask = self.generate_single_mask(frame, [0, roi[1], frame.shape[1], roi[3]], self.threshold) # 仅使用ROI横坐标区域
return self.dilate_mask(mask)
def inpaint_image(self, img: numpy.ndarray, mask: numpy.ndarray) -> numpy.ndarray:
'''
修复图像
:param img: 单帧图像
:parma mask: 蒙版
:return: 修复后图像
'''
telea = cv2.inpaint(img, mask, 1, cv2.INPAINT_TELEA)
return telea
def merge_audio(self, input_path: str, output_path: str, temp_path: str):
'''
合并音频与处理后视频
:param input_path: 原视频文件路径
:param output_path: 封装音视频后文件路径
:param temp_path: 无声视频文件路径
'''
with editor.VideoFileClip(input_path) as video:
audio = video.audio
with editor.VideoFileClip(temp_path) as opencv_video:
clip = opencv_video.set_audio(audio)
clip.to_videofile(output_path)
def remove_video_watermark(self):
'''
去除视频水印
'''
if not os.path.exists(self.output):
os.makedirs(self.output)
filenames = [os.path.join(self.video_path, i) for i in os.listdir(self.video_path)]
mask = None
for i, name in enumerate(filenames):
if i == 0:
# 生成水印蒙版
mask = self.generate_watermark_mask(name)
# 创建待写入文件对象
video = cv2.VideoCapture(name)
fps = video.get(cv2.CAP_PROP_FPS)
size = (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)))
video_writer = cv2.VideoWriter(TEMP_VIDEO, cv2.VideoWriter_fourcc(*'mp4v'), fps, size)
# 逐帧处理图像
success, frame = video.read()
while success:
frame = self.inpaint_image(frame, mask)
video_writer.write(frame)
success, frame = video.read()
video.release()
video_writer.release()
# 封装视频
(_, filename) = os.path.split(name)
output_path = os.path.join(self.output, filename.split('.')[0] + '_no_watermark.mp4') # 输出文件路径
self.merge_audio(name, output_path, TEMP_VIDEO)
if os.path.exists(TEMP_VIDEO):
os.remove(TEMP_VIDEO)
def remove_video_subtitle(self):
'''去除视频字幕'''
if not os.path.exists(self.output):
os.makedirs(self.output)
filenames = [os.path.join(self.video_path, i) for i in os.listdir(self.video_path)]
roi = []
for i, name in enumerate(filenames):
# 创建待写入文件对象
video = cv2.VideoCapture(name)
fps = video.get(cv2.CAP_PROP_FPS)
size = (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)))
video_writer = cv2.VideoWriter(TEMP_VIDEO, cv2.VideoWriter_fourcc(*'mp4v'), fps, size)
# 逐帧处理图像
success, frame = video.read()
if i == 0:
roi = self.select_roi(frame, 'select subtitle ROI')
while success:
mask = self.generate_subtitle_mask(frame, roi)
frame = self.inpaint_image(frame, mask)
video_writer.write(frame)
success, frame = video.read()
video.release()
video_writer.release()
# 封装视频
(_, filename) = os.path.split(name)
output_path = os.path.join(OUTPUT_PATH, filename.split('.')[0] + '_no_sub.mp4') # 输出文件路径
self.merge_audio(name, output_path, TEMP_VIDEO)
if os.path.exists(TEMP_VIDEO):
os.remove(TEMP_VIDEO)
# 去水印
video_path = 'video'
output_path = 'output'
remover = WatermarkRemover(video_path,output_path,threshold=80, kernel_size=5)
remover.remove_video_watermark()
#去字幕
# remover = WatermarkRemover(video_path,output_path,threshold=80, kernel_size=5)
# remover.remove_video_subtitle()
效果一般吧: