前言
当Git本地存储里右超过50MB,却又确实需要上传的时候,就需要用到了不是
解决
本代码就是把大文件进行拆解成小文件,然后上传。
等到拉取下来的时候,可以直接再进行合并,合并成原文件
代码如下,仅供参考:
'''
-*- coding: utf-8 -*-
@File : 文件分割与合并.py
@Author: Shanmh
@Time : 2024/02/26 下午2:37
@Function: 突破git上传限制,把大文件分割成小文件再上传
'''
import os
import shutil
from natsort import natsorted
import os
def get_file_size(path):
return os.path.getsize(path)/(1024*1024)
def list_files_and_sizes(directory,max_size):
big_list=[]
for root, dirs, files in os.walk(directory):
for file in files:
file_path = os.path.join(root, file)
file_size = get_file_size(file_path)
if file_size>=max_size:
print(f"{file_path}: {file_size:.2f} MB")
big_list.append(file_path)
return big_list
def list_parts(directory):
print("Find all parts , willing combine the parts:")
parts_list = []
for root, dirs, files in os.walk(directory):
for dir in dirs:
dir_path = os.path.join(root, dir)
if dir_path.endswith(".parts"):
parts_list.append(dir_path)
print(parts_list)
return parts_list
def split_file(input_file, chunk_size):
# 读取输入文件的内容
os.makedirs(f"{input_file}.parts")
with open(input_file, 'rb') as f:
data = f.read()
# 计算分割的文件数量
num_chunks = len(data) // chunk_size
if len(data) % chunk_size != 0:
num_chunks += 1
# 分割文件并保存为多个分块文件
chunks = [data[i * chunk_size:(i + 1) * chunk_size] for i in range(num_chunks)]
for i, chunk in enumerate(chunks):
output_file = f'{input_file}.parts/tmp.part{str(i+1).zfill(4)}'
with open(output_file, 'wb') as f:
f.write(chunk)
print(f'Split file: {output_file}')
os.remove(input_file)
return num_chunks
def merge_files(output_file):
# 合并分块文件并保存为输出文件
with open(output_file.split(".parts")[0], 'wb') as f:
for file in natsorted(os.listdir(output_file)):
input_file = f'{output_file}/{file}'
with open(input_file, 'rb') as chunk_file:
f.write(chunk_file.read())
os.remove(input_file)
shutil.rmtree(output_file)
print(f'Merged file: {output_file}')
if __name__ == '__main__':
method=1 #如果设置为 1 进行分割大文件, 2 则是进行合并
currect_path=os.getcwd() #默认当前目录,可自行更改
max_size=50 #MB 大于等于 50 MB 文件
split_size=40 * 1024 * 1024 # 分割大小为40MB
if method==1:
files_list=list_files_and_sizes(currect_path,max_size)
for fi in files_list:
split_file(fi, split_size)
elif method==2:
parts_list=list_parts(currect_path)
for fi in parts_list:
merge_files(fi)
注意:此代码中有删除命令,请确保本程序正常运行后,在运行