最近项目需要使用多国语言版本,但是项目之前的代码使用了Layui组件,全网找不到layui的多语言,只能自己动手做,使用Python脚本提取组件中所有的中文。
1、到官网下载完整的版本,一定不要用压缩包,改了容易报错
2、Python遍历文件夹
import io
import os
import re
import hashlib
filepath = "\\layui\\layui-zh-CN\\modules"
# 遍历指定目录,显示目录下的所有文件名
def each_file(filepath):
chinese_charsList = []
languagefile = "language.txt"
for root, dirs, files in os.walk(filepath):
for file in files:
if(file.find(languagefile)== -1):#过滤语言包,避免重复运行一直累加
filename = os.path.join(root, file)
print(filename+"\n")
filtered_chinese_chars = read_file(filename)
chinese_charsList = chinese_charsList + filtered_chinese_chars
#chinese_charsList = list(set(chinese_charsList))
# 构建新的文件路径,将后缀改为.txt
new_file_path = os.path.join(filepath, languagefile)
if(len(chinese_charsList)>0):
with open(new_file_path, 'w', encoding='utf-8') as file:
file.write("\n".join(chinese_charsList))
3、提取中文字符,并保存到language.txt,保存格式为KeyValue模式,方便替换的时候查找
def read_file(filename):
#if(filename.find('carousel.js')!= -1):
chinese_pattern = re.compile('[\u4e00-\u9fff]+')
filtered_chinese_chars = []
with open(filename, 'r', encoding='utf-8') as file:
for line in file:
matches = chinese_pattern.findall(line)
for match in matches:
#过滤js文件里面 // 和*开头的行
if (not line.strip().startswith('//')) and (not line.strip().startswith('*')):
#查找字符串面是否直接带// /*
index_target = line.find(match)
if index_target > 0:
preceding_text = line[:index_target]
if ("//" not in preceding_text) and ("/*" not in preceding_text):
#增加字符串的Md5标识,方便之后翻译后替换
md5 = hashlib.md5(match.encode('utf-8')).hexdigest()
file_name = os.path.basename(filename)
#保存格式文件名,字符串M5,需要翻译的汉字
filtered_chinese_chars.append(file_name+"_"+md5+"="+match)
#print(match)
return filtered_chinese_chars
4、运行主程序
if __name__ == '__main__':
each_file(filepath)