基于java的英文翻译字典,附有源代码,源数据库初始化文件
源码地址
dict_demo: 提取一段英文对话中的英文词汇,输出为英文单词字典形式
解析json字条
private void readFile(String pathname) {
long start = System.currentTimeMillis();
// String pathname = ;
int count = 0;
int errcount = 0;
try (FileReader reader = new FileReader(pathname);
BufferedReader br = new BufferedReader(reader)
) {
String line;
while ((line = br.readLine()) != null) {
EnglishDictVo englishDictVo = JSONObject.parseObject(line, EnglishDictVo.class);
String headWord = englishDictVo.getHeadWord();
String bookId = englishDictVo.getBookId();
Object sentence = englishDictVo.getContent().getWord().getContent().getSentence();
String ukphone = englishDictVo.getContent().getWord().getContent().getUkphone();
String usphone = englishDictVo.getContent().getWord().getContent().getUsphone();
String tranCn = englishDictVo.getContent().getWord().getContent().getTrans().get(0).getTranCn();
EnglishDict englishDict = new EnglishDict();
englishDict.setEnglishWord(headWord);
englishDict.setBritishSound(ukphone);
englishDict.setAmericanSound(usphone);
englishDict.setChineseWord(tranCn);
englishDict.setTag(bookId);
if (sentence != null) {
englishDict.setSentence(JSONObject.toJSONString(sentence));
}
if (headWord == null || bookId == null || ukphone == null || usphone == null || tranCn == null) {
errcount++;
continue;
}
// 这里可以不做单词唯一的校验。导入多本词汇。给出多个单词书的实例
EnglishDict englishDictQuery = englishDictMapper.selectOne(
Wrappers.<EnglishDict>lambdaQuery()
.eq(EnglishDict::getEnglishWord, headWord)
.last("limit 1"));
if (englishDictQuery == null) {
englishDictMapper.insert(englishDict);
count++;
}
}
} catch (IOException e) {
e.printStackTrace();
}
long end = System.currentTimeMillis();
System.out.println("====== 入库 " + count + " 错误" + errcount);
System.out.println("====== 耗时 " + (end - start));
}
从内容中提取词汇
public ReportVo extractKeywords(String contentString) {
ReportVo reportVo = new ReportVo();
String[] s = contentString.replace(" ", ",").split(",");
HashSet<String> strings = new HashSet<String>(Arrays.asList(s));
if (CollUtil.isNotEmpty(strings)) {
Set<String> queryWords = strings.stream().map(vo -> {
String s1 = vo.replaceAll("[^a-zA-Z]", ""); // 去掉无用的符号
String lowerCase = s1.toLowerCase();
return lowerCase;
}).collect(Collectors.toSet());
List<EnglishDict> englishDicts = englishDictMapper.selectList(
Wrappers.<EnglishDict>lambdaQuery()
.in(EnglishDict::getEnglishWord, queryWords)
);
reportVo.setWordNum(englishDicts.size());
List<ReportVo.WordVo> words = englishDicts.stream().map(vo -> {
ReportVo.WordVo wordVo = new ReportVo.WordVo();
wordVo.setAmericanSound(vo.getAmericanSound());
wordVo.setBritishSound(vo.getBritishSound());
wordVo.setChinese(vo.getChineseWord());
wordVo.setEnglish(vo.getEnglishWord());
return wordVo;
}).collect(Collectors.toList());
reportVo.setWords(words);
} else {
reportVo.setWordNum(0);
reportVo.setWords(Collections.EMPTY_LIST);
}
return reportVo;
}
初始化接口如下
一千条耗时5秒
把剩下的也插入