一、前言
最近项目中需要实现这样一个功能,就是从本地读取CSV文件,并以指定行作为标题行,指定行开始作为数据读取行,读取数据并返回给前端,下面具体说下是如何通过java实现。
二、如何实现?
1.引入相关maven依赖
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
</dependency>
2.定义一个工具类CsvUtils。
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.collection.IterUtil;
import cn.hutool.core.text.csv.CsvData;
import cn.hutool.core.text.csv.CsvReader;
import cn.hutool.core.text.csv.CsvRow;
import cn.hutool.core.text.csv.CsvUtil;
import cn.hutool.core.util.ReUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.json.JSONUtil;
import java.io.*;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public class CsvUtils {
public static final String FIELD_NAME="column";
public static List<Map<String, Object>> getCsvFileContent(InputStream in, Long readLine, int headerRowIndex, int readCount,String splitChar) throws IOException {
InputStreamReader is = null;
CsvReader reader =null;
InputStream bufferedInputStreamOne =null;
InputStream bufferedInputStreamTwo =null;
ByteArrayOutputStream baos =null;
try {
if (in == null) {
throw new FileStorageRuntimeException("文件读取失败,文件不存在!");
}
if (readLine ==null){
readLine =2l;
}
List<Map<String, Object>> resList = new ArrayList<>();
reader = CsvUtil.getReader();
baos = new ByteArrayOutputStream();
byte[] buffer = new byte[1024*10];
int len;
while ((len = in.read(buffer)) > -1 ) {
baos.write(buffer, 0, len);
}
baos.flush();
bufferedInputStreamOne=new ByteArrayInputStream(baos.toByteArray());
bufferedInputStreamTwo=new ByteArrayInputStream(baos.toByteArray());
boolean isUtf8=checkUTF8(bufferedInputStreamOne);
//从文件中读取CSV数据
is = new InputStreamReader(bufferedInputStreamTwo,Charset.forName(isUtf8 ? "UTF-8":"GBK"));
reader.setFieldSeparator(splitChar.charAt(0));
reader.setSkipEmptyRows(false);
CsvData data = reader.read(is);
List<CsvRow> rows = data.getRows();
//空表格;
if (rows.isEmpty()) {
return null;
}
List<String> headRowList =new ArrayList<>();
if (headerRowIndex > 0 && rows.size()>headerRowIndex - 1){
//获取表头;
headRowList = rows.get(headerRowIndex - 1).getRawList();
}else {
if (CollectionUtil.isNotEmpty(rows)){
List<String> rowList=rows.get(0).getRawList();
for(int i=1;i<=rowList.size();i++) {
headRowList.add(FIELD_NAME+i);
}
}
}
List<String> headList=new ArrayList<>();
for (int i=0;i<headRowList.size();i++) {
String fieldName = headRowList.get(i);
if (StrUtil.isBlank(fieldName )) {
headList.add(FIELD_NAME+(i+1));
}else {
headList.add(fieldName);
}
}
if (CollUtil.isNotEmpty(rows)){
CsvRow currCsvRow = rows.get(0);
if (headList.size() != currCsvRow.getRawList().size()) {
throw new FileStorageRuntimeException("列数量与数据数量不一致");
}
}
if (readLine>1) {
//加上一行
List<String> addRawListNew = headRowList.stream().map(s -> StrUtil.trim(s)).collect(Collectors.toList());
Map map = IterUtil.toMap(headList, (Iterable) addRawListNew,true);
resList.add(map);
}
//遍历行
for (int i = (int)((long)readLine)-1; i < rows.size(); i++) {
CsvRow csvRow = rows.get(i);
//getRawList返回一个List列表,列表的每一项为CSV中的一个单元格(既逗号分隔部分)
List<String> rawList = csvRow.getRawList();
List<String> rawListNew = rawList.stream().map(s -> StrUtil.trim(s)).collect(Collectors.toList());
Map map = IterUtil.toMap(headList, (Iterable) rawListNew,true);
resList.add(map);
if(readCount>=0 && i>=readCount){
break;
}
}
return resList;
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException("get inputStreamReader failed");
} finally {
if (in!=null){
in.close();
}
if (is!=null){
is.close();
}
if (reader!=null){
reader.close();
}
if(bufferedInputStreamTwo !=null){
bufferedInputStreamTwo.close();
}
if(bufferedInputStreamOne !=null){
bufferedInputStreamOne.close();
}
if (baos!=null){
baos.close();
}
}
}
/**
* 判断文件内容是否为 UTF-8 编码
* @author
*/
public static boolean checkUTF8(InputStream fis) {
//请注意fis是流,是不能复用的!
try {
while (true) {
int curr = fis.read();
if (curr == -1) {
return true;
}
if (curr < 0x80) {// (10000000): 值小于0x80的为ASCII字符
} else if (curr < (0xC0)) { // (11000000): 值介于0x80与0xC0之间的为无效UTF-8字符
return false;
} else if (curr < (0xE0)) { // (11100000): 此范围内为2字节UTF-8字符
if ((fis.read() & (0xC0)) != 0x80) {
return false;
}
return true;
} else if (curr < (0xF0)) { // (11110000): 此范围内为3字节UTF-8字符
if ((fis.read() & (0xC0)) != 0x80 || (fis.read() & (0xC0)) != 0x80) {
return false;
}
return true;
} else {
return false;
}
}
} catch (IOException e) {
return true;
}
}
}
接着通过main方法调用下。
public static void main(String[] args) throws IOException {
FileInputStream inputStream = new FileInputStream(new File("D:\\111.csv"));
List<Map<String, Object>> list=getCsvFileContent(inputStream,2l,1,50,",");
System.err.println(list);
}
结果如下:
其中readCount表示返回的数据数量。