一.开发框架:
.NetCore6.0
工具:Visual Studio 2022
二.思路:
1.界面上传PDF文件,并保存
2.PDF文件转换为Word文件并保存
3.使用SHA256Hash判断文件是否已经转换过了,转换过了的话,就返回原先转换过的文件
三.C#后台包:
1.Spire.PDF转换包(免费的只能一次转换10页)
2.mssql数据库连接包
四:C#代码案例:
1.PDF转Word方法:
/// <summary>
/// PDF文件转化为Word文件
/// </summary>
/// <param name="pdfFilePath"></param>
/// <param name="wordFilePath"></param>
public static void ConvertPdfToWord(string pdfFilePath, string wordFilePath)
{
try
{
PdfDocument pdfDoc = new PdfDocument();
pdfDoc.LoadFromFile(pdfFilePath);
pdfDoc.SaveToFile(wordFilePath, FileFormat.DOCX);
pdfDoc.Close();
}
catch (Exception ex)
{
Console.WriteLine("Error converting PDF to Word: " + ex.Message);
}
}
2.获取文件散列值方法(两种):
a.根据上传文件,获取散列值
/// <summary>
/// 根据上传文件获取文件散列值
/// </summary>
/// <param name="file"></param>
/// <returns></returns>
public string CalculateSHA256Hash(IFormFile file)
{
try
{
using (var sha256 = SHA256.Create())
{
using (var stream = file.OpenReadStream())
{
byte[] hashBytes = sha256.ComputeHash(stream);
string hashString = BitConverter.ToString(hashBytes).Replace("-", String.Empty);
return hashString;
}
}
}
catch (Exception ex)
{
Console.WriteLine("Error calculating SHA256 hash: " + ex.Message);
return null;
}
}
b.根据文件路径,获取散列值
/// <summary>
/// 根据文件路径获取文件散列值
/// </summary>
/// <param name="filePath"></param>
/// <returns></returns>
public string CalculateSHA256Hash(string filePath)
{
try
{
using (FileStream stream = System.IO.File.OpenRead(filePath))
{
SHA256 sha = SHA256.Create();
byte[] hash = sha.ComputeHash(stream);
string hashString = BitConverter.ToString(hash).Replace("-", String.Empty);
return hashString;
}
}
catch (Exception ex)
{
Console.WriteLine("Error calculating SHA256 hash: " + ex.Message);
return null;
}
}
3.上传PDF文件,转化为Word文件方法:
/// <summary>
/// 文件上传
/// </summary>
/// <returns></returns>
public ActionResult UploadFile()
{
var files = HttpContext.Request.Form.Files;
if (files == null || files.Count <= 0)
{
return Json(new { code = -1, msg = "请上传文件!" });
}
var file = files[0];
if (file.ContentType != "application/pdf")
{
return Json(new { code = -1, msg = "不是PDF文件!" });
}
var SHA256Hash = CalculateSHA256Hash(file);
long fileSize = file.Length;
//检索数据库是否已经存在相同散列值和大小的记录了,如果有的话,直接返回转化文件地址
if (new FileConversionBll().ExistsSHA256Hash(SHA256Hash, fileSize))
{
var model = new FileConversionBll().GetFileBySHA256HashAndSize(SHA256Hash, fileSize);
return Json(new { code = 0, msg = "",data = model.WordFilePath });
}
else
{
//文件名
string fileName = DateTime.Now.ToString("yyyyMMddHHmmssfff");
string pdfFilePath = Path.Combine("这里是你上传PDF文件保存的文件路径", fileName + ".pdf");
using (var fileStream = new FileStream(pdfFilePath, FileMode.Create))
{
file.CopyTo(fileStream);
}
string wordFilePath = Path.Combine("这里是你需要保存转化后Word文件的文件路径", fileName + ".docx");
ConvertPdfToWord(pdfFilePath, wordFilePath);
//FileConversion是自定义的类,在数据库记录上传过的记录
var res = new FileConversionBll().AddFileConversion(
new FileConversion()
{
PdfFilePath = pdfFilePath,
WordFilePath = wordFilePath,
PdfSHA256Hash = SHA256Hash,
FileSize = fileSize
}
) ;
if (res)
{
return Json(new { code = 0, msg = "",data = wordFilePath });
}
}
return Json(new {code = -2,msg = "出错了!"});
}
五.效果图:
这里的PDF文件转化后只有10页Word。。。
上传保存的文件(源文件54页):
转化后的文件(只转化了10页):