callmeyan 32e85c62c0 优化日志记录、资源管理及功能支持
- 引入 log4net 库,统一日志记录方式,提升可维护性。
- 优化异常处理,增加详细日志记录,增强代码健壮性。
- 调整资源文件引用,新增图标资源,删除无用资源。
- 优化文档事件处理逻辑,改进面板显示与隐藏逻辑。
- 增加对 WPS 环境的支持,动态调整功能行为。
- 禁用部分功能(如常识性检测、客服、升级和帮助)。
- 删除冗余代码,清理注释,统一代码风格。
- 更新程序集版本至 2.2.5,改进调试与生产环境配置。
2025-05-08 13:57:12 +08:00

319 lines
12 KiB
C#

using AIProofread.core;
using Microsoft.Office.Interop.Word;
using Newtonsoft.Json;
using NPOI;
using NPOI.POIFS.FileSystem;
using NPOI.XWPF.UserModel;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.IO.Compression;
using System.Runtime.InteropServices;
using System.Security.Cryptography;
namespace AIProofread
{
public class Tools
{
private static readonly string[] paragSplitor = new string[] { "\r\n", "\r\f", "\r", "\n", "\f" };
/// <summary>
/// 采用NPOI进行文本读取
/// </summary>
/// <param name="docPath"></param>
/// <param name="doc"></param>
/// <returns></returns>
public static Dictionary<string, object> GetAllText(Microsoft.Office.Interop.Word.Document doc)
{
string ext = doc.FullName.ToLower();
// 如果是
//if (ext.EndsWith(".wps") || doc.Paragraphs.Count < 200 || doc.Tables.Count < 20)
//{
// // 如果段落数小于200或表格小于20 则直接使用vsto 获取数据
// return GetAllTextByVSTO(doc);
//}
// 创建临时文件 方便数据读取
string docPath = Tools.GetReadDocumentFilePath(doc);
Debug.WriteLine("GetAllText Start ==>", DateTime.Now.ToLongTimeString());
// 获取当前文档所有文本
string allText = doc.Range().Text;
List<DocumentText> list = new List<DocumentText>();
//DocumentReader.ReadByVSTO(doc, Globals.ThisAddIn.Application, list);
try
{
ReadTextByNPOI(docPath, list);
}
catch (POIXMLException ex)
{
// 编号有误,移除编号再读取
if (ex.StackTrace.Contains("NPOI.XWPF.UserModel.XWPFNumbering"))
{
RemoveNumbersReadTextByNPOI(docPath, list);
}
}
//using (FileStream stream = new FileStream(docPath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
//{
// //try
// //{
// //}
// //catch (POIXMLException npoiError)
// //{
// // Logger.Log("GetAllText Error", npoiError);
// // // Logger.Log("GetAllText", e);
// // // return GetAllTextByVSTO(doc);
// // // 读取文档
// // DocumentReader.ReadByVSTO(doc, Globals.ThisAddIn.Application, list);
// //}
//}
var map = new Dictionary<string, object>
{
{ "list", list },
{ "text", allText }
};
// 删除临时文件
File.Delete(docPath);
return map;
}
private static void RemoveNumbersReadTextByNPOI(string filePath, List<DocumentText> list)
{
list.Clear();
var fs = File.OpenWrite(filePath);
// 打开Zip文件,删除 numbering.xml
using (ZipArchive archive = new ZipArchive(fs, ZipArchiveMode.Update))
{
var numberingEntry = archive.GetEntry("word/numbering.xml");
numberingEntry?.Delete(); // 删除 numbering.xml
}
fs.Close();
ReadTextByNPOI(filePath, list);
}
private static void ReadTextByNPOI(string filePath, List<DocumentText> list)
{
//, FileMode.Open, FileAccess.Read, FileShare.ReadWrite
using (var stream = File.OpenRead(filePath))
{
XWPFDocument ndoc = new XWPFDocument(stream);
var paragraphs = ndoc.Paragraphs;
int paragraphNumber = 1;
foreach (var bodyElement in ndoc.BodyElements)
{
// normal paragraph
if (bodyElement is XWPFParagraph p)
{
// 处理普通段落
var text = p.ParagraphText.Replace("\u0002", "");
if (text.Trim().Length > 0)
{
list.Add(new DocumentText(text, paragraphNumber));
}
paragraphNumber++;
}
// table -- vsto对于每个单元格的分段也会有
else if (bodyElement is XWPFTable table)
{
foreach (var row in table.Rows)
{
foreach (var cell in row.GetTableCells())
{
foreach (var pc in cell.Paragraphs)
{
list.Add(new DocumentText(pc.ParagraphText.Replace("\u0002", ""), paragraphNumber));
paragraphNumber++;
}
}
//list.Add(string.Empty);
paragraphNumber++;
}
}
// 目录处理
else if (bodyElement is XWPFSDT sdt)
{
string tocText = sdt.Content.Text;
// 如果需要,可以进一步解析目录项// 例如,按换行符拆分目录内容
var tocEntries = tocText.Split(new string[] { "\n", "\r", "\r\n", "\f" }, StringSplitOptions.None);
paragraphNumber += tocEntries.Length;
// 暂时跳过目录
//foreach (string entry in tocEntries)
//{
// list.Add(new DocumentText(entry.Replace("\u0002", ""), paragraphNumber));
// paragraphNumber++;
//}
}
}
}
}
public static string GetReadDocumentFilePath(Microsoft.Office.Interop.Word.Document doc)
{
// 保存文档确保内容是最新的
doc.Save();
//
string docPath = doc.FullName;
string ext = Path.GetExtension(docPath);
string tmpFile = Path.GetTempFileName();
Debug.WriteLine("GetReadDocumentFilePath Start ==>", DateTime.Now.ToLongTimeString());
Debug.WriteLine("GetReadDocumentFilePath =>{0}", docPath);
// 先转成临时文件
FileStream fs = new FileStream(docPath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
byte[] bytes = new byte[fs.Length];
fs.Read(bytes, 0, bytes.Length);
FileStream fsWriter = new FileStream(tmpFile, FileMode.OpenOrCreate, FileAccess.ReadWrite);
fsWriter.Write(bytes, 0, bytes.Length);
// 关闭吧
fsWriter.Close();
fs.Close();
// 判断是否是doc格式
if (ext.ToLower().EndsWith("doc"))
{
// doc格式文档 需要转换成docx
//
Application app = new Application();
// 打开临时文件
var tmpDoc = app.Documents.OpenNoRepairDialog(
FileName: tmpFile,
AddToRecentFiles: false,
ReadOnly: true,
Visible: false
);
string newTmpFile = docPath + ".docx";
// 将原始文档另存为docx格式文档
tmpDoc.SaveAs2(
FileName: newTmpFile,
FileFormat: WdSaveFormat.wdFormatXMLDocument,
AddToRecentFiles: false
);
// 释放资源
object missing = System.Reflection.Missing.Value;
object saveOption = WdSaveOptions.wdDoNotSaveChanges;
tmpDoc.Close(ref saveOption, ref missing, ref missing);
app.Quit(ref saveOption, ref missing, ref missing);
// 删除copy文件
File.Delete(tmpFile);
tmpFile = newTmpFile;
}
Debug.WriteLine("GetReadDocumentFilePath =>=>{0}", tmpFile);
Debug.WriteLine("GetReadDocumentFilePath End ==>", DateTime.Now.ToLongTimeString());
return tmpFile;
}
public static Dictionary<string, object> GetAllTextByVSTO(Microsoft.Office.Interop.Word.Document doc)
{
// 获取当前文档所有文本
string allText = doc.Range().Text;
List<DocumentText> list = new List<DocumentText>();
Paragraphs paragraphs = doc.Paragraphs;
//FormLoading frm = new FormLoading();
//frm.Show();
int paragraphNumber = 0;
foreach (Paragraph p in paragraphs)
{
paragraphNumber++;
//if (paragraphNumber % 20 == 0)
//{
// Debug.WriteLine("process paragraphNumber{0}", paragraphNumber);
//}
Range r = p.Range;
string text = p.Range.Text;
if (text.Trim().Length == 0 || text.EndsWith("\r\a") || r.Tables.Count > 0)
{
continue;
}
list.Add(new DocumentText(text.Replace("\u0002", ""), paragraphNumber));
Marshal.ReleaseComObject(p);
}
Marshal.ReleaseComObject(paragraphs);
//for (; paragraphNumber <= total; paragraphNumber++)
//{
// Paragraph p = paragraphs[paragraphNumber];
// //Logger.LogToWeb(string.Format("get paragraph {0}", paragraphNumber));
// //frm.SetLoadingText(text);
// //if (text.Trim().Length > 0)
// //{
// // //byte[] hash = md5.ComputeHash(Encoding.Default.GetBytes(text));
// //}
//}
//frm.Close();
var map = new Dictionary<string, object>
{
{ "list", list },
{ "text", allText }
};
return map;
}
public static List<DocumentText> GetTextListByParagraphRange(int start, int end)
{
List<DocumentText> list = new List<DocumentText>();
var doc = Globals.ThisAddIn.Application.ActiveDocument;
Paragraphs paragraphs = doc.Paragraphs;
int total = paragraphs.Count;
if (start > total) return list;
start = Math.Max(start, 1);
end = Math.Min(end, total);
for (int paragraphNumber = start; paragraphNumber <= end; paragraphNumber++)
{
Paragraph p = paragraphs[paragraphNumber];
Range r = p.Range;
if (r.Tables.Count > 0
|| r.Endnotes.Count > 0
|| r.Footnotes.Count > 0
|| r.ListFormat.ListType != WdListType.wdListNoNumbering)
{
continue;
}
string text = p.Range.Text;
if (text.Trim().Length > 0)
{
list.Add(new DocumentText(text, paragraphNumber));
}
}
return list;
}
public static string GetJSONString(object data)
{
return JsonConvert.SerializeObject(data, Formatting.Indented);
}
/// <summary>
/// 生成设备唯一标识
/// </summary>
/// <returns></returns>
public static string GetDeviceId()
{
string devicePath = Config.APP_DATA_PATH + "\\deviceId.txt";
// 如果存在则直接返回
if (File.Exists(devicePath))
{
return File.ReadAllText(devicePath);
}
string deviceId = Guid.NewGuid().ToString().ToLower();
// 将deviceId保存为纯文本文件到程序目录
File.WriteAllText(devicePath, deviceId);
return deviceId;
}
}
}