perf: 📜️更新版本;对于特殊内容文档兼容

This commit is contained in:
LittleBoy 2025-03-30 14:28:18 +08:00
parent 2b3e330841
commit 200f415ac6
5 changed files with 90 additions and 59 deletions

Binary file not shown.

View File

@ -427,7 +427,7 @@ namespace AIProofread
{
Logger.Log(ex);
data["code"] = 5;
data["message"] = "文档格式有误,请另存文档后再进行校对";
data["message"] = "文档格式有误或内容异常,请另存文档后再进行校对";
}
catch (Exception ex)
{

View File

@ -36,7 +36,7 @@ namespace AIProofread
public class Config
{
public static readonly string APP_NAME = "AI校对王";
public static readonly string APP_VERSION = "2.2.2";
public static readonly string APP_VERSION = "2.2.3";
public static bool IS_WPS = false;
public static bool UpgradeForcedNotice = false;
public static readonly string APP_BASE_DIR = AppDomain.CurrentDomain.BaseDirectory;

View File

@ -7,10 +7,10 @@ using System.Security;
// 控制。更改这些特性值可修改
// 与程序集关联的信息。
[assembly: AssemblyTitle("AI校对王")]
[assembly: AssemblyDescription("AI校对王 2.2.1")]
[assembly: AssemblyDescription("AI校对王 2.2.3")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("果麦文化传媒股份有限公司")]
[assembly: AssemblyProduct("AI校对王 2.2.1")]
[assembly: AssemblyProduct("AI校对王 2.2.3")]
[assembly: AssemblyCopyright("Copyright © 果麦文化传媒股份有限公司 2025")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
@ -34,5 +34,5 @@ using System.Security;
// 方法是按如下所示使用“*”: :
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("2.0")]
[assembly: AssemblyFileVersion("2.2.1.0")]
[assembly: AssemblyFileVersion("2.2.3.0")]

View File

@ -8,6 +8,7 @@ using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.IO.Compression;
using System.Runtime.InteropServices;
using System.Security.Cryptography;
@ -41,22 +42,34 @@ namespace AIProofread
List<DocumentText> list = new List<DocumentText>();
//DocumentReader.ReadByVSTO(doc, Globals.ThisAddIn.Application, list);
using (FileStream stream = new FileStream(docPath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
try
{
ReadTextByNPOI(stream, list);
//try
//{
//}
//catch (POIXMLException npoiError)
//{
// Logger.Log("GetAllText Error", npoiError);
// // Logger.Log("GetAllText", e);
// // return GetAllTextByVSTO(doc);
// // 读取文档
// DocumentReader.ReadByVSTO(doc, Globals.ThisAddIn.Application, list);
//}
ReadTextByNPOI(docPath, list);
}
catch (POIXMLException ex)
{
// 编号有误,移除编号再读取
if (ex.StackTrace.Contains("NPOI.XWPF.UserModel.XWPFNumbering"))
{
RemoveNumbersReadTextByNPOI(docPath, list);
}
}
//using (FileStream stream = new FileStream(docPath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
//{
// //try
// //{
// //}
// //catch (POIXMLException npoiError)
// //{
// // Logger.Log("GetAllText Error", npoiError);
// // // Logger.Log("GetAllText", e);
// // // return GetAllTextByVSTO(doc);
// // // 读取文档
// // DocumentReader.ReadByVSTO(doc, Globals.ThisAddIn.Application, list);
// //}
//}
var map = new Dictionary<string, object>
{
{ "list", list },
@ -67,55 +80,73 @@ namespace AIProofread
return map;
}
private static void ReadTextByNPOI(FileStream stream, List<DocumentText> list)
private static void RemoveNumbersReadTextByNPOI(string filePath, List<DocumentText> list)
{
XWPFDocument ndoc = new XWPFDocument(stream);
var paragraphs = ndoc.Paragraphs;
int paragraphNumber = 1;
foreach (var bodyElement in ndoc.BodyElements)
list.Clear();
var fs = File.OpenWrite(filePath);
// 打开Zip文件,删除 numbering.xml
using (ZipArchive archive = new ZipArchive(fs, ZipArchiveMode.Update))
{
// normal paragraph
if (bodyElement is XWPFParagraph p)
var numberingEntry = archive.GetEntry("word/numbering.xml");
numberingEntry?.Delete(); // 删除 numbering.xml
}
fs.Close();
ReadTextByNPOI(filePath, list);
}
private static void ReadTextByNPOI(string filePath, List<DocumentText> list)
{
//, FileMode.Open, FileAccess.Read, FileShare.ReadWrite
using (var stream = File.OpenRead(filePath))
{
XWPFDocument ndoc = new XWPFDocument(stream);
var paragraphs = ndoc.Paragraphs;
int paragraphNumber = 1;
foreach (var bodyElement in ndoc.BodyElements)
{
// 处理普通段落
var text = p.ParagraphText.Replace("\u0002", "");
if(text.Trim().Length > 0)
// normal paragraph
if (bodyElement is XWPFParagraph p)
{
list.Add(new DocumentText(text, paragraphNumber));
}
paragraphNumber++;
}
// table -- vsto对于每个单元格的分段也会有
else if (bodyElement is XWPFTable table)
{
foreach (var row in table.Rows)
{
foreach (var cell in row.GetTableCells())
// 处理普通段落
var text = p.ParagraphText.Replace("\u0002", "");
if (text.Trim().Length > 0)
{
foreach (var pc in cell.Paragraphs)
{
list.Add(new DocumentText(pc.ParagraphText.Replace("\u0002", ""), paragraphNumber));
paragraphNumber++;
}
list.Add(new DocumentText(text, paragraphNumber));
}
//list.Add(string.Empty);
paragraphNumber++;
}
}
// 目录处理
else if (bodyElement is XWPFSDT sdt)
{
string tocText = sdt.Content.Text;
// table -- vsto对于每个单元格的分段也会有
else if (bodyElement is XWPFTable table)
{
foreach (var row in table.Rows)
{
foreach (var cell in row.GetTableCells())
{
foreach (var pc in cell.Paragraphs)
{
list.Add(new DocumentText(pc.ParagraphText.Replace("\u0002", ""), paragraphNumber));
paragraphNumber++;
}
}
//list.Add(string.Empty);
paragraphNumber++;
}
}
// 目录处理
else if (bodyElement is XWPFSDT sdt)
{
string tocText = sdt.Content.Text;
// 如果需要,可以进一步解析目录项// 例如,按换行符拆分目录内容
var tocEntries = tocText.Split(new string[] { "\n", "\r", "\r\n", "\f" }, StringSplitOptions.None);
paragraphNumber+= tocEntries.Length;
// 暂时跳过目录
//foreach (string entry in tocEntries)
//{
// list.Add(new DocumentText(entry.Replace("\u0002", ""), paragraphNumber));
// paragraphNumber++;
//}
// 如果需要,可以进一步解析目录项// 例如,按换行符拆分目录内容
var tocEntries = tocText.Split(new string[] { "\n", "\r", "\r\n", "\f" }, StringSplitOptions.None);
paragraphNumber += tocEntries.Length;
// 暂时跳过目录
//foreach (string entry in tocEntries)
//{
// list.Add(new DocumentText(entry.Replace("\u0002", ""), paragraphNumber));
// paragraphNumber++;
//}
}
}
}
}