perf: 📜️更新版本;对于特殊内容文档兼容

2025-03-30 14:28:18 +08:00 · 2025-03-30 14:28:18 +08:00 · 200f415ac6
commit 200f415ac6
parent 2b3e330841
5 changed files with 90 additions and 59 deletions
--- a/.vs/AIProofread/v17/.suo
+++ b/.vs/AIProofread/v17/.suo
--- a/AIProofread/Bridge.cs
+++ b/AIProofread/Bridge.cs
@ -427,7 +427,7 @@ namespace AIProofread
                {
                    Logger.Log(ex);
                    data["code"] = 5;
-                    data["message"] = "文档格式有误，请另存文档后再进行校对";
+                    data["message"] = "文档格式有误或内容异常，请另存文档后再进行校对";
                }
                catch (Exception ex)
                {
--- a/AIProofread/Config.cs
+++ b/AIProofread/Config.cs
@ -36,7 +36,7 @@ namespace AIProofread
    public class Config
    {
        public static readonly string APP_NAME = "AI校对王";
-        public static readonly string APP_VERSION = "2.2.2";
+        public static readonly string APP_VERSION = "2.2.3";
        public static bool IS_WPS = false;
        public static bool UpgradeForcedNotice = false;
        public static readonly string APP_BASE_DIR = AppDomain.CurrentDomain.BaseDirectory;
--- a/AIProofread/Properties/AssemblyInfo.cs
+++ b/AIProofread/Properties/AssemblyInfo.cs
@ -7,10 +7,10 @@ using System.Security;
 // 控制。更改这些特性值可修改
 // 与程序集关联的信息。
 [assembly: AssemblyTitle("AI校对王")]
-[assembly: AssemblyDescription("AI校对王 2.2.1")]
+[assembly: AssemblyDescription("AI校对王 2.2.3")]
 [assembly: AssemblyConfiguration("")]
 [assembly: AssemblyCompany("果麦文化传媒股份有限公司")]
-[assembly: AssemblyProduct("AI校对王 2.2.1")]
+[assembly: AssemblyProduct("AI校对王 2.2.3")]
 [assembly: AssemblyCopyright("Copyright © 果麦文化传媒股份有限公司 2025")]
 [assembly: AssemblyTrademark("")]
 [assembly: AssemblyCulture("")]
@ -34,5 +34,5 @@ using System.Security;
 // 方法是按如下所示使用“*”: :
 // [assembly: AssemblyVersion("1.0.*")]
 [assembly: AssemblyVersion("2.0")]
-[assembly: AssemblyFileVersion("2.2.1.0")]
+[assembly: AssemblyFileVersion("2.2.3.0")]

--- a/AIProofread/core/Tools.cs
+++ b/AIProofread/core/Tools.cs
@ -8,6 +8,7 @@ using System;
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.IO;
+using System.IO.Compression;
 using System.Runtime.InteropServices;
 using System.Security.Cryptography;

@ -41,22 +42,34 @@ namespace AIProofread
            List<DocumentText> list = new List<DocumentText>();

            //DocumentReader.ReadByVSTO(doc, Globals.ThisAddIn.Application, list);
-            using (FileStream stream = new FileStream(docPath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
+            try
            {
-                ReadTextByNPOI(stream, list);
-                //try
-                //{
-                    
-                //}
-                //catch (POIXMLException npoiError)
-                //{
-                //    Logger.Log("GetAllText Error", npoiError);
-                //    // Logger.Log("GetAllText", e);
-                //    // return GetAllTextByVSTO(doc);
-                //    // 读取文档
-                //    DocumentReader.ReadByVSTO(doc, Globals.ThisAddIn.Application, list);
-                //}
+                ReadTextByNPOI(docPath, list);
            }
+            catch (POIXMLException ex)
+            {
+                // 编号有误,移除编号再读取
+                if (ex.StackTrace.Contains("NPOI.XWPF.UserModel.XWPFNumbering"))
+                {
+                    RemoveNumbersReadTextByNPOI(docPath, list);
+                }
+            }
+            //using (FileStream stream = new FileStream(docPath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
+            //{
+
+            //    //try
+            //    //{
+
+            //    //}
+            //    //catch (POIXMLException npoiError)
+            //    //{
+            //    //    Logger.Log("GetAllText Error", npoiError);
+            //    //    // Logger.Log("GetAllText", e);
+            //    //    // return GetAllTextByVSTO(doc);
+            //    //    // 读取文档
+            //    //    DocumentReader.ReadByVSTO(doc, Globals.ThisAddIn.Application, list);
+            //    //}
+            //}
            var map = new Dictionary<string, object>
            {
                { "list", list },
@ -67,55 +80,73 @@ namespace AIProofread
            return map;
        }

-        private static void ReadTextByNPOI(FileStream stream, List<DocumentText> list)
+        private static void RemoveNumbersReadTextByNPOI(string filePath, List<DocumentText> list)
        {
-            XWPFDocument ndoc = new XWPFDocument(stream);
-            var paragraphs = ndoc.Paragraphs;
-            int paragraphNumber = 1;
-            foreach (var bodyElement in ndoc.BodyElements)
+            list.Clear();
+            var fs = File.OpenWrite(filePath);
+            // 打开Zip文件,删除 numbering.xml
+            using (ZipArchive archive = new ZipArchive(fs, ZipArchiveMode.Update))
            {
-                // normal paragraph
-                if (bodyElement is XWPFParagraph p)
+                var numberingEntry = archive.GetEntry("word/numbering.xml");
+                numberingEntry?.Delete();  // 删除 numbering.xml
+            }
+            fs.Close();
+            ReadTextByNPOI(filePath, list);
+        }
+
+        private static void ReadTextByNPOI(string filePath, List<DocumentText> list)
+        {
+            //, FileMode.Open, FileAccess.Read, FileShare.ReadWrite
+            using (var stream = File.OpenRead(filePath))
+            {
+                XWPFDocument ndoc = new XWPFDocument(stream);
+                var paragraphs = ndoc.Paragraphs;
+                int paragraphNumber = 1;
+                foreach (var bodyElement in ndoc.BodyElements)
                {
-                    // 处理普通段落
-                   var text = p.ParagraphText.Replace("\u0002", "");
-                    if(text.Trim().Length > 0)
+                    // normal paragraph
+                    if (bodyElement is XWPFParagraph p)
                    {
-                        list.Add(new DocumentText(text, paragraphNumber));
-                    }
-                    paragraphNumber++;
-                }
-                // table -- vsto对于每个单元格的分段也会有
-                else if (bodyElement is XWPFTable table)
-                {
-                    foreach (var row in table.Rows)
-                    {
-                        foreach (var cell in row.GetTableCells())
+                        // 处理普通段落
+                        var text = p.ParagraphText.Replace("\u0002", "");
+                        if (text.Trim().Length > 0)
                        {
-                            foreach (var pc in cell.Paragraphs)
-                            {
-                                list.Add(new DocumentText(pc.ParagraphText.Replace("\u0002", ""), paragraphNumber));
-                                paragraphNumber++;
-                            }
+                            list.Add(new DocumentText(text, paragraphNumber));
                        }
-                        //list.Add(string.Empty);
                        paragraphNumber++;
                    }
-                }
-                // 目录处理
-                else if (bodyElement is XWPFSDT sdt)
-                {
-                    string tocText = sdt.Content.Text;
+                    // table -- vsto对于每个单元格的分段也会有
+                    else if (bodyElement is XWPFTable table)
+                    {
+                        foreach (var row in table.Rows)
+                        {
+                            foreach (var cell in row.GetTableCells())
+                            {
+                                foreach (var pc in cell.Paragraphs)
+                                {
+                                    list.Add(new DocumentText(pc.ParagraphText.Replace("\u0002", ""), paragraphNumber));
+                                    paragraphNumber++;
+                                }
+                            }
+                            //list.Add(string.Empty);
+                            paragraphNumber++;
+                        }
+                    }
+                    // 目录处理
+                    else if (bodyElement is XWPFSDT sdt)
+                    {
+                        string tocText = sdt.Content.Text;

-                    // 如果需要，可以进一步解析目录项// 例如，按换行符拆分目录内容
-                    var tocEntries = tocText.Split(new string[] { "\n", "\r", "\r\n", "\f" }, StringSplitOptions.None);
-                    paragraphNumber+= tocEntries.Length;
-                    // 暂时跳过目录
-                    //foreach (string entry in tocEntries)
-                    //{
-                    //    list.Add(new DocumentText(entry.Replace("\u0002", ""), paragraphNumber));
-                    //    paragraphNumber++;
-                    //}
+                        // 如果需要，可以进一步解析目录项// 例如，按换行符拆分目录内容
+                        var tocEntries = tocText.Split(new string[] { "\n", "\r", "\r\n", "\f" }, StringSplitOptions.None);
+                        paragraphNumber += tocEntries.Length;
+                        // 暂时跳过目录
+                        //foreach (string entry in tocEntries)
+                        //{
+                        //    list.Add(new DocumentText(entry.Replace("\u0002", ""), paragraphNumber));
+                        //    paragraphNumber++;
+                        //}
+                    }
                }
            }
        }