diff --git a/AIProofread/Bridge.cs b/AIProofread/Bridge.cs index a4b96ce..b75f266 100644 --- a/AIProofread/Bridge.cs +++ b/AIProofread/Bridge.cs @@ -1,4 +1,5 @@ using AIProofread.Controls; +using AIProofread.Util; using Microsoft.Office.Interop.Word; using Microsoft.Office.Tools.Word; using Microsoft.Web.WebView2.Core; @@ -11,8 +12,10 @@ using System.Drawing; using System.IO; using System.Runtime.InteropServices; using System.Text.RegularExpressions; +using System.Threading.Tasks; using UtilLib; using Document = Microsoft.Office.Interop.Word.Document; +using Task = System.Threading.Tasks.Task; namespace AIProofread { @@ -149,21 +152,114 @@ namespace AIProofread // 获取文档所有文本数据 public Dictionary getAllText() { - return Tools.GetAllText(); + return Tools.GetAllText(Globals.ThisAddIn.Application.ActiveDocument); } public string getDocumentData() { Dictionary data = new Dictionary(); - var name = Globals.ThisAddIn.Application.ActiveDocument.Name; var doc = Globals.ThisAddIn.Application.ActiveDocument; - data.Add("name", name); + data.Add("name", doc.Name); data.Add("fullName", doc.FullName); data.Add("wordsCount", doc.Words.Count); data.Add("charactersCount", doc.Characters.Count); - data.Add("content", Tools.GetAllText()); + + Application app = new Application(); + + object missing = System.Reflection.Missing.Value; + object saveOption = WdSaveOptions.wdDoNotSaveChanges; + // 创建临时文件 + string path = CreateTempDocumentFile(doc); + Document tmpDoc = app.Documents.Open(path, false, true, false); + data.Add("content", Tools.GetAllText(tmpDoc)); + // 关闭并释放对象 + tmpDoc.Close(ref saveOption, ref missing, ref missing); + Marshal.ReleaseComObject(tmpDoc); + app.Quit(ref saveOption,ref missing,ref missing); + Marshal.ReleaseComObject(app); + // 显式回收一下吧 + GC.Collect(); + File.Delete(path); return Tools.GetJSONString(data); } + private string CreateTempDocumentFile(Document doc) + { + // 先转成临时文件 + FileStream fs = new FileStream(doc.FullName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); + // + var tmpFile = Path.GetTempFileName(); + byte[] bytes = new byte[fs.Length]; + fs.Read(bytes, 0, bytes.Length); + FileStream tmp = new FileStream(tmpFile, FileMode.OpenOrCreate,FileAccess.ReadWrite); + tmp.Write(bytes, 0, bytes.Length); + // 关闭吧 + tmp.Close(); + fs.Close(); + + return tmpFile; + } + + + public string getParagraphTextByRange(int start, int end) + { + var list = Tools.GetTextListByParagraphRange(start, end); + return Tools.GetJSONString(list); + } + + public string getAllParagraphs() + { + var doc = Globals.ThisAddIn.Application.ActiveDocument; + + var rangeText = doc.Content.Text; + var trimText = HostHelper.ReplaceSpecialChars(rangeText, isReplaceMultSpaceLine: true); + + string[] separator = new string[5] { "\r\a", "\a", "\r", "\v", "\f" }; + string[] array4 = rangeText.Split(separator, StringSplitOptions.None); + string[] array5 = trimText.Split('\n'); + List list = new List(); + var paragraphs = doc.Paragraphs; + int total = paragraphs.Count; + for (int i = 1; i <= total; i++) + { + list.Add(GetParagraphText(paragraphs[i])); + } + var array6 = list.ToArray(); + Dictionary data = new Dictionary + { + {"origin_cut",array4 }, + {"trim_cut",array5 }, + {"paragraph_cut",array6 }, + }; + return Tools.GetJSONString(data); + } + private string GetParagraphText(Paragraph paragraph) + { + // 需要 + return GetRangeText(paragraph.Range); + } + private string GetRangeText(Range range) + { + // 需要 + return range.Text; + } + + + public void getParagraphTextByRangeSync(int start,int end) + { + //var list = Tools.GetTextListByParagraphRange(start, end); + //return Tools.GetJSONString(list); + Task.Run(() => + { + var list = Tools.GetTextListByParagraphRange(start, end); + Globals.ThisAddIn.SendMessageToWeb("getParagraphTextByRange", Tools.GetJSONString(list)); + }); + } + + public int getTotalParagraphNumber() + { + return Globals.ThisAddIn.Application.ActiveDocument.Paragraphs.Count; + } + /// /// 读取文档原始文件并转换成base64 /// @@ -482,18 +578,28 @@ namespace AIProofread int index = 0; foreach (var item in correct.Diffs) { + if (item.idx == 3330) + { + Console.WriteLine("xx"); + } var mark = AddBookmark(item, index, correct.Sentence_offset, correct.Insert_len, correct.Paragraph_num); if (item.tag != "i") index++; - var msg = new Dictionary{ - {"message",mark == null ? "没有找到标记对象":"标记对象" + mark.Name }, - { "origin",item } - }; if (mark != null) { marks.Add(item.id, new ProofreadItem(item, mark)); } + else + { + var msg = new Dictionary{ + {"message","没有找到标记对象" }, + { "origin",item }, + { "origin_correct",correct }, + { "new_text",correct.New_text }, + { "paragraph_num",correct.Paragraph_num }, + }; + Logger.Log(JsonConvert.SerializeObject(msg)); + } - Logger.LogToWeb(JsonConvert.SerializeObject(msg)); } } } diff --git a/AIProofread/Util/HostHelper.cs b/AIProofread/Util/HostHelper.cs new file mode 100644 index 0000000..d46ffcf --- /dev/null +++ b/AIProofread/Util/HostHelper.cs @@ -0,0 +1,52 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; + +namespace AIProofread.Util +{ + public class HostHelper + { + public static string ReplaceSpecialChars(string text, bool isReplaceMultSpaceLine = false) + { + Regex regex = new Regex("[\r\f\a\v]{1}"); + if (!string.IsNullOrEmpty(text)) + { + text = regex.Replace(text, "\n").Replace("\u001e", "-"); + if (isReplaceMultSpaceLine) + { + bool flag = text.Last() == '\n'; + text = string.Join("\n", text.Split(new char[1] { '\n' }, StringSplitOptions.RemoveEmptyEntries)); + if (flag) + { + text += "\n"; + } + } + } + + return text; + } + + public static string RemoveInvisibleCharas(string text, bool containTable = false) + { + List list = new List { '\r', '\a', '\n', '\f', '\v' }; + if (containTable) + { + list.Add('\t'); + } + + return string.IsNullOrEmpty(text) ? string.Empty : text.Trim(list.ToArray()); + } + + public static bool HasValidCharacters(string text) + { + List list = new List{ + '\0', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', '\a', '\b', '\t', + '\n', '\v', '\f', '\r', '\u000e', '\u000f', '\u0010', '\u0011', '\u0012', '\u0013', + '\u0014', '\u0015', '\u0016', '\u0017', '\u0018', '\u0019', '\u001a', '\u001b', '\u001c', '\u001d', + '\u001e', '\u001f', ' ', '/' + }; + return text.Trim(list.ToArray()).Any(); + } + } +} diff --git a/AIProofread/core/Tools.cs b/AIProofread/core/Tools.cs index be5f589..9b38d59 100644 --- a/AIProofread/core/Tools.cs +++ b/AIProofread/core/Tools.cs @@ -1,72 +1,56 @@ -using AIProofread.Controls; -using Microsoft.Office.Interop.Word; +using Microsoft.Office.Interop.Word; using Newtonsoft.Json; using System; using System.Collections.Generic; -using System.Linq; +using System.Diagnostics; using System.Security.Cryptography; -using System.Security.Policy; -using System.Text; -using System.Windows.Forms; -using static System.Net.Mime.MediaTypeNames; namespace AIProofread { public class Tools { private static readonly string[] paragSplitor = new string[] { "\r", "\n", "\r\n" }; - public static Dictionary GetAllText() + public static Dictionary GetAllText(Document doc) { - var doc = Globals.ThisAddIn.Application.ActiveDocument; // 获取当前文档所有文本 string allText = doc.Range().Text; List list = new List(); // // 开始分割 MD5 md5 = new MD5CryptoServiceProvider(); - //if (allText != null && allText.Trim().Length > 0) - //{ - - // List lines = allText.Split(paragSplitor, StringSplitOptions.None).ToList();//StringUtil.CutTextToSentences(allText); - // foreach (string text in lines) - // { - // if(text.Trim().Length > 0) - // { - // byte[] hash = md5.ComputeHash(Encoding.Default.GetBytes(text)); - // list.Add(new DocumentText(hash, text + "\n")); - // } - // else - // { - - // list.Add(new DocumentText(text + "\n")); - // } - // } - //} - Paragraphs paragraphs = doc.Paragraphs; - int total = paragraphs.Count; + //FormLoading frm = new FormLoading(); //frm.Show(); - for (int paragraphNumber = 1; paragraphNumber <= total; paragraphNumber++) + int paragraphNumber = 0; + foreach (Paragraph p in paragraphs) { - Paragraph p = paragraphs[paragraphNumber]; + paragraphNumber++; + if (paragraphNumber % 20 == 0) + { + Debug.WriteLine("process paragraphNumber{0}", paragraphNumber); + } Range r = p.Range; - if (r.ListFormat.ListType == WdListType.wdListPictureBullet - || r.Tables.Count > 0 - || p.Range.Text.Trim().Length == 0) + string text = p.Range.Text; + if (text.Trim().Length == 0 || text.EndsWith("\r\a") || r.Tables.Count > 0) { continue; } - string text = p.Range.Text; - //Logger.LogToWeb(string.Format("get paragraph {0}", paragraphNumber)); - //frm.SetLoadingText(text); - if (text.Trim().Length > 0) - { - //byte[] hash = md5.ComputeHash(Encoding.Default.GetBytes(text)); - list.Add(new DocumentText(text,paragraphNumber)); - } - + list.Add(new DocumentText(text.Replace("\u0002", ""), paragraphNumber)); } + //for (; paragraphNumber <= total; paragraphNumber++) + //{ + + // Paragraph p = paragraphs[paragraphNumber]; + + // //Logger.LogToWeb(string.Format("get paragraph {0}", paragraphNumber)); + // //frm.SetLoadingText(text); + // //if (text.Trim().Length > 0) + // //{ + // // //byte[] hash = md5.ComputeHash(Encoding.Default.GetBytes(text)); + + // //} + //} //frm.Close(); var map = new Dictionary { @@ -76,6 +60,36 @@ namespace AIProofread return map; } + public static List GetTextListByParagraphRange(int start, int end) + { + List list = new List(); + var doc = Globals.ThisAddIn.Application.ActiveDocument; + Paragraphs paragraphs = doc.Paragraphs; + int total = paragraphs.Count; + if (start > total) return list; + start = Math.Max(start, 1); + end = Math.Min(end, total); + + for (int paragraphNumber = start; paragraphNumber <= end; paragraphNumber++) + { + Paragraph p = paragraphs[paragraphNumber]; + Range r = p.Range; + if (r.Tables.Count > 0 + || r.Endnotes.Count > 0 + || r.Footnotes.Count > 0 + || r.ListFormat.ListType != WdListType.wdListNoNumbering) + { + continue; + } + string text = p.Range.Text; + if (text.Trim().Length > 0) + { + list.Add(new DocumentText(text, paragraphNumber)); + } + } + return list; + } + public static string GetJSONString(object data) { return JsonConvert.SerializeObject(data, Formatting.Indented);