From ebce23c6c4f7f1884fa42d6c3aeca46d39197027 Mon Sep 17 00:00:00 2001 From: callmeyan Date: Sat, 3 Feb 2024 12:04:55 +0800 Subject: [PATCH] add core --- AIProofread/core/DocumentText.cs | 21 ++++++++++ AIProofread/core/Tools.cs | 67 ++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+) create mode 100644 AIProofread/core/DocumentText.cs create mode 100644 AIProofread/core/Tools.cs diff --git a/AIProofread/core/DocumentText.cs b/AIProofread/core/DocumentText.cs new file mode 100644 index 0000000..36306ed --- /dev/null +++ b/AIProofread/core/DocumentText.cs @@ -0,0 +1,21 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace WordAddInTest2024 +{ + public class DocumentText + { + public string Hash { get; set; } + public string Text { get; set; } + + public DocumentText() { } + public DocumentText(byte[] hash, string text) + { + this.Hash = BitConverter.ToString(hash); + this.Text = text; + } + } +} diff --git a/AIProofread/core/Tools.cs b/AIProofread/core/Tools.cs new file mode 100644 index 0000000..90c7757 --- /dev/null +++ b/AIProofread/core/Tools.cs @@ -0,0 +1,67 @@ +using Microsoft.Office.Interop.Word; +using Newtonsoft.Json; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Security.Cryptography; +using System.Text; +using System.Text.RegularExpressions; +using System.Threading.Tasks; +using static System.Net.Mime.MediaTypeNames; + +namespace WordAddInTest2024 +{ + + public class Tools + { + public static List CutSentences(string para) + { + para = Regex.Replace(para, @"([。!?\?])([^”’])", "$1\n$2", RegexOptions.Multiline); + para = Regex.Replace(para, @"(\.{6})([^”’])", "$1\n$2", RegexOptions.Multiline); + para = Regex.Replace(para, @"(\…{2})([^”’])", "$1\n$2", RegexOptions.Multiline); + para = Regex.Replace(para, @"([。!?\?][”’])([^,。!?\?])", "$1\n$2", RegexOptions.Multiline); + para = para.TrimEnd('\n'); + return para.Split(new char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).ToList(); + } + + public static string GetAllText() + { + // 获取当前文档所有文本 + string allText = Globals.ThisAddIn.Application.ActiveDocument.Range().Text; + List list = new List(); + + if (allText != null && allText.Length > 0) + { + string[] splitor = { "\r\n", "\r", "\n" }; + // 开始分割 + string[] lines = allText.Split(splitor, StringSplitOptions.RemoveEmptyEntries); + MD5 md5 = new MD5CryptoServiceProvider(); + foreach (string line in lines) + { + //var sentenceArr = Regex.Split(line, "(?<=[。|.])"); + var i = 0; + //foreach (var sentence in sentenceArr) + //{ + // string text = sentence + (i + 1 == sentenceArr.Length ? "\r\n" : ""); + // byte[] hash = md5.ComputeHash(Encoding.Default.GetBytes(text)); + // list.Add(new DocumentText(hash, text)); + + //} + var matches = Regex.Matches(line, "。"); + foreach (Match match in matches) + { + string text = match.Value + (i + 1 == matches.Count ? "\n" : ""); + byte[] hash = md5.ComputeHash(Encoding.Default.GetBytes(text)); + list.Add(new DocumentText(hash, text)); + } + } + } + return GetJSONString(list); + } + + public static string GetJSONString(object data) + { + return JsonConvert.SerializeObject(data, Formatting.Indented); + } + } +}