using Microsoft.Office.Interop.Word; using Newtonsoft.Json; using System; using System.Collections.Generic; using System.Linq; using System.Security.Cryptography; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; using static System.Net.Mime.MediaTypeNames; namespace WordAddInTest2024 { public class Tools { public static List CutSentences(string para) { para = Regex.Replace(para, @"([。!?\?])([^”’])", "$1\n$2", RegexOptions.Multiline); para = Regex.Replace(para, @"(\.{6})([^”’])", "$1\n$2", RegexOptions.Multiline); para = Regex.Replace(para, @"(\…{2})([^”’])", "$1\n$2", RegexOptions.Multiline); para = Regex.Replace(para, @"([。!?\?][”’])([^,。!?\?])", "$1\n$2", RegexOptions.Multiline); para = para.TrimEnd('\n'); return para.Split(new char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).ToList(); } public static string GetAllText() { // 获取当前文档所有文本 string allText = Globals.ThisAddIn.Application.ActiveDocument.Range().Text; List list = new List(); if (allText != null && allText.Length > 0) { string[] splitor = { "\r\n", "\r", "\n" }; // 开始分割 string[] lines = allText.Split(splitor, StringSplitOptions.RemoveEmptyEntries); MD5 md5 = new MD5CryptoServiceProvider(); foreach (string line in lines) { //var sentenceArr = Regex.Split(line, "(?<=[。|.])"); var i = 0; //foreach (var sentence in sentenceArr) //{ // string text = sentence + (i + 1 == sentenceArr.Length ? "\r\n" : ""); // byte[] hash = md5.ComputeHash(Encoding.Default.GetBytes(text)); // list.Add(new DocumentText(hash, text)); //} var matches = Regex.Matches(line, "。"); foreach (Match match in matches) { string text = match.Value + (i + 1 == matches.Count ? "\n" : ""); byte[] hash = md5.ComputeHash(Encoding.Default.GetBytes(text)); list.Add(new DocumentText(hash, text)); } } } return GetJSONString(list); } public static string GetJSONString(object data) { return JsonConvert.SerializeObject(data, Formatting.Indented); } } }