后台处理文档
This commit is contained in:
parent
097ec94001
commit
9dbe1d5bf4
@ -1,4 +1,5 @@
|
|||||||
using AIProofread.Controls;
|
using AIProofread.Controls;
|
||||||
|
using AIProofread.Util;
|
||||||
using Microsoft.Office.Interop.Word;
|
using Microsoft.Office.Interop.Word;
|
||||||
using Microsoft.Office.Tools.Word;
|
using Microsoft.Office.Tools.Word;
|
||||||
using Microsoft.Web.WebView2.Core;
|
using Microsoft.Web.WebView2.Core;
|
||||||
@ -11,8 +12,10 @@ using System.Drawing;
|
|||||||
using System.IO;
|
using System.IO;
|
||||||
using System.Runtime.InteropServices;
|
using System.Runtime.InteropServices;
|
||||||
using System.Text.RegularExpressions;
|
using System.Text.RegularExpressions;
|
||||||
|
using System.Threading.Tasks;
|
||||||
using UtilLib;
|
using UtilLib;
|
||||||
using Document = Microsoft.Office.Interop.Word.Document;
|
using Document = Microsoft.Office.Interop.Word.Document;
|
||||||
|
using Task = System.Threading.Tasks.Task;
|
||||||
|
|
||||||
namespace AIProofread
|
namespace AIProofread
|
||||||
{
|
{
|
||||||
@ -149,21 +152,114 @@ namespace AIProofread
|
|||||||
// 获取文档所有文本数据
|
// 获取文档所有文本数据
|
||||||
public Dictionary<string, object> getAllText()
|
public Dictionary<string, object> getAllText()
|
||||||
{
|
{
|
||||||
return Tools.GetAllText();
|
return Tools.GetAllText(Globals.ThisAddIn.Application.ActiveDocument);
|
||||||
}
|
}
|
||||||
public string getDocumentData()
|
public string getDocumentData()
|
||||||
{
|
{
|
||||||
Dictionary<string, object> data = new Dictionary<string, object>();
|
Dictionary<string, object> data = new Dictionary<string, object>();
|
||||||
var name = Globals.ThisAddIn.Application.ActiveDocument.Name;
|
|
||||||
var doc = Globals.ThisAddIn.Application.ActiveDocument;
|
var doc = Globals.ThisAddIn.Application.ActiveDocument;
|
||||||
data.Add("name", name);
|
data.Add("name", doc.Name);
|
||||||
data.Add("fullName", doc.FullName);
|
data.Add("fullName", doc.FullName);
|
||||||
data.Add("wordsCount", doc.Words.Count);
|
data.Add("wordsCount", doc.Words.Count);
|
||||||
data.Add("charactersCount", doc.Characters.Count);
|
data.Add("charactersCount", doc.Characters.Count);
|
||||||
data.Add("content", Tools.GetAllText());
|
|
||||||
|
Application app = new Application();
|
||||||
|
|
||||||
|
object missing = System.Reflection.Missing.Value;
|
||||||
|
object saveOption = WdSaveOptions.wdDoNotSaveChanges;
|
||||||
|
// 创建临时文件
|
||||||
|
string path = CreateTempDocumentFile(doc);
|
||||||
|
Document tmpDoc = app.Documents.Open(path, false, true, false);
|
||||||
|
data.Add("content", Tools.GetAllText(tmpDoc));
|
||||||
|
// 关闭并释放对象
|
||||||
|
tmpDoc.Close(ref saveOption, ref missing, ref missing);
|
||||||
|
Marshal.ReleaseComObject(tmpDoc);
|
||||||
|
app.Quit(ref saveOption,ref missing,ref missing);
|
||||||
|
Marshal.ReleaseComObject(app);
|
||||||
|
// 显式回收一下吧
|
||||||
|
GC.Collect();
|
||||||
|
File.Delete(path);
|
||||||
return Tools.GetJSONString(data);
|
return Tools.GetJSONString(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private string CreateTempDocumentFile(Document doc)
|
||||||
|
{
|
||||||
|
// 先转成临时文件
|
||||||
|
FileStream fs = new FileStream(doc.FullName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
|
||||||
|
//
|
||||||
|
var tmpFile = Path.GetTempFileName();
|
||||||
|
byte[] bytes = new byte[fs.Length];
|
||||||
|
fs.Read(bytes, 0, bytes.Length);
|
||||||
|
FileStream tmp = new FileStream(tmpFile, FileMode.OpenOrCreate,FileAccess.ReadWrite);
|
||||||
|
tmp.Write(bytes, 0, bytes.Length);
|
||||||
|
// 关闭吧
|
||||||
|
tmp.Close();
|
||||||
|
fs.Close();
|
||||||
|
|
||||||
|
return tmpFile;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public string getParagraphTextByRange(int start, int end)
|
||||||
|
{
|
||||||
|
var list = Tools.GetTextListByParagraphRange(start, end);
|
||||||
|
return Tools.GetJSONString(list);
|
||||||
|
}
|
||||||
|
|
||||||
|
public string getAllParagraphs()
|
||||||
|
{
|
||||||
|
var doc = Globals.ThisAddIn.Application.ActiveDocument;
|
||||||
|
|
||||||
|
var rangeText = doc.Content.Text;
|
||||||
|
var trimText = HostHelper.ReplaceSpecialChars(rangeText, isReplaceMultSpaceLine: true);
|
||||||
|
|
||||||
|
string[] separator = new string[5] { "\r\a", "\a", "\r", "\v", "\f" };
|
||||||
|
string[] array4 = rangeText.Split(separator, StringSplitOptions.None);
|
||||||
|
string[] array5 = trimText.Split('\n');
|
||||||
|
List<string> list = new List<string>();
|
||||||
|
var paragraphs = doc.Paragraphs;
|
||||||
|
int total = paragraphs.Count;
|
||||||
|
for (int i = 1; i <= total; i++)
|
||||||
|
{
|
||||||
|
list.Add(GetParagraphText(paragraphs[i]));
|
||||||
|
}
|
||||||
|
var array6 = list.ToArray();
|
||||||
|
Dictionary<string, object> data = new Dictionary<string, object>
|
||||||
|
{
|
||||||
|
{"origin_cut",array4 },
|
||||||
|
{"trim_cut",array5 },
|
||||||
|
{"paragraph_cut",array6 },
|
||||||
|
};
|
||||||
|
return Tools.GetJSONString(data);
|
||||||
|
}
|
||||||
|
private string GetParagraphText(Paragraph paragraph)
|
||||||
|
{
|
||||||
|
// 需要
|
||||||
|
return GetRangeText(paragraph.Range);
|
||||||
|
}
|
||||||
|
private string GetRangeText(Range range)
|
||||||
|
{
|
||||||
|
// 需要
|
||||||
|
return range.Text;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void getParagraphTextByRangeSync(int start,int end)
|
||||||
|
{
|
||||||
|
//var list = Tools.GetTextListByParagraphRange(start, end);
|
||||||
|
//return Tools.GetJSONString(list);
|
||||||
|
Task.Run(() =>
|
||||||
|
{
|
||||||
|
var list = Tools.GetTextListByParagraphRange(start, end);
|
||||||
|
Globals.ThisAddIn.SendMessageToWeb("getParagraphTextByRange", Tools.GetJSONString(list));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getTotalParagraphNumber()
|
||||||
|
{
|
||||||
|
return Globals.ThisAddIn.Application.ActiveDocument.Paragraphs.Count;
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// 读取文档原始文件并转换成base64
|
/// 读取文档原始文件并转换成base64
|
||||||
/// </summary>
|
/// </summary>
|
||||||
@ -482,18 +578,28 @@ namespace AIProofread
|
|||||||
int index = 0;
|
int index = 0;
|
||||||
foreach (var item in correct.Diffs)
|
foreach (var item in correct.Diffs)
|
||||||
{
|
{
|
||||||
|
if (item.idx == 3330)
|
||||||
|
{
|
||||||
|
Console.WriteLine("xx");
|
||||||
|
}
|
||||||
var mark = AddBookmark(item, index, correct.Sentence_offset, correct.Insert_len, correct.Paragraph_num);
|
var mark = AddBookmark(item, index, correct.Sentence_offset, correct.Insert_len, correct.Paragraph_num);
|
||||||
if (item.tag != "i") index++;
|
if (item.tag != "i") index++;
|
||||||
var msg = new Dictionary<string, object>{
|
|
||||||
{"message",mark == null ? "没有找到标记对象":"标记对象" + mark.Name },
|
|
||||||
{ "origin",item }
|
|
||||||
};
|
|
||||||
if (mark != null)
|
if (mark != null)
|
||||||
{
|
{
|
||||||
marks.Add(item.id, new ProofreadItem(item, mark));
|
marks.Add(item.id, new ProofreadItem(item, mark));
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
var msg = new Dictionary<string, object>{
|
||||||
|
{"message","没有找到标记对象" },
|
||||||
|
{ "origin",item },
|
||||||
|
{ "origin_correct",correct },
|
||||||
|
{ "new_text",correct.New_text },
|
||||||
|
{ "paragraph_num",correct.Paragraph_num },
|
||||||
|
};
|
||||||
|
Logger.Log(JsonConvert.SerializeObject(msg));
|
||||||
|
}
|
||||||
|
|
||||||
Logger.LogToWeb(JsonConvert.SerializeObject(msg));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
52
AIProofread/Util/HostHelper.cs
Normal file
52
AIProofread/Util/HostHelper.cs
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
|
|
||||||
|
namespace AIProofread.Util
|
||||||
|
{
|
||||||
|
public class HostHelper
|
||||||
|
{
|
||||||
|
public static string ReplaceSpecialChars(string text, bool isReplaceMultSpaceLine = false)
|
||||||
|
{
|
||||||
|
Regex regex = new Regex("[\r\f\a\v]{1}");
|
||||||
|
if (!string.IsNullOrEmpty(text))
|
||||||
|
{
|
||||||
|
text = regex.Replace(text, "\n").Replace("\u001e", "-");
|
||||||
|
if (isReplaceMultSpaceLine)
|
||||||
|
{
|
||||||
|
bool flag = text.Last() == '\n';
|
||||||
|
text = string.Join("\n", text.Split(new char[1] { '\n' }, StringSplitOptions.RemoveEmptyEntries));
|
||||||
|
if (flag)
|
||||||
|
{
|
||||||
|
text += "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static string RemoveInvisibleCharas(string text, bool containTable = false)
|
||||||
|
{
|
||||||
|
List<char> list = new List<char> { '\r', '\a', '\n', '\f', '\v' };
|
||||||
|
if (containTable)
|
||||||
|
{
|
||||||
|
list.Add('\t');
|
||||||
|
}
|
||||||
|
|
||||||
|
return string.IsNullOrEmpty(text) ? string.Empty : text.Trim(list.ToArray());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static bool HasValidCharacters(string text)
|
||||||
|
{
|
||||||
|
List<char> list = new List<char>{
|
||||||
|
'\0', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', '\a', '\b', '\t',
|
||||||
|
'\n', '\v', '\f', '\r', '\u000e', '\u000f', '\u0010', '\u0011', '\u0012', '\u0013',
|
||||||
|
'\u0014', '\u0015', '\u0016', '\u0017', '\u0018', '\u0019', '\u001a', '\u001b', '\u001c', '\u001d',
|
||||||
|
'\u001e', '\u001f', ' ', '/'
|
||||||
|
};
|
||||||
|
return text.Trim(list.ToArray()).Any();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,72 +1,56 @@
|
|||||||
using AIProofread.Controls;
|
using Microsoft.Office.Interop.Word;
|
||||||
using Microsoft.Office.Interop.Word;
|
|
||||||
using Newtonsoft.Json;
|
using Newtonsoft.Json;
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Linq;
|
using System.Diagnostics;
|
||||||
using System.Security.Cryptography;
|
using System.Security.Cryptography;
|
||||||
using System.Security.Policy;
|
|
||||||
using System.Text;
|
|
||||||
using System.Windows.Forms;
|
|
||||||
using static System.Net.Mime.MediaTypeNames;
|
|
||||||
|
|
||||||
namespace AIProofread
|
namespace AIProofread
|
||||||
{
|
{
|
||||||
public class Tools
|
public class Tools
|
||||||
{
|
{
|
||||||
private static readonly string[] paragSplitor = new string[] { "\r", "\n", "\r\n" };
|
private static readonly string[] paragSplitor = new string[] { "\r", "\n", "\r\n" };
|
||||||
public static Dictionary<string, object> GetAllText()
|
public static Dictionary<string, object> GetAllText(Document doc)
|
||||||
{
|
{
|
||||||
var doc = Globals.ThisAddIn.Application.ActiveDocument;
|
|
||||||
// 获取当前文档所有文本
|
// 获取当前文档所有文本
|
||||||
string allText = doc.Range().Text;
|
string allText = doc.Range().Text;
|
||||||
List<DocumentText> list = new List<DocumentText>();
|
List<DocumentText> list = new List<DocumentText>();
|
||||||
// // 开始分割
|
// // 开始分割
|
||||||
MD5 md5 = new MD5CryptoServiceProvider();
|
MD5 md5 = new MD5CryptoServiceProvider();
|
||||||
|
|
||||||
//if (allText != null && allText.Trim().Length > 0)
|
|
||||||
//{
|
|
||||||
|
|
||||||
// List<string> lines = allText.Split(paragSplitor, StringSplitOptions.None).ToList();//StringUtil.CutTextToSentences(allText);
|
|
||||||
// foreach (string text in lines)
|
|
||||||
// {
|
|
||||||
// if(text.Trim().Length > 0)
|
|
||||||
// {
|
|
||||||
// byte[] hash = md5.ComputeHash(Encoding.Default.GetBytes(text));
|
|
||||||
// list.Add(new DocumentText(hash, text + "\n"));
|
|
||||||
// }
|
|
||||||
// else
|
|
||||||
// {
|
|
||||||
|
|
||||||
// list.Add(new DocumentText(text + "\n"));
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//}
|
|
||||||
|
|
||||||
Paragraphs paragraphs = doc.Paragraphs;
|
Paragraphs paragraphs = doc.Paragraphs;
|
||||||
int total = paragraphs.Count;
|
|
||||||
//FormLoading frm = new FormLoading();
|
//FormLoading frm = new FormLoading();
|
||||||
//frm.Show();
|
//frm.Show();
|
||||||
for (int paragraphNumber = 1; paragraphNumber <= total; paragraphNumber++)
|
int paragraphNumber = 0;
|
||||||
|
foreach (Paragraph p in paragraphs)
|
||||||
{
|
{
|
||||||
Paragraph p = paragraphs[paragraphNumber];
|
paragraphNumber++;
|
||||||
|
if (paragraphNumber % 20 == 0)
|
||||||
|
{
|
||||||
|
Debug.WriteLine("process paragraphNumber{0}", paragraphNumber);
|
||||||
|
}
|
||||||
Range r = p.Range;
|
Range r = p.Range;
|
||||||
if (r.ListFormat.ListType == WdListType.wdListPictureBullet
|
string text = p.Range.Text;
|
||||||
|| r.Tables.Count > 0
|
if (text.Trim().Length == 0 || text.EndsWith("\r\a") || r.Tables.Count > 0)
|
||||||
|| p.Range.Text.Trim().Length == 0)
|
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
string text = p.Range.Text;
|
list.Add(new DocumentText(text.Replace("\u0002", ""), paragraphNumber));
|
||||||
//Logger.LogToWeb(string.Format("get paragraph {0}", paragraphNumber));
|
|
||||||
//frm.SetLoadingText(text);
|
|
||||||
if (text.Trim().Length > 0)
|
|
||||||
{
|
|
||||||
//byte[] hash = md5.ComputeHash(Encoding.Default.GetBytes(text));
|
|
||||||
list.Add(new DocumentText(text,paragraphNumber));
|
|
||||||
}
|
}
|
||||||
|
//for (; paragraphNumber <= total; paragraphNumber++)
|
||||||
|
//{
|
||||||
|
|
||||||
}
|
// Paragraph p = paragraphs[paragraphNumber];
|
||||||
|
|
||||||
|
// //Logger.LogToWeb(string.Format("get paragraph {0}", paragraphNumber));
|
||||||
|
// //frm.SetLoadingText(text);
|
||||||
|
// //if (text.Trim().Length > 0)
|
||||||
|
// //{
|
||||||
|
// // //byte[] hash = md5.ComputeHash(Encoding.Default.GetBytes(text));
|
||||||
|
|
||||||
|
// //}
|
||||||
|
//}
|
||||||
//frm.Close();
|
//frm.Close();
|
||||||
var map = new Dictionary<string, object>
|
var map = new Dictionary<string, object>
|
||||||
{
|
{
|
||||||
@ -76,6 +60,36 @@ namespace AIProofread
|
|||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static List<DocumentText> GetTextListByParagraphRange(int start, int end)
|
||||||
|
{
|
||||||
|
List<DocumentText> list = new List<DocumentText>();
|
||||||
|
var doc = Globals.ThisAddIn.Application.ActiveDocument;
|
||||||
|
Paragraphs paragraphs = doc.Paragraphs;
|
||||||
|
int total = paragraphs.Count;
|
||||||
|
if (start > total) return list;
|
||||||
|
start = Math.Max(start, 1);
|
||||||
|
end = Math.Min(end, total);
|
||||||
|
|
||||||
|
for (int paragraphNumber = start; paragraphNumber <= end; paragraphNumber++)
|
||||||
|
{
|
||||||
|
Paragraph p = paragraphs[paragraphNumber];
|
||||||
|
Range r = p.Range;
|
||||||
|
if (r.Tables.Count > 0
|
||||||
|
|| r.Endnotes.Count > 0
|
||||||
|
|| r.Footnotes.Count > 0
|
||||||
|
|| r.ListFormat.ListType != WdListType.wdListNoNumbering)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
string text = p.Range.Text;
|
||||||
|
if (text.Trim().Length > 0)
|
||||||
|
{
|
||||||
|
list.Add(new DocumentText(text, paragraphNumber));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return list;
|
||||||
|
}
|
||||||
|
|
||||||
public static string GetJSONString(object data)
|
public static string GetJSONString(object data)
|
||||||
{
|
{
|
||||||
return JsonConvert.SerializeObject(data, Formatting.Indented);
|
return JsonConvert.SerializeObject(data, Formatting.Indented);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user