后台处理文档
This commit is contained in:
parent
097ec94001
commit
9dbe1d5bf4
@ -1,4 +1,5 @@
|
||||
using AIProofread.Controls;
|
||||
using AIProofread.Util;
|
||||
using Microsoft.Office.Interop.Word;
|
||||
using Microsoft.Office.Tools.Word;
|
||||
using Microsoft.Web.WebView2.Core;
|
||||
@ -11,8 +12,10 @@ using System.Drawing;
|
||||
using System.IO;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading.Tasks;
|
||||
using UtilLib;
|
||||
using Document = Microsoft.Office.Interop.Word.Document;
|
||||
using Task = System.Threading.Tasks.Task;
|
||||
|
||||
namespace AIProofread
|
||||
{
|
||||
@ -149,21 +152,114 @@ namespace AIProofread
|
||||
// 获取文档所有文本数据
|
||||
public Dictionary<string, object> getAllText()
|
||||
{
|
||||
return Tools.GetAllText();
|
||||
return Tools.GetAllText(Globals.ThisAddIn.Application.ActiveDocument);
|
||||
}
|
||||
public string getDocumentData()
|
||||
{
|
||||
Dictionary<string, object> data = new Dictionary<string, object>();
|
||||
var name = Globals.ThisAddIn.Application.ActiveDocument.Name;
|
||||
var doc = Globals.ThisAddIn.Application.ActiveDocument;
|
||||
data.Add("name", name);
|
||||
data.Add("name", doc.Name);
|
||||
data.Add("fullName", doc.FullName);
|
||||
data.Add("wordsCount", doc.Words.Count);
|
||||
data.Add("charactersCount", doc.Characters.Count);
|
||||
data.Add("content", Tools.GetAllText());
|
||||
|
||||
Application app = new Application();
|
||||
|
||||
object missing = System.Reflection.Missing.Value;
|
||||
object saveOption = WdSaveOptions.wdDoNotSaveChanges;
|
||||
// 创建临时文件
|
||||
string path = CreateTempDocumentFile(doc);
|
||||
Document tmpDoc = app.Documents.Open(path, false, true, false);
|
||||
data.Add("content", Tools.GetAllText(tmpDoc));
|
||||
// 关闭并释放对象
|
||||
tmpDoc.Close(ref saveOption, ref missing, ref missing);
|
||||
Marshal.ReleaseComObject(tmpDoc);
|
||||
app.Quit(ref saveOption,ref missing,ref missing);
|
||||
Marshal.ReleaseComObject(app);
|
||||
// 显式回收一下吧
|
||||
GC.Collect();
|
||||
File.Delete(path);
|
||||
return Tools.GetJSONString(data);
|
||||
}
|
||||
|
||||
private string CreateTempDocumentFile(Document doc)
|
||||
{
|
||||
// 先转成临时文件
|
||||
FileStream fs = new FileStream(doc.FullName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
|
||||
//
|
||||
var tmpFile = Path.GetTempFileName();
|
||||
byte[] bytes = new byte[fs.Length];
|
||||
fs.Read(bytes, 0, bytes.Length);
|
||||
FileStream tmp = new FileStream(tmpFile, FileMode.OpenOrCreate,FileAccess.ReadWrite);
|
||||
tmp.Write(bytes, 0, bytes.Length);
|
||||
// 关闭吧
|
||||
tmp.Close();
|
||||
fs.Close();
|
||||
|
||||
return tmpFile;
|
||||
}
|
||||
|
||||
|
||||
public string getParagraphTextByRange(int start, int end)
|
||||
{
|
||||
var list = Tools.GetTextListByParagraphRange(start, end);
|
||||
return Tools.GetJSONString(list);
|
||||
}
|
||||
|
||||
public string getAllParagraphs()
|
||||
{
|
||||
var doc = Globals.ThisAddIn.Application.ActiveDocument;
|
||||
|
||||
var rangeText = doc.Content.Text;
|
||||
var trimText = HostHelper.ReplaceSpecialChars(rangeText, isReplaceMultSpaceLine: true);
|
||||
|
||||
string[] separator = new string[5] { "\r\a", "\a", "\r", "\v", "\f" };
|
||||
string[] array4 = rangeText.Split(separator, StringSplitOptions.None);
|
||||
string[] array5 = trimText.Split('\n');
|
||||
List<string> list = new List<string>();
|
||||
var paragraphs = doc.Paragraphs;
|
||||
int total = paragraphs.Count;
|
||||
for (int i = 1; i <= total; i++)
|
||||
{
|
||||
list.Add(GetParagraphText(paragraphs[i]));
|
||||
}
|
||||
var array6 = list.ToArray();
|
||||
Dictionary<string, object> data = new Dictionary<string, object>
|
||||
{
|
||||
{"origin_cut",array4 },
|
||||
{"trim_cut",array5 },
|
||||
{"paragraph_cut",array6 },
|
||||
};
|
||||
return Tools.GetJSONString(data);
|
||||
}
|
||||
private string GetParagraphText(Paragraph paragraph)
|
||||
{
|
||||
// 需要
|
||||
return GetRangeText(paragraph.Range);
|
||||
}
|
||||
private string GetRangeText(Range range)
|
||||
{
|
||||
// 需要
|
||||
return range.Text;
|
||||
}
|
||||
|
||||
|
||||
public void getParagraphTextByRangeSync(int start,int end)
|
||||
{
|
||||
//var list = Tools.GetTextListByParagraphRange(start, end);
|
||||
//return Tools.GetJSONString(list);
|
||||
Task.Run(() =>
|
||||
{
|
||||
var list = Tools.GetTextListByParagraphRange(start, end);
|
||||
Globals.ThisAddIn.SendMessageToWeb("getParagraphTextByRange", Tools.GetJSONString(list));
|
||||
});
|
||||
}
|
||||
|
||||
public int getTotalParagraphNumber()
|
||||
{
|
||||
return Globals.ThisAddIn.Application.ActiveDocument.Paragraphs.Count;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 读取文档原始文件并转换成base64
|
||||
/// </summary>
|
||||
@ -482,18 +578,28 @@ namespace AIProofread
|
||||
int index = 0;
|
||||
foreach (var item in correct.Diffs)
|
||||
{
|
||||
if (item.idx == 3330)
|
||||
{
|
||||
Console.WriteLine("xx");
|
||||
}
|
||||
var mark = AddBookmark(item, index, correct.Sentence_offset, correct.Insert_len, correct.Paragraph_num);
|
||||
if (item.tag != "i") index++;
|
||||
var msg = new Dictionary<string, object>{
|
||||
{"message",mark == null ? "没有找到标记对象":"标记对象" + mark.Name },
|
||||
{ "origin",item }
|
||||
};
|
||||
if (mark != null)
|
||||
{
|
||||
marks.Add(item.id, new ProofreadItem(item, mark));
|
||||
}
|
||||
else
|
||||
{
|
||||
var msg = new Dictionary<string, object>{
|
||||
{"message","没有找到标记对象" },
|
||||
{ "origin",item },
|
||||
{ "origin_correct",correct },
|
||||
{ "new_text",correct.New_text },
|
||||
{ "paragraph_num",correct.Paragraph_num },
|
||||
};
|
||||
Logger.Log(JsonConvert.SerializeObject(msg));
|
||||
}
|
||||
|
||||
Logger.LogToWeb(JsonConvert.SerializeObject(msg));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
52
AIProofread/Util/HostHelper.cs
Normal file
52
AIProofread/Util/HostHelper.cs
Normal file
@ -0,0 +1,52 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace AIProofread.Util
|
||||
{
|
||||
public class HostHelper
|
||||
{
|
||||
public static string ReplaceSpecialChars(string text, bool isReplaceMultSpaceLine = false)
|
||||
{
|
||||
Regex regex = new Regex("[\r\f\a\v]{1}");
|
||||
if (!string.IsNullOrEmpty(text))
|
||||
{
|
||||
text = regex.Replace(text, "\n").Replace("\u001e", "-");
|
||||
if (isReplaceMultSpaceLine)
|
||||
{
|
||||
bool flag = text.Last() == '\n';
|
||||
text = string.Join("\n", text.Split(new char[1] { '\n' }, StringSplitOptions.RemoveEmptyEntries));
|
||||
if (flag)
|
||||
{
|
||||
text += "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
public static string RemoveInvisibleCharas(string text, bool containTable = false)
|
||||
{
|
||||
List<char> list = new List<char> { '\r', '\a', '\n', '\f', '\v' };
|
||||
if (containTable)
|
||||
{
|
||||
list.Add('\t');
|
||||
}
|
||||
|
||||
return string.IsNullOrEmpty(text) ? string.Empty : text.Trim(list.ToArray());
|
||||
}
|
||||
|
||||
public static bool HasValidCharacters(string text)
|
||||
{
|
||||
List<char> list = new List<char>{
|
||||
'\0', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', '\a', '\b', '\t',
|
||||
'\n', '\v', '\f', '\r', '\u000e', '\u000f', '\u0010', '\u0011', '\u0012', '\u0013',
|
||||
'\u0014', '\u0015', '\u0016', '\u0017', '\u0018', '\u0019', '\u001a', '\u001b', '\u001c', '\u001d',
|
||||
'\u001e', '\u001f', ' ', '/'
|
||||
};
|
||||
return text.Trim(list.ToArray()).Any();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,72 +1,56 @@
|
||||
using AIProofread.Controls;
|
||||
using Microsoft.Office.Interop.Word;
|
||||
using Microsoft.Office.Interop.Word;
|
||||
using Newtonsoft.Json;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Diagnostics;
|
||||
using System.Security.Cryptography;
|
||||
using System.Security.Policy;
|
||||
using System.Text;
|
||||
using System.Windows.Forms;
|
||||
using static System.Net.Mime.MediaTypeNames;
|
||||
|
||||
namespace AIProofread
|
||||
{
|
||||
public class Tools
|
||||
{
|
||||
private static readonly string[] paragSplitor = new string[] { "\r", "\n", "\r\n" };
|
||||
public static Dictionary<string, object> GetAllText()
|
||||
public static Dictionary<string, object> GetAllText(Document doc)
|
||||
{
|
||||
var doc = Globals.ThisAddIn.Application.ActiveDocument;
|
||||
// 获取当前文档所有文本
|
||||
string allText = doc.Range().Text;
|
||||
List<DocumentText> list = new List<DocumentText>();
|
||||
// // 开始分割
|
||||
MD5 md5 = new MD5CryptoServiceProvider();
|
||||
|
||||
//if (allText != null && allText.Trim().Length > 0)
|
||||
//{
|
||||
|
||||
// List<string> lines = allText.Split(paragSplitor, StringSplitOptions.None).ToList();//StringUtil.CutTextToSentences(allText);
|
||||
// foreach (string text in lines)
|
||||
// {
|
||||
// if(text.Trim().Length > 0)
|
||||
// {
|
||||
// byte[] hash = md5.ComputeHash(Encoding.Default.GetBytes(text));
|
||||
// list.Add(new DocumentText(hash, text + "\n"));
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
|
||||
// list.Add(new DocumentText(text + "\n"));
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
|
||||
Paragraphs paragraphs = doc.Paragraphs;
|
||||
int total = paragraphs.Count;
|
||||
|
||||
//FormLoading frm = new FormLoading();
|
||||
//frm.Show();
|
||||
for (int paragraphNumber = 1; paragraphNumber <= total; paragraphNumber++)
|
||||
int paragraphNumber = 0;
|
||||
foreach (Paragraph p in paragraphs)
|
||||
{
|
||||
Paragraph p = paragraphs[paragraphNumber];
|
||||
paragraphNumber++;
|
||||
if (paragraphNumber % 20 == 0)
|
||||
{
|
||||
Debug.WriteLine("process paragraphNumber{0}", paragraphNumber);
|
||||
}
|
||||
Range r = p.Range;
|
||||
if (r.ListFormat.ListType == WdListType.wdListPictureBullet
|
||||
|| r.Tables.Count > 0
|
||||
|| p.Range.Text.Trim().Length == 0)
|
||||
string text = p.Range.Text;
|
||||
if (text.Trim().Length == 0 || text.EndsWith("\r\a") || r.Tables.Count > 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
string text = p.Range.Text;
|
||||
//Logger.LogToWeb(string.Format("get paragraph {0}", paragraphNumber));
|
||||
//frm.SetLoadingText(text);
|
||||
if (text.Trim().Length > 0)
|
||||
{
|
||||
//byte[] hash = md5.ComputeHash(Encoding.Default.GetBytes(text));
|
||||
list.Add(new DocumentText(text,paragraphNumber));
|
||||
}
|
||||
|
||||
list.Add(new DocumentText(text.Replace("\u0002", ""), paragraphNumber));
|
||||
}
|
||||
//for (; paragraphNumber <= total; paragraphNumber++)
|
||||
//{
|
||||
|
||||
// Paragraph p = paragraphs[paragraphNumber];
|
||||
|
||||
// //Logger.LogToWeb(string.Format("get paragraph {0}", paragraphNumber));
|
||||
// //frm.SetLoadingText(text);
|
||||
// //if (text.Trim().Length > 0)
|
||||
// //{
|
||||
// // //byte[] hash = md5.ComputeHash(Encoding.Default.GetBytes(text));
|
||||
|
||||
// //}
|
||||
//}
|
||||
//frm.Close();
|
||||
var map = new Dictionary<string, object>
|
||||
{
|
||||
@ -76,6 +60,36 @@ namespace AIProofread
|
||||
return map;
|
||||
}
|
||||
|
||||
public static List<DocumentText> GetTextListByParagraphRange(int start, int end)
|
||||
{
|
||||
List<DocumentText> list = new List<DocumentText>();
|
||||
var doc = Globals.ThisAddIn.Application.ActiveDocument;
|
||||
Paragraphs paragraphs = doc.Paragraphs;
|
||||
int total = paragraphs.Count;
|
||||
if (start > total) return list;
|
||||
start = Math.Max(start, 1);
|
||||
end = Math.Min(end, total);
|
||||
|
||||
for (int paragraphNumber = start; paragraphNumber <= end; paragraphNumber++)
|
||||
{
|
||||
Paragraph p = paragraphs[paragraphNumber];
|
||||
Range r = p.Range;
|
||||
if (r.Tables.Count > 0
|
||||
|| r.Endnotes.Count > 0
|
||||
|| r.Footnotes.Count > 0
|
||||
|| r.ListFormat.ListType != WdListType.wdListNoNumbering)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
string text = p.Range.Text;
|
||||
if (text.Trim().Length > 0)
|
||||
{
|
||||
list.Add(new DocumentText(text, paragraphNumber));
|
||||
}
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
public static string GetJSONString(object data)
|
||||
{
|
||||
return JsonConvert.SerializeObject(data, Formatting.Indented);
|
||||
|
Loading…
x
Reference in New Issue
Block a user