1. 在 `Config.cs` 中新增多个环境常量字符串: - 新增 `ALL_IN_ONE`、`PRE` 和新的 `PROD` 常量。 - 删除旧的 `PROD` 常量,原值为 `"http://aijdw1.goldmye.com/"`。 2. 调整 `WEB_PATH` 的值: - `#if DEBUG` 下改为 `AppServer.PRE`。 - 非 `DEBUG` 下保持为 `AppServer.PROD`。 3. 在 `Tools.cs` 中注释掉两行 `Debug.WriteLine` 调试代码,减少不必要的调试输出。
319 lines
12 KiB
C#
319 lines
12 KiB
C#
using AIProofread.core;
|
|
using Microsoft.Office.Interop.Word;
|
|
using Newtonsoft.Json;
|
|
using NPOI;
|
|
using NPOI.POIFS.FileSystem;
|
|
using NPOI.XWPF.UserModel;
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.Diagnostics;
|
|
using System.IO;
|
|
using System.IO.Compression;
|
|
using System.Runtime.InteropServices;
|
|
using System.Security.Cryptography;
|
|
|
|
namespace AIProofread
|
|
{
|
|
public class Tools
|
|
{
|
|
private static readonly string[] paragSplitor = new string[] { "\r\n", "\r\f", "\r", "\n", "\f" };
|
|
|
|
/// <summary>
|
|
/// 采用NPOI进行文本读取
|
|
/// </summary>
|
|
/// <param name="docPath"></param>
|
|
/// <param name="doc"></param>
|
|
/// <returns></returns>
|
|
public static Dictionary<string, object> GetAllText(Microsoft.Office.Interop.Word.Document doc)
|
|
{
|
|
string ext = doc.FullName.ToLower();
|
|
// 如果是
|
|
//if (ext.EndsWith(".wps") || doc.Paragraphs.Count < 200 || doc.Tables.Count < 20)
|
|
//{
|
|
// // 如果段落数小于200或表格小于20 则直接使用vsto 获取数据
|
|
// return GetAllTextByVSTO(doc);
|
|
//}
|
|
// 创建临时文件 方便数据读取
|
|
string docPath = Tools.GetReadDocumentFilePath(doc);
|
|
|
|
Debug.WriteLine("GetAllText Start ==>", DateTime.Now.ToLongTimeString());
|
|
// 获取当前文档所有文本
|
|
string allText = doc.Range().Text;
|
|
List<DocumentText> list = new List<DocumentText>();
|
|
|
|
//DocumentReader.ReadByVSTO(doc, Globals.ThisAddIn.Application, list);
|
|
try
|
|
{
|
|
ReadTextByNPOI(docPath, list);
|
|
}
|
|
catch (POIXMLException ex)
|
|
{
|
|
// 编号有误,移除编号再读取
|
|
if (ex.StackTrace.Contains("NPOI.XWPF.UserModel.XWPFNumbering"))
|
|
{
|
|
RemoveNumbersReadTextByNPOI(docPath, list);
|
|
}
|
|
}
|
|
//using (FileStream stream = new FileStream(docPath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
|
|
//{
|
|
|
|
// //try
|
|
// //{
|
|
|
|
// //}
|
|
// //catch (POIXMLException npoiError)
|
|
// //{
|
|
// // Logger.Log("GetAllText Error", npoiError);
|
|
// // // Logger.Log("GetAllText", e);
|
|
// // // return GetAllTextByVSTO(doc);
|
|
// // // 读取文档
|
|
// // DocumentReader.ReadByVSTO(doc, Globals.ThisAddIn.Application, list);
|
|
// //}
|
|
//}
|
|
var map = new Dictionary<string, object>
|
|
{
|
|
{ "list", list },
|
|
{ "text", allText }
|
|
};
|
|
// 删除临时文件
|
|
File.Delete(docPath);
|
|
return map;
|
|
}
|
|
|
|
private static void RemoveNumbersReadTextByNPOI(string filePath, List<DocumentText> list)
|
|
{
|
|
list.Clear();
|
|
var fs = File.OpenWrite(filePath);
|
|
// 打开Zip文件,删除 numbering.xml
|
|
using (ZipArchive archive = new ZipArchive(fs, ZipArchiveMode.Update))
|
|
{
|
|
var numberingEntry = archive.GetEntry("word/numbering.xml");
|
|
numberingEntry?.Delete(); // 删除 numbering.xml
|
|
}
|
|
fs.Close();
|
|
ReadTextByNPOI(filePath, list);
|
|
}
|
|
|
|
private static void ReadTextByNPOI(string filePath, List<DocumentText> list)
|
|
{
|
|
//, FileMode.Open, FileAccess.Read, FileShare.ReadWrite
|
|
using (var stream = File.OpenRead(filePath))
|
|
{
|
|
XWPFDocument ndoc = new XWPFDocument(stream);
|
|
var paragraphs = ndoc.Paragraphs;
|
|
int paragraphNumber = 1;
|
|
foreach (var bodyElement in ndoc.BodyElements)
|
|
{
|
|
// normal paragraph
|
|
if (bodyElement is XWPFParagraph p)
|
|
{
|
|
// 处理普通段落
|
|
var text = p.ParagraphText.Replace("\u0002", "");
|
|
if (text.Trim().Length > 0)
|
|
{
|
|
list.Add(new DocumentText(text, paragraphNumber));
|
|
}
|
|
paragraphNumber++;
|
|
}
|
|
// table -- vsto对于每个单元格的分段也会有
|
|
else if (bodyElement is XWPFTable table)
|
|
{
|
|
foreach (var row in table.Rows)
|
|
{
|
|
foreach (var cell in row.GetTableCells())
|
|
{
|
|
foreach (var pc in cell.Paragraphs)
|
|
{
|
|
list.Add(new DocumentText(pc.ParagraphText.Replace("\u0002", ""), paragraphNumber));
|
|
paragraphNumber++;
|
|
}
|
|
}
|
|
//list.Add(string.Empty);
|
|
paragraphNumber++;
|
|
}
|
|
}
|
|
// 目录处理
|
|
else if (bodyElement is XWPFSDT sdt)
|
|
{
|
|
string tocText = sdt.Content.Text;
|
|
|
|
// 如果需要,可以进一步解析目录项// 例如,按换行符拆分目录内容
|
|
var tocEntries = tocText.Split(new string[] { "\n", "\r", "\r\n", "\f" }, StringSplitOptions.None);
|
|
paragraphNumber += tocEntries.Length;
|
|
// 暂时跳过目录
|
|
//foreach (string entry in tocEntries)
|
|
//{
|
|
// list.Add(new DocumentText(entry.Replace("\u0002", ""), paragraphNumber));
|
|
// paragraphNumber++;
|
|
//}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
public static string GetReadDocumentFilePath(Microsoft.Office.Interop.Word.Document doc)
|
|
{
|
|
|
|
// 保存文档确保内容是最新的
|
|
doc.Save();
|
|
//
|
|
string docPath = doc.FullName;
|
|
string ext = Path.GetExtension(docPath);
|
|
string tmpFile = Path.GetTempFileName();
|
|
|
|
//Debug.WriteLine("GetReadDocumentFilePath Start ==>", DateTime.Now.ToLongTimeString());
|
|
//Debug.WriteLine("GetReadDocumentFilePath =>{0}", docPath);
|
|
|
|
// 先转成临时文件
|
|
FileStream fs = new FileStream(docPath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
|
|
byte[] bytes = new byte[fs.Length];
|
|
fs.Read(bytes, 0, bytes.Length);
|
|
|
|
FileStream fsWriter = new FileStream(tmpFile, FileMode.OpenOrCreate, FileAccess.ReadWrite);
|
|
fsWriter.Write(bytes, 0, bytes.Length);
|
|
// 关闭吧
|
|
fsWriter.Close();
|
|
fs.Close();
|
|
|
|
// 判断是否是doc格式
|
|
if (ext.ToLower().EndsWith("doc"))
|
|
{
|
|
// doc格式文档 需要转换成docx
|
|
//
|
|
Application app = new Application();
|
|
// 打开临时文件
|
|
var tmpDoc = app.Documents.OpenNoRepairDialog(
|
|
FileName: tmpFile,
|
|
AddToRecentFiles: false,
|
|
ReadOnly: true,
|
|
Visible: false
|
|
);
|
|
string newTmpFile = docPath + ".docx";
|
|
// 将原始文档另存为docx格式文档
|
|
tmpDoc.SaveAs2(
|
|
FileName: newTmpFile,
|
|
FileFormat: WdSaveFormat.wdFormatXMLDocument,
|
|
AddToRecentFiles: false
|
|
);
|
|
// 释放资源
|
|
object missing = System.Reflection.Missing.Value;
|
|
object saveOption = WdSaveOptions.wdDoNotSaveChanges;
|
|
tmpDoc.Close(ref saveOption, ref missing, ref missing);
|
|
app.Quit(ref saveOption, ref missing, ref missing);
|
|
// 删除copy文件
|
|
File.Delete(tmpFile);
|
|
tmpFile = newTmpFile;
|
|
}
|
|
Debug.WriteLine("GetReadDocumentFilePath =>=>{0}", tmpFile);
|
|
Debug.WriteLine("GetReadDocumentFilePath End ==>", DateTime.Now.ToLongTimeString());
|
|
return tmpFile;
|
|
}
|
|
|
|
public static Dictionary<string, object> GetAllTextByVSTO(Microsoft.Office.Interop.Word.Document doc)
|
|
{
|
|
// 获取当前文档所有文本
|
|
string allText = doc.Range().Text;
|
|
List<DocumentText> list = new List<DocumentText>();
|
|
|
|
|
|
Paragraphs paragraphs = doc.Paragraphs;
|
|
|
|
//FormLoading frm = new FormLoading();
|
|
//frm.Show();
|
|
int paragraphNumber = 0;
|
|
foreach (Paragraph p in paragraphs)
|
|
{
|
|
paragraphNumber++;
|
|
//if (paragraphNumber % 20 == 0)
|
|
//{
|
|
// Debug.WriteLine("process paragraphNumber{0}", paragraphNumber);
|
|
//}
|
|
Range r = p.Range;
|
|
string text = p.Range.Text;
|
|
|
|
if (text.Trim().Length == 0 || text.EndsWith("\r\a") || r.Tables.Count > 0)
|
|
{
|
|
continue;
|
|
}
|
|
list.Add(new DocumentText(text.Replace("\u0002", ""), paragraphNumber));
|
|
Marshal.ReleaseComObject(p);
|
|
}
|
|
Marshal.ReleaseComObject(paragraphs);
|
|
|
|
//for (; paragraphNumber <= total; paragraphNumber++)
|
|
//{
|
|
|
|
// Paragraph p = paragraphs[paragraphNumber];
|
|
|
|
// //Logger.LogToWeb(string.Format("get paragraph {0}", paragraphNumber));
|
|
// //frm.SetLoadingText(text);
|
|
// //if (text.Trim().Length > 0)
|
|
// //{
|
|
// // //byte[] hash = md5.ComputeHash(Encoding.Default.GetBytes(text));
|
|
|
|
// //}
|
|
//}
|
|
//frm.Close();
|
|
var map = new Dictionary<string, object>
|
|
{
|
|
{ "list", list },
|
|
{ "text", allText }
|
|
};
|
|
return map;
|
|
}
|
|
|
|
public static List<DocumentText> GetTextListByParagraphRange(int start, int end)
|
|
{
|
|
List<DocumentText> list = new List<DocumentText>();
|
|
var doc = Globals.ThisAddIn.Application.ActiveDocument;
|
|
Paragraphs paragraphs = doc.Paragraphs;
|
|
int total = paragraphs.Count;
|
|
if (start > total) return list;
|
|
start = Math.Max(start, 1);
|
|
end = Math.Min(end, total);
|
|
|
|
for (int paragraphNumber = start; paragraphNumber <= end; paragraphNumber++)
|
|
{
|
|
Paragraph p = paragraphs[paragraphNumber];
|
|
Range r = p.Range;
|
|
if (r.Tables.Count > 0
|
|
|| r.Endnotes.Count > 0
|
|
|| r.Footnotes.Count > 0
|
|
|| r.ListFormat.ListType != WdListType.wdListNoNumbering)
|
|
{
|
|
continue;
|
|
}
|
|
string text = p.Range.Text;
|
|
if (text.Trim().Length > 0)
|
|
{
|
|
list.Add(new DocumentText(text, paragraphNumber));
|
|
}
|
|
}
|
|
return list;
|
|
}
|
|
|
|
public static string GetJSONString(object data)
|
|
{
|
|
return JsonConvert.SerializeObject(data, Formatting.Indented);
|
|
}
|
|
|
|
/// <summary>
|
|
/// 生成设备唯一标识
|
|
/// </summary>
|
|
/// <returns></returns>
|
|
public static string GetDeviceId()
|
|
{
|
|
string devicePath = Config.APP_DATA_PATH + "\\deviceId.txt";
|
|
// 如果存在则直接返回
|
|
if (File.Exists(devicePath))
|
|
{
|
|
return File.ReadAllText(devicePath);
|
|
}
|
|
string deviceId = Guid.NewGuid().ToString().ToLower();
|
|
// 将deviceId保存为纯文本文件到程序目录
|
|
File.WriteAllText(devicePath, deviceId);
|
|
return deviceId;
|
|
}
|
|
}
|
|
}
|