虚位以待(AD)
虚位以待(AD)
首页 > 软件编程 > C#编程 > 使用NOPI读取Word、Excel文档内容

使用NOPI读取Word、Excel文档内容
类别:C#编程   作者:码皇   来源:互联网   点击:

这篇文章主要为大家详细介绍了使用NOPI读取Word、Excel文档内容的方法,具有一定的参考价值,感兴趣的小伙伴们可以参考一下

使用NOPI读取Excel的例子很多,读取Word的例子不多。

Excel的解析方式有多中,可以使用ODBC查询,把Excel作为一个数据集对待。也可以使用文档结构模型的方式进行解析,即解析Workbook(工作簿)、Sheet、Row、Column。

Word的解析比较复杂,因为Word的文档结构模型定义较为复杂。解析Word或者Excel,关键是理解Word、Excel的文档对象模型。

Word、Excel文档对象模型的解析,可以通过COM接口调用,此类方式使用较广。(可以录制宏代码,然后替换为对应的语言)

也可以使用XML模型解析,尤其是对于2007、2010版本的文档的解析。

    using NPOI.POIFS.FileSystem;
    using NPOI.SS.UserModel;
    using NPOI.XSSF.UserModel;
    using NPOI.XWPF.UserModel;
    using System;
    using System.Collections.Generic;
    using System.Configuration;
    using System.IO;
    using System.Text;
    namespace eyuan{
    public static class NOPIHandler {
    /// <summary> /// /// </summary> /// <param name="fileName"></param> /// <returns></returns> public static List<List<List<string>>> ReadExcel(string fileName) {
    //打开Excel工作簿 XSSFWorkbook hssfworkbook = null;
    try {
    using (FileStream file = new FileStream(fileName, FileMode.Open, FileAccess.Read)) {
    hssfworkbook = new XSSFWorkbook(file);
    }
    }
    catch (Exception e) {
    LogHandler.LogWrite(string.Format("文件{
    0}
    打开失败,错误:{
    1}
    ", new string[] {
    fileName, e.ToString() }
    ));
    }
    //循环Sheet页 int sheetsCount = hssfworkbook.NumberOfSheets;
    List<List<List<string>>> workBookContent = new List<List<List<string>>>();
    for (int i = 0;
    i < sheetsCount;
    i++) {
    //Sheet索引从0开始 ISheet sheet = hssfworkbook.GetSheetAt(i);
    //循环行 List<List<string>> sheetContent = new List<List<string>>();
    int rowCount = sheet.PhysicalNumberOfRows;
    for (int j = 0;
    j < rowCount;
    j++) {
    //Row(逻辑行)的索引从0开始 IRow row = sheet.GetRow(j);
    //循环列(各行的列数可能不同) List<string> rowContent = new List<string>();
    int cellCount = row.PhysicalNumberOfCells;
    for (int k = 0;
    k < cellCount;
    k++) {
    //ICell cell = row.GetCell(k);
    ICell cell = row.Cells[k];
    if (cell == null) {
    rowContent.Add("NIL");
    }
    else {
    rowContent.Add(cell.ToString());
    //rowContent.Add(cell.StringCellValue);
    }
    }
    //添加行到集合中 sheetContent.Add(rowContent);
    }
    //添加Sheet到集合中 workBookContent.Add(sheetContent);
    }
    return workBookContent;
    }
    /// <summary> /// /// </summary> /// <param name="fileName"></param> /// <returns></returns> public static string ReadExcelText(string fileName) {
    string ExcelCellSeparator = ConfigurationManager.AppSettings["ExcelCellSeparator"];
    string ExcelRowSeparator = ConfigurationManager.AppSettings["ExcelRowSeparator"];
    string ExcelSheetSeparator = ConfigurationManager.AppSettings["ExcelSheetSeparator"];
    // List<List<List<string>>> excelContent = ReadExcel(fileName);
    string fileText = string.Empty;
    StringBuilder sbFileText = new StringBuilder();
    //循环处理WorkBook中的各Sheet页 List<List<List<string>>>.Enumerator enumeratorWorkBook = excelContent.GetEnumerator();
    while (enumeratorWorkBook.MoveNext()) {
    //循环处理当期Sheet页中的各行 List<List<string>>.Enumerator enumeratorSheet = enumeratorWorkBook.Current.GetEnumerator();
    while (enumeratorSheet.MoveNext()) {
    string[] rowContent = enumeratorSheet.Current.ToArray();
    sbFileText.Append(string.Join(ExcelCellSeparator, rowContent));
    sbFileText.Append(ExcelRowSeparator);
    }
    sbFileText.Append(ExcelSheetSeparator);
    }
    // fileText = sbFileText.ToString();
    return fileText;
    }
    /// <summary> /// 读取Word内容 /// </summary> /// <param name="fileName"></param> /// <returns></returns> public static string ReadWordText(string fileName) {
    string WordTableCellSeparator = ConfigurationManager.AppSettings["WordTableCellSeparator"];
    string WordTableRowSeparator = ConfigurationManager.AppSettings["WordTableRowSeparator"];
    string WordTableSeparator = ConfigurationManager.AppSettings["WordTableSeparator"];
    // string CaptureWordHeader = ConfigurationManager.AppSettings["CaptureWordHeader"];
    string CaptureWordFooter = ConfigurationManager.AppSettings["CaptureWordFooter"];
    string CaptureWordTable = ConfigurationManager.AppSettings["CaptureWordTable"];
    string CaptureWordImage = ConfigurationManager.AppSettings["CaptureWordImage"];
    // string CaptureWordImageFileName = ConfigurationManager.AppSettings["CaptureWordImageFileName"];
    // string fileText = string.Empty;
    StringBuilder sbFileText = new StringBuilder();
    #region 打开文档 XWPFDocument document = null;
    try {
    using (FileStream file = new FileStream(fileName, FileMode.Open, FileAccess.Read)) {
    document = new XWPFDocument(file);
    }
    }
    catch (Exception e) {
    LogHandler.LogWrite(string.Format("文件{
    0}
    打开失败,错误:{
    1}
    ", new string[] {
    fileName, e.ToString() }
    ));
    }
    #endregion #region 页眉、页脚 //页眉 if (CaptureWordHeader == "true") {
    sbFileText.AppendLine("Capture Header Begin");
    foreach (XWPFHeader xwpfHeader in document.HeaderList) {
    sbFileText.AppendLine(string.Format("{
    0}
    ", new string[] {
    xwpfHeader.Text }
    ));
    }
    sbFileText.AppendLine("Capture Header End");
    }
    //页脚 if (CaptureWordFooter == "true") {
    sbFileText.AppendLine("Capture Footer Begin");
    foreach (XWPFFooter xwpfFooter in document.FooterList) {
    sbFileText.AppendLine(string.Format("{
    0}
    ", new string[] {
    xwpfFooter.Text }
    ));
    }
    sbFileText.AppendLine("Capture Footer End");
    }
    #endregion #region 表格 if (CaptureWordTable == "true") {
    sbFileText.AppendLine("Capture Table Begin");
    foreach (XWPFTable table in document.Tables) {
    //循环表格行 foreach (XWPFTableRow row in table.Rows) {
    foreach (XWPFTableCell cell in row.GetTableCells()) {
    sbFileText.Append(cell.GetText());
    // sbFileText.Append(WordTableCellSeparator);
    }
    sbFileText.Append(WordTableRowSeparator);
    }
    sbFileText.Append(WordTableSeparator);
    }
    sbFileText.AppendLine("Capture Table End");
    }
    #endregion #region 图片 if (CaptureWordImage == "true") {
    sbFileText.AppendLine("Capture Image Begin");
    foreach (XWPFPictureData pictureData in document.AllPictures) {
    string picExtName = pictureData.suggestFileExtension();
    string picFileName = pictureData.GetFileName();
    byte[] picFileContent = pictureData.GetData();
    // string picTempName = string.Format(CaptureWordImageFileName, new string[] {
    Guid.NewGuid().ToString() + "_" + picFileName + "." + picExtName }
    );
    // using (FileStream fs = new FileStream(picTempName, FileMode.Create, FileAccess.Write)) {
    fs.Write(picFileContent, 0, picFileContent.Length);
    fs.Close();
    }
    // sbFileText.AppendLine(picTempName);
    }
    sbFileText.AppendLine("Capture Image End");
    }
    #endregion //正文段落 sbFileText.AppendLine("Capture Paragraph Begin");
    foreach (XWPFParagraph paragraph in document.Paragraphs) {
    sbFileText.AppendLine(paragraph.ParagraphText);
    }
    sbFileText.AppendLine("Capture Paragraph End");
    // // fileText = sbFileText.ToString();
    return fileText;
    }
    }
    }

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持脚本之家。

您可能感兴趣的文章:

  • C#实现DataSet内数据转化为Excel和Word文件的通用类完整实例
  • C#实现将数据导出到word或者Excel中的方法
  • word ppt excel文档转换成pdf的C#实现代码
相关热词搜索: NOPI Word Excel