NanoXLSX.Reader 3.0.0-rc.2
Loading...
Searching...
No Matches
XlsxReader.cs
1/*
2 * NanoXLSX is a small .NET library to generate and read XLSX (Microsoft Excel 2007 or newer) files in an easy and native way
3 * Copyright Raphael Stoeckli © 2025
4 * This library is licensed under the MIT License.
5 * You find a copy of the license in project folder or on: http://opensource.org/licenses/MIT
6 */
7
8using System;
9using System.Collections.Generic;
10using System.IO;
11using System.IO.Compression;
12using System.Linq;
13using System.Threading.Tasks;
14using NanoXLSX.Exceptions;
15using NanoXLSX.Interfaces.Plugin;
17using NanoXLSX.Registry;
18using NanoXLSX.Styles;
19using NanoXLSX.Utils;
20using IOException = NanoXLSX.Exceptions.IOException;
21
23{
27 public class XlsxReader : IDisposable
28 {
29 #region privateFields
30 private readonly string filePath;
31 private readonly Stream inputStream;
32 private readonly ReaderOptions readerOptions;
33 private MemoryStream memoryStream;
34 #endregion
35
36 #region properties
40 public Workbook Workbook { get; internal set; }
41 #endregion
42
43 #region constructors
49 public XlsxReader(string path, ReaderOptions options = null)
50 {
51 filePath = path;
52 readerOptions = options;
53 }
54
60 public XlsxReader(Stream stream, ReaderOptions options = null)
61 {
62 inputStream = stream;
63 readerOptions = options;
64 }
65 #endregion
66
67 #region methods
68
75 public void Read()
76 {
77 try
78 {
79 using (memoryStream = new MemoryStream())
80 {
81 ReadInternal().GetAwaiter().GetResult();
82 }
83 }
84 catch (NotSupportedContentException)
85 {
86 throw; // rethrow
87 }
88 catch (IOException)
89 {
90 throw; // rethrow
91 }
92 catch (Exception ex)
93 {
94 throw new IOException("There was an error while reading an XLSX file. Please see the inner exception:", ex);
95 }
96 }
97
105 public async Task ReadAsync()
106 {
107 try
108 {
109 using (memoryStream = new MemoryStream())
110 {
111 await ReadInternal();
112 }
113 }
114 catch (IOException)
115 {
116 throw; // rethrow
117 }
118 catch (Exception ex)
119 {
120 throw new IOException("There was an error while reading an XLSX file. Please see the inner exception:", ex);
121 }
122 }
123
128 private async Task ReadInternal()
129 {
130 ZipArchive zf;
131 if (inputStream == null && !string.IsNullOrEmpty(filePath))
132 {
133 using (FileStream fs = new FileStream(filePath, FileMode.Open))
134 {
135 await fs.CopyToAsync(memoryStream);
136 }
137 }
138 else if (inputStream != null)
139 {
140 using (inputStream)
141 {
142 await inputStream.CopyToAsync(memoryStream);
143 }
144 }
145 else
146 {
147 throw new IOException("No valid stream or file path was provided to open");
148 }
149
150 memoryStream.Position = 0;
151 zf = new ZipArchive(memoryStream, ZipArchiveMode.Read);
152
153 await Task.Run(() =>
154 {
155 ReadZip(zf);
156 }).ConfigureAwait(false);
157 }
158
163 private void ReadZip(ZipArchive zf)
164 {
165 MemoryStream ms;
166 Workbook wb = new Workbook
167 {
168 importInProgress = true // Disables checks during load
169 };
170 HandleQueuePlugIns(PlugInUUID.ReaderPrependingQueue, zf, ref wb);
171
172 ISharedStringReader sharedStringsReader = PlugInLoader.GetPlugIn<ISharedStringReader>(PlugInUUID.SharedStringsReader, new SharedStringsReader());
173 ms = GetEntryStream("xl/sharedStrings.xml", zf);
174 if (ms != null && ms.Length > 0) // If length == 0, no shared strings are defined (no text in file)
175 {
176 sharedStringsReader.Init(ms, wb, readerOptions);
177 sharedStringsReader.Execute();
178 }
179 Dictionary<int, string> themeStreamNames = GetSequentialStreamNames("xl/theme/theme", zf);
180 if (themeStreamNames.Count > 0)
181 {
182 // There is not really a definition whether multiple themes can be managed in one workbook.
183 // the suffix number (e.g. theme1) indicates it. However, no examples were found and therefore
184 // (currently) only the first occurring theme will be read
185 foreach (KeyValuePair<int, string> streamName in themeStreamNames)
186 {
187 IPlugInReader themeReader = PlugInLoader.GetPlugIn<IPlugInReader>(PlugInUUID.ThemeReader, new ThemeReader());
188 ms = GetEntryStream(streamName.Value, zf);
189 themeReader.Init(ms, wb, readerOptions);
190 themeReader.Execute();
191 break;
192 }
193 }
194 StyleRepository.Instance.ImportInProgress = true; // TODO: To be checked
195 IPlugInReader styleReader = PlugInLoader.GetPlugIn<IPlugInReader>(PlugInUUID.StyleReader, new StyleReader());
196 ms = GetEntryStream("xl/styles.xml", zf);
197 styleReader.Init(ms, wb, readerOptions);
198 styleReader.Execute();
199 StyleRepository.Instance.ImportInProgress = false;
200
201 ms = GetEntryStream("xl/workbook.xml", zf);
202 IPlugInReader workbookReader = PlugInLoader.GetPlugIn<IPlugInReader>(PlugInUUID.WorkbookReader, new WorkbookReader());
203 workbookReader.Init(ms, wb, readerOptions);
204 workbookReader.Execute();
205
206 ms = GetEntryStream("docProps/app.xml", zf);
207 if (ms != null && ms.Length > 0) // If null/length == 0, no docProps/app.xml seems to be defined
208 {
209 IPlugInReader metadataAppReader = PlugInLoader.GetPlugIn<IPlugInReader>(PlugInUUID.MetadataAppReader, new MetadataAppReader());
210 metadataAppReader.Init(ms, wb, readerOptions);
211 metadataAppReader.Execute();
212 }
213 ms = GetEntryStream("docProps/core.xml", zf);
214 if (ms != null && ms.Length > 0) // If null/length == 0, no docProps/core.xml seems to be defined
215 {
216 IPlugInReader metadataCoreReader = PlugInLoader.GetPlugIn<IPlugInReader>(PlugInUUID.MetadataCoreReader, new MetadataCoreReader());
217 metadataCoreReader.Init(ms, wb, readerOptions);
218 metadataCoreReader.Execute();
219 }
220
221 IPlugInReader relationships = PlugInLoader.GetPlugIn<IPlugInReader>(PlugInUUID.RelationshipReader, new RelationshipReader());
222 ms = GetEntryStream("xl/_rels/workbook.xml.rels", zf);
223 relationships.Init(ms, wb, readerOptions);
224 relationships.Execute();
225
226 IWorksheetReader worksheetReader = PlugInLoader.GetPlugIn<IWorksheetReader>(PlugInUUID.WorksheetReader, new WorksheetReader());
227 worksheetReader.SharedStrings = sharedStringsReader.SharedStrings;
228 List<WorksheetDefinition> workshetDefinitions = wb.AuxiliaryData.GetDataList<WorksheetDefinition>(PlugInUUID.WorkbookReader, PlugInUUID.WorksheetDefinitionEntity);
229 List<Relationship> relationshipDefinitions = wb.AuxiliaryData.GetDataList<Relationship>(PlugInUUID.RelationshipReader, PlugInUUID.RelationshipEntity);
230 foreach (WorksheetDefinition definition in workshetDefinitions)
231 {
232 Relationship relationship = relationshipDefinitions.SingleOrDefault(r => r.RID == definition.RelId);
233 if (relationship == null)
234 {
235 throw new IOException("There was an error while reading an XLSX file. The relationship target of the worksheet with the RelID " + definition.RelId + " was not found");
236 }
237 ms = GetEntryStream(relationship.Target, zf);
238 worksheetReader.Init(ms, wb, readerOptions);
239 worksheetReader.CurrentWorksheetID = definition.SheetID;
240 worksheetReader.Execute();
241 }
242 if (wb.Worksheets.Count == 0)
243 {
244 throw new IOException("No worksheet was found in the workbook");
245 }
246 HandleQueuePlugIns(PlugInUUID.ReaderAppendingQueue, zf, ref wb);
247 wb.importInProgress = false; // Enables checks for runtime
248 wb.AuxiliaryData.ClearTemporaryData(); // Remove temporary staging data
249 this.Workbook = wb;
250 }
251
258 private static MemoryStream GetEntryStream(string name, ZipArchive archive)
259 {
260 MemoryStream stream = null;
261 for (int i = 0; i < archive.Entries.Count; i++)
262 {
263 if (archive.Entries[i].FullName == name)
264 {
265 MemoryStream ms = new MemoryStream();
266 archive.Entries[i].Open().CopyTo(ms);
267 ms.Position = 0;
268 stream = ms;
269 break;
270 }
271 }
272 return stream;
273 }
274
281 private static Dictionary<int, string> GetSequentialStreamNames(string namePrefix, ZipArchive archive)
282 {
283 Dictionary<int, string> files = new Dictionary<int, string>();
284 int index = 1; // Assumption: There is no file that has the index 0 in its name
285 while (true)
286 {
287 string name = namePrefix + ParserUtils.ToString(index) + ".xml";
288 var ms = GetEntryStream(name, archive);
289 if (ms != null)
290 {
291 files.Add(index, name);
292 }
293 else
294 {
295 break;
296 }
297 index++;
298 }
299 return files;
300 }
301
308 private void HandleQueuePlugIns(string queueUuid, ZipArchive zf, ref Workbook workbook)
309 {
310 string lastUuid = null;
311 IPlugInReader queueReader;
312 do
313 {
314 string currentUuid;
315 queueReader = PlugInLoader.GetNextQueuePlugIn<IPlugInReader>(queueUuid, lastUuid, out currentUuid);
316 MemoryStream ms = null;
317 if (queueReader != null)
318 {
319 if (queueReader is IPlugInPackageReader)
320 {
321 string streamPartName = (queueReader as IPlugInPackageReader).StreamEntryName;
322 if (!string.IsNullOrEmpty(streamPartName))
323 {
324 ms = GetEntryStream(streamPartName, zf);
325 if (ms == null)
326 {
327 lastUuid = currentUuid;
328 continue; // Skip if the stream part name was defined but not found
329 }
330 }
331 }
332 queueReader.Init(ms, workbook, this.readerOptions); // stream may be null
333 queueReader.Execute();
334 lastUuid = currentUuid;
335 }
336 else
337 {
338 lastUuid = null;
339 }
340
341 } while (queueReader != null);
342 }
343
347 public void Dispose()
348 {
349 this.inputStream?.Dispose();
350 GC.SuppressFinalize(this);
351 }
352
353
354 #endregion
355 }
356}
XlsxReader(string path, ReaderOptions options=null)
Constructor with file path as parameter.
Definition XlsxReader.cs:49
XlsxReader(Stream stream, ReaderOptions options=null)
Constructor with stream as parameter.
Definition XlsxReader.cs:60
async Task ReadAsync()
Reads the XLSX file from a file path or a file stream asynchronously.
void Dispose()
Disposes the XlsxReader instance.
void Read()
Reads the XLSX file from a file path or a file stream.
Definition XlsxReader.cs:75
Workbook Workbook
Gets the read workbook.
Definition XlsxReader.cs:40
The reader options define global rules, applied when loading a worksheet. The options are mainly to o...
Exceptions.IOException IOException