NanoXLSX.Reader 3.1.0
Loading...
Searching...
No Matches
XlsxReader.cs
1/*
2 * NanoXLSX is a small .NET library to generate and read XLSX (Microsoft Excel 2007 or newer) files in an easy and native way
3 * Copyright Raphael Stoeckli © 2026
4 * This library is licensed under the MIT License.
5 * You find a copy of the license in project folder or on: http://opensource.org/licenses/MIT
6 */
7
8using System;
9using System.Collections.Generic;
10using System.IO;
11using System.IO.Compression;
12using System.Linq;
13using System.Threading.Tasks;
14using NanoXLSX.Exceptions;
15using NanoXLSX.Interfaces.Reader;
16using NanoXLSX.Registry;
17using NanoXLSX.Styles;
18using NanoXLSX.Utils;
19using IOException = NanoXLSX.Exceptions.IOException;
20
22{
26 public class XlsxReader : IDisposable
27 {
28 #region privateFields
29 private readonly string filePath;
30 private readonly Stream inputStream;
31 private readonly ReaderOptions readerOptions;
32 private MemoryStream memoryStream;
33 #endregion
34
35 #region properties
39 public Workbook Workbook { get; internal set; }
40 #endregion
41
42 #region constructors
48 public XlsxReader(string path, ReaderOptions options = null)
49 {
50 filePath = path;
51 readerOptions = options;
52 }
53
59 public XlsxReader(Stream stream, ReaderOptions options = null)
60 {
61 inputStream = stream;
62 readerOptions = options;
63 }
64 #endregion
65
66 #region methods
67
74 public void Read()
75 {
76 try
77 {
78 using (memoryStream = new MemoryStream())
79 {
80 Task.Run(() => ReadInternal()).GetAwaiter().GetResult();
81 }
82 }
83 catch (NotSupportedContentException)
84 {
85 throw; // rethrow
86 }
87 catch (IOException)
88 {
89 throw; // rethrow
90 }
91 catch (Exception ex)
92 {
93 throw new IOException("There was an error while reading an XLSX file. Please see the inner exception:", ex);
94 }
95 }
96
104 public async Task ReadAsync()
105 {
106 try
107 {
108 using (memoryStream = new MemoryStream())
109 {
110 await ReadInternal();
111 }
112 }
113 catch (IOException)
114 {
115 throw; // rethrow
116 }
117 catch (Exception ex)
118 {
119 throw new IOException("There was an error while reading an XLSX file. Please see the inner exception:", ex);
120 }
121 }
122
127 private async Task ReadInternal()
128 {
129 ZipArchive zf;
130 if (inputStream == null && !string.IsNullOrEmpty(filePath))
131 {
132 using (FileStream fs = new FileStream(filePath, FileMode.Open))
133 {
134 await fs.CopyToAsync(memoryStream);
135 }
136 }
137 else if (inputStream != null)
138 {
139 using (inputStream)
140 {
141 await inputStream.CopyToAsync(memoryStream);
142 }
143 }
144 else
145 {
146 throw new IOException("No valid stream or file path was provided to open");
147 }
148
149 memoryStream.Position = 0;
150 zf = new ZipArchive(memoryStream, ZipArchiveMode.Read);
151
152 await Task.Run(() =>
153 {
154 ReadZip(zf);
155 }).ConfigureAwait(false);
156 }
157
162 private void ReadZip(ZipArchive zf)
163 {
164 MemoryStream ms;
165 Workbook wb = new Workbook
166 {
167 importInProgress = true // Disables checks during load
168 };
169 Dictionary<string, ZipArchiveEntry> entryLookup = new Dictionary<string, ZipArchiveEntry>(zf.Entries.Count, StringComparer.Ordinal);
170 foreach (ZipArchiveEntry entry in zf.Entries)
171 {
172 entryLookup[entry.FullName] = entry;
173 }
174 HandleQueuePlugIns(PlugInUUID.ReaderPrependingQueue, entryLookup, ref wb);
175
176 ISharedStringReader sharedStringsReader = PlugInLoader.GetPlugIn<ISharedStringReader>(PlugInUUID.SharedStringsReader, new SharedStringsReader());
177 if (entryLookup.TryGetValue("xl/sharedStrings.xml", out ZipArchiveEntry sharedStringsEntry) && sharedStringsEntry.Length > 0)
178 {
179 if (PlugInLoader.HasQueuePlugins(PlugInUUID.SharedStringsInlineReader))
180 {
181 // Inline plugins need a seekable stream; buffer so the handler can reset position
182 MemoryStream ssMs = GetEntryStream("xl/sharedStrings.xml", entryLookup);
183 sharedStringsReader.Init(ssMs, wb, readerOptions, ReaderPlugInHandler.HandleInlineQueuePlugins);
184 sharedStringsReader.Execute();
185 }
186 else
187 {
188 // Direct-stream from ZIP entry — no intermediate MemoryStream
189 using (Stream sharedStringsStream = sharedStringsEntry.Open())
190 {
191 sharedStringsReader.Init(sharedStringsStream, wb, readerOptions, ReaderPlugInHandler.HandleInlineQueuePlugins);
192 sharedStringsReader.Execute();
193 }
194 }
195 }
196 Dictionary<int, string> themeStreamNames = GetSequentialStreamNames("xl/theme/theme", entryLookup);
197 if (themeStreamNames.Count > 0)
198 {
199 // There is not really a definition whether multiple themes can be managed in one workbook.
200 // the suffix number (e.g. theme1) indicates it. However, no examples were found and therefore
201 // (currently) only the first occurring theme will be read
202 foreach (KeyValuePair<int, string> streamName in themeStreamNames)
203 {
204 IPluginBaseReader themeReader = PlugInLoader.GetPlugIn<IPluginBaseReader>(PlugInUUID.ThemeReader, new ThemeReader());
205 ms = GetEntryStream(streamName.Value, entryLookup);
206 themeReader.Init(ms, wb, readerOptions, ReaderPlugInHandler.HandleInlineQueuePlugins);
207 themeReader.Execute();
208 break;
209 }
210 }
211 StyleRepository.Instance.ImportInProgress = true; // TODO: To be checked
212 IPluginBaseReader styleReader = PlugInLoader.GetPlugIn<IPluginBaseReader>(PlugInUUID.StyleReader, new StyleReader());
213 ms = GetEntryStream("xl/styles.xml", entryLookup);
214 styleReader.Init(ms, wb, readerOptions, ReaderPlugInHandler.HandleInlineQueuePlugins);
215 styleReader.Execute();
216 StyleRepository.Instance.ImportInProgress = false;
217
218 ms = GetEntryStream("xl/workbook.xml", entryLookup);
219 IPluginBaseReader workbookReader = PlugInLoader.GetPlugIn<IPluginBaseReader>(PlugInUUID.WorkbookReader, new WorkbookReader());
220 workbookReader.Init(ms, wb, readerOptions, ReaderPlugInHandler.HandleInlineQueuePlugins);
221 workbookReader.Execute();
222
223 ms = GetEntryStream("docProps/app.xml", entryLookup);
224 if (ms != null && ms.Length > 0) // If null/length == 0, no docProps/app.xml seems to be defined
225 {
226 IPluginBaseReader metadataAppReader = PlugInLoader.GetPlugIn<IPluginBaseReader>(PlugInUUID.MetadataAppReader, new MetadataAppReader());
227 metadataAppReader.Init(ms, wb, readerOptions, ReaderPlugInHandler.HandleInlineQueuePlugins);
228 metadataAppReader.Execute();
229 }
230 ms = GetEntryStream("docProps/core.xml", entryLookup);
231 if (ms != null && ms.Length > 0) // If null/length == 0, no docProps/core.xml seems to be defined
232 {
233 IPluginBaseReader metadataCoreReader = PlugInLoader.GetPlugIn<IPluginBaseReader>(PlugInUUID.MetadataCoreReader, new MetadataCoreReader());
234 metadataCoreReader.Init(ms, wb, readerOptions, ReaderPlugInHandler.HandleInlineQueuePlugins);
235 metadataCoreReader.Execute();
236 }
237
238 IPluginBaseReader relationships = PlugInLoader.GetPlugIn<IPluginBaseReader>(PlugInUUID.RelationshipReader, new RelationshipReader());
239 ms = GetEntryStream("xl/_rels/workbook.xml.rels", entryLookup);
240 relationships.Init(ms, wb, readerOptions, ReaderPlugInHandler.HandleInlineQueuePlugins);
241 relationships.Execute();
242
243 IWorksheetReader worksheetReader = PlugInLoader.GetPlugIn<IWorksheetReader>(PlugInUUID.WorksheetReader, new WorksheetReader());
244 worksheetReader.SharedStrings = sharedStringsReader.SharedStrings;
245 List<Relationship> relationshipDefinitions = wb.AuxiliaryData.GetDataList<Relationship>(PlugInUUID.RelationshipReader, PlugInUUID.RelationshipEntity);
246 int worksheetVisualIndex = 0;
247 WorksheetDefinition definition;
248 while ((definition = wb.AuxiliaryData.GetData<WorksheetDefinition>(PlugInUUID.WorkbookReader, PlugInUUID.WorksheetDefinitionEntity, worksheetVisualIndex)) != null)
249 {
250 Relationship relationship = relationshipDefinitions.SingleOrDefault(r => r.RID == definition.RelId);
251 if (relationship == null)
252 {
253 throw new IOException("There was an error while reading an XLSX file. The relationship target of the worksheet with the RelID " + definition.RelId + " was not found");
254 }
255 if (!entryLookup.TryGetValue(relationship.Target, out ZipArchiveEntry worksheetEntry))
256 {
257 throw new IOException("There was an error while reading an XLSX file. The worksheet entry '" + relationship.Target + "' was not found in the archive");
258 }
259 worksheetReader.CurrentWorksheetID = worksheetVisualIndex;
260 if (PlugInLoader.HasQueuePlugins(PlugInUUID.WorksheetInlineReader))
261 {
262 // Inline plugins need a seekable stream; buffer so the handler can reset position
263 MemoryStream wsMs = GetEntryStream(relationship.Target, entryLookup);
264 worksheetReader.Init(wsMs, wb, readerOptions, ReaderPlugInHandler.HandleInlineQueuePlugins);
265 worksheetReader.Execute();
266 }
267 else
268 {
269 // Direct-stream from ZIP entry — largest single allocation on big files
270 using (Stream worksheetStream = worksheetEntry.Open())
271 {
272 worksheetReader.Init(worksheetStream, wb, readerOptions, ReaderPlugInHandler.HandleInlineQueuePlugins);
273 worksheetReader.Execute();
274 }
275 }
276 worksheetVisualIndex++;
277 }
278 if (wb.Worksheets.Count == 0)
279 {
280 throw new IOException("No worksheet was found in the workbook");
281 }
282 HandleQueuePlugIns(PlugInUUID.ReaderAppendingQueue, entryLookup, ref wb);
283 wb.importInProgress = false; // Enables checks for runtime
284 wb.AuxiliaryData.ClearTemporaryData(); // Remove temporary staging data
285 this.Workbook = wb;
286 }
287
295 private static MemoryStream GetEntryStream(string name, Dictionary<string, ZipArchiveEntry> entryLookup)
296 {
297 if (!entryLookup.TryGetValue(name, out ZipArchiveEntry entry))
298 {
299 return null;
300 }
301 int capacity = (int)Math.Min(entry.Length, int.MaxValue);
302 MemoryStream ms = new MemoryStream(capacity);
303 using (Stream src = entry.Open())
304 {
305 src.CopyTo(ms);
306 }
307 ms.Position = 0;
308 return ms;
309 }
310
317 private static Dictionary<int, string> GetSequentialStreamNames(string namePrefix, Dictionary<string, ZipArchiveEntry> entryLookup)
318 {
319 Dictionary<int, string> files = new Dictionary<int, string>();
320 int index = 1; // Assumption: There is no file that has the index 0 in its name
321 while (true)
322 {
323 string name = namePrefix + ParserUtils.ToString(index) + ".xml";
324 if (entryLookup.ContainsKey(name))
325 {
326 files.Add(index, name);
327 }
328 else
329 {
330 break;
331 }
332 index++;
333 }
334 return files;
335 }
336
343 private void HandleQueuePlugIns(string queueUuid, Dictionary<string, ZipArchiveEntry> entryLookup, ref Workbook workbook)
344 {
345 string lastUuid = null;
346 IPluginQueueReader queueReader;
347 do
348 {
349 string currentUuid;
350 queueReader = PlugInLoader.GetNextQueuePlugIn<IPluginQueueReader>(queueUuid, lastUuid, out currentUuid);
351 MemoryStream ms = null;
352 if (queueReader != null)
353 {
354 if (queueReader is IPluginPackageReader)
355 {
356 string streamPartName = (queueReader as IPluginPackageReader).StreamEntryName;
357 if (!string.IsNullOrEmpty(streamPartName))
358 {
359 ms = GetEntryStream(streamPartName, entryLookup);
360 if (ms == null)
361 {
362 lastUuid = currentUuid;
363 continue; // Skip if the stream part name was defined but not found
364 }
365 }
366 }
367 queueReader.Init(ms, workbook, this.readerOptions, null); // stream may be null, inlinePluginAction is not used here
368 queueReader.Execute();
369 lastUuid = currentUuid;
370 }
371 else
372 {
373 lastUuid = null;
374 }
375
376 } while (queueReader != null);
377 }
378
382 public void Dispose()
383 {
384 this.inputStream?.Dispose();
385 GC.SuppressFinalize(this);
386 }
387
388
389 #endregion
390 }
391}
XlsxReader(string path, ReaderOptions options=null)
Constructor with file path as parameter.
Definition XlsxReader.cs:48
XlsxReader(Stream stream, ReaderOptions options=null)
Constructor with stream as parameter.
Definition XlsxReader.cs:59
async Task ReadAsync()
Reads the XLSX file from a file path or a file stream asynchronously.
void Dispose()
Disposes the XlsxReader instance.
void Read()
Reads the XLSX file from a file path or a file stream.
Definition XlsxReader.cs:74
Workbook Workbook
Gets the read workbook.
Definition XlsxReader.cs:39
Exceptions.IOException IOException