NanoXLSX.Reader 3.0.0-rc.5
Loading...
Searching...
No Matches
XlsxReader.cs
1/*
2 * NanoXLSX is a small .NET library to generate and read XLSX (Microsoft Excel 2007 or newer) files in an easy and native way
3 * Copyright Raphael Stoeckli © 2026
4 * This library is licensed under the MIT License.
5 * You find a copy of the license in project folder or on: http://opensource.org/licenses/MIT
6 */
7
8using System;
9using System.Collections.Generic;
10using System.IO;
11using System.IO.Compression;
12using System.Linq;
13using System.Threading.Tasks;
14using NanoXLSX.Exceptions;
15using NanoXLSX.Interfaces.Reader;
16using NanoXLSX.Registry;
17using NanoXLSX.Styles;
18using NanoXLSX.Utils;
19using IOException = NanoXLSX.Exceptions.IOException;
20
22{
26 public class XlsxReader : IDisposable
27 {
28 #region privateFields
29 private readonly string filePath;
30 private readonly Stream inputStream;
31 private readonly ReaderOptions readerOptions;
32 private MemoryStream memoryStream;
33 #endregion
34
35 #region properties
39 public Workbook Workbook { get; internal set; }
40 #endregion
41
42 #region constructors
48 public XlsxReader(string path, ReaderOptions options = null)
49 {
50 filePath = path;
51 readerOptions = options;
52 }
53
59 public XlsxReader(Stream stream, ReaderOptions options = null)
60 {
61 inputStream = stream;
62 readerOptions = options;
63 }
64 #endregion
65
66 #region methods
67
74 public void Read()
75 {
76 try
77 {
78 using (memoryStream = new MemoryStream())
79 {
80 ReadInternal().GetAwaiter().GetResult();
81 }
82 }
83 catch (NotSupportedContentException)
84 {
85 throw; // rethrow
86 }
87 catch (IOException)
88 {
89 throw; // rethrow
90 }
91 catch (Exception ex)
92 {
93 throw new IOException("There was an error while reading an XLSX file. Please see the inner exception:", ex);
94 }
95 }
96
104 public async Task ReadAsync()
105 {
106 try
107 {
108 using (memoryStream = new MemoryStream())
109 {
110 await ReadInternal();
111 }
112 }
113 catch (IOException)
114 {
115 throw; // rethrow
116 }
117 catch (Exception ex)
118 {
119 throw new IOException("There was an error while reading an XLSX file. Please see the inner exception:", ex);
120 }
121 }
122
127 private async Task ReadInternal()
128 {
129 ZipArchive zf;
130 if (inputStream == null && !string.IsNullOrEmpty(filePath))
131 {
132 using (FileStream fs = new FileStream(filePath, FileMode.Open))
133 {
134 await fs.CopyToAsync(memoryStream);
135 }
136 }
137 else if (inputStream != null)
138 {
139 using (inputStream)
140 {
141 await inputStream.CopyToAsync(memoryStream);
142 }
143 }
144 else
145 {
146 throw new IOException("No valid stream or file path was provided to open");
147 }
148
149 memoryStream.Position = 0;
150 zf = new ZipArchive(memoryStream, ZipArchiveMode.Read);
151
152 await Task.Run(() =>
153 {
154 ReadZip(zf);
155 }).ConfigureAwait(false);
156 }
157
162 private void ReadZip(ZipArchive zf)
163 {
164 MemoryStream ms;
165 Workbook wb = new Workbook
166 {
167 importInProgress = true // Disables checks during load
168 };
169 HandleQueuePlugIns(PlugInUUID.ReaderPrependingQueue, zf, ref wb);
170
171 ISharedStringReader sharedStringsReader = PlugInLoader.GetPlugIn<ISharedStringReader>(PlugInUUID.SharedStringsReader, new SharedStringsReader());
172 ms = GetEntryStream("xl/sharedStrings.xml", zf);
173 if (ms != null && ms.Length > 0) // If length == 0, no shared strings are defined (no text in file)
174 {
175 sharedStringsReader.Init(ms, wb, readerOptions, ReaderPlugInHandler.HandleInlineQueuePlugins);
176 sharedStringsReader.Execute();
177 }
178 Dictionary<int, string> themeStreamNames = GetSequentialStreamNames("xl/theme/theme", zf);
179 if (themeStreamNames.Count > 0)
180 {
181 // There is not really a definition whether multiple themes can be managed in one workbook.
182 // the suffix number (e.g. theme1) indicates it. However, no examples were found and therefore
183 // (currently) only the first occurring theme will be read
184 foreach (KeyValuePair<int, string> streamName in themeStreamNames)
185 {
186 IPluginBaseReader themeReader = PlugInLoader.GetPlugIn<IPluginBaseReader>(PlugInUUID.ThemeReader, new ThemeReader());
187 ms = GetEntryStream(streamName.Value, zf);
188 themeReader.Init(ms, wb, readerOptions, ReaderPlugInHandler.HandleInlineQueuePlugins);
189 themeReader.Execute();
190 break;
191 }
192 }
193 StyleRepository.Instance.ImportInProgress = true; // TODO: To be checked
194 IPluginBaseReader styleReader = PlugInLoader.GetPlugIn<IPluginBaseReader>(PlugInUUID.StyleReader, new StyleReader());
195 ms = GetEntryStream("xl/styles.xml", zf);
196 styleReader.Init(ms, wb, readerOptions, ReaderPlugInHandler.HandleInlineQueuePlugins);
197 styleReader.Execute();
198 StyleRepository.Instance.ImportInProgress = false;
199
200 ms = GetEntryStream("xl/workbook.xml", zf);
201 IPluginBaseReader workbookReader = PlugInLoader.GetPlugIn<IPluginBaseReader>(PlugInUUID.WorkbookReader, new WorkbookReader());
202 workbookReader.Init(ms, wb, readerOptions, ReaderPlugInHandler.HandleInlineQueuePlugins);
203 workbookReader.Execute();
204
205 ms = GetEntryStream("docProps/app.xml", zf);
206 if (ms != null && ms.Length > 0) // If null/length == 0, no docProps/app.xml seems to be defined
207 {
208 IPluginBaseReader metadataAppReader = PlugInLoader.GetPlugIn<IPluginBaseReader>(PlugInUUID.MetadataAppReader, new MetadataAppReader());
209 metadataAppReader.Init(ms, wb, readerOptions, ReaderPlugInHandler.HandleInlineQueuePlugins);
210 metadataAppReader.Execute();
211 }
212 ms = GetEntryStream("docProps/core.xml", zf);
213 if (ms != null && ms.Length > 0) // If null/length == 0, no docProps/core.xml seems to be defined
214 {
215 IPluginBaseReader metadataCoreReader = PlugInLoader.GetPlugIn<IPluginBaseReader>(PlugInUUID.MetadataCoreReader, new MetadataCoreReader());
216 metadataCoreReader.Init(ms, wb, readerOptions, ReaderPlugInHandler.HandleInlineQueuePlugins);
217 metadataCoreReader.Execute();
218 }
219
220 IPluginBaseReader relationships = PlugInLoader.GetPlugIn<IPluginBaseReader>(PlugInUUID.RelationshipReader, new RelationshipReader());
221 ms = GetEntryStream("xl/_rels/workbook.xml.rels", zf);
222 relationships.Init(ms, wb, readerOptions, ReaderPlugInHandler.HandleInlineQueuePlugins);
223 relationships.Execute();
224
225 IWorksheetReader worksheetReader = PlugInLoader.GetPlugIn<IWorksheetReader>(PlugInUUID.WorksheetReader, new WorksheetReader());
226 worksheetReader.SharedStrings = sharedStringsReader.SharedStrings;
227 List<WorksheetDefinition> workshetDefinitions = wb.AuxiliaryData.GetDataList<WorksheetDefinition>(PlugInUUID.WorkbookReader, PlugInUUID.WorksheetDefinitionEntity);
228 List<Relationship> relationshipDefinitions = wb.AuxiliaryData.GetDataList<Relationship>(PlugInUUID.RelationshipReader, PlugInUUID.RelationshipEntity);
229 foreach (WorksheetDefinition definition in workshetDefinitions)
230 {
231 Relationship relationship = relationshipDefinitions.SingleOrDefault(r => r.RID == definition.RelId);
232 if (relationship == null)
233 {
234 throw new IOException("There was an error while reading an XLSX file. The relationship target of the worksheet with the RelID " + definition.RelId + " was not found");
235 }
236 ms = GetEntryStream(relationship.Target, zf);
237 worksheetReader.Init(ms, wb, readerOptions, ReaderPlugInHandler.HandleInlineQueuePlugins);
238 worksheetReader.CurrentWorksheetID = definition.SheetID;
239 worksheetReader.Execute();
240 }
241 if (wb.Worksheets.Count == 0)
242 {
243 throw new IOException("No worksheet was found in the workbook");
244 }
245 HandleQueuePlugIns(PlugInUUID.ReaderAppendingQueue, zf, ref wb);
246 wb.importInProgress = false; // Enables checks for runtime
247 wb.AuxiliaryData.ClearTemporaryData(); // Remove temporary staging data
248 this.Workbook = wb;
249 }
250
257 private static MemoryStream GetEntryStream(string name, ZipArchive archive)
258 {
259 MemoryStream stream = null;
260 for (int i = 0; i < archive.Entries.Count; i++)
261 {
262 if (archive.Entries[i].FullName == name)
263 {
264 MemoryStream ms = new MemoryStream();
265 archive.Entries[i].Open().CopyTo(ms);
266 ms.Position = 0;
267 stream = ms;
268 break;
269 }
270 }
271 return stream;
272 }
273
280 private static Dictionary<int, string> GetSequentialStreamNames(string namePrefix, ZipArchive archive)
281 {
282 Dictionary<int, string> files = new Dictionary<int, string>();
283 int index = 1; // Assumption: There is no file that has the index 0 in its name
284 while (true)
285 {
286 string name = namePrefix + ParserUtils.ToString(index) + ".xml";
287 var ms = GetEntryStream(name, archive);
288 if (ms != null)
289 {
290 files.Add(index, name);
291 }
292 else
293 {
294 break;
295 }
296 index++;
297 }
298 return files;
299 }
300
307 private void HandleQueuePlugIns(string queueUuid, ZipArchive zf, ref Workbook workbook)
308 {
309 string lastUuid = null;
310 IPluginQueueReader queueReader;
311 do
312 {
313 string currentUuid;
314 queueReader = PlugInLoader.GetNextQueuePlugIn<IPluginQueueReader>(queueUuid, lastUuid, out currentUuid);
315 MemoryStream ms = null;
316 if (queueReader != null)
317 {
318 if (queueReader is IPluginPackageReader)
319 {
320 string streamPartName = (queueReader as IPluginPackageReader).StreamEntryName;
321 if (!string.IsNullOrEmpty(streamPartName))
322 {
323 ms = GetEntryStream(streamPartName, zf);
324 if (ms == null)
325 {
326 lastUuid = currentUuid;
327 continue; // Skip if the stream part name was defined but not found
328 }
329 }
330 }
331 queueReader.Init(ms, workbook, this.readerOptions, null); // stream may be null, inlinePluginAction is not used here
332 queueReader.Execute();
333 lastUuid = currentUuid;
334 }
335 else
336 {
337 lastUuid = null;
338 }
339
340 } while (queueReader != null);
341 }
342
346 public void Dispose()
347 {
348 this.inputStream?.Dispose();
349 GC.SuppressFinalize(this);
350 }
351
352
353 #endregion
354 }
355}
XlsxReader(string path, ReaderOptions options=null)
Constructor with file path as parameter.
Definition XlsxReader.cs:48
XlsxReader(Stream stream, ReaderOptions options=null)
Constructor with stream as parameter.
Definition XlsxReader.cs:59
async Task ReadAsync()
Reads the XLSX file from a file path or a file stream asynchronously.
void Dispose()
Disposes the XlsxReader instance.
void Read()
Reads the XLSX file from a file path or a file stream.
Definition XlsxReader.cs:74
Workbook Workbook
Gets the read workbook.
Definition XlsxReader.cs:39
Exceptions.IOException IOException