NanoXLSX.Reader 3.0.0-rc.2
Loading...
Searching...
No Matches
WorksheetReader.cs
1/*
2 * NanoXLSX is a small .NET library to generate and read XLSX (Microsoft Excel 2007 or newer) files in an easy and native way
3 * Copyright Raphael Stoeckli © 2025
4 * This library is licensed under the MIT License.
5 * You find a copy of the license in project folder or on: http://opensource.org/licenses/MIT
6 */
7
8using System;
9using System.Collections.Generic;
10using System.Globalization;
11using System.IO;
12using System.Linq;
13using System.Xml;
14using NanoXLSX.Exceptions;
15using NanoXLSX.Interfaces.Plugin;
17using NanoXLSX.Registry;
18using NanoXLSX.Styles;
19using NanoXLSX.Utils;
21using IOException = NanoXLSX.Exceptions.IOException;
22
24{
28 public class WorksheetReader : IWorksheetReader
29 {
30 #region privateFields
31 private MemoryStream stream;
32 private ReaderOptions readerOptions;
33 private List<string> dateStyles;
34 private List<string> timeStyles;
35 private Dictionary<string, Style> resolvedStyles;
36 private IPasswordReader passwordReader;
37 #endregion
38
39 #region properties
40
44 public Workbook Workbook { get; set; }
48 public int CurrentWorksheetID { get; set; }
49
53 public List<String> SharedStrings { get; set; }
54 #endregion
55
56 #region constructors
61 {
62 }
63 #endregion
64
65 #region functions
72 public void Init(MemoryStream stream, Workbook workbook, IOptions readerOptions)
73 {
74 this.stream = stream;
75 this.Workbook = workbook;
76 this.readerOptions = readerOptions as ReaderOptions;
77 if (dateStyles == null || timeStyles == null || this.resolvedStyles == null)
78 {
79 StyleReaderContainer styleReaderContainer = workbook.AuxiliaryData.GetData<StyleReaderContainer>(PlugInUUID.StyleReader, PlugInUUID.StyleEntity);
80 ProcessStyles(styleReaderContainer);
81 }
82 if (this.passwordReader == null)
83 {
84 this.passwordReader = PlugInLoader.GetPlugIn<IPasswordReader>(PlugInUUID.PasswordReader, new LegacyPasswordReader());
85 this.passwordReader.Init(PasswordType.WorksheetProtection, this.readerOptions);
86 }
87 }
88
93 public void Execute()
94 {
95 try
96 {
97 WorksheetDefinition worksheetDefinition = Workbook.AuxiliaryData.GetData<WorksheetDefinition>(PlugInUUID.WorkbookReader, PlugInUUID.WorksheetDefinitionEntity, CurrentWorksheetID);
98 Worksheet worksheet = new Worksheet(worksheetDefinition.WorksheetName, CurrentWorksheetID, Workbook)
99 {
100 Hidden = worksheetDefinition.Hidden
101 };
102 using (stream) // Close after processing
103 {
104 XmlDocument document = new XmlDocument() { XmlResolver = null };
105 using (XmlReader reader = XmlReader.Create(stream, new XmlReaderSettings() { XmlResolver = null }))
106 {
107 document.Load(reader);
108 GetRows(document, worksheet);
109 GetSheetView(document, worksheet);
110 GetMergedCells(document, worksheet);
111 GetSheetFormats(document, worksheet);
112 GetAutoFilters(document, worksheet);
113 GetColumns(document, worksheet);
114 GetSheetProtection(document, worksheet);
115 SetWorkbookRelation(worksheet);
116 RederPlugInHandler.HandleInlineQueuePlugins(ref stream, Workbook, PlugInUUID.WorksheetInlineReader, CurrentWorksheetID);
117 }
118 }
119
120
121 }
122 catch (NotSupportedContentException)
123 {
124 throw; // rethrow
125 }
126 catch (Exception ex)
127 {
128 throw new IOException("The XML entry could not be read from the input stream. Please see the inner exception:", ex);
129 }
130 }
131
136 private void SetWorkbookRelation(Worksheet worksheet)
137 {
138 Workbook.AddWorksheet(worksheet);
139 int selectedWorksheetId = Workbook.AuxiliaryData.GetData<int>(PlugInUUID.WorkbookReader, PlugInUUID.SelectedWorksheetEntity);
140 if (selectedWorksheetId + 1 == CurrentWorksheetID) // selectedWorksheetId is 0-based
141 {
142 Workbook.SetSelectedWorksheet(worksheet);
143 }
144 }
145
150 private void ProcessStyles(StyleReaderContainer styleReaderContainer)
151 {
152 this.dateStyles = new List<string>();
153 this.timeStyles = new List<string>();
154 this.resolvedStyles = new Dictionary<string, Style>();
155 for (int i = 0; i < styleReaderContainer.StyleCount; i++)
156 {
157 bool isDate;
158 bool isTime;
159 string index = ParserUtils.ToString(i);
160 Style style = styleReaderContainer.GetStyle(i, out isDate, out isTime);
161 if (isDate)
162 {
163 this.dateStyles.Add(index);
164 }
165 if (isTime)
166 {
167 this.timeStyles.Add(index);
168 }
169 this.resolvedStyles.Add(index, style);
170 }
171 }
172
178 private void GetRows(XmlDocument document, Worksheet worksheet)
179 {
180 XmlNodeList rows = document.GetElementsByTagName("row");
181 foreach (XmlNode row in rows)
182 {
183 string rowAttribute = ReaderUtils.GetAttribute(row, "r");
184 if (rowAttribute != null)
185 {
186 int rowNumber = ParserUtils.ParseInt(rowAttribute) - 1; // Transform to zero-based
187 string hiddenAttribute = ReaderUtils.GetAttribute(row, "hidden");
188 if (hiddenAttribute != null)
189 {
190 int value = ParserUtils.ParseBinaryBool(hiddenAttribute);
191 if (value == 1)
192 {
193 worksheet.AddHiddenRow(rowNumber);
194 }
195 }
196 string heightAttribute = ReaderUtils.GetAttribute(row, "ht");
197 if (heightAttribute != null)
198 {
199 worksheet.RowHeights.Add(rowNumber, GetValidatedHeight(ParserUtils.ParseFloat(heightAttribute)));
200 }
201 }
202 if (row.HasChildNodes)
203 {
204 foreach (XmlNode rowChild in row.ChildNodes)
205 {
206 ReadCell(rowChild, worksheet);
207 }
208 }
209 }
210 }
211
217 private static void GetSheetView(XmlDocument xmlDocument, Worksheet worksheet)
218 {
219 XmlNodeList sheetViewsNodes = xmlDocument.GetElementsByTagName("sheetViews");
220 if (sheetViewsNodes != null && sheetViewsNodes.Count > 0)
221 {
222 XmlNodeList sheetViewNodes = sheetViewsNodes[0].ChildNodes;
223 string attribute;
224 // Go through all possible views
225 foreach (XmlNode sheetView in sheetViewNodes)
226 {
227 attribute = ReaderUtils.GetAttribute(sheetView, "view", string.Empty);
228 worksheet.ViewType = Worksheet.GetSheetViewTypeEnum(attribute);
229 attribute = ReaderUtils.GetAttribute(sheetView, "zoomScale");
230 if (attribute != null)
231 {
232 worksheet.ZoomFactor = ParserUtils.ParseInt(attribute);
233 }
234 attribute = ReaderUtils.GetAttribute(sheetView, "zoomScaleNormal");
235 if (attribute != null)
236 {
237 int scale = ParserUtils.ParseInt(attribute);
238 worksheet.ZoomFactors[Worksheet.SheetViewType.Normal] = scale;
239 }
240 attribute = ReaderUtils.GetAttribute(sheetView, "zoomScalePageLayoutView");
241 if (attribute != null)
242 {
243 int scale = ParserUtils.ParseInt(attribute);
244 worksheet.ZoomFactors[Worksheet.SheetViewType.PageLayout] = scale;
245 }
246 attribute = ReaderUtils.GetAttribute(sheetView, "zoomScaleSheetLayoutView");
247 if (attribute != null)
248 {
249 int scale = ParserUtils.ParseInt(attribute);
250 worksheet.ZoomFactors[Worksheet.SheetViewType.PageBreakPreview] = scale;
251 }
252 attribute = ReaderUtils.GetAttribute(sheetView, "showGridLines");
253 if (attribute != null)
254 {
255 worksheet.ShowGridLines = ParserUtils.ParseBinaryBool(attribute) == 1;
256 }
257 attribute = ReaderUtils.GetAttribute(sheetView, "showRowColHeaders");
258 if (attribute != null)
259 {
260 worksheet.ShowRowColumnHeaders = ParserUtils.ParseBinaryBool(attribute) == 1;
261 }
262 attribute = ReaderUtils.GetAttribute(sheetView, "showRuler");
263 if (attribute != null)
264 {
265 worksheet.ShowRuler = ParserUtils.ParseBinaryBool(attribute) == 1;
266 }
267 if (sheetView.LocalName.Equals("sheetView", StringComparison.OrdinalIgnoreCase))
268 {
269 XmlNodeList selectionNodes = sheetView.ChildNodes;
270 if (selectionNodes != null && selectionNodes.Count > 0)
271 {
272 foreach (XmlNode selectionNode in selectionNodes)
273 {
274 attribute = ReaderUtils.GetAttribute(selectionNode, "sqref");
275 if (attribute != null)
276 {
277 if (attribute.Contains(" "))
278 {
279 // Multiple ranges
280 string[] ranges = attribute.Split(' ');
281 foreach (string range in ranges)
282 {
283 CollectSelectedCells(range, worksheet);
284 }
285 }
286 else
287 {
288 CollectSelectedCells(attribute, worksheet);
289 }
290
291 }
292 }
293 }
294 XmlNode paneNode = ReaderUtils.GetChildNode(sheetView, "pane");
295 if (paneNode != null)
296 {
297 SetPaneSplit(paneNode, worksheet);
298 }
299 }
300 }
301 }
302 }
303
309 private static void CollectSelectedCells(string attribute, Worksheet worksheet)
310 {
311 if (attribute.Contains(":"))
312 {
313 // One range
314 worksheet.AddSelectedCells(new Range(attribute));
315 }
316 else
317 {
318 // One cell
319 worksheet.AddSelectedCells(new Range(attribute + ":" + attribute));
320 }
321 }
322
328 private static void SetPaneSplit(XmlNode paneNode, Worksheet worksheet)
329 {
330 string attribute = ReaderUtils.GetAttribute(paneNode, "state");
331 bool useNumbers = false;
332 bool frozenState = false;
333 bool ySplitDefined = false;
334 bool xSplitDefined = false;
335 int? paneSplitRowIndex = null;
336 int? paneSplitColumnIndex = null;
337 float? paneSplitHeight = null;
338 float? paneSplitWidth = null;
339 Address topLeftCell = new Address(0, 0); // default value
340 Worksheet.WorksheetPane? activePane = null;
341 if (attribute != null)
342 {
343 if (ParserUtils.ToLower(attribute) == "frozen" || ParserUtils.ToLower(attribute) == "frozensplit")
344 {
345 frozenState = true;
346 }
347 useNumbers = frozenState;
348 }
349 attribute = ReaderUtils.GetAttribute(paneNode, "ySplit");
350 if (attribute != null)
351 {
352 ySplitDefined = true;
353 if (useNumbers)
354 {
355 paneSplitRowIndex = ParserUtils.ParseInt(attribute);
356 }
357 else
358 {
359 paneSplitHeight = DataUtils.GetPaneSplitHeight(ParserUtils.ParseFloat(attribute));
360 }
361 }
362 attribute = ReaderUtils.GetAttribute(paneNode, "xSplit");
363 if (attribute != null)
364 {
365 xSplitDefined = true;
366 if (useNumbers)
367 {
368 paneSplitColumnIndex = ParserUtils.ParseInt(attribute);
369 }
370 else
371 {
372 paneSplitWidth = DataUtils.GetPaneSplitWidth(ParserUtils.ParseFloat(attribute));
373 }
374 }
375 attribute = ReaderUtils.GetAttribute(paneNode, "topLeftCell");
376 if (attribute != null)
377 {
378 topLeftCell = new Address(attribute);
379 }
380 attribute = ReaderUtils.GetAttribute(paneNode, "activePane", string.Empty);
381 activePane = Worksheet.GetWorksheetPaneEnum(attribute);
382 if (frozenState)
383 {
384 if (ySplitDefined && !xSplitDefined)
385 {
386 worksheet.SetHorizontalSplit(paneSplitRowIndex.Value, frozenState, topLeftCell, activePane);
387 }
388 if (!ySplitDefined && xSplitDefined)
389 {
390 worksheet.SetVerticalSplit(paneSplitColumnIndex.Value, frozenState, topLeftCell, activePane);
391 }
392 else if (ySplitDefined && xSplitDefined)
393 {
394 worksheet.SetSplit(paneSplitColumnIndex.Value, paneSplitRowIndex.Value, frozenState, topLeftCell, activePane);
395 }
396 }
397 else
398 {
399 if (ySplitDefined && !xSplitDefined)
400 {
401 worksheet.SetHorizontalSplit(paneSplitHeight.Value, topLeftCell, activePane);
402 }
403 if (!ySplitDefined && xSplitDefined)
404 {
405 worksheet.SetVerticalSplit(paneSplitWidth.Value, topLeftCell, activePane);
406 }
407 else if (ySplitDefined && xSplitDefined)
408 {
409 worksheet.SetSplit(paneSplitWidth, paneSplitHeight, topLeftCell, activePane);
410 }
411 }
412 }
413
419 private void GetSheetProtection(XmlDocument xmlDocument, Worksheet worksheet)
420 {
421 XmlNodeList sheetProtectionNodes = xmlDocument.GetElementsByTagName("sheetProtection");
422 if (sheetProtectionNodes != null && sheetProtectionNodes.Count > 0)
423 {
424 int hasProtection = 0;
425 XmlNode sheetProtectionNode = sheetProtectionNodes[0];
426 hasProtection += ManageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.AutoFilter, worksheet);
427 hasProtection += ManageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.DeleteColumns, worksheet);
428 hasProtection += ManageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.DeleteRows, worksheet);
429 hasProtection += ManageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.FormatCells, worksheet);
430 hasProtection += ManageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.FormatColumns, worksheet);
431 hasProtection += ManageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.FormatRows, worksheet);
432 hasProtection += ManageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.InsertColumns, worksheet);
433 hasProtection += ManageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.InsertHyperlinks, worksheet);
434 hasProtection += ManageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.InsertRows, worksheet);
435 hasProtection += ManageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.Objects, worksheet);
436 hasProtection += ManageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.PivotTables, worksheet);
437 hasProtection += ManageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.Scenarios, worksheet);
438 hasProtection += ManageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.SelectLockedCells, worksheet);
439 hasProtection += ManageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.SelectUnlockedCells, worksheet);
440 hasProtection += ManageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.Sort, worksheet);
441 if (hasProtection > 0)
442 {
443 worksheet.UseSheetProtection = true;
444 }
445 this.passwordReader.ReadXmlAttributes(sheetProtectionNode);
446 if (this.passwordReader.PasswordIsSet())
447 {
448 if (this.passwordReader is LegacyPasswordReader && (this.passwordReader as LegacyPasswordReader).ContemporaryAlgorithmDetected && (readerOptions == null || !readerOptions.IgnoreNotSupportedPasswordAlgorithms))
449 {
450 throw new NotSupportedContentException("A not supported, contemporary password algorithm for the worksheet protection was detected. Check possible packages to add support to NanoXLSX, or ignore this error by a reader option");
451 }
452 worksheet.SheetProtectionPassword.CopyFrom(this.passwordReader);
453 }
454 }
455 }
456
463 private static int ManageSheetProtection(XmlNode node, Worksheet.SheetProtectionValue sheetProtectionValue, Worksheet worksheet)
464 {
465 int hasProtection = 0;
466 string attributeName = Worksheet.GetSheetProtectionName(sheetProtectionValue);
467 string attribute = ReaderUtils.GetAttribute(node, attributeName);
468 if (attribute != null)
469 {
470 hasProtection = 1;
471 worksheet.SheetProtectionValues.Add(sheetProtectionValue);
472 }
473 return hasProtection;
474 }
475
481 private static void GetMergedCells(XmlDocument xmlDocument, Worksheet worksheet)
482 {
483 XmlNodeList mergedCellsNodes = xmlDocument.GetElementsByTagName("mergeCells");
484 if (mergedCellsNodes != null && mergedCellsNodes.Count > 0)
485 {
486 XmlNodeList mergedCellNodes = mergedCellsNodes[0].ChildNodes;
487 if (mergedCellNodes != null && mergedCellNodes.Count > 0)
488 {
489 foreach (XmlNode mergedCells in mergedCellNodes)
490 {
491 string attribute = ReaderUtils.GetAttribute(mergedCells, "ref");
492 if (attribute != null)
493 {
494 worksheet.MergeCells(new Range(attribute));
495 }
496 }
497 }
498 }
499 }
500
506 private static void GetSheetFormats(XmlDocument xmlDocument, Worksheet worksheet)
507 {
508 XmlNodeList formatNodes = xmlDocument.GetElementsByTagName("sheetFormatPr");
509 if (formatNodes != null && formatNodes.Count > 0)
510 {
511 string attribute = ReaderUtils.GetAttribute(formatNodes[0], "defaultColWidth");
512 if (attribute != null)
513 {
514 worksheet.DefaultColumnWidth = ParserUtils.ParseFloat(attribute);
515 }
516 attribute = ReaderUtils.GetAttribute(formatNodes[0], "defaultRowHeight");
517 if (attribute != null)
518 {
519 worksheet.DefaultRowHeight = ParserUtils.ParseFloat(attribute);
520 }
521 }
522 }
523
529 private static void GetAutoFilters(XmlDocument xmlDocument, Worksheet worksheet)
530 {
531 XmlNodeList autoFilterNodes = xmlDocument.GetElementsByTagName("autoFilter");
532 if (autoFilterNodes != null && autoFilterNodes.Count > 0)
533 {
534 string autoFilterRef = ReaderUtils.GetAttribute(autoFilterNodes[0], "ref");
535 if (autoFilterRef != null)
536 {
537 Range range = new Range(autoFilterRef);
538 worksheet.SetAutoFilter(range.StartAddress.Column, range.EndAddress.Column);
539 }
540 }
541 }
542
548 private void GetColumns(XmlDocument xmlDocument, Worksheet worksheet)
549 {
550 XmlNodeList columnNodes = xmlDocument.GetElementsByTagName("col");
551 foreach (XmlNode columnNode in columnNodes)
552 {
553 int? min = null;
554 int? max = null;
555 List<int> indices = new List<int>();
556 string attribute = ReaderUtils.GetAttribute(columnNode, "min");
557 if (attribute != null)
558 {
559 min = ParserUtils.ParseInt(attribute);
560 max = min;
561 indices.Add(min.Value);
562 }
563 attribute = ReaderUtils.GetAttribute(columnNode, "max");
564 if (attribute != null)
565 {
566 max = ParserUtils.ParseInt(attribute);
567 }
568 if (min != null && max.Value != min.Value)
569 {
570 for (int i = min.Value; i <= max.Value; i++)
571 {
572 indices.Add(i);
573 }
574 }
575 attribute = ReaderUtils.GetAttribute(columnNode, "width");
576 float width = Worksheet.DefaultWorksheetColumnWidth;
577 if (attribute != null)
578 {
579 width = ParserUtils.ParseFloat(attribute);
580 }
581 attribute = ReaderUtils.GetAttribute(columnNode, "hidden");
582 bool hidden = false;
583 if (attribute != null)
584 {
585 int value = ParserUtils.ParseBinaryBool(attribute);
586 if (value == 1)
587 {
588 hidden = true;
589 }
590 }
591 attribute = ReaderUtils.GetAttribute(columnNode, "style");
592 Style defaultStyle = null;
593 if (attribute != null && resolvedStyles.TryGetValue(attribute, out var attributeValue))
594 {
595 defaultStyle = attributeValue;
596 }
597 foreach (int index in indices)
598 {
599 string columnAddress = Cell.ResolveColumnAddress(index - 1); // Transform to zero-based
600 if (defaultStyle != null)
601 {
602 worksheet.SetColumnDefaultStyle(columnAddress, defaultStyle);
603 }
604
605 if (width != Worksheet.DefaultWorksheetColumnWidth)
606 {
607 worksheet.SetColumnWidth(columnAddress, GetValidatedWidth(width));
608 }
609 if (hidden)
610 {
611 worksheet.AddHiddenColumn(columnAddress);
612 }
613 }
614 }
615 }
616
622 private void ReadCell(XmlNode rowChild, Worksheet worksheet)
623 {
624 string type = "s";
625 string styleNumber = "";
626 string address = "A1";
627 string value = "";
628 if (rowChild.LocalName.Equals("c", StringComparison.OrdinalIgnoreCase))
629 {
630 address = ReaderUtils.GetAttribute(rowChild, "r"); // Mandatory
631 type = ReaderUtils.GetAttribute(rowChild, "t"); // can be null if not existing
632 styleNumber = ReaderUtils.GetAttribute(rowChild, "s"); // can be null
633 if (rowChild.HasChildNodes)
634 {
635 foreach (XmlNode valueNode in rowChild.ChildNodes)
636 {
637 if (valueNode.LocalName.Equals("v", StringComparison.OrdinalIgnoreCase))
638 {
639 value = valueNode.InnerText;
640 }
641 if (valueNode.LocalName.Equals("f", StringComparison.OrdinalIgnoreCase))
642 {
643 value = valueNode.InnerText;
644 }
645 if (valueNode.LocalName.Equals("is", StringComparison.OrdinalIgnoreCase))
646 {
647 value = valueNode.InnerText;
648 }
649 }
650 }
651 }
652 string key = ParserUtils.ToUpper(address);
653 Cell cell = ResolveCellData(value, type, styleNumber, address);
654 worksheet.AddCell(cell, address);
655 if (styleNumber != null)
656 {
657 Style style = null;
658 this.resolvedStyles.TryGetValue(styleNumber, out style);
659 if (style != null)
660 {
661 worksheet.Cells[address].SetStyle(style);
662 }
663 }
664 }
665
674 private Cell ResolveCellData(string raw, string type, string styleNumber, string address)
675 {
676 Cell.CellType importedType = Cell.CellType.Default;
677 object rawValue;
678 if (type == "b")
679 {
680 rawValue = TryParseBool(raw);
681 if (rawValue != null)
682 {
683 importedType = Cell.CellType.Bool;
684 }
685 else
686 {
687 rawValue = GetNumericValue(raw);
688 if (rawValue != null)
689 {
690 importedType = Cell.CellType.Number;
691 }
692 }
693 }
694 else if (type == "s")
695 {
696 importedType = Cell.CellType.String;
697 rawValue = ResolveSharedString(raw);
698 }
699 else if (type == "str")
700 {
701 importedType = Cell.CellType.Formula;
702 rawValue = raw;
703 }
704 else if (type == "inlineStr")
705 {
706 importedType = Cell.CellType.String;
707 rawValue = raw;
708 }
709 else if (dateStyles.Contains(styleNumber) && (type == null || type == "" || type == "n"))
710 {
711 rawValue = GetDateTimeValue(raw, Cell.CellType.Date, out importedType);
712 }
713 else if (timeStyles.Contains(styleNumber) && (type == null || type == "" || type == "n"))
714 {
715 rawValue = GetDateTimeValue(raw, Cell.CellType.Time, out importedType);
716 }
717 else
718 {
719 importedType = Cell.CellType.Number;
720 rawValue = GetNumericValue(raw);
721 }
722 if (rawValue == null && raw == "")
723 {
724 importedType = Cell.CellType.Empty;
725 rawValue = null;
726 }
727 else if (rawValue == null && raw.Length > 0)
728 {
729 importedType = Cell.CellType.String;
730 rawValue = raw;
731 }
732 Address cellAddress = new Address(address);
733 if (readerOptions != null)
734 {
735 if (readerOptions.EnforcedColumnTypes.Count > 0)
736 {
737 rawValue = GetEnforcedColumnValue(rawValue, importedType, cellAddress);
738 }
739 rawValue = GetGloballyEnforcedValue(rawValue, cellAddress);
740 rawValue = GetGloballyEnforcedFlagValues(rawValue, cellAddress);
741 importedType = ResolveType(rawValue, importedType);
742 if (importedType == Cell.CellType.Date && rawValue is DateTime && (DateTime)rawValue < DataUtils.FirstAllowedExcelDate)
743 {
744 // Fix conversion from time to date, where time has no days
745 rawValue = ((DateTime)rawValue).AddDays(1);
746 }
747 }
748 return CreateCell(rawValue, importedType, cellAddress, styleNumber);
749 }
750
757 private static Cell.CellType ResolveType(object value, Cell.CellType defaultType)
758 {
759 if (defaultType == Cell.CellType.Formula)
760 {
761 return defaultType;
762 }
763 if (value == null)
764 {
765 return Cell.CellType.Empty;
766 }
767 switch (value)
768 {
769 case uint _:
770 case long _:
771 case ulong _:
772 case short _:
773 case ushort _:
774 case float _:
775 case double _:
776 case byte _:
777 case sbyte _:
778 case int _:
779 return Cell.CellType.Number;
780 case DateTime _:
781 return Cell.CellType.Date;
782 case TimeSpan _:
783 return Cell.CellType.Time;
784 case bool _:
785 return Cell.CellType.Bool;
786 default:
787 return Cell.CellType.String;
788 }
789 }
790
797 private object GetGloballyEnforcedFlagValues(object data, Address address)
798 {
799 if (address.Row < readerOptions.EnforcingStartRowNumber)
800 {
801 return data;
802 }
803 if (readerOptions.EnforceDateTimesAsNumbers)
804 {
805 if (data is DateTime)
806 {
807 data = DataUtils.GetOADateTime((DateTime)data, true);
808 }
809 else if (data is TimeSpan)
810 {
811 data = DataUtils.GetOATime((TimeSpan)data);
812 }
813 }
814 if (readerOptions.EnforceEmptyValuesAsString && data == null)
815 {
816 return "";
817 }
818 return data;
819 }
820
827 private object GetGloballyEnforcedValue(object data, Address address)
828 {
829 if (address.Row < readerOptions.EnforcingStartRowNumber)
830 {
831 return data;
832 }
833 if (readerOptions.GlobalEnforcingType == ReaderOptions.GlobalType.AllNumbersToDouble)
834 {
835 object tempDouble = ConvertToDouble(data);
836 if (tempDouble != null)
837 {
838 return tempDouble;
839 }
840 }
841 else if (readerOptions.GlobalEnforcingType == ReaderOptions.GlobalType.AllNumbersToDecimal)
842 {
843 object tempDecimal = ConvertToDecimal(data);
844 if (tempDecimal != null)
845 {
846 return tempDecimal;
847 }
848 }
849 else if (readerOptions.GlobalEnforcingType == ReaderOptions.GlobalType.AllNumbersToInt)
850 {
851 object tempInt = ConvertToInt(data);
852 if (tempInt != null)
853 {
854 return tempInt;
855 }
856 }
857 else if (readerOptions.GlobalEnforcingType == ReaderOptions.GlobalType.EverythingToString)
858 {
859 return ConvertToString(data);
860 }
861 return data;
862 }
863
871 private object GetEnforcedColumnValue(object data, Cell.CellType importedTyp, Address address)
872 {
873 if (address.Row < readerOptions.EnforcingStartRowNumber)
874 {
875 return data;
876 }
877 if (!readerOptions.EnforcedColumnTypes.TryGetValue(address.Column, out var columnType))
878 {
879 return data;
880 }
881 if (importedTyp == Cell.CellType.Formula)
882 {
883 return data;
884 }
885 switch (columnType)
886 {
887 case ReaderOptions.ColumnType.Numeric:
888 return GetNumericValue(data, importedTyp);
889 case ReaderOptions.ColumnType.Decimal:
890 return ConvertToDecimal(data);
891 case ReaderOptions.ColumnType.Double:
892 return ConvertToDouble(data);
893 case ReaderOptions.ColumnType.Date:
894 return ConvertToDate(data);
895 case ReaderOptions.ColumnType.Time:
896 return ConvertToTime(data);
897 case ReaderOptions.ColumnType.Bool:
898 return ConvertToBool(data);
899 default:
900 return ConvertToString(data);
901 }
902 }
903
909 private object ConvertToBool(object data)
910 {
911 switch (data)
912 {
913 case bool _:
914 return data;
915 case uint _:
916 case long _:
917 case ulong _:
918 case short _:
919 case ushort _:
920 case float _:
921 case byte _:
922 case sbyte _:
923 case int _:
924 object tempObject = ConvertToDouble(data);
925 if (tempObject is double)
926 {
927 double tempDouble = (double)tempObject;
928 if (double.Equals(tempDouble, 0d))
929 {
930 return false;
931 }
932 else if (double.Equals(tempDouble, 1d))
933 {
934 return true;
935 }
936 }
937 break;
938 case string _:
939
940 string tempString = (string)data;
941 bool? tempBool = TryParseBool(tempString);
942 if (tempBool != null)
943 {
944 return tempBool.Value;
945 }
946 break;
947 }
948 return data;
949 }
950
956 private static bool? TryParseBool(string raw)
957 {
958 if (raw == "0")
959 {
960 return false;
961 }
962 else if (raw == "1")
963 {
964 return true;
965 }
966 else
967 {
968 bool value;
969 if (bool.TryParse(raw, out value))
970 {
971 return value;
972 }
973 else
974 {
975 return null;
976 }
977 }
978 }
979
985 private object ConvertToDouble(object data)
986 {
987 object value = ConvertToDecimal(data);
988 if (value is decimal)
989 {
990 return Decimal.ToDouble((decimal)value);
991 }
992 else if (value is float)
993 {
994 return Convert.ToDouble((float)value);
995 }
996 return value;
997 }
998
1004 private object ConvertToDecimal(object data)
1005 {
1006 IConvertible converter;
1007 switch (data)
1008 {
1009 case double _:
1010 return data;
1011 case uint _:
1012 case long _:
1013 case ulong _:
1014 case short _:
1015 case ushort _:
1016 case float _:
1017 case byte _:
1018 case sbyte _:
1019 case int _:
1020 converter = data as IConvertible;
1021 double tempDouble = converter.ToDouble(DataUtils.InvariantCulture);
1022 if (tempDouble > (double)decimal.MaxValue || tempDouble < (double)decimal.MinValue)
1023 {
1024 return data;
1025 }
1026 else
1027 {
1028 return converter.ToDecimal(DataUtils.InvariantCulture);
1029 }
1030 case bool _:
1031 if ((bool)data)
1032 {
1033 return decimal.One;
1034 }
1035 else
1036 {
1037 return decimal.Zero;
1038 }
1039 case DateTime _:
1040 return new decimal(DataUtils.GetOADateTime((DateTime)data));
1041 case TimeSpan _:
1042 return new decimal(DataUtils.GetOATime((TimeSpan)data));
1043 case string _:
1044 decimal dValue;
1045 string tempString = (string)data;
1046 if (ParserUtils.TryParseDecimal(tempString, out dValue))
1047 {
1048 return dValue;
1049 }
1050 DateTime? tempDate = TryParseDate(tempString);
1051 if (tempDate != null)
1052 {
1053 return new decimal(DataUtils.GetOADateTime(tempDate.Value));
1054 }
1055 TimeSpan? tempTime = TryParseTime(tempString);
1056 if (tempTime != null)
1057 {
1058 return new decimal(DataUtils.GetOATime(tempTime.Value));
1059 }
1060 break;
1061 }
1062 return data;
1063 }
1064
1070 private static object ConvertToInt(object data)
1071 {
1072 double tempDouble;
1073 switch (data)
1074 {
1075 case uint _:
1076 case long _:
1077 case ulong _:
1078 break;
1079 case DateTime _:
1080 tempDouble = DataUtils.GetOADateTime((DateTime)data, true);
1081 return ConvertDoubleToInt(tempDouble);
1082 case TimeSpan _:
1083 tempDouble = DataUtils.GetOATime((TimeSpan)data);
1084 return ConvertDoubleToInt(tempDouble);
1085 case float _:
1086 case double _:
1087 int? tempInt = TryConvertDoubleToInt(data);
1088 if (tempInt != null)
1089 {
1090 return tempInt;
1091 }
1092 break;
1093 case bool _:
1094 return (bool)data ? 1 : 0;
1095 case string _:
1096 int tempInt2;
1097 if (ParserUtils.TryParseInt((string)data, out tempInt2))
1098 {
1099 return tempInt2;
1100 }
1101 break;
1102 }
1103 return null;
1104 }
1105
1111 private object ConvertToDate(object data)
1112 {
1113 switch (data)
1114 {
1115 case DateTime _:
1116 return data;
1117 case TimeSpan _:
1118 DateTime root = DataUtils.FirstAllowedExcelDate;
1119 TimeSpan time = (TimeSpan)data;
1120 root = root.AddDays(-1); // Fix offset of 1
1121 root = root.AddHours(time.Hours);
1122 root = root.AddMinutes(time.Minutes);
1123 root = root.AddSeconds(time.Seconds);
1124 return root;
1125 case double _:
1126 case uint _:
1127 case long _:
1128 case ulong _:
1129 case short _:
1130 case ushort _:
1131 case float _:
1132 case byte _:
1133 case sbyte _:
1134 case int _:
1135 return ConvertDateFromDouble(data);
1136 case string _:
1137 DateTime? date2 = TryParseDate((string)data);
1138 if (date2 != null)
1139 {
1140 return date2.Value;
1141 }
1142 return ConvertDateFromDouble(data);
1143 }
1144 return data;
1145 }
1146
1152 private DateTime? TryParseDate(string raw)
1153 {
1154 DateTime dateTime;
1155 bool isDateTime;
1156 if (readerOptions == null || string.IsNullOrEmpty(readerOptions.DateTimeFormat) || readerOptions.TemporalCultureInfo == null)
1157 {
1158 isDateTime = DateTime.TryParse(raw, ReaderOptions.DefaultCultureInfo, DateTimeStyles.None, out dateTime);
1159 }
1160 else
1161 {
1162 isDateTime = DateTime.TryParseExact(raw, readerOptions.DateTimeFormat, readerOptions.TemporalCultureInfo, DateTimeStyles.None, out dateTime);
1163 }
1164 if (isDateTime && dateTime >= DataUtils.FirstAllowedExcelDate && dateTime <= DataUtils.LastAllowedExcelDate)
1165 {
1166 return dateTime;
1167 }
1168 return null;
1169 }
1170
1176 private object ConvertToTime(object data)
1177 {
1178 switch (data)
1179 {
1180 case DateTime _:
1181 return ConvertTimeFromDouble(data);
1182 case TimeSpan _:
1183 return data;
1184 case double _:
1185 case uint _:
1186 case long _:
1187 case ulong _:
1188 case short _:
1189 case ushort _:
1190 case float _:
1191 case byte _:
1192 case sbyte _:
1193 case int _:
1194 return ConvertTimeFromDouble(data);
1195 case string _:
1196 TimeSpan? time = TryParseTime((string)data);
1197 if (time != null)
1198 {
1199 return time;
1200 }
1201 return ConvertTimeFromDouble(data);
1202 }
1203 return data;
1204 }
1205
1211 private TimeSpan? TryParseTime(string raw)
1212 {
1213 TimeSpan timeSpan;
1214 bool isTimeSpan;
1215 if (readerOptions == null || string.IsNullOrEmpty(readerOptions.TimeSpanFormat) || readerOptions.TemporalCultureInfo == null)
1216 {
1217 isTimeSpan = TimeSpan.TryParse(raw, ReaderOptions.DefaultCultureInfo, out timeSpan);
1218 }
1219 else
1220 {
1221 isTimeSpan = TimeSpan.TryParseExact(raw, readerOptions.TimeSpanFormat, readerOptions.TemporalCultureInfo, out timeSpan);
1222 }
1223 if (isTimeSpan && timeSpan.Days >= 0 && timeSpan.Days < DataUtils.MaxOADateValue)
1224 {
1225 return timeSpan;
1226 }
1227 return null;
1228 }
1229
1238 private static object GetDateTimeValue(string raw, Cell.CellType valueType, out Cell.CellType resolvedType)
1239 {
1240 double dValue;
1241 if (!ParserUtils.TryParseDouble(raw, out dValue))
1242 {
1243 resolvedType = Cell.CellType.String;
1244 return raw;
1245 }
1246 if ((valueType == Cell.CellType.Date && (dValue < DataUtils.MinOADateValue || dValue > DataUtils.MaxOADateValue)) || (valueType == Cell.CellType.Time && (dValue < 0.0 || dValue > DataUtils.MaxOADateValue)))
1247 {
1248 // fallback to number (cannot be anything else)
1249 resolvedType = Cell.CellType.Number;
1250 return GetNumericValue(raw);
1251 }
1252 DateTime tempDate = DataUtils.GetDateFromOA(dValue);
1253 if (dValue < 1.0)
1254 {
1255 tempDate = tempDate.AddDays(1); // Modify wrong 1st date when < 1
1256 }
1257 if (valueType == Cell.CellType.Date)
1258 {
1259 resolvedType = Cell.CellType.Date;
1260 return tempDate;
1261 }
1262 else
1263 {
1264 resolvedType = Cell.CellType.Time;
1265 return new TimeSpan((int)dValue, tempDate.Hour, tempDate.Minute, tempDate.Second);
1266 }
1267 }
1268
1274 private object ConvertDateFromDouble(object data)
1275 {
1276 object oaDate = ConvertToDouble(data);
1277 if (oaDate is double && (double)oaDate < DataUtils.MaxOADateValue)
1278 {
1279 DateTime date = DataUtils.GetDateFromOA((double)oaDate);
1280 if (date >= DataUtils.FirstAllowedExcelDate && date <= DataUtils.LastAllowedExcelDate)
1281 {
1282 return date;
1283 }
1284 }
1285 return data;
1286 }
1287
1293 private object ConvertTimeFromDouble(object data)
1294 {
1295 object oaDate = ConvertToDouble(data);
1296 if (oaDate is double)
1297 {
1298 double d = (double)oaDate;
1299 if (d >= DataUtils.MinOADateValue && d <= DataUtils.MaxOADateValue)
1300 {
1301 DateTime date = DataUtils.GetDateFromOA(d);
1302 return new TimeSpan((int)d, date.Hour, date.Minute, date.Second);
1303 }
1304 }
1305 return data;
1306 }
1307
1313 private static int? TryConvertDoubleToInt(object data)
1314 {
1315 IConvertible converter = data as IConvertible;
1316 double dValue = converter.ToDouble(ReaderOptions.DefaultCultureInfo);
1317 if (dValue > int.MinValue && dValue < int.MaxValue)
1318 {
1319 return converter.ToInt32(ReaderOptions.DefaultCultureInfo);
1320 }
1321 return null;
1322 }
1323
1329 private static int ConvertDoubleToInt(object data)
1330 {
1331 IConvertible converter = data as IConvertible;
1332 return converter.ToInt32(ReaderOptions.DefaultCultureInfo);
1333 }
1334
1340 private string ConvertToString(object data)
1341 {
1342 switch (data)
1343 {
1344 case int _:
1345 return ((int)data).ToString(ReaderOptions.DefaultCultureInfo);
1346 case uint _:
1347 return ((uint)data).ToString(ReaderOptions.DefaultCultureInfo);
1348 case long _:
1349 return ((long)data).ToString(ReaderOptions.DefaultCultureInfo);
1350 case ulong _:
1351 return ((ulong)data).ToString(ReaderOptions.DefaultCultureInfo);
1352 case float _:
1353 return ((float)data).ToString(ReaderOptions.DefaultCultureInfo);
1354 case double _:
1355 return ((double)data).ToString(ReaderOptions.DefaultCultureInfo);
1356 case bool _:
1357 return ((bool)data).ToString(ReaderOptions.DefaultCultureInfo);
1358 case DateTime _:
1359 return ((DateTime)data).ToString(readerOptions.DateTimeFormat, ParserUtils.InvariantCulture);
1360 case TimeSpan _:
1361 return ((TimeSpan)data).ToString(readerOptions.TimeSpanFormat, ParserUtils.InvariantCulture);
1362 default:
1363 if (data == null)
1364 {
1365 return null;
1366 }
1367 return data.ToString();
1368 }
1369 }
1370
1377 private object GetNumericValue(object raw, Cell.CellType importedType)
1378 {
1379 if (raw == null)
1380 {
1381 return null;
1382 }
1383 object tempObject;
1384 switch (importedType)
1385 {
1386 case Cell.CellType.String:
1387 string tempString = raw.ToString();
1388 tempObject = GetNumericValue(tempString);
1389 if (tempObject != null)
1390 {
1391 return tempObject;
1392 }
1393 DateTime? tempDate = TryParseDate(tempString);
1394 if (tempDate != null)
1395 {
1396 return DataUtils.GetOADateTime(tempDate.Value);
1397 }
1398 TimeSpan? tempTime = TryParseTime(tempString);
1399 if (tempTime != null)
1400 {
1401 return DataUtils.GetOATime(tempTime.Value);
1402 }
1403 tempObject = ConvertToBool(raw);
1404 if (tempObject is bool)
1405 {
1406 return (bool)tempObject ? 1 : 0;
1407 }
1408 break;
1409 case Cell.CellType.Number:
1410 return raw;
1411 case Cell.CellType.Date:
1412 return DataUtils.GetOADateTime((DateTime)raw);
1413 case Cell.CellType.Time:
1414 return DataUtils.GetOATime((TimeSpan)raw);
1415 case Cell.CellType.Bool:
1416 if ((bool)raw)
1417 {
1418 return 1;
1419 }
1420 return 0;
1421 }
1422 return raw;
1423 }
1424
1425
1431 private static object GetNumericValue(string raw)
1432 {
1433 bool hasDecimalPoint = raw.Contains(".");
1434
1435 // Only try integer parsing if there's no decimal point
1436 if (!hasDecimalPoint)
1437 {
1438 // integer section (unchanged)
1439 uint uiValue;
1440 int iValue;
1441 bool canBeUint = ParserUtils.TryParseUint(raw, out uiValue);
1442 bool canBeInt = ParserUtils.TryParseInt(raw, out iValue);
1443 if (canBeUint && !canBeInt)
1444 {
1445 return uiValue;
1446 }
1447 else if (canBeInt)
1448 {
1449 return iValue;
1450 }
1451 ulong ulValue;
1452 long lValue;
1453 bool canBeUlong = ParserUtils.TryParseUlong(raw, out ulValue);
1454 bool canBeLong = ParserUtils.TryParseLong(raw, out lValue);
1455 if (canBeUlong && !canBeLong)
1456 {
1457 return ulValue;
1458 }
1459 else if (canBeLong)
1460 {
1461 return lValue;
1462 }
1463 }
1464
1465 decimal dcValue;
1466 double dValue;
1467 float fValue;
1468
1469 // Decimal/float section
1470 if (ParserUtils.TryParseDecimal(raw, out dcValue))
1471 {
1472 // Check if the value can be accurately represented as float
1473 float testFloat = decimal.ToSingle(dcValue);
1474 decimal backToDecimal = (decimal)testFloat;
1475
1476 // If converting to float and back preserves the value, use float
1477 if (dcValue == backToDecimal)
1478 {
1479 return testFloat;
1480 }
1481 else
1482 {
1483 // Otherwise use double for better precision
1484 return decimal.ToDouble(dcValue);
1485 }
1486 }
1487 // High range float section
1488 else if (ParserUtils.TryParseFloat(raw, out fValue) && fValue >= float.MinValue && fValue <= float.MaxValue && !float.IsInfinity(fValue))
1489 {
1490 return fValue;
1491 }
1492 if (ParserUtils.TryParseDouble(raw, out dValue))
1493 {
1494 return dValue;
1495 }
1496 return null;
1497 }
1498
1505 private float GetValidatedWidth(float rawValue)
1506 {
1507 if (rawValue < Worksheet.MinColumnWidth)
1508 {
1509 if (readerOptions.EnforceStrictValidation)
1510 {
1511 throw new WorksheetException($"The worksheet contains an invalid column width (too small: {rawValue}) value. This error is ignored when disabling the reader option 'EnforceStrictValidation'");
1512 }
1513 else
1514 {
1515 return Worksheet.MinColumnWidth;
1516 }
1517 }
1518 else if (rawValue > Worksheet.MaxColumnWidth)
1519 {
1520 if (readerOptions.EnforceStrictValidation)
1521 {
1522 throw new WorksheetException($"The worksheet contains an invalid column width (too large: {rawValue}) value. This error is ignored when disabling the reader option 'EnforceStrictValidation'");
1523 }
1524 else
1525 {
1526 return Worksheet.MaxColumnWidth;
1527 }
1528 }
1529 else
1530 {
1531 return rawValue;
1532 }
1533 }
1534
1541 private float GetValidatedHeight(float rawValue)
1542 {
1543 if (rawValue < Worksheet.MinRowHeight)
1544 {
1545 if (readerOptions.EnforceStrictValidation)
1546 {
1547 throw new WorksheetException($"The worksheet contains an invalid row height (too small: {rawValue}) value. Consider using the ImportOption 'EnforceValidRowDimensions' to ignore this error.");
1548 }
1549 else
1550 {
1551 return Worksheet.MinRowHeight;
1552 }
1553 }
1554 else if (rawValue > Worksheet.MaxRowHeight)
1555 {
1556 if (readerOptions.EnforceStrictValidation)
1557 {
1558 throw new WorksheetException($"The worksheet contains an invalid row height (too large: {rawValue}) value. Consider using the ImportOption 'EnforceValidRowDimensions' to ignore this error.");
1559 }
1560 else
1561 {
1562 return Worksheet.MaxRowHeight;
1563 }
1564 }
1565 else
1566 {
1567 return rawValue;
1568 }
1569 }
1570
1576 private string ResolveSharedString(string raw)
1577 {
1578 int stringId;
1579 if (ParserUtils.TryParseInt(raw, out stringId))
1580 {
1581 string resolvedString = SharedStrings.ElementAtOrDefault(stringId);
1582 if (resolvedString == null)
1583 {
1584 return raw;
1585 }
1586 else
1587 {
1588 return resolvedString;
1589 }
1590 }
1591 return raw;
1592 }
1593
1602 private Cell CreateCell(object value, Cell.CellType type, Address address, string styleNumber = null)
1603 {
1604 Cell cell = new Cell(value, type, address);
1605 if (styleNumber != null && resolvedStyles.TryGetValue(styleNumber, out var styleValue))
1606 {
1607 cell.SetStyle(styleValue);
1608 }
1609 return cell;
1610 }
1611 #endregion
1612 }
1613}
Static class that contains enums for password handling.
Class representing a reader for legacy passwords.
void Execute()
Method to execute the main logic of the plug-in (interface implementation).
List< String > SharedStrings
Gets or Sets the list of the shared strings. The index of the list corresponds to the index,...
int CurrentWorksheetID
Gets or sets the (r)ID of the current worksheet.
WorksheetReader()
Default constructor - Must be defined for instantiation of the plug-ins.
Workbook Workbook
Workbook reference where read data is stored (should not be null).
void Init(MemoryStream stream, Workbook workbook, IOptions readerOptions)
Initialization method (interface implementation).
Class representing a collection of pre-processed styles and their components. This class is internall...
The reader options define global rules, applied when loading a worksheet. The options are mainly to o...
Interface, used by password readers.
void Init(PasswordType type, ReaderOptions readerOptions)
Method to initialize the password reader.
Exceptions.IOException IOException