NanoXLSX.Reader 3.1.0
Loading...
Searching...
No Matches
WorksheetReader.cs
1/*
2 * NanoXLSX is a small .NET library to generate and read XLSX (Microsoft Excel 2007 or newer) files in an easy and native way
3 * Copyright Raphael Stoeckli © 2026
4 * This library is licensed under the MIT License.
5 * You find a copy of the license in project folder or on: http://opensource.org/licenses/MIT
6 */
7
8using System;
9using System.Collections.Generic;
10using System.Globalization;
11using System.IO;
12using System.Linq;
13using System.Text;
14using System.Xml;
15using NanoXLSX.Exceptions;
16using NanoXLSX.Interfaces;
17using NanoXLSX.Interfaces.Reader;
18using NanoXLSX.Registry;
19using NanoXLSX.Styles;
20using NanoXLSX.Utils;
21using NanoXLSX.Utils.Xml;
22using static NanoXLSX.Enums.Password;
23using IOException = NanoXLSX.Exceptions.IOException;
24
26{
30 public class WorksheetReader : IWorksheetReader
31 {
32 #region privateFields
33 private Stream stream;
34 private HashSet<string> dateStyles;
35 private HashSet<string> timeStyles;
36 private Dictionary<string, Style> resolvedStyles;
37 private IPasswordReader passwordReader;
38 private ReaderOptions readerOptions;
39 #endregion
40
41 #region properties
42
46 public Workbook Workbook { get; set; }
50 public IOptions Options { get; set; }
54 public Action<Stream, Workbook, string, IOptions, int?> InlinePluginHandler { get; set; }
58 public int CurrentWorksheetID { get; set; }
59
63 public List<String> SharedStrings { get; set; }
64 #endregion
65
66 #region constructors
71 {
72 }
73 #endregion
74
75 #region functions
83 public void Init(Stream stream, Workbook workbook, IOptions readerOptions, Action<Stream, Workbook, string, IOptions, int?> inlinePluginHandler)
84 {
85 this.stream = stream;
86 this.Workbook = workbook;
87 this.Options = readerOptions;
88 this.readerOptions = readerOptions as ReaderOptions;
89 this.InlinePluginHandler = inlinePluginHandler;
90 if (dateStyles == null || timeStyles == null || this.resolvedStyles == null)
91 {
92 StyleReaderContainer styleReaderContainer = workbook.AuxiliaryData.GetData<StyleReaderContainer>(PlugInUUID.StyleReader, PlugInUUID.StyleEntity);
93 ProcessStyles(styleReaderContainer);
94 }
95 if (this.passwordReader == null)
96 {
97 this.passwordReader = PlugInLoader.GetPlugIn<IPasswordReader>(PlugInUUID.PasswordReader, new LegacyPasswordReader());
98 this.passwordReader.Init(PasswordType.WorksheetProtection, (ReaderOptions)readerOptions);
99 }
100 }
101
106 public void Execute()
107 {
108 try
109 {
110 WorksheetDefinition worksheetDefinition = Workbook.AuxiliaryData.GetData<WorksheetDefinition>(PlugInUUID.WorkbookReader, PlugInUUID.WorksheetDefinitionEntity, CurrentWorksheetID);
111 Worksheet worksheet = new Worksheet(worksheetDefinition.WorksheetName, worksheetDefinition.SheetID, Workbook)
112 {
113 Hidden = worksheetDefinition.Hidden
114 };
115 using (stream) // Close after processing
116 {
117 StringBuilder sb = new StringBuilder();
118 using (XmlReader reader = XmlReader.Create(stream, XmlStreamUtils.CreateSettings()))
119 {
120 while (reader.Read())
121 {
122 if (reader.NodeType != XmlNodeType.Element)
123 {
124 continue;
125 }
126 switch (reader.LocalName.ToLowerInvariant())
127 {
128 case "sheetviews":
129 GetSheetView(reader, worksheet);
130 break;
131 case "sheetformatpr":
132 GetSheetFormats(reader, worksheet);
133 break;
134 case "cols":
135 GetColumns(reader, worksheet, readerOptions);
136 break;
137 case "sheetdata":
138 GetRows(reader, worksheet, readerOptions, sb);
139 break;
140 case "sheetprotection":
141 GetSheetProtection(reader, worksheet);
142 break;
143 case "mergecells":
144 GetMergedCells(reader, worksheet);
145 break;
146 case "autofilter":
147 GetAutoFilters(reader, worksheet);
148 break;
149 }
150 }
151 SetWorkbookRelation(worksheet);
152 InlinePluginHandler?.Invoke(stream, Workbook, PlugInUUID.WorksheetInlineReader, Options, CurrentWorksheetID);
153 }
154 }
155 }
156 catch (NotSupportedContentException)
157 {
158 throw; // rethrow
159 }
160 catch (Exception ex)
161 {
162 throw new IOException("The XML entry could not be read from the input stream. Please see the inner exception:", ex);
163 }
164 }
165
170 private void SetWorkbookRelation(Worksheet worksheet)
171 {
172 Workbook.AddWorksheet(worksheet);
173 int selectedWorksheetId = Workbook.AuxiliaryData.GetData<int>(PlugInUUID.WorkbookReader, PlugInUUID.SelectedWorksheetEntity);
174 if (selectedWorksheetId == CurrentWorksheetID)
175 {
176 Workbook.SetSelectedWorksheet(worksheet);
177 }
178 }
179
184 private void ProcessStyles(StyleReaderContainer styleReaderContainer)
185 {
186 this.dateStyles = new HashSet<string>();
187 this.timeStyles = new HashSet<string>();
188 this.resolvedStyles = new Dictionary<string, Style>();
189 for (int i = 0; i < styleReaderContainer.StyleCount; i++)
190 {
191 bool isDate;
192 bool isTime;
193 string index = ParserUtils.ToString(i);
194 Style style = styleReaderContainer.GetStyle(i, out isDate, out isTime);
195 if (isDate)
196 {
197 this.dateStyles.Add(index);
198 }
199 if (isTime)
200 {
201 this.timeStyles.Add(index);
202 }
203 this.resolvedStyles.Add(index, style);
204 }
205 }
206
214 private void GetRows(XmlReader reader, Worksheet worksheet, ReaderOptions readerOptions, StringBuilder sb)
215 {
216 using (XmlReader sheetDataReader = reader.ReadSubtree())
217 {
218 sheetDataReader.Read(); // consume the sheetData open tag
219 while (sheetDataReader.Read())
220 {
221 if (!XmlStreamUtils.IsElement(sheetDataReader, "row"))
222 {
223 continue;
224 }
225 string rowAttribute = sheetDataReader.GetAttribute("r");
226 if (rowAttribute != null)
227 {
228 int rowNumber = ParserUtils.ParseInt(rowAttribute) - 1; // Transform to zero-based
229 string hiddenAttribute = sheetDataReader.GetAttribute("hidden");
230 if (hiddenAttribute != null && ParserUtils.ParseBinaryBool(hiddenAttribute) == 1)
231 {
232 worksheet.AddHiddenRow(rowNumber);
233 }
234 string heightAttribute = sheetDataReader.GetAttribute("ht");
235 if (heightAttribute != null)
236 {
237 worksheet.RowHeights.Add(rowNumber, GetValidatedHeight(ParserUtils.ParseFloat(heightAttribute), readerOptions));
238 }
239 }
240 if (!sheetDataReader.IsEmptyElement)
241 {
242 using (XmlReader rowReader = sheetDataReader.ReadSubtree())
243 {
244 rowReader.Read(); // consume the row open tag
245 while (rowReader.Read())
246 {
247 if (!XmlStreamUtils.IsElement(rowReader, "c"))
248 {
249 continue;
250 }
251 ReadCell(rowReader, worksheet, sb);
252 }
253 }
254 }
255 }
256 }
257 }
258
264 private static void GetSheetView(XmlReader reader, Worksheet worksheet)
265 {
266 using (XmlReader subtree = reader.ReadSubtree())
267 {
268 subtree.Read(); // consume sheetViews
269 while (subtree.Read())
270 {
271 if (!XmlStreamUtils.IsElement(subtree, "sheetView"))
272 {
273 continue;
274 }
275 string attribute = subtree.GetAttribute("view") ?? string.Empty;
276 worksheet.ViewType = Worksheet.GetSheetViewTypeEnum(attribute);
277 attribute = subtree.GetAttribute("zoomScale");
278 if (attribute != null)
279 {
280 worksheet.ZoomFactor = ParserUtils.ParseInt(attribute);
281 }
282 attribute = subtree.GetAttribute("zoomScaleNormal");
283 if (attribute != null)
284 {
285 worksheet.ZoomFactors[Worksheet.SheetViewType.Normal] = ParserUtils.ParseInt(attribute);
286 }
287 attribute = subtree.GetAttribute("zoomScalePageLayoutView");
288 if (attribute != null)
289 {
290 worksheet.ZoomFactors[Worksheet.SheetViewType.PageLayout] = ParserUtils.ParseInt(attribute);
291 }
292 attribute = subtree.GetAttribute("zoomScaleSheetLayoutView");
293 if (attribute != null)
294 {
295 worksheet.ZoomFactors[Worksheet.SheetViewType.PageBreakPreview] = ParserUtils.ParseInt(attribute);
296 }
297 attribute = subtree.GetAttribute("showGridLines");
298 if (attribute != null)
299 {
300 worksheet.ShowGridLines = ParserUtils.ParseBinaryBool(attribute) == 1;
301 }
302 attribute = subtree.GetAttribute("showRowColHeaders");
303 if (attribute != null)
304 {
305 worksheet.ShowRowColumnHeaders = ParserUtils.ParseBinaryBool(attribute) == 1;
306 }
307 attribute = subtree.GetAttribute("showRuler");
308 if (attribute != null)
309 {
310 worksheet.ShowRuler = ParserUtils.ParseBinaryBool(attribute) == 1;
311 }
312 using (XmlReader sheetViewReader = subtree.ReadSubtree())
313 {
314 sheetViewReader.Read(); // consume sheetView
315 while (sheetViewReader.Read())
316 {
317 if (sheetViewReader.NodeType != XmlNodeType.Element)
318 {
319 continue;
320 }
321 if (XmlStreamUtils.IsElement(sheetViewReader, "selection"))
322 {
323 attribute = sheetViewReader.GetAttribute("sqref");
324 if (attribute != null)
325 {
326 if (attribute.Contains(" "))
327 {
328 string[] ranges = attribute.Split(' ');
329 foreach (string range in ranges)
330 {
331 CollectSelectedCells(range, worksheet);
332 }
333 }
334 else
335 {
336 CollectSelectedCells(attribute, worksheet);
337 }
338 }
339 }
340 else if (XmlStreamUtils.IsElement(sheetViewReader, "pane"))
341 {
342 SetPaneSplit(sheetViewReader, worksheet);
343 }
344 }
345 }
346 }
347 }
348 }
349
355 private static void CollectSelectedCells(string attribute, Worksheet worksheet)
356 {
357 if (attribute.Contains(":"))
358 {
359 // One range
360 worksheet.AddSelectedCells(new Range(attribute));
361 }
362 else
363 {
364 // One cell
365 worksheet.AddSelectedCells(new Range(attribute + ":" + attribute));
366 }
367 }
368
374 private static void SetPaneSplit(XmlReader reader, Worksheet worksheet)
375 {
376 string attribute = reader.GetAttribute("state");
377 bool useNumbers = false;
378 bool frozenState = false;
379 bool ySplitDefined = false;
380 bool xSplitDefined = false;
381 int? paneSplitRowIndex = null;
382 int? paneSplitColumnIndex = null;
383 float? paneSplitHeight = null;
384 float? paneSplitWidth = null;
385 Address topLeftCell = new Address(0, 0); // default value
386 Worksheet.WorksheetPane? activePane = null;
387 if (attribute != null)
388 {
389 if (ParserUtils.ToLower(attribute) == "frozen" || ParserUtils.ToLower(attribute) == "frozensplit")
390 {
391 frozenState = true;
392 }
393 useNumbers = frozenState;
394 }
395 attribute = reader.GetAttribute("ySplit");
396 if (attribute != null)
397 {
398 ySplitDefined = true;
399 if (useNumbers)
400 {
401 paneSplitRowIndex = ParserUtils.ParseInt(attribute);
402 }
403 else
404 {
405 paneSplitHeight = DataUtils.GetPaneSplitHeight(ParserUtils.ParseFloat(attribute));
406 }
407 }
408 attribute = reader.GetAttribute("xSplit");
409 if (attribute != null)
410 {
411 xSplitDefined = true;
412 if (useNumbers)
413 {
414 paneSplitColumnIndex = ParserUtils.ParseInt(attribute);
415 }
416 else
417 {
418 paneSplitWidth = DataUtils.GetPaneSplitWidth(ParserUtils.ParseFloat(attribute));
419 }
420 }
421 attribute = reader.GetAttribute("topLeftCell");
422 if (attribute != null)
423 {
424 topLeftCell = new Address(attribute);
425 }
426 attribute = reader.GetAttribute("activePane") ?? string.Empty;
427 activePane = Worksheet.GetWorksheetPaneEnum(attribute);
428 if (frozenState)
429 {
430 if (ySplitDefined && !xSplitDefined)
431 {
432 worksheet.SetHorizontalSplit(paneSplitRowIndex.Value, frozenState, topLeftCell, activePane);
433 }
434 if (!ySplitDefined && xSplitDefined)
435 {
436 worksheet.SetVerticalSplit(paneSplitColumnIndex.Value, frozenState, topLeftCell, activePane);
437 }
438 else if (ySplitDefined && xSplitDefined)
439 {
440 worksheet.SetSplit(paneSplitColumnIndex.Value, paneSplitRowIndex.Value, frozenState, topLeftCell, activePane);
441 }
442 }
443 else
444 {
445 if (ySplitDefined && !xSplitDefined)
446 {
447 worksheet.SetHorizontalSplit(paneSplitHeight.Value, topLeftCell, activePane);
448 }
449 if (!ySplitDefined && xSplitDefined)
450 {
451 worksheet.SetVerticalSplit(paneSplitWidth.Value, topLeftCell, activePane);
452 }
453 else if (ySplitDefined && xSplitDefined)
454 {
455 worksheet.SetSplit(paneSplitWidth, paneSplitHeight, topLeftCell, activePane);
456 }
457 }
458 }
459
468 private void GetSheetProtection(XmlReader reader, Worksheet worksheet)
469 {
470 int hasProtection = 0;
471 hasProtection += ReadSheetProtectionAttribute(reader, Worksheet.SheetProtectionValue.AutoFilter, worksheet);
472 hasProtection += ReadSheetProtectionAttribute(reader, Worksheet.SheetProtectionValue.DeleteColumns, worksheet);
473 hasProtection += ReadSheetProtectionAttribute(reader, Worksheet.SheetProtectionValue.DeleteRows, worksheet);
474 hasProtection += ReadSheetProtectionAttribute(reader, Worksheet.SheetProtectionValue.FormatCells, worksheet);
475 hasProtection += ReadSheetProtectionAttribute(reader, Worksheet.SheetProtectionValue.FormatColumns, worksheet);
476 hasProtection += ReadSheetProtectionAttribute(reader, Worksheet.SheetProtectionValue.FormatRows, worksheet);
477 hasProtection += ReadSheetProtectionAttribute(reader, Worksheet.SheetProtectionValue.InsertColumns, worksheet);
478 hasProtection += ReadSheetProtectionAttribute(reader, Worksheet.SheetProtectionValue.InsertHyperlinks, worksheet);
479 hasProtection += ReadSheetProtectionAttribute(reader, Worksheet.SheetProtectionValue.InsertRows, worksheet);
480 hasProtection += ReadSheetProtectionAttribute(reader, Worksheet.SheetProtectionValue.Objects, worksheet);
481 hasProtection += ReadSheetProtectionAttribute(reader, Worksheet.SheetProtectionValue.PivotTables, worksheet);
482 hasProtection += ReadSheetProtectionAttribute(reader, Worksheet.SheetProtectionValue.Scenarios, worksheet);
483 hasProtection += ReadSheetProtectionAttribute(reader, Worksheet.SheetProtectionValue.SelectLockedCells, worksheet);
484 hasProtection += ReadSheetProtectionAttribute(reader, Worksheet.SheetProtectionValue.SelectUnlockedCells, worksheet);
485 hasProtection += ReadSheetProtectionAttribute(reader, Worksheet.SheetProtectionValue.Sort, worksheet);
486 if (hasProtection > 0)
487 {
488 worksheet.UseSheetProtection = true;
489 }
490 string outerXml;
491 using (XmlReader subtree = reader.ReadSubtree())
492 {
493 subtree.MoveToContent();
494 outerXml = subtree.ReadOuterXml();
495 }
496 XmlDocument miniDoc = new XmlDocument { XmlResolver = null };
497 miniDoc.LoadXml(outerXml);
498 this.passwordReader.ReadXmlAttributes(miniDoc.DocumentElement);
499 if (this.passwordReader.PasswordIsSet())
500 {
501 if (this.passwordReader is LegacyPasswordReader && (this.passwordReader as LegacyPasswordReader).ContemporaryAlgorithmDetected && (readerOptions == null || !readerOptions.IgnoreNotSupportedPasswordAlgorithms))
502 {
503 throw new NotSupportedContentException("A not supported, contemporary password algorithm for the worksheet protection was detected. Check possible packages to add support to NanoXLSX, or ignore this error by a reader option");
504 }
505 worksheet.SheetProtectionPassword.CopyFrom(this.passwordReader);
506 }
507 }
508
516 private static int ReadSheetProtectionAttribute(XmlReader reader, Worksheet.SheetProtectionValue sheetProtectionValue, Worksheet worksheet)
517 {
518 string attrName = Worksheet.GetSheetProtectionName(sheetProtectionValue);
519 if (reader.GetAttribute(attrName) != null)
520 {
521 worksheet.SheetProtectionValues.Add(sheetProtectionValue);
522 return 1;
523 }
524 return 0;
525 }
526
532 private static void GetMergedCells(XmlReader reader, Worksheet worksheet)
533 {
534 using (XmlReader subtree = reader.ReadSubtree())
535 {
536 subtree.Read(); // consume the mergeCells open tag
537 while (subtree.Read())
538 {
539 if (!XmlStreamUtils.IsElement(subtree, "mergeCell"))
540 {
541 continue;
542 }
543 string attribute = subtree.GetAttribute("ref");
544 if (attribute != null)
545 {
546 worksheet.MergeCells(new Range(attribute));
547 }
548 }
549 }
550 }
551
557 private static void GetSheetFormats(XmlReader reader, Worksheet worksheet)
558 {
559 string attribute = reader.GetAttribute("defaultColWidth");
560 if (attribute != null)
561 {
562 worksheet.DefaultColumnWidth = ParserUtils.ParseFloat(attribute);
563 }
564 attribute = reader.GetAttribute("defaultRowHeight");
565 if (attribute != null)
566 {
567 worksheet.DefaultRowHeight = ParserUtils.ParseFloat(attribute);
568 }
569 }
570
576 private static void GetAutoFilters(XmlReader reader, Worksheet worksheet)
577 {
578 string autoFilterRef = reader.GetAttribute("ref");
579 if (autoFilterRef != null)
580 {
581 Range range = new Range(autoFilterRef);
582 worksheet.SetAutoFilter(range.StartAddress.Column, range.EndAddress.Column);
583 }
584 }
585
592 private void GetColumns(XmlReader reader, Worksheet worksheet, ReaderOptions readerOptions)
593 {
594 using (XmlReader subtree = reader.ReadSubtree())
595 {
596 subtree.Read(); // consume the cols open tag
597 while (subtree.Read())
598 {
599 if (!XmlStreamUtils.IsElement(subtree, "col"))
600 {
601 continue;
602 }
603 int? min = null;
604 int? max = null;
605 List<int> indices = new List<int>();
606 string attribute = subtree.GetAttribute("min");
607 if (attribute != null)
608 {
609 min = ParserUtils.ParseInt(attribute);
610 max = min;
611 indices.Add(min.Value);
612 }
613 attribute = subtree.GetAttribute("max");
614 if (attribute != null)
615 {
616 max = ParserUtils.ParseInt(attribute);
617 }
618 if (min != null && max.Value != min.Value)
619 {
620 for (int i = min.Value; i <= max.Value; i++)
621 {
622 indices.Add(i);
623 }
624 }
625 attribute = subtree.GetAttribute("width");
626 float width = Worksheet.DefaultWorksheetColumnWidth;
627 if (attribute != null)
628 {
629 width = ParserUtils.ParseFloat(attribute);
630 }
631 attribute = subtree.GetAttribute("hidden");
632 bool hidden = false;
633 if (attribute != null && ParserUtils.ParseBinaryBool(attribute) == 1)
634 {
635 hidden = true;
636 }
637 attribute = subtree.GetAttribute("style");
638 Style defaultStyle = null;
639 if (attribute != null && resolvedStyles.TryGetValue(attribute, out var attributeValue))
640 {
641 defaultStyle = attributeValue;
642 }
643 foreach (int index in indices)
644 {
645 string columnAddress = Cell.ResolveColumnAddress(index - 1); // Transform to zero-based
646 if (defaultStyle != null)
647 {
648 worksheet.SetColumnDefaultStyle(columnAddress, defaultStyle);
649 }
650 if (width != Worksheet.DefaultWorksheetColumnWidth)
651 {
652 worksheet.SetColumnWidth(columnAddress, GetValidatedWidth(width, readerOptions));
653 }
654 if (hidden)
655 {
656 worksheet.AddHiddenColumn(columnAddress);
657 }
658 }
659 }
660 }
661 }
662
669 private void ReadCell(XmlReader rowReader, Worksheet worksheet, StringBuilder sb)
670 {
671 string address = rowReader.GetAttribute("r"); // Mandatory
672 string type = rowReader.GetAttribute("t"); // can be null
673 string styleNumber = rowReader.GetAttribute("s"); // can be null
674 string value = "";
675 if (!rowReader.IsEmptyElement)
676 {
677 using (XmlReader cellReader = rowReader.ReadSubtree())
678 {
679 cellReader.Read(); // consume <c>
680 while (cellReader.Read())
681 {
682 if (cellReader.NodeType != XmlNodeType.Element)
683 {
684 continue;
685 }
686 if (cellReader.LocalName.Equals("v", StringComparison.OrdinalIgnoreCase) ||
687 cellReader.LocalName.Equals("f", StringComparison.OrdinalIgnoreCase))
688 {
689 value = cellReader.ReadElementContentAsString();
690 }
691 else if (cellReader.LocalName.Equals("is", StringComparison.OrdinalIgnoreCase))
692 {
693 sb.Clear();
694 using (XmlReader isReader = cellReader.ReadSubtree())
695 {
696 isReader.Read(); // consume <is>
697 while (isReader.Read())
698 {
699 if (isReader.NodeType == XmlNodeType.Element &&
700 isReader.LocalName.Equals("t", StringComparison.OrdinalIgnoreCase))
701 {
702 sb.Append(isReader.ReadElementContentAsString());
703 }
704 }
705 }
706 value = sb.ToString();
707 }
708 }
709 }
710 }
711 Cell cell = ResolveCellData(value, type, styleNumber, address);
712 worksheet.AddCell(cell, address);
713 }
714
723 private Cell ResolveCellData(string raw, string type, string styleNumber, string address)
724 {
725 Cell.CellType importedType = Cell.CellType.Default;
726 object rawValue;
727 if (type == "b")
728 {
729 rawValue = TryParseBool(raw);
730 if (rawValue != null)
731 {
732 importedType = Cell.CellType.Bool;
733 }
734 else
735 {
736 rawValue = GetNumericValue(raw);
737 if (rawValue != null)
738 {
739 importedType = Cell.CellType.Number;
740 }
741 }
742 }
743 else if (type == "s")
744 {
745 importedType = Cell.CellType.String;
746 rawValue = ResolveSharedString(raw);
747 }
748 else if (type == "str")
749 {
750 importedType = Cell.CellType.Formula;
751 rawValue = raw;
752 }
753 else if (type == "inlineStr")
754 {
755 importedType = Cell.CellType.String;
756 rawValue = raw;
757 }
758 else if (dateStyles.Contains(styleNumber) && (type == null || type == "" || type == "n"))
759 {
760 rawValue = GetDateTimeValue(raw, Cell.CellType.Date, out importedType);
761 }
762 else if (timeStyles.Contains(styleNumber) && (type == null || type == "" || type == "n"))
763 {
764 rawValue = GetDateTimeValue(raw, Cell.CellType.Time, out importedType);
765 }
766 else
767 {
768 importedType = Cell.CellType.Number;
769 rawValue = GetNumericValue(raw);
770 }
771 if (rawValue == null && raw == "")
772 {
773 importedType = Cell.CellType.Empty;
774 rawValue = null;
775 }
776 else if (rawValue == null && raw.Length > 0)
777 {
778 importedType = Cell.CellType.String;
779 rawValue = raw;
780 }
781 Address cellAddress = new Address(address);
782 if (readerOptions != null)
783 {
784 if (readerOptions.EnforcedColumnTypes.Count > 0)
785 {
786 rawValue = GetEnforcedColumnValue(rawValue, importedType, cellAddress);
787 }
788 rawValue = GetGloballyEnforcedValue(rawValue, cellAddress);
789 rawValue = GetGloballyEnforcedFlagValues(rawValue, cellAddress);
790 importedType = ResolveType(rawValue, importedType);
791 if (importedType == Cell.CellType.Date && rawValue is DateTime && (DateTime)rawValue < DataUtils.FirstAllowedExcelDate)
792 {
793 // Fix conversion from time to date, where time has no days
794 rawValue = ((DateTime)rawValue).AddDays(1);
795 }
796 }
797 return CreateCell(rawValue, importedType, cellAddress, styleNumber);
798 }
799
806 private static Cell.CellType ResolveType(object value, Cell.CellType defaultType)
807 {
808 if (defaultType == Cell.CellType.Formula)
809 {
810 return defaultType;
811 }
812 if (value == null)
813 {
814 return Cell.CellType.Empty;
815 }
816 switch (value)
817 {
818 case uint _:
819 case long _:
820 case ulong _:
821 case short _:
822 case ushort _:
823 case float _:
824 case double _:
825 case byte _:
826 case sbyte _:
827 case int _:
828 return Cell.CellType.Number;
829 case DateTime _:
830 return Cell.CellType.Date;
831 case TimeSpan _:
832 return Cell.CellType.Time;
833 case bool _:
834 return Cell.CellType.Bool;
835 default:
836 return Cell.CellType.String;
837 }
838 }
839
846 private object GetGloballyEnforcedFlagValues(object data, Address address)
847 {
848 if (address.Row < readerOptions.EnforcingStartRowNumber)
849 {
850 return data;
851 }
852 if (readerOptions.EnforceDateTimesAsNumbers)
853 {
854 if (data is DateTime)
855 {
856 data = DataUtils.GetOADateTime((DateTime)data, true);
857 }
858 else if (data is TimeSpan)
859 {
860 data = DataUtils.GetOATime((TimeSpan)data);
861 }
862 }
863 if (readerOptions.EnforceEmptyValuesAsString && data == null)
864 {
865 return "";
866 }
867 return data;
868 }
869
876 private object GetGloballyEnforcedValue(object data, Address address)
877 {
878 if (address.Row < readerOptions.EnforcingStartRowNumber)
879 {
880 return data;
881 }
882 if (readerOptions.GlobalEnforcingType == ReaderOptions.GlobalType.AllNumbersToDouble)
883 {
884 object tempDouble = ConvertToDouble(data, readerOptions);
885 if (tempDouble != null)
886 {
887 return tempDouble;
888 }
889 }
890 else if (readerOptions.GlobalEnforcingType == ReaderOptions.GlobalType.AllNumbersToDecimal)
891 {
892 object tempDecimal = ConvertToDecimal(data, readerOptions);
893 if (tempDecimal != null)
894 {
895 return tempDecimal;
896 }
897 }
898 else if (readerOptions.GlobalEnforcingType == ReaderOptions.GlobalType.AllNumbersToInt)
899 {
900 object tempInt = ConvertToInt(data);
901 if (tempInt != null)
902 {
903 return tempInt;
904 }
905 }
906 else if (readerOptions.GlobalEnforcingType == ReaderOptions.GlobalType.EverythingToString)
907 {
908 return ConvertToString(data, readerOptions);
909 }
910 return data;
911 }
912
920 private object GetEnforcedColumnValue(object data, Cell.CellType importedTyp, Address address)
921 {
922 if (address.Row < readerOptions.EnforcingStartRowNumber)
923 {
924 return data;
925 }
926 if (!readerOptions.EnforcedColumnTypes.TryGetValue(address.Column, out var columnType))
927 {
928 return data;
929 }
930 if (importedTyp == Cell.CellType.Formula)
931 {
932 return data;
933 }
934 switch (columnType)
935 {
936 case ReaderOptions.ColumnType.Numeric:
937 return GetNumericValue(data, importedTyp, readerOptions);
938 case ReaderOptions.ColumnType.Decimal:
939 return ConvertToDecimal(data, readerOptions);
940 case ReaderOptions.ColumnType.Double:
941 return ConvertToDouble(data, readerOptions);
942 case ReaderOptions.ColumnType.Date:
943 return ConvertToDate(data, readerOptions);
944 case ReaderOptions.ColumnType.Time:
945 return ConvertToTime(data, readerOptions);
946 case ReaderOptions.ColumnType.Bool:
947 return ConvertToBool(data, readerOptions);
948 default:
949 return ConvertToString(data, readerOptions);
950 }
951 }
952
959 private object ConvertToBool(object data, ReaderOptions readerOptions)
960 {
961 switch (data)
962 {
963 case bool _:
964 return data;
965 case uint _:
966 case long _:
967 case ulong _:
968 case short _:
969 case ushort _:
970 case float _:
971 case byte _:
972 case sbyte _:
973 case int _:
974 object tempObject = ConvertToDouble(data, readerOptions);
975 if (tempObject is double)
976 {
977 double tempDouble = (double)tempObject;
978 if (double.Equals(tempDouble, 0d))
979 {
980 return false;
981 }
982 else if (double.Equals(tempDouble, 1d))
983 {
984 return true;
985 }
986 }
987 break;
988 case string _:
989
990 string tempString = (string)data;
991 bool? tempBool = TryParseBool(tempString);
992 if (tempBool != null)
993 {
994 return tempBool.Value;
995 }
996 break;
997 }
998 return data;
999 }
1000
1006 private static bool? TryParseBool(string raw)
1007 {
1008 if (raw == "0")
1009 {
1010 return false;
1011 }
1012 else if (raw == "1")
1013 {
1014 return true;
1015 }
1016 else
1017 {
1018 bool value;
1019 if (bool.TryParse(raw, out value))
1020 {
1021 return value;
1022 }
1023 else
1024 {
1025 return null;
1026 }
1027 }
1028 }
1029
1036 private object ConvertToDouble(object data, ReaderOptions readerOptions)
1037 {
1038 object value = ConvertToDecimal(data, readerOptions);
1039 if (value is decimal)
1040 {
1041 return Decimal.ToDouble((decimal)value);
1042 }
1043 else if (value is float)
1044 {
1045 return Convert.ToDouble((float)value);
1046 }
1047 return value;
1048 }
1049
1056 private object ConvertToDecimal(object data, ReaderOptions readerOptions)
1057 {
1058 IConvertible converter;
1059 switch (data)
1060 {
1061 case double _:
1062 return data;
1063 case uint _:
1064 case long _:
1065 case ulong _:
1066 case short _:
1067 case ushort _:
1068 case float _:
1069 case byte _:
1070 case sbyte _:
1071 case int _:
1072 converter = data as IConvertible;
1073 double tempDouble = converter.ToDouble(DataUtils.InvariantCulture);
1074 if (tempDouble > (double)decimal.MaxValue || tempDouble < (double)decimal.MinValue)
1075 {
1076 return data;
1077 }
1078 else
1079 {
1080 return converter.ToDecimal(DataUtils.InvariantCulture);
1081 }
1082 case bool _:
1083 if ((bool)data)
1084 {
1085 return decimal.One;
1086 }
1087 else
1088 {
1089 return decimal.Zero;
1090 }
1091 case DateTime _:
1092 return new decimal(DataUtils.GetOADateTime((DateTime)data));
1093 case TimeSpan _:
1094 return new decimal(DataUtils.GetOATime((TimeSpan)data));
1095 case string _:
1096 decimal dValue;
1097 string tempString = (string)data;
1098 if (ParserUtils.TryParseDecimal(tempString, out dValue))
1099 {
1100 return dValue;
1101 }
1102 DateTime? tempDate = TryParseDate(tempString, readerOptions);
1103 if (tempDate != null)
1104 {
1105 return new decimal(DataUtils.GetOADateTime(tempDate.Value));
1106 }
1107 TimeSpan? tempTime = TryParseTime(tempString, readerOptions);
1108 if (tempTime != null)
1109 {
1110 return new decimal(DataUtils.GetOATime(tempTime.Value));
1111 }
1112 break;
1113 }
1114 return data;
1115 }
1116
1122 private static object ConvertToInt(object data)
1123 {
1124 double tempDouble;
1125 switch (data)
1126 {
1127 case uint _:
1128 case long _:
1129 case ulong _:
1130 break;
1131 case DateTime _:
1132 tempDouble = DataUtils.GetOADateTime((DateTime)data, true);
1133 return ConvertDoubleToInt(tempDouble);
1134 case TimeSpan _:
1135 tempDouble = DataUtils.GetOATime((TimeSpan)data);
1136 return ConvertDoubleToInt(tempDouble);
1137 case float _:
1138 case double _:
1139 int? tempInt = TryConvertDoubleToInt(data);
1140 if (tempInt != null)
1141 {
1142 return tempInt;
1143 }
1144 break;
1145 case bool _:
1146 return (bool)data ? 1 : 0;
1147 case string _:
1148 int tempInt2;
1149 if (ParserUtils.TryParseInt((string)data, out tempInt2))
1150 {
1151 return tempInt2;
1152 }
1153 break;
1154 }
1155 return null;
1156 }
1157
1164 private object ConvertToDate(object data, ReaderOptions readerOptions)
1165 {
1166 switch (data)
1167 {
1168 case DateTime _:
1169 return data;
1170 case TimeSpan _:
1171 DateTime root = DataUtils.FirstAllowedExcelDate;
1172 TimeSpan time = (TimeSpan)data;
1173 root = root.AddDays(-1); // Fix offset of 1
1174 root = root.AddHours(time.Hours);
1175 root = root.AddMinutes(time.Minutes);
1176 root = root.AddSeconds(time.Seconds);
1177 return root;
1178 case double _:
1179 case uint _:
1180 case long _:
1181 case ulong _:
1182 case short _:
1183 case ushort _:
1184 case float _:
1185 case byte _:
1186 case sbyte _:
1187 case int _:
1188 return ConvertDateFromDouble(data, readerOptions);
1189 case string _:
1190 DateTime? date2 = TryParseDate((string)data, readerOptions);
1191 if (date2 != null)
1192 {
1193 return date2.Value;
1194 }
1195 return ConvertDateFromDouble(data, readerOptions);
1196 }
1197 return data;
1198 }
1199
1206 private DateTime? TryParseDate(string raw, ReaderOptions readerOptions)
1207 {
1208 DateTime dateTime;
1209 bool isDateTime;
1210 if (readerOptions == null || string.IsNullOrEmpty(readerOptions.DateTimeFormat) || readerOptions.TemporalCultureInfo == null)
1211 {
1212 isDateTime = DateTime.TryParse(raw, ReaderOptions.DefaultCultureInfo, DateTimeStyles.None, out dateTime);
1213 }
1214 else
1215 {
1216 isDateTime = DateTime.TryParseExact(raw, readerOptions.DateTimeFormat, readerOptions.TemporalCultureInfo, DateTimeStyles.None, out dateTime);
1217 }
1218 if (isDateTime && dateTime >= DataUtils.FirstAllowedExcelDate && dateTime <= DataUtils.LastAllowedExcelDate)
1219 {
1220 return dateTime;
1221 }
1222 return null;
1223 }
1224
1231 private object ConvertToTime(object data, ReaderOptions readerOptions)
1232 {
1233 switch (data)
1234 {
1235 case DateTime _:
1236 return ConvertTimeFromDouble(data, readerOptions);
1237 case TimeSpan _:
1238 return data;
1239 case double _:
1240 case uint _:
1241 case long _:
1242 case ulong _:
1243 case short _:
1244 case ushort _:
1245 case float _:
1246 case byte _:
1247 case sbyte _:
1248 case int _:
1249 return ConvertTimeFromDouble(data, readerOptions);
1250 case string _:
1251 TimeSpan? time = TryParseTime((string)data, readerOptions);
1252 if (time != null)
1253 {
1254 return time;
1255 }
1256 return ConvertTimeFromDouble(data, readerOptions);
1257 }
1258 return data;
1259 }
1260
1267 private TimeSpan? TryParseTime(string raw, ReaderOptions readerOptions)
1268 {
1269 TimeSpan timeSpan;
1270 bool isTimeSpan;
1271 if (readerOptions == null || string.IsNullOrEmpty(readerOptions.TimeSpanFormat) || readerOptions.TemporalCultureInfo == null)
1272 {
1273 isTimeSpan = TimeSpan.TryParse(raw, ReaderOptions.DefaultCultureInfo, out timeSpan);
1274 }
1275 else
1276 {
1277 isTimeSpan = TimeSpan.TryParseExact(raw, readerOptions.TimeSpanFormat, readerOptions.TemporalCultureInfo, out timeSpan);
1278 }
1279 if (isTimeSpan && timeSpan.Days >= 0 && timeSpan.Days < DataUtils.MaxOADateValue)
1280 {
1281 return timeSpan;
1282 }
1283 return null;
1284 }
1285
1294 private static object GetDateTimeValue(string raw, Cell.CellType valueType, out Cell.CellType resolvedType)
1295 {
1296 double dValue;
1297 if (!ParserUtils.TryParseDouble(raw, out dValue))
1298 {
1299 resolvedType = Cell.CellType.String;
1300 return raw;
1301 }
1302 if ((valueType == Cell.CellType.Date && (dValue < DataUtils.MinOADateValue || dValue > DataUtils.MaxOADateValue)) || (valueType == Cell.CellType.Time && (dValue < 0.0 || dValue > DataUtils.MaxOADateValue)))
1303 {
1304 // fallback to number (cannot be anything else)
1305 resolvedType = Cell.CellType.Number;
1306 return GetNumericValue(raw);
1307 }
1308 DateTime tempDate = DataUtils.GetDateFromOA(dValue);
1309 if (dValue < 1.0)
1310 {
1311 tempDate = tempDate.AddDays(1); // Modify wrong 1st date when < 1
1312 }
1313 if (valueType == Cell.CellType.Date)
1314 {
1315 resolvedType = Cell.CellType.Date;
1316 return tempDate;
1317 }
1318 else
1319 {
1320 resolvedType = Cell.CellType.Time;
1321 return new TimeSpan((int)dValue, tempDate.Hour, tempDate.Minute, tempDate.Second);
1322 }
1323 }
1324
1331 private object ConvertDateFromDouble(object data, ReaderOptions readerOptions)
1332 {
1333 object oaDate = ConvertToDouble(data, readerOptions);
1334 if (oaDate is double && (double)oaDate < DataUtils.MaxOADateValue)
1335 {
1336 DateTime date = DataUtils.GetDateFromOA((double)oaDate);
1337 if (date >= DataUtils.FirstAllowedExcelDate && date <= DataUtils.LastAllowedExcelDate)
1338 {
1339 return date;
1340 }
1341 }
1342 return data;
1343 }
1344
1351 private object ConvertTimeFromDouble(object data, ReaderOptions readerOptions)
1352 {
1353 object oaDate = ConvertToDouble(data, readerOptions);
1354 if (oaDate is double)
1355 {
1356 double d = (double)oaDate;
1357 if (d >= DataUtils.MinOADateValue && d <= DataUtils.MaxOADateValue)
1358 {
1359 DateTime date = DataUtils.GetDateFromOA(d);
1360 return new TimeSpan((int)d, date.Hour, date.Minute, date.Second);
1361 }
1362 }
1363 return data;
1364 }
1365
1371 private static int? TryConvertDoubleToInt(object data)
1372 {
1373 IConvertible converter = data as IConvertible;
1374 double dValue = converter.ToDouble(ReaderOptions.DefaultCultureInfo);
1375 if (dValue > int.MinValue && dValue < int.MaxValue)
1376 {
1377 return converter.ToInt32(ReaderOptions.DefaultCultureInfo);
1378 }
1379 return null;
1380 }
1381
1387 private static int ConvertDoubleToInt(object data)
1388 {
1389 IConvertible converter = data as IConvertible;
1390 return converter.ToInt32(ReaderOptions.DefaultCultureInfo);
1391 }
1392
1399 private string ConvertToString(object data, ReaderOptions readerOptions)
1400 {
1401 switch (data)
1402 {
1403 case int _:
1404 return ((int)data).ToString(ReaderOptions.DefaultCultureInfo);
1405 case uint _:
1406 return ((uint)data).ToString(ReaderOptions.DefaultCultureInfo);
1407 case long _:
1408 return ((long)data).ToString(ReaderOptions.DefaultCultureInfo);
1409 case ulong _:
1410 return ((ulong)data).ToString(ReaderOptions.DefaultCultureInfo);
1411 case float _:
1412 return ((float)data).ToString(ReaderOptions.DefaultCultureInfo);
1413 case double _:
1414 return ((double)data).ToString(ReaderOptions.DefaultCultureInfo);
1415 case bool _:
1416 return ((bool)data).ToString(ReaderOptions.DefaultCultureInfo);
1417 case DateTime _:
1418 return ((DateTime)data).ToString(readerOptions.DateTimeFormat, ParserUtils.InvariantCulture);
1419 case TimeSpan _:
1420 return ((TimeSpan)data).ToString(readerOptions.TimeSpanFormat, ParserUtils.InvariantCulture);
1421 default:
1422 if (data == null)
1423 {
1424 return null;
1425 }
1426 return data.ToString();
1427 }
1428 }
1429
1437 private object GetNumericValue(object raw, Cell.CellType importedType, ReaderOptions readerOptions)
1438 {
1439 if (raw == null)
1440 {
1441 return null;
1442 }
1443 object tempObject;
1444 switch (importedType)
1445 {
1446 case Cell.CellType.String:
1447 string tempString = raw.ToString();
1448 tempObject = GetNumericValue(tempString);
1449 if (tempObject != null)
1450 {
1451 return tempObject;
1452 }
1453 DateTime? tempDate = TryParseDate(tempString, readerOptions);
1454 if (tempDate != null)
1455 {
1456 return DataUtils.GetOADateTime(tempDate.Value);
1457 }
1458 TimeSpan? tempTime = TryParseTime(tempString, readerOptions);
1459 if (tempTime != null)
1460 {
1461 return DataUtils.GetOATime(tempTime.Value);
1462 }
1463 tempObject = ConvertToBool(raw, readerOptions);
1464 if (tempObject is bool)
1465 {
1466 return (bool)tempObject ? 1 : 0;
1467 }
1468 break;
1469 case Cell.CellType.Number:
1470 return raw;
1471 case Cell.CellType.Date:
1472 return DataUtils.GetOADateTime((DateTime)raw);
1473 case Cell.CellType.Time:
1474 return DataUtils.GetOATime((TimeSpan)raw);
1475 case Cell.CellType.Bool:
1476 if ((bool)raw)
1477 {
1478 return 1;
1479 }
1480 return 0;
1481 }
1482 return raw;
1483 }
1484
1485
1491 private static object GetNumericValue(string raw)
1492 {
1493 bool hasDecimalPoint = raw.Contains(".");
1494
1495 // Only try integer parsing if there's no decimal point
1496 if (!hasDecimalPoint)
1497 {
1498 // integer section (unchanged)
1499 uint uiValue;
1500 int iValue;
1501 bool canBeUint = ParserUtils.TryParseUint(raw, out uiValue);
1502 bool canBeInt = ParserUtils.TryParseInt(raw, out iValue);
1503 if (canBeUint && !canBeInt)
1504 {
1505 return uiValue;
1506 }
1507 else if (canBeInt)
1508 {
1509 return iValue;
1510 }
1511 ulong ulValue;
1512 long lValue;
1513 bool canBeUlong = ParserUtils.TryParseUlong(raw, out ulValue);
1514 bool canBeLong = ParserUtils.TryParseLong(raw, out lValue);
1515 if (canBeUlong && !canBeLong)
1516 {
1517 return ulValue;
1518 }
1519 else if (canBeLong)
1520 {
1521 return lValue;
1522 }
1523 }
1524
1525 decimal dcValue;
1526 double dValue;
1527 float fValue;
1528
1529 // Decimal/float section
1530 if (ParserUtils.TryParseDecimal(raw, out dcValue))
1531 {
1532 // Check if the value can be accurately represented as float
1533 float testFloat = decimal.ToSingle(dcValue);
1534 decimal backToDecimal = (decimal)testFloat;
1535
1536 // If converting to float and back preserves the value, use float
1537 if (dcValue == backToDecimal)
1538 {
1539 return testFloat;
1540 }
1541 else
1542 {
1543 // Otherwise use double for better precision
1544 return decimal.ToDouble(dcValue);
1545 }
1546 }
1547 // High range float section
1548 else if (ParserUtils.TryParseFloat(raw, out fValue) && fValue >= float.MinValue && fValue <= float.MaxValue && !float.IsInfinity(fValue))
1549 {
1550 return fValue;
1551 }
1552 if (ParserUtils.TryParseDouble(raw, out dValue))
1553 {
1554 return dValue;
1555 }
1556 return null;
1557 }
1558
1566 private float GetValidatedWidth(float rawValue, ReaderOptions readerOptions)
1567 {
1568 if (rawValue < Worksheet.MinColumnWidth)
1569 {
1570 if (readerOptions.EnforceStrictValidation)
1571 {
1572 throw new WorksheetException($"The worksheet contains an invalid column width (too small: {rawValue}) value. This error is ignored when disabling the reader option 'EnforceStrictValidation'");
1573 }
1574 else
1575 {
1576 return Worksheet.MinColumnWidth;
1577 }
1578 }
1579 else if (rawValue > Worksheet.MaxColumnWidth)
1580 {
1581 if (readerOptions.EnforceStrictValidation)
1582 {
1583 throw new WorksheetException($"The worksheet contains an invalid column width (too large: {rawValue}) value. This error is ignored when disabling the reader option 'EnforceStrictValidation'");
1584 }
1585 else
1586 {
1587 return Worksheet.MaxColumnWidth;
1588 }
1589 }
1590 else
1591 {
1592 return rawValue;
1593 }
1594 }
1595
1603 private float GetValidatedHeight(float rawValue, ReaderOptions readerOptions)
1604 {
1605 if (rawValue < Worksheet.MinRowHeight)
1606 {
1607 if (readerOptions.EnforceStrictValidation)
1608 {
1609 throw new WorksheetException($"The worksheet contains an invalid row height (too small: {rawValue}) value. Consider using the ImportOption 'EnforceValidRowDimensions' to ignore this error.");
1610 }
1611 else
1612 {
1613 return Worksheet.MinRowHeight;
1614 }
1615 }
1616 else if (rawValue > Worksheet.MaxRowHeight)
1617 {
1618 if (readerOptions.EnforceStrictValidation)
1619 {
1620 throw new WorksheetException($"The worksheet contains an invalid row height (too large: {rawValue}) value. Consider using the ImportOption 'EnforceValidRowDimensions' to ignore this error.");
1621 }
1622 else
1623 {
1624 return Worksheet.MaxRowHeight;
1625 }
1626 }
1627 else
1628 {
1629 return rawValue;
1630 }
1631 }
1632
1638 private string ResolveSharedString(string raw)
1639 {
1640 int stringId;
1641 if (ParserUtils.TryParseInt(raw, out stringId))
1642 {
1643 string resolvedString = SharedStrings.ElementAtOrDefault(stringId);
1644 if (resolvedString == null)
1645 {
1646 return raw;
1647 }
1648 else
1649 {
1650 return resolvedString;
1651 }
1652 }
1653 return raw;
1654 }
1655
1664 private Cell CreateCell(object value, Cell.CellType type, Address address, string styleNumber = null)
1665 {
1666 Cell cell = new Cell(value, type, address);
1667 if (styleNumber != null && resolvedStyles.TryGetValue(styleNumber, out var styleValue))
1668 {
1669 cell.SetStyle(styleValue);
1670 }
1671 return cell;
1672 }
1673 #endregion
1674 }
1675}
Class representing a reader for legacy passwords.
void Execute()
Method to execute the main logic of the plug-in (interface implementation).
List< String > SharedStrings
Gets or Sets the list of the shared strings. The index of the list corresponds to the index,...
int CurrentWorksheetID
Gets or sets the (r)ID of the current worksheet.
void Init(Stream stream, Workbook workbook, IOptions readerOptions, Action< Stream, Workbook, string, IOptions, int?> inlinePluginHandler)
Initialization method (interface implementation).
Action< Stream, Workbook, string, IOptions, int?> InlinePluginHandler
Reference to the ReaderPlugInHandler, to be used for post operations in the Execute method.
WorksheetReader()
Default constructor - Must be defined for instantiation of the plug-ins.
Workbook Workbook
Workbook reference where read data is stored (should not be null).
Class representing a collection of pre-processed styles and their components. This class is internall...
Exceptions.IOException IOException