NanoXLSX.Reader 3.0.0-rc.5
Loading...
Searching...
No Matches
SharedStringsReader.cs
1/*
2 * NanoXLSX is a small .NET library to generate and read XLSX (Microsoft Excel 2007 or newer) files in an easy and native way
3 * Copyright Raphael Stoeckli © 2026
4 * This library is licensed under the MIT License.
5 * You find a copy of the license in project folder or on: http://opensource.org/licenses/MIT
6 */
7
8using System;
9using System.Collections.Generic;
10using System.IO;
11using System.Text;
12using System.Xml;
13using NanoXLSX.Interfaces;
14using NanoXLSX.Interfaces.Reader;
15using NanoXLSX.Registry;
16using NanoXLSX.Registry.Attributes;
17using NanoXLSX.Utils;
18using IOException = NanoXLSX.Exceptions.IOException;
19
21{
25 [NanoXlsxPlugIn(PlugInUUID = PlugInUUID.SharedStringsReader)]
26 public class SharedStringsReader : ISharedStringReader
27 {
28
29 #region privateFields
30 private bool capturePhoneticCharacters;
31 private readonly List<PhoneticInfo> phoneticsInfo;
32 private MemoryStream stream;
33 #endregion
34
35 #region properties
36
43 public List<string> SharedStrings { get; private set; }
44
48 public Workbook Workbook { get ; set; }
52 public IOptions Options { get; set; }
56 public Action<MemoryStream, Workbook, string, IOptions, int?> InlinePluginHandler { get; set; }
57 #endregion
58
59 #region constructors
64 {
65 phoneticsInfo = new List<PhoneticInfo>();
66 SharedStrings = new List<string>();
67 }
68 #endregion
69
70 #region methods
78 public void Init(MemoryStream stream, Workbook workbook, IOptions readerOptions, Action<MemoryStream, Workbook, string, IOptions, int?> inlinePluginHandler)
79 {
80 this.stream = stream;
81 this.Workbook = workbook;
82 this.Options = readerOptions;
83 this.InlinePluginHandler = inlinePluginHandler;
84 if (readerOptions is ReaderOptions options)
85 {
86 this.capturePhoneticCharacters = options.EnforcePhoneticCharacterImport;
87 }
88 }
89
94 public void Execute()
95 {
96 try
97 {
98 using (stream) // Close after processing
99 {
100 XmlDocument xr = new XmlDocument
101 {
102 XmlResolver = null
103 };
104 using (XmlReader reader = XmlReader.Create(stream, new XmlReaderSettings() { XmlResolver = null }))
105 {
106 xr.Load(reader);
107 StringBuilder sb = new StringBuilder();
108 foreach (XmlNode node in xr.DocumentElement.ChildNodes)
109 {
110 if (node.LocalName.Equals("si", StringComparison.OrdinalIgnoreCase))
111 {
112 sb.Clear();
113 GetTextToken(node, ref sb);
114 if (capturePhoneticCharacters)
115 {
116 SharedStrings.Add(ProcessPhoneticCharacters(sb));
117 }
118 else
119 {
120 SharedStrings.Add(sb.ToString());
121 }
122 }
123 }
124 InlinePluginHandler?.Invoke(stream, Workbook, PlugInUUID.SharedStringsInlineReader, Options, null);
125 }
126 }
127 }
128 catch (Exception ex)
129 {
130 throw new IOException("The XML entry could not be read from the " + nameof(stream) + ". Please see the inner exception:", ex);
131 }
132 }
133
139 private void GetTextToken(XmlNode node, ref StringBuilder sb)
140 {
141 if (node.LocalName.Equals("rPh", StringComparison.OrdinalIgnoreCase))
142 {
143 if (capturePhoneticCharacters && !string.IsNullOrEmpty(node.InnerText))
144 {
145 string start = node.Attributes.GetNamedItem("sb").InnerText;
146 string end = node.Attributes.GetNamedItem("eb").InnerText;
147 phoneticsInfo.Add(new PhoneticInfo(node.InnerText, start, end));
148 }
149 return;
150 }
151
152 if (node.LocalName.Equals("t", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrEmpty(node.InnerText))
153 {
154 sb.Append(node.InnerText);
155 }
156 if (node.HasChildNodes)
157 {
158 foreach (XmlNode childNode in node.ChildNodes)
159 {
160 GetTextToken(childNode, ref sb);
161 }
162 }
163 }
164
170 private string ProcessPhoneticCharacters(StringBuilder sb)
171 {
172 if (phoneticsInfo.Count == 0)
173 {
174 return sb.ToString();
175 }
176 string text = sb.ToString();
177 StringBuilder sb2 = new StringBuilder();
178 int currentTextIndex = 0;
179 foreach (PhoneticInfo info in phoneticsInfo)
180 {
181 sb2.Append(text.Substring(currentTextIndex, info.StartIndex + info.Length - currentTextIndex));
182 sb2.Append('(').Append(info.Value).Append(')');
183 currentTextIndex = info.StartIndex + info.Length;
184 }
185 sb2.Append(text.Substring(currentTextIndex));
186
187 phoneticsInfo.Clear();
188 return sb2.ToString();
189 }
190
191
192 #endregion
193
194 #region sub-classes
199 sealed class PhoneticInfo
200 {
204 public string Value { get; private set; }
208 public int StartIndex { get; private set; }
212 public int Length { get; private set; }
213
220 public PhoneticInfo(string value, string start, string end)
221 {
222 Value = value;
223 StartIndex = ParserUtils.ParseInt(start);
224 Length = ParserUtils.ParseInt(end) - StartIndex;
225
226 }
227 }
228 #endregion
229 }
230}
void Init(MemoryStream stream, Workbook workbook, IOptions readerOptions, Action< MemoryStream, Workbook, string, IOptions, int?> inlinePluginHandler)
Initialization method (interface implementation).
Workbook Workbook
Workbook reference where read data is stored (should not be null).
List< string > SharedStrings
List of shared string entries.
Action< MemoryStream, Workbook, string, IOptions, int?> InlinePluginHandler
Reference to the ReaderPlugInHandler, to be used for post operations in the Execute method.
SharedStringsReader()
Default constructor - Must be defined for instantiation of the plug-ins.
void Execute()
Method to execute the main logic of the plug-in (interface implementation).
Exceptions.IOException IOException