NanoXLSX.Reader 3.1.0
Loading...
Searching...
No Matches
SharedStringsReader.cs
1/*
2 * NanoXLSX is a small .NET library to generate and read XLSX (Microsoft Excel 2007 or newer) files in an easy and native way
3 * Copyright Raphael Stoeckli © 2026
4 * This library is licensed under the MIT License.
5 * You find a copy of the license in project folder or on: http://opensource.org/licenses/MIT
6 */
7
8using System;
9using System.Collections.Generic;
10using System.IO;
11using System.Text;
12using System.Xml;
13using NanoXLSX.Interfaces;
14using NanoXLSX.Interfaces.Reader;
15using NanoXLSX.Registry;
16using NanoXLSX.Registry.Attributes;
17using NanoXLSX.Utils;
18using NanoXLSX.Utils.Xml;
19using IOException = NanoXLSX.Exceptions.IOException;
20
22{
26 [NanoXlsxPlugIn(PlugInUUID = PlugInUUID.SharedStringsReader)]
27 public class SharedStringsReader : ISharedStringReader
28 {
29
30 #region privateFields
31 private bool capturePhoneticCharacters;
32 private readonly List<PhoneticInfo> phoneticsInfo;
33 private Stream stream;
34 #endregion
35
36 #region properties
37
44 public List<string> SharedStrings { get; private set; }
45
49 public Workbook Workbook { get; set; }
53 public IOptions Options { get; set; }
57 public Action<Stream, Workbook, string, IOptions, int?> InlinePluginHandler { get; set; }
58 #endregion
59
60 #region constructors
65 {
66 phoneticsInfo = new List<PhoneticInfo>();
67 SharedStrings = new List<string>();
68 }
69 #endregion
70
71 #region methods
79 public void Init(Stream stream, Workbook workbook, IOptions readerOptions, Action<Stream, Workbook, string, IOptions, int?> inlinePluginHandler)
80 {
81 this.stream = stream;
82 this.Workbook = workbook;
83 this.Options = readerOptions;
84 this.InlinePluginHandler = inlinePluginHandler;
85 if (readerOptions is ReaderOptions options)
86 {
87 this.capturePhoneticCharacters = options.EnforcePhoneticCharacterImport;
88 }
89 }
90
95 public void Execute()
96 {
97 try
98 {
99 using (stream) // Close after processing
100 {
101 StringBuilder sb = new StringBuilder();
102 using (XmlReader reader = XmlReader.Create(stream, XmlStreamUtils.CreateSettings()))
103 {
104 while (reader.Read())
105 {
106 if (!XmlStreamUtils.IsElement(reader, "si"))
107 {
108 continue;
109 }
110 sb.Clear();
111 ReadSiElement(reader, sb);
112 if (capturePhoneticCharacters)
113 {
114 SharedStrings.Add(ProcessPhoneticCharacters(sb));
115 }
116 else
117 {
118 SharedStrings.Add(sb.ToString());
119 }
120 }
121 InlinePluginHandler?.Invoke(stream, Workbook, PlugInUUID.SharedStringsInlineReader, Options, null);
122 }
123 }
124 }
125 catch (Exception ex)
126 {
127 throw new IOException("The XML entry could not be read from the " + nameof(stream) + ". Please see the inner exception:", ex);
128 }
129 }
130
135 private void ReadSiElement(XmlReader reader, StringBuilder sb)
136 {
137 using (XmlReader siSubtree = reader.ReadSubtree())
138 {
139 siSubtree.Read(); // consume the <si> open tag
140 while (siSubtree.Read())
141 {
142 if (siSubtree.NodeType != XmlNodeType.Element)
143 {
144 continue;
145 }
146 if (siSubtree.LocalName.Equals("rPh", StringComparison.OrdinalIgnoreCase))
147 {
148 if (capturePhoneticCharacters)
149 {
150 ReadPhoneticElement(siSubtree);
151 }
152 else
153 {
154 using (siSubtree.ReadSubtree()) { } // dispose immediately; positions siSubtree at </rPh>
155 }
156 }
157 else if (siSubtree.LocalName.Equals("t", StringComparison.OrdinalIgnoreCase))
158 {
159 string text;
160 using (XmlReader tSubtree = siSubtree.ReadSubtree())
161 {
162 tSubtree.Read(); // position at <t>
163 text = tSubtree.ReadElementContentAsString();
164 }
165 if (!string.IsNullOrEmpty(text))
166 {
167 sb.Append(text);
168 }
169 }
170 }
171 }
172 }
173
178 private void ReadPhoneticElement(XmlReader reader)
179 {
180 string start = reader.GetAttribute("sb");
181 string end = reader.GetAttribute("eb");
182 string text = null;
183 using (XmlReader rPhSubtree = reader.ReadSubtree())
184 {
185 rPhSubtree.Read(); // consume the <rPh> open tag
186 while (rPhSubtree.Read())
187 {
188 if (rPhSubtree.NodeType == XmlNodeType.Element
189 && rPhSubtree.LocalName.Equals("t", StringComparison.OrdinalIgnoreCase))
190 {
191 text = rPhSubtree.ReadElementContentAsString();
192 }
193 }
194 }
195 if (!string.IsNullOrEmpty(text))
196 {
197 phoneticsInfo.Add(new PhoneticInfo(text, start, end));
198 }
199 }
200
206 private string ProcessPhoneticCharacters(StringBuilder sb)
207 {
208 if (phoneticsInfo.Count == 0)
209 {
210 return sb.ToString();
211 }
212 string text = sb.ToString();
213 StringBuilder sb2 = new StringBuilder();
214 int currentTextIndex = 0;
215 foreach (PhoneticInfo info in phoneticsInfo)
216 {
217 sb2.Append(text.Substring(currentTextIndex, info.StartIndex + info.Length - currentTextIndex));
218 sb2.Append('(').Append(info.Value).Append(')');
219 currentTextIndex = info.StartIndex + info.Length;
220 }
221 sb2.Append(text.Substring(currentTextIndex));
222
223 phoneticsInfo.Clear();
224 return sb2.ToString();
225 }
226
227
228 #endregion
229
230 #region sub-classes
235 sealed class PhoneticInfo
236 {
240 public string Value { get; private set; }
244 public int StartIndex { get; private set; }
248 public int Length { get; private set; }
249
256 public PhoneticInfo(string value, string start, string end)
257 {
258 Value = value;
259 StartIndex = ParserUtils.ParseInt(start);
260 Length = ParserUtils.ParseInt(end) - StartIndex;
261
262 }
263 }
264 #endregion
265 }
266}
Action< Stream, Workbook, string, IOptions, int?> InlinePluginHandler
Reference to the ReaderPlugInHandler, to be used for post operations in the Execute method.
Workbook Workbook
Workbook reference where read data is stored (should not be null).
List< string > SharedStrings
List of shared string entries.
void Init(Stream stream, Workbook workbook, IOptions readerOptions, Action< Stream, Workbook, string, IOptions, int?> inlinePluginHandler)
Initialization method (interface implementation).
SharedStringsReader()
Default constructor - Must be defined for instantiation of the plug-ins.
void Execute()
Method to execute the main logic of the plug-in (interface implementation).
Exceptions.IOException IOException