NanoXLSX.Reader 3.0.0-rc.2
Loading...
Searching...
No Matches
SharedStringsReader.cs
1/*
2 * NanoXLSX is a small .NET library to generate and read XLSX (Microsoft Excel 2007 or newer) files in an easy and native way
3 * Copyright Raphael Stoeckli © 2025
4 * This library is licensed under the MIT License.
5 * You find a copy of the license in project folder or on: http://opensource.org/licenses/MIT
6 */
7
8using System;
9using System.Collections.Generic;
10using System.IO;
11using System.Text;
12using System.Xml;
13using NanoXLSX.Interfaces.Plugin;
14using NanoXLSX.Registry;
15using NanoXLSX.Registry.Attributes;
16using NanoXLSX.Utils;
17using IOException = NanoXLSX.Exceptions.IOException;
18
20{
24 [NanoXlsxPlugIn(PlugInUUID = PlugInUUID.SharedStringsReader)]
25 public class SharedStringsReader : ISharedStringReader
26 {
27
28 #region privateFields
29 private bool capturePhoneticCharacters;
30 private readonly List<PhoneticInfo> phoneticsInfo;
31 private MemoryStream stream;
32 private Workbook workbook;
33 #endregion
34
35 #region properties
36
43 public List<string> SharedStrings { get; private set; }
44
48 public Workbook Workbook { get => workbook; set => workbook = value; }
49 #endregion
50
51 #region constructors
56 {
57 phoneticsInfo = new List<PhoneticInfo>();
58 SharedStrings = new List<string>();
59 }
60 #endregion
61
62 #region methods
69 public void Init(MemoryStream stream, Workbook workbook, IOptions readerOptions)
70 {
71 this.stream = stream;
72 this.workbook = workbook;
73 if (readerOptions is ReaderOptions options)
74 {
75 this.capturePhoneticCharacters = options.EnforcePhoneticCharacterImport;
76 }
77 }
78
83 public void Execute()
84 {
85 try
86 {
87 using (stream) // Close after processing
88 {
89 XmlDocument xr = new XmlDocument
90 {
91 XmlResolver = null
92 };
93 using (XmlReader reader = XmlReader.Create(stream, new XmlReaderSettings() { XmlResolver = null }))
94 {
95 xr.Load(reader);
96 StringBuilder sb = new StringBuilder();
97 foreach (XmlNode node in xr.DocumentElement.ChildNodes)
98 {
99 if (node.LocalName.Equals("si", StringComparison.OrdinalIgnoreCase))
100 {
101 sb.Clear();
102 GetTextToken(node, ref sb);
103 if (capturePhoneticCharacters)
104 {
105 SharedStrings.Add(ProcessPhoneticCharacters(sb));
106 }
107 else
108 {
109 SharedStrings.Add(sb.ToString());
110 }
111 }
112 }
113 RederPlugInHandler.HandleInlineQueuePlugins(ref stream, Workbook, PlugInUUID.SharedStringsInlineReader);
114 }
115 }
116 }
117 catch (Exception ex)
118 {
119 throw new IOException("The XML entry could not be read from the " + nameof(stream) + ". Please see the inner exception:", ex);
120 }
121 }
122
128 private void GetTextToken(XmlNode node, ref StringBuilder sb)
129 {
130 if (node.LocalName.Equals("rPh", StringComparison.OrdinalIgnoreCase))
131 {
132 if (capturePhoneticCharacters && !string.IsNullOrEmpty(node.InnerText))
133 {
134 string start = node.Attributes.GetNamedItem("sb").InnerText;
135 string end = node.Attributes.GetNamedItem("eb").InnerText;
136 phoneticsInfo.Add(new PhoneticInfo(node.InnerText, start, end));
137 }
138 return;
139 }
140
141 if (node.LocalName.Equals("t", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrEmpty(node.InnerText))
142 {
143 sb.Append(node.InnerText);
144 }
145 if (node.HasChildNodes)
146 {
147 foreach (XmlNode childNode in node.ChildNodes)
148 {
149 GetTextToken(childNode, ref sb);
150 }
151 }
152 }
153
159 private string ProcessPhoneticCharacters(StringBuilder sb)
160 {
161 if (phoneticsInfo.Count == 0)
162 {
163 return sb.ToString();
164 }
165 string text = sb.ToString();
166 StringBuilder sb2 = new StringBuilder();
167 int currentTextIndex = 0;
168 foreach (PhoneticInfo info in phoneticsInfo)
169 {
170 sb2.Append(text.Substring(currentTextIndex, info.StartIndex + info.Length - currentTextIndex));
171 sb2.Append('(').Append(info.Value).Append(')');
172 currentTextIndex = info.StartIndex + info.Length;
173 }
174 sb2.Append(text.Substring(currentTextIndex));
175
176 phoneticsInfo.Clear();
177 return sb2.ToString();
178 }
179
180
181 #endregion
182
183 #region sub-classes
188 sealed class PhoneticInfo
189 {
193 public string Value { get; private set; }
197 public int StartIndex { get; private set; }
201 public int Length { get; private set; }
202
209 public PhoneticInfo(string value, string start, string end)
210 {
211 Value = value;
212 StartIndex = ParserUtils.ParseInt(start);
213 Length = ParserUtils.ParseInt(end) - StartIndex;
214
215 }
216 }
217 #endregion
218 }
219}
void Init(MemoryStream stream, Workbook workbook, IOptions readerOptions)
Initialization method (interface implementation).
Workbook Workbook
Workbook reference where read data is stored (should not be null).
List< string > SharedStrings
List of shared string entries.
SharedStringsReader()
Default constructor - Must be defined for instantiation of the plug-ins.
void Execute()
Method to execute the main logic of the plug-in (interface implementation).
The reader options define global rules, applied when loading a worksheet. The options are mainly to o...
Exceptions.IOException IOException