NanoXLSX.Formatting 3.1.0
Loading...
Searching...
No Matches
FormattedSharedStringsReader.cs
1/*
2 * NanoXLSX is a small .NET library to generate and read XLSX (Microsoft Excel 2007 or newer) files in an easy and native way
3 * Copyright Raphael Stoeckli © 2026
4 * This library is licensed under the MIT License.
5 * You find a copy of the license in project folder or on: http://opensource.org/licenses/MIT
6 */
7
8using System;
9using System.Collections.Generic;
10using System.IO;
11using System.Text;
12using System.Xml;
13using NanoXLSX.Colors;
15using NanoXLSX.Interfaces;
16using NanoXLSX.Interfaces.Reader;
17using NanoXLSX.Registry;
18using NanoXLSX.Registry.Attributes;
19using NanoXLSX.Styles;
20using NanoXLSX.Utils;
21using NanoXLSX.Utils.Xml;
22using IOException = NanoXLSX.Exceptions.IOException;
23
25{
29 [NanoXlsxPlugIn(PlugInUUID = PlugInUUID.SharedStringsReader, PlugInOrder = 1000)]
30 internal class FormattedSharedStringsReader : ISharedStringReader
31 {
35 internal static readonly int AUXILIARY_DATA_ID = 854563987;
36
37 #region privateFields
38 private bool capturePhoneticCharacters;
39 private readonly List<PhoneticInfo> phoneticsInfo;
40 private Stream stream;
41 private Workbook workbook;
42 #endregion
43
44 #region properties
45
52 public List<string> SharedStrings { get; private set; }
53
57 public Dictionary<string, FormattedText> FormattedTexts { get; private set; }
58
62 public Workbook Workbook { get => workbook; set => workbook = value; }
66 public IOptions Options { get; set; }
70 public Action<Stream, Workbook, string, IOptions, int?> InlinePluginHandler { get; set; }
71 #endregion
72
73 #region constructors
77 public FormattedSharedStringsReader()
78 {
79 phoneticsInfo = new List<PhoneticInfo>();
80 SharedStrings = new List<string>();
81 FormattedTexts = new Dictionary<string, FormattedText>();
82 }
83 #endregion
84
85 #region methods
93 public void Init(Stream stream, Workbook workbook, IOptions readerOptions, Action<Stream, Workbook, string, IOptions, int?> inlinePluginHandler)
94 {
95 this.stream = stream;
96 this.workbook = workbook;
97 if (readerOptions is ITextOptions options)
98 {
99 this.capturePhoneticCharacters = options.EnforcePhoneticCharacterImport;
100 }
101 this.InlinePluginHandler = inlinePluginHandler;
102 }
103
108 public void Execute()
109 {
110 try
111 {
112 using (stream) // Close after processing
113 {
114 bool hasFormattedText = false;
115 StringBuilder sb = new StringBuilder();
116 using (XmlReader reader = XmlReader.Create(stream, XmlStreamUtils.CreateSettings()))
117 {
118 while (reader.Read())
119 {
120 if (!XmlStreamUtils.IsElement(reader, "si"))
121 continue;
122
123 sb.Clear();
124 phoneticsInfo.Clear();
125
126 FormattedText formattedText;
127 using (XmlReader siReader = reader.ReadSubtree())
128 {
129 siReader.Read(); // consume the <si> open tag
130 formattedText = ProcessSharedStringItem(siReader, ref sb);
131 }
132
133 string textValue;
134 if (capturePhoneticCharacters)
135 {
136 textValue = ProcessPhoneticCharacters(sb);
137 formattedText.OverridePlainText(textValue);
138 }
139 else if (formattedText != null && string.IsNullOrEmpty(formattedText.PlainText) && sb.Length > 0)
140 {
141 textValue = sb.ToString();
142 formattedText.OverridePlainText(textValue); // Fallback to prevent data loss
143 }
144 else
145 {
146 textValue = sb.ToString();
147 }
148
149 if (formattedText != null)
150 {
151 string key = PlugInUUID.SharedStringsReader + textValue;
152 SharedStrings.Add(key);
153 FormattedTexts[key] = formattedText;
154 hasFormattedText = true;
155 }
156 else
157 {
158 SharedStrings.Add(textValue);
159 }
160 }
161 }
162 InlinePluginHandler?.Invoke(stream, Workbook, PlugInUUID.SharedStringsInlineReader, Options, null);
163 if (hasFormattedText)
164 {
165 Workbook.AuxiliaryData.SetData(PlugInUUID.SharedStringsReader, AUXILIARY_DATA_ID, FormattedTexts);
166 }
167 }
168 }
169 catch (Exception ex)
170 {
171 throw new IOException("The XML entry could not be read from the " + nameof(stream) + ". Please see the inner exception:", ex);
172 }
173 }
174
181 private FormattedText ProcessSharedStringItem(XmlReader siReader, ref StringBuilder sb)
182 {
183 bool hasRuns = false;
184 bool hasFormattedContent = false;
185 FormattedText formattedText = null;
186
187 while (siReader.Read())
188 {
189 if (siReader.NodeType != XmlNodeType.Element)
190 continue;
191
192 if (XmlStreamUtils.IsElement(siReader, "r"))
193 {
194 hasRuns = true;
195 hasFormattedContent = true;
196 if (formattedText == null)
197 formattedText = new FormattedText();
198 using (XmlReader runReader = siReader.ReadSubtree())
199 {
200 runReader.Read(); // consume the <r> open tag
201 ProcessTextRun(runReader, formattedText, ref sb);
202 }
203 }
204 else if (XmlStreamUtils.IsElement(siReader, "rPh"))
205 {
206 hasFormattedContent = true;
207 if (formattedText == null)
208 formattedText = new FormattedText();
209 using (XmlReader rPhReader = siReader.ReadSubtree())
210 {
211 rPhReader.Read(); // consume the <rPh> open tag
212 ProcessPhoneticRun(rPhReader, formattedText);
213 }
214 }
215 else if (XmlStreamUtils.IsElement(siReader, "phoneticPr"))
216 {
217 hasFormattedContent = true;
218 if (formattedText == null)
219 formattedText = new FormattedText();
220 ProcessPhoneticProperties(siReader, formattedText);
221 using (siReader.ReadSubtree()) { } // consume element; positions reader at end element
222 }
223 else if (XmlStreamUtils.IsElement(siReader, "t") && !hasRuns)
224 {
225 // Plain text or text accompanying only phonetic runs (no rich-text runs)
226 string text;
227 using (XmlReader tReader = siReader.ReadSubtree())
228 {
229 tReader.Read(); // position at <t>
230 text = tReader.ReadElementContentAsString();
231 }
232 sb.Append(text);
233 }
234 }
235
236 return hasFormattedContent ? formattedText : null;
237 }
238
245 private void ProcessTextRun(XmlReader runReader, FormattedText formattedText, ref StringBuilder sb)
246 {
247 Font fontStyle = null;
248 string text = null;
249
250 while (runReader.Read())
251 {
252 if (runReader.NodeType != XmlNodeType.Element)
253 continue;
254
255 if (XmlStreamUtils.IsElement(runReader, "rPr"))
256 {
257 using (XmlReader rPrReader = runReader.ReadSubtree())
258 {
259 rPrReader.Read(); // consume the <rPr> open tag
260 fontStyle = ParseRunProperties(rPrReader);
261 }
262 }
263 else if (XmlStreamUtils.IsElement(runReader, "t"))
264 {
265 using (XmlReader tReader = runReader.ReadSubtree())
266 {
267 tReader.Read(); // position at <t>
268 text = tReader.ReadElementContentAsString();
269 }
270 sb.Append(text);
271 }
272 }
273
274 if (!string.IsNullOrEmpty(text))
275 {
276 formattedText.AddRun(text, fontStyle);
277 }
278 }
279
285 private Font ParseRunProperties(XmlReader rPrReader)
286 {
287 Font font = new Font();
288
289 while (rPrReader.Read())
290 {
291 if (rPrReader.NodeType != XmlNodeType.Element)
292 continue;
293
294 string nodeName = rPrReader.LocalName;
295
296 if (nodeName.Equals("rFont", StringComparison.OrdinalIgnoreCase))
297 {
298 font.Name = rPrReader.GetAttribute("val");
299 }
300 else if (nodeName.Equals("charset", StringComparison.OrdinalIgnoreCase))
301 {
302 string val = rPrReader.GetAttribute("val");
303 if (!string.IsNullOrEmpty(val))
304 font.Charset = (Font.CharsetValue)ParserUtils.ParseInt(val);
305 }
306 else if (nodeName.Equals("family", StringComparison.OrdinalIgnoreCase))
307 {
308 string val = rPrReader.GetAttribute("val");
309 if (!string.IsNullOrEmpty(val))
310 font.Family = (Font.FontFamilyValue)ParserUtils.ParseInt(val);
311 }
312 else if (nodeName.Equals("b", StringComparison.OrdinalIgnoreCase))
313 {
314 font.Bold = true;
315 }
316 else if (nodeName.Equals("i", StringComparison.OrdinalIgnoreCase))
317 {
318 font.Italic = true;
319 }
320 else if (nodeName.Equals("strike", StringComparison.OrdinalIgnoreCase))
321 {
322 font.Strike = true;
323 }
324 else if (nodeName.Equals("outline", StringComparison.OrdinalIgnoreCase))
325 {
326 font.Outline = true;
327 }
328 else if (nodeName.Equals("shadow", StringComparison.OrdinalIgnoreCase))
329 {
330 font.Shadow = true;
331 }
332 else if (nodeName.Equals("condense", StringComparison.OrdinalIgnoreCase))
333 {
334 font.Condense = true;
335 }
336 else if (nodeName.Equals("extend", StringComparison.OrdinalIgnoreCase))
337 {
338 font.Extend = true;
339 }
340 else if (nodeName.Equals("color", StringComparison.OrdinalIgnoreCase))
341 {
342 font.ColorValue = ParseColor(rPrReader);
343 }
344 else if (nodeName.Equals("sz", StringComparison.OrdinalIgnoreCase))
345 {
346 string val = rPrReader.GetAttribute("val");
347 if (!string.IsNullOrEmpty(val))
348 font.Size = ParserUtils.ParseFloat(val);
349 }
350 else if (nodeName.Equals("u", StringComparison.OrdinalIgnoreCase))
351 {
352 string val = rPrReader.GetAttribute("val");
353 font.Underline = string.IsNullOrEmpty(val) ? Font.UnderlineValue.Single : ParseUnderlineValue(val);
354 }
355 else if (nodeName.Equals("vertAlign", StringComparison.OrdinalIgnoreCase))
356 {
357 string val = rPrReader.GetAttribute("val");
358 if (!string.IsNullOrEmpty(val))
359 font.VerticalAlign = ParseVerticalAlignValue(val);
360 }
361 else if (nodeName.Equals("scheme", StringComparison.OrdinalIgnoreCase))
362 {
363 string val = rPrReader.GetAttribute("val");
364 if (!string.IsNullOrEmpty(val))
365 font.Scheme = ParseSchemeValue(val);
366 }
367 }
368
369 return font;
370 }
371
377 private Color ParseColor(XmlReader reader)
378 {
379 string autoValue = reader.GetAttribute("auto");
380 string indexedValue = reader.GetAttribute("indexed");
381 string rgbValue = reader.GetAttribute("rgb");
382 string themeValue = reader.GetAttribute("theme");
383 string systemValue = reader.GetAttribute("system");
384 string tintValue = reader.GetAttribute("tint");
385
386 Color color = null;
387
388 if (!string.IsNullOrEmpty(autoValue))
389 color = Color.CreateAuto();
390 else if (!string.IsNullOrEmpty(indexedValue))
391 color = Color.CreateIndexed(ParserUtils.ParseInt(indexedValue));
392 else if (!string.IsNullOrEmpty(rgbValue))
393 color = Color.CreateRgb(rgbValue);
394 else if (!string.IsNullOrEmpty(themeValue))
395 color = Color.CreateTheme(ParserUtils.ParseInt(themeValue));
396 else if (!string.IsNullOrEmpty(systemValue))
397 color = Color.CreateSystem(SystemColor.MapStringToValue(systemValue));
398
399 if (color != null && !string.IsNullOrEmpty(tintValue))
400 color.Tint = ParserUtils.ParseFloat(tintValue);
401
402 return color;
403 }
404
410 private void ProcessPhoneticRun(XmlReader rPhReader, FormattedText formattedText)
411 {
412 string startBase = rPhReader.GetAttribute("sb");
413 string endBase = rPhReader.GetAttribute("eb");
414 string text = null;
415
416 while (rPhReader.Read())
417 {
418 if (rPhReader.NodeType != XmlNodeType.Element)
419 continue;
420
421 if (XmlStreamUtils.IsElement(rPhReader, "t"))
422 {
423 using (XmlReader tReader = rPhReader.ReadSubtree())
424 {
425 tReader.Read(); // position at <t>
426 text = tReader.ReadElementContentAsString();
427 }
428 }
429 }
430
431 if (!string.IsNullOrEmpty(text) && !string.IsNullOrEmpty(startBase) && !string.IsNullOrEmpty(endBase))
432 {
433 uint sb = (uint)ParserUtils.ParseInt(startBase);
434 uint eb = (uint)ParserUtils.ParseInt(endBase);
435 formattedText.AddPhoneticRun(text, sb, eb);
436
437 if (capturePhoneticCharacters)
438 {
439 phoneticsInfo.Add(new PhoneticInfo(text, startBase, endBase));
440 }
441 }
442 }
443
449 private void ProcessPhoneticProperties(XmlReader reader, FormattedText formattedText)
450 {
451 string typeValue = reader.GetAttribute("type");
452 string alignmentValue = reader.GetAttribute("alignment");
453
454 PhoneticRun.PhoneticType type = PhoneticRun.PhoneticType.FullwidthKatakana;
455 if (!string.IsNullOrEmpty(typeValue))
456 type = ParsePhoneticType(typeValue);
457
459 if (!string.IsNullOrEmpty(alignmentValue))
460 alignment = ParsePhoneticAlignment(alignmentValue);
461
462 formattedText.SetPhoneticProperties(new Font(), type, alignment);
463 }
464
470 private string ProcessPhoneticCharacters(StringBuilder sb)
471 {
472 string text = sb.ToString();
473 StringBuilder sb2 = new StringBuilder();
474 int currentTextIndex = 0;
475 foreach (PhoneticInfo info in phoneticsInfo)
476 {
477 sb2.Append(text.Substring(currentTextIndex, info.StartIndex + info.Length - currentTextIndex));
478 sb2.Append('(').Append(info.Value).Append(')');
479 currentTextIndex = info.StartIndex + info.Length;
480 }
481 sb2.Append(text.Substring(currentTextIndex));
482
483 return sb2.ToString();
484 }
485
489 private Font.UnderlineValue ParseUnderlineValue(string value)
490 {
491 switch (value.ToLowerInvariant())
492 {
493 case "double":
494 return Font.UnderlineValue.Double;
495 case "singleaccounting":
496 return Font.UnderlineValue.SingleAccounting;
497 case "doubleaccounting":
498 return Font.UnderlineValue.DoubleAccounting;
499 default:
500 return Font.UnderlineValue.Single;
501 }
502 }
503
507 private Font.VerticalTextAlignValue ParseVerticalAlignValue(string value)
508 {
509 switch (value.ToLowerInvariant())
510 {
511 case "superscript":
512 return Font.VerticalTextAlignValue.Superscript;
513 case "subscript":
514 return Font.VerticalTextAlignValue.Subscript;
515 default:
516 return Font.VerticalTextAlignValue.Baseline;
517 }
518 }
519
523 private Font.SchemeValue ParseSchemeValue(string value)
524 {
525 switch (value.ToLowerInvariant())
526 {
527 case "major":
528 return Font.SchemeValue.Major;
529 case "minor":
530 return Font.SchemeValue.Minor;
531 default:
532 return Font.SchemeValue.None;
533 }
534 }
535
539 private PhoneticRun.PhoneticType ParsePhoneticType(string value)
540 {
541 switch (value.ToLowerInvariant())
542 {
543 case "halfwidthkatakana":
544 return PhoneticRun.PhoneticType.HalfwidthKatakana;
545 case "hiragana":
546 return PhoneticRun.PhoneticType.Hiragana;
547 case "noconversion":
548 return PhoneticRun.PhoneticType.NoConversion;
549 default:
550 return PhoneticRun.PhoneticType.FullwidthKatakana;
551 }
552 }
553
557 private PhoneticRun.PhoneticAlignment ParsePhoneticAlignment(string value)
558 {
559 switch (value.ToLowerInvariant())
560 {
561 case "nocontrol":
562 return PhoneticRun.PhoneticAlignment.NoControl;
563 case "center":
564 return PhoneticRun.PhoneticAlignment.Center;
565 case "distributed":
566 return PhoneticRun.PhoneticAlignment.Distributed;
567 default:
568 return PhoneticRun.PhoneticAlignment.Left;
569 }
570 }
571
572 #endregion
573
574 #region sub-classes
579 sealed class PhoneticInfo
580 {
584 public string Value { get; private set; }
588 public int StartIndex { get; private set; }
592 public int Length { get; private set; }
593
600 public PhoneticInfo(string value, string start, string end)
601 {
602 Value = value;
603 StartIndex = ParserUtils.ParseInt(start);
604 Length = ParserUtils.ParseInt(end) - StartIndex;
605
606 }
607 }
608 #endregion
609 }
610}
Represents a phonetic run that provides pronunciation guidance for text.
PhoneticAlignment
Enumeration for phonetic text alignment.
PhoneticType
Enumeration for phonetic text types.
Represents a formatted text entry in Excel shared strings, supporting rich text with multiple runs an...
override string ToString()
Gets the string representation of the formatted text without formatting (plain text)....
FormattedText AddPhoneticRun(string text, uint startBase, uint endBase)
Adds a phonetic run for pronunciation guidance (Ruby text, like Furigana, Pinyin or Zhuyin).
string PlainText
Gets the plain text content by concatenating all runs.
FormattedText AddRun(string text, Font fontStyle=null)
Adds a text run with the specified style.
FormattedText SetPhoneticProperties(Font fontReference, PhoneticRun.PhoneticType type=PhoneticRun.PhoneticType.FullwidthKatakana, PhoneticRun.PhoneticAlignment alignment=PhoneticRun.PhoneticAlignment.Left)
Sets the phonetic properties for this formatted text, applied to the phonetic run (Ruby text).