NanoXLSX.Formatting 3.0.0
Loading...
Searching...
No Matches
FormattedSharedStringsReader.cs
1/*
2 * NanoXLSX is a small .NET library to generate and read XLSX (Microsoft Excel 2007 or newer) files in an easy and native way
3 * Copyright Raphael Stoeckli © 2026
4 * This library is licensed under the MIT License.
5 * You find a copy of the license in project folder or on: http://opensource.org/licenses/MIT
6 */
7
8using System;
9using System.Collections.Generic;
10using System.IO;
11using System.Text;
12using System.Xml;
13using NanoXLSX.Colors;
15using NanoXLSX.Interfaces;
16using NanoXLSX.Interfaces.Reader;
17using NanoXLSX.Registry;
18using NanoXLSX.Registry.Attributes;
19using NanoXLSX.Styles;
20using NanoXLSX.Utils;
21using IOException = NanoXLSX.Exceptions.IOException;
22
24{
28 [NanoXlsxPlugIn(PlugInUUID = PlugInUUID.SharedStringsReader, PlugInOrder = 1000)]
29 internal class FormattedSharedStringsReader : ISharedStringReader
30 {
34 internal static readonly int AUXILIARY_DATA_ID = 854563987;
35
36 #region privateFields
37 private bool capturePhoneticCharacters;
38 private readonly List<PhoneticInfo> phoneticsInfo;
39 private MemoryStream stream;
40 private Workbook workbook;
41 #endregion
42
43 #region properties
44
51 public List<string> SharedStrings { get; private set; }
52
56 public Dictionary<string, FormattedText> FormattedTexts { get; private set; }
57
61 public Workbook Workbook { get => workbook; set => workbook = value; }
65 public IOptions Options { get; set; }
69 public Action<MemoryStream, Workbook, string, IOptions, int?> InlinePluginHandler { get; set; }
70 #endregion
71
72 #region constructors
76 public FormattedSharedStringsReader()
77 {
78 phoneticsInfo = new List<PhoneticInfo>();
79 SharedStrings = new List<string>();
80 FormattedTexts = new Dictionary<string, FormattedText>();
81 }
82 #endregion
83
84 #region methods
92 public void Init(MemoryStream stream, Workbook workbook, IOptions readerOptions, Action<MemoryStream, Workbook, string, IOptions, int?> inlinePluginHandler)
93 {
94 this.stream = stream;
95 this.workbook = workbook;
96 if (readerOptions is ITextOptions options)
97 {
98 this.capturePhoneticCharacters = options.EnforcePhoneticCharacterImport;
99 }
100 this.InlinePluginHandler = inlinePluginHandler;
101 }
102
107 public void Execute()
108 {
109 try
110 {
111 using (stream) // Close after processing
112 {
113 XmlDocument xr = new XmlDocument
114 {
115 XmlResolver = null
116 };
117 bool hasFormattedText = false;
118 using (XmlReader reader = XmlReader.Create(stream, new XmlReaderSettings() { XmlResolver = null }))
119 {
120 xr.Load(reader);
121 StringBuilder sb = new StringBuilder();
122 foreach (XmlNode node in xr.DocumentElement.ChildNodes)
123 {
124 if (node.LocalName.Equals("si", StringComparison.OrdinalIgnoreCase))
125 {
126 sb.Clear();
127 phoneticsInfo.Clear();
128
129 FormattedText formattedText = ProcessSharedStringItem(node, ref sb);
130 string textValue;
131 if (capturePhoneticCharacters)
132 {
133 textValue = ProcessPhoneticCharacters(sb);
134 formattedText.OverridePlainText(textValue);
135 }
136 else if (formattedText != null && string.IsNullOrEmpty(formattedText.PlainText) && sb.ToString().Length > 0)
137 {
138 textValue = sb.ToString();
139 formattedText.OverridePlainText(sb.ToString()); // Fallback to prevent data loss
140 }
141 else
142 {
143 textValue = sb.ToString();
144 }
145 if (formattedText != null)
146 {
147 string key = PlugInUUID.SharedStringsReader + textValue;
148 SharedStrings.Add(key);
149 FormattedTexts[key] = formattedText;
150 hasFormattedText = true;
151 }
152 else
153 {
154 SharedStrings.Add(textValue);
155 }
156 }
157 }
158 InlinePluginHandler?.Invoke(stream, Workbook, PlugInUUID.SharedStringsInlineReader, Options, null);
159 }
160 if (hasFormattedText)
161 {
162 Workbook.AuxiliaryData.SetData(PlugInUUID.SharedStringsReader, AUXILIARY_DATA_ID, FormattedTexts);
163 }
164 }
165 }
166 catch (Exception ex)
167 {
168 throw new IOException("The XML entry could not be read from the " + nameof(stream) + ". Please see the inner exception:", ex);
169 }
170 }
171
178 private FormattedText ProcessSharedStringItem(XmlNode siNode, ref StringBuilder sb)
179 {
180 bool hasRuns = false;
181 bool hasPhoneticRuns = false;
182 XmlNode phoneticPropertiesNode = null;
183
184 // Check if this is a formatted text entry
185 foreach (XmlNode childNode in siNode.ChildNodes)
186 {
187 if (childNode.LocalName.Equals("r", StringComparison.OrdinalIgnoreCase))
188 {
189 hasRuns = true;
190 }
191 else if (childNode.LocalName.Equals("rPh", StringComparison.OrdinalIgnoreCase))
192 {
193 hasPhoneticRuns = true;
194 }
195 else if (childNode.LocalName.Equals("phoneticPr", StringComparison.OrdinalIgnoreCase))
196 {
197 phoneticPropertiesNode = childNode;
198 }
199 }
200
201 if (!hasRuns && !hasPhoneticRuns && phoneticPropertiesNode == null)
202 {
203 // Simple text node, just extract plain text
204 GetTextToken(siNode, ref sb);
205 return null;
206 }
207
208 // Create FormattedText object
209 FormattedText formattedText = new FormattedText();
210
211 // Process text runs
212 if (hasRuns)
213 {
214 foreach (XmlNode childNode in siNode.ChildNodes)
215 {
216 if (childNode.LocalName.Equals("r", StringComparison.OrdinalIgnoreCase))
217 {
218 ProcessTextRun(childNode, formattedText, ref sb);
219 }
220 }
221 }
222 else
223 {
224 // No runs but has phonetic info - extract text as single run
225 GetTextToken(siNode, ref sb);
226 }
227
228 // Process phonetic runs
229 if (hasPhoneticRuns)
230 {
231 foreach (XmlNode childNode in siNode.ChildNodes)
232 {
233 if (childNode.LocalName.Equals("rPh", StringComparison.OrdinalIgnoreCase))
234 {
235 ProcessPhoneticRun(childNode, formattedText);
236 }
237 }
238 }
239
240 // Process phonetic properties
241 if (phoneticPropertiesNode != null)
242 {
243 ProcessPhoneticProperties(phoneticPropertiesNode, formattedText);
244 }
245
246 return formattedText;
247 }
248
255 private void ProcessTextRun(XmlNode runNode, FormattedText formattedText, ref StringBuilder sb)
256 {
257 Font fontStyle = null;
258 string text = null;
259
260 foreach (XmlNode childNode in runNode.ChildNodes)
261 {
262 if (childNode.LocalName.Equals("rPr", StringComparison.OrdinalIgnoreCase))
263 {
264 fontStyle = ParseRunProperties(childNode);
265 }
266 else if (childNode.LocalName.Equals("t", StringComparison.OrdinalIgnoreCase))
267 {
268 text = childNode.InnerText;
269 sb.Append(text);
270 }
271 }
272
273 if (!string.IsNullOrEmpty(text))
274 {
275 formattedText.AddRun(text, fontStyle);
276 }
277 }
278
284 private Font ParseRunProperties(XmlNode rPrNode)
285 {
286 Font font = new Font();
287
288 foreach (XmlNode childNode in rPrNode.ChildNodes)
289 {
290 string nodeName = childNode.LocalName;
291
292 if (nodeName.Equals("rFont", StringComparison.OrdinalIgnoreCase))
293 {
294 font.Name = GetAttributeValue(childNode, "val");
295 }
296 else if (nodeName.Equals("charset", StringComparison.OrdinalIgnoreCase))
297 {
298 string charsetValue = GetAttributeValue(childNode, "val");
299 if (!string.IsNullOrEmpty(charsetValue))
300 {
301 font.Charset = (Font.CharsetValue)ParserUtils.ParseInt(charsetValue);
302 }
303 }
304 else if (nodeName.Equals("family", StringComparison.OrdinalIgnoreCase))
305 {
306 string familyValue = GetAttributeValue(childNode, "val");
307 if (!string.IsNullOrEmpty(familyValue))
308 {
309 font.Family = (Font.FontFamilyValue)ParserUtils.ParseInt(familyValue);
310 }
311 }
312 else if (nodeName.Equals("b", StringComparison.OrdinalIgnoreCase))
313 {
314 font.Bold = true;
315 }
316 else if (nodeName.Equals("i", StringComparison.OrdinalIgnoreCase))
317 {
318 font.Italic = true;
319 }
320 else if (nodeName.Equals("strike", StringComparison.OrdinalIgnoreCase))
321 {
322 font.Strike = true;
323 }
324 else if (nodeName.Equals("outline", StringComparison.OrdinalIgnoreCase))
325 {
326 font.Outline = true;
327 }
328 else if (nodeName.Equals("shadow", StringComparison.OrdinalIgnoreCase))
329 {
330 font.Shadow = true;
331 }
332 else if (nodeName.Equals("condense", StringComparison.OrdinalIgnoreCase))
333 {
334 font.Condense = true;
335 }
336 else if (nodeName.Equals("extend", StringComparison.OrdinalIgnoreCase))
337 {
338 font.Extend = true;
339 }
340 else if (nodeName.Equals("color", StringComparison.OrdinalIgnoreCase))
341 {
342 font.ColorValue = ParseColor(childNode);
343 }
344 else if (nodeName.Equals("sz", StringComparison.OrdinalIgnoreCase))
345 {
346 string sizeValue = GetAttributeValue(childNode, "val");
347 if (!string.IsNullOrEmpty(sizeValue))
348 {
349 font.Size = ParserUtils.ParseFloat(sizeValue);
350 }
351 }
352 else if (nodeName.Equals("u", StringComparison.OrdinalIgnoreCase))
353 {
354 string underlineValue = GetAttributeValue(childNode, "val");
355 if (string.IsNullOrEmpty(underlineValue))
356 {
357 font.Underline = Font.UnderlineValue.Single;
358 }
359 else
360 {
361 font.Underline = ParseUnderlineValue(underlineValue);
362 }
363 }
364 else if (nodeName.Equals("vertAlign", StringComparison.OrdinalIgnoreCase))
365 {
366 string vertAlignValue = GetAttributeValue(childNode, "val");
367 if (!string.IsNullOrEmpty(vertAlignValue))
368 {
369 font.VerticalAlign = ParseVerticalAlignValue(vertAlignValue);
370 }
371 }
372 else if (nodeName.Equals("scheme", StringComparison.OrdinalIgnoreCase))
373 {
374 string schemeValue = GetAttributeValue(childNode, "val");
375 if (!string.IsNullOrEmpty(schemeValue))
376 {
377 font.Scheme = ParseSchemeValue(schemeValue);
378 }
379 }
380 }
381
382 return font;
383 }
384
390 private Color ParseColor(XmlNode colorNode)
391 {
392 string autoValue = GetAttributeValue(colorNode, "auto");
393 string indexedValue = GetAttributeValue(colorNode, "indexed");
394 string rgbValue = GetAttributeValue(colorNode, "rgb");
395 string themeValue = GetAttributeValue(colorNode, "theme");
396 string systemValue = GetAttributeValue(colorNode, "system");
397 string tintValue = GetAttributeValue(colorNode, "tint");
398
399 Color color = null; //= new Color();
400
401 if (!string.IsNullOrEmpty(autoValue))
402 {
403 color = Color.CreateAuto();
404 }
405 else if (!string.IsNullOrEmpty(indexedValue))
406 {
407 color = Color.CreateIndexed(ParserUtils.ParseInt(indexedValue));
408 }
409 else if (!string.IsNullOrEmpty(rgbValue))
410 {
411 color = Color.CreateRgb(rgbValue);
412 }
413 else if (!string.IsNullOrEmpty(themeValue))
414 {
415 color = Color.CreateTheme(ParserUtils.ParseInt(themeValue));
416 }
417 else if (!string.IsNullOrEmpty(systemValue))
418 {
419 color = Color.CreateSystem(SystemColor.MapStringToValue(systemValue));
420 }
421 if (color != null && !string.IsNullOrEmpty(tintValue))
422 {
423 color.Tint = ParserUtils.ParseFloat(tintValue);
424 }
425
426 return color;
427 }
428
434 private void ProcessPhoneticRun(XmlNode rPhNode, FormattedText formattedText)
435 {
436 string startBase = GetAttributeValue(rPhNode, "sb");
437 string endBase = GetAttributeValue(rPhNode, "eb");
438 string text = null;
439
440 foreach (XmlNode childNode in rPhNode.ChildNodes)
441 {
442 if (childNode.LocalName.Equals("t", StringComparison.OrdinalIgnoreCase))
443 {
444 text = childNode.InnerText;
445 }
446 }
447
448 if (!string.IsNullOrEmpty(text) && !string.IsNullOrEmpty(startBase) && !string.IsNullOrEmpty(endBase))
449 {
450 uint sb = (uint)ParserUtils.ParseInt(startBase);
451 uint eb = (uint)ParserUtils.ParseInt(endBase);
452 formattedText.AddPhoneticRun(text, sb, eb);
453
454 // Also capture for plain text processing
455 if (capturePhoneticCharacters)
456 {
457 phoneticsInfo.Add(new PhoneticInfo(text, startBase, endBase));
458 }
459 }
460 }
461
467 private void ProcessPhoneticProperties(XmlNode phoneticPrNode, FormattedText formattedText)
468 {
469 string fontIdValue = GetAttributeValue(phoneticPrNode, "fontId");
470 string typeValue = GetAttributeValue(phoneticPrNode, "type");
471 string alignmentValue = GetAttributeValue(phoneticPrNode, "alignment");
472
473 // Create a basic font reference for phonetic properties
474 Font fontReference = new Font();
475 if (!string.IsNullOrEmpty(fontIdValue))
476 {
477 // Font ID is stored but not resolved here
478 // This is just a placeholder for the reference
479 }
480
481 PhoneticRun.PhoneticType type = PhoneticRun.PhoneticType.FullwidthKatakana;
482 if (!string.IsNullOrEmpty(typeValue))
483 {
484 type = ParsePhoneticType(typeValue);
485 }
486
488 if (!string.IsNullOrEmpty(alignmentValue))
489 {
490 alignment = ParsePhoneticAlignment(alignmentValue);
491 }
492
493 formattedText.SetPhoneticProperties(fontReference, type, alignment);
494 }
495
501 private void GetTextToken(XmlNode node, ref StringBuilder sb)
502 {
503 if (node.LocalName.Equals("t", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrEmpty(node.InnerText))
504 {
505 sb.Append(node.InnerText);
506 }
507 if (node.HasChildNodes)
508 {
509 foreach (XmlNode childNode in node.ChildNodes)
510 {
511 if (childNode.LocalName.Equals("rPh", StringComparison.OrdinalIgnoreCase))
512 {
513 continue;
514 }
515 GetTextToken(childNode, ref sb);
516 }
517 }
518 }
519
525 private string ProcessPhoneticCharacters(StringBuilder sb)
526 {
527 string text = sb.ToString();
528 StringBuilder sb2 = new StringBuilder();
529 int currentTextIndex = 0;
530 foreach (PhoneticInfo info in phoneticsInfo)
531 {
532 sb2.Append(text.Substring(currentTextIndex, info.StartIndex + info.Length - currentTextIndex));
533 sb2.Append('(').Append(info.Value).Append(')');
534 currentTextIndex = info.StartIndex + info.Length;
535 }
536 sb2.Append(text.Substring(currentTextIndex));
537
538 return sb2.ToString();
539 }
540
547 private string GetAttributeValue(XmlNode node, string attributeName)
548 {
549 XmlNode attribute = node.Attributes?.GetNamedItem(attributeName);
550 return attribute?.InnerText;
551 }
552
558 private Font.UnderlineValue ParseUnderlineValue(string value)
559 {
560 switch (value.ToLowerInvariant())
561 {
562 case "double":
563 return Font.UnderlineValue.Double;
564 case "singleaccounting":
565 return Font.UnderlineValue.SingleAccounting;
566 case "doubleaccounting":
567 return Font.UnderlineValue.DoubleAccounting;
568 default:
569 return Font.UnderlineValue.Single;
570 }
571 }
572
578 private Font.VerticalTextAlignValue ParseVerticalAlignValue(string value)
579 {
580 switch (value.ToLowerInvariant())
581 {
582 case "superscript":
583 return Font.VerticalTextAlignValue.Superscript;
584 case "subscript":
585 return Font.VerticalTextAlignValue.Subscript;
586 default:
587 return Font.VerticalTextAlignValue.Baseline;
588 }
589 }
590
596 private Font.SchemeValue ParseSchemeValue(string value)
597 {
598 switch (value.ToLowerInvariant())
599 {
600 case "major":
601 return Font.SchemeValue.Major;
602 case "minor":
603 return Font.SchemeValue.Minor;
604 default:
605 return Font.SchemeValue.None;
606 }
607 }
608
614 private PhoneticRun.PhoneticType ParsePhoneticType(string value)
615 {
616 switch (value.ToLowerInvariant())
617 {
618 case "halfwidthkatakana":
619 return PhoneticRun.PhoneticType.HalfwidthKatakana;
620 case "hiragana":
621 return PhoneticRun.PhoneticType.Hiragana;
622 case "noconversion":
623 return PhoneticRun.PhoneticType.NoConversion;
624 default:
625 return PhoneticRun.PhoneticType.FullwidthKatakana;
626 }
627 }
628
634 private PhoneticRun.PhoneticAlignment ParsePhoneticAlignment(string value)
635 {
636 switch (value.ToLowerInvariant())
637 {
638 case "nocontrol":
639 return PhoneticRun.PhoneticAlignment.NoControl;
640 case "center":
641 return PhoneticRun.PhoneticAlignment.Center;
642 case "distributed":
643 return PhoneticRun.PhoneticAlignment.Distributed;
644 default:
645 return PhoneticRun.PhoneticAlignment.Left;
646 }
647 }
648
649 #endregion
650
651 #region sub-classes
656 sealed class PhoneticInfo
657 {
661 public string Value { get; private set; }
665 public int StartIndex { get; private set; }
669 public int Length { get; private set; }
670
677 public PhoneticInfo(string value, string start, string end)
678 {
679 Value = value;
680 StartIndex = ParserUtils.ParseInt(start);
681 Length = ParserUtils.ParseInt(end) - StartIndex;
682
683 }
684 }
685 #endregion
686 }
687}
Represents a phonetic run that provides pronunciation guidance for text.
PhoneticAlignment
Enumeration for phonetic text alignment.
PhoneticType
Enumeration for phonetic text types.
Represents a formatted text entry in Excel shared strings, supporting rich text with multiple runs an...
override string ToString()
Gets the string representation of the formatted text without formatting (plain text)....
FormattedText AddPhoneticRun(string text, uint startBase, uint endBase)
Adds a phonetic run for pronunciation guidance (Ruby text, like Furigana, Pinyin or Zhuyin).
string PlainText
Gets the plain text content by concatenating all runs.
FormattedText AddRun(string text, Font fontStyle=null)
Adds a text run with the specified style.
FormattedText SetPhoneticProperties(Font fontReference, PhoneticRun.PhoneticType type=PhoneticRun.PhoneticType.FullwidthKatakana, PhoneticRun.PhoneticAlignment alignment=PhoneticRun.PhoneticAlignment.Left)
Sets the phonetic properties for this formatted text, applied to the phonetic run (Ruby text).