jellyfin/Emby.Server.Implementations/Localization/TextLocalizer.cs

64 lines
2.0 KiB
C#
Raw Normal View History

2016-11-05 02:17:18 +00:00
using System;
using System.Globalization;
using System.Linq;
using System.Text;
2016-11-29 19:13:01 +00:00
using System.Text.RegularExpressions;
2016-11-05 02:17:18 +00:00
namespace Emby.Server.Implementations.Localization
2016-11-05 02:17:18 +00:00
{
public class TextLocalizer : ITextLocalizer
{
public string RemoveDiacritics(string text)
{
2016-11-29 19:13:01 +00:00
if (text == null)
{
throw new ArgumentNullException(nameof(text));
2016-11-29 19:13:01 +00:00
}
var chars = Normalize(text, NormalizationForm.FormD)
.Where(ch => CharUnicodeInfo.GetUnicodeCategory(ch) != UnicodeCategory.NonSpacingMark);
return Normalize(string.Concat(chars), NormalizationForm.FormC);
2016-11-29 19:13:01 +00:00
}
private static string Normalize(string text, NormalizationForm form, bool stripStringOnFailure = true)
{
if (stripStringOnFailure)
{
try
{
return text.Normalize(form);
}
catch (ArgumentException)
{
// will throw if input contains invalid unicode chars
2019-01-07 23:27:46 +00:00
// https://mnaoumov.wordpress.com/2014/06/14/stripping-invalid-characters-from-utf-16-strings/
2016-11-29 19:13:01 +00:00
text = StripInvalidUnicodeCharacters(text);
return Normalize(text, form, false);
}
}
2017-03-06 02:32:56 +00:00
try
{
return text.Normalize(form);
}
catch (ArgumentException)
{
// if it still fails, return the original text
return text;
}
2016-11-29 19:13:01 +00:00
}
private static string StripInvalidUnicodeCharacters(string str)
{
var invalidCharactersRegex = new Regex("([\ud800-\udbff](?![\udc00-\udfff]))|((?<![\ud800-\udbff])[\udc00-\udfff])");
return invalidCharactersRegex.Replace(str, "");
2016-11-05 02:17:18 +00:00
}
public string NormalizeFormKD(string text)
{
return text.Normalize(NormalizationForm.FormKD);
}
}
}