2016-11-05 02:17:18 +00:00
|
|
|
|
using System;
|
|
|
|
|
using System.Globalization;
|
|
|
|
|
using System.Linq;
|
|
|
|
|
using System.Text;
|
2016-11-29 19:13:01 +00:00
|
|
|
|
using System.Text.RegularExpressions;
|
2016-11-05 02:17:18 +00:00
|
|
|
|
|
2017-08-09 19:56:38 +00:00
|
|
|
|
namespace Emby.Server.Implementations.Localization
|
2016-11-05 02:17:18 +00:00
|
|
|
|
{
|
|
|
|
|
public class TextLocalizer : ITextLocalizer
|
|
|
|
|
{
|
|
|
|
|
public string RemoveDiacritics(string text)
|
|
|
|
|
{
|
2016-11-29 19:13:01 +00:00
|
|
|
|
if (text == null)
|
|
|
|
|
{
|
2019-01-06 20:50:43 +00:00
|
|
|
|
throw new ArgumentNullException(nameof(text));
|
2016-11-29 19:13:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var chars = Normalize(text, NormalizationForm.FormD)
|
|
|
|
|
.Where(ch => CharUnicodeInfo.GetUnicodeCategory(ch) != UnicodeCategory.NonSpacingMark);
|
|
|
|
|
|
2019-01-06 20:50:43 +00:00
|
|
|
|
return Normalize(string.Concat(chars), NormalizationForm.FormC);
|
2016-11-29 19:13:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static string Normalize(string text, NormalizationForm form, bool stripStringOnFailure = true)
|
|
|
|
|
{
|
|
|
|
|
if (stripStringOnFailure)
|
|
|
|
|
{
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
return text.Normalize(form);
|
|
|
|
|
}
|
|
|
|
|
catch (ArgumentException)
|
|
|
|
|
{
|
|
|
|
|
// will throw if input contains invalid unicode chars
|
|
|
|
|
// https://mnaoumov.wordpress.com/2014/06/14/stripping-invalid-characters-from-utf-16-strings/
|
|
|
|
|
text = StripInvalidUnicodeCharacters(text);
|
|
|
|
|
return Normalize(text, form, false);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-06 02:32:56 +00:00
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
return text.Normalize(form);
|
|
|
|
|
}
|
|
|
|
|
catch (ArgumentException)
|
|
|
|
|
{
|
|
|
|
|
// if it still fails, return the original text
|
|
|
|
|
return text;
|
|
|
|
|
}
|
2016-11-29 19:13:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static string StripInvalidUnicodeCharacters(string str)
|
|
|
|
|
{
|
|
|
|
|
var invalidCharactersRegex = new Regex("([\ud800-\udbff](?![\udc00-\udfff]))|((?<![\ud800-\udbff])[\udc00-\udfff])");
|
|
|
|
|
return invalidCharactersRegex.Replace(str, "");
|
2016-11-05 02:17:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public string NormalizeFormKD(string text)
|
|
|
|
|
{
|
|
|
|
|
return text.Normalize(NormalizationForm.FormKD);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|