jellyfin/Emby.Server.Implementations/Localization/TextLocalizer.cs

using System;
using System.Globalization;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;

namespace Emby.Server.Implementations.Localization
{
    public class TextLocalizer : ITextLocalizer
    {
        public string RemoveDiacritics(string text)
        {
            if (text == null)
            {
                throw new ArgumentNullException(nameof(text));
            }

            var chars = Normalize(text, NormalizationForm.FormD)
                .Where(ch => CharUnicodeInfo.GetUnicodeCategory(ch) != UnicodeCategory.NonSpacingMark);

            return Normalize(string.Concat(chars), NormalizationForm.FormC);
        }

        private static string Normalize(string text, NormalizationForm form, bool stripStringOnFailure = true)
        {
            if (stripStringOnFailure)
            {
                try
                {
                    return text.Normalize(form);
                }
                catch (ArgumentException)
                {
                    // will throw if input contains invalid unicode chars
                    // https://mnaoumov.wordpress.com/2014/06/14/stripping-invalid-characters-from-utf-16-strings/
                    text = StripInvalidUnicodeCharacters(text);
                    return Normalize(text, form, false);
                }
            }

            try
            {
                return text.Normalize(form);
            }
            catch (ArgumentException)
            {
                // if it still fails, return the original text
                return text;
            }
        }

        private static string StripInvalidUnicodeCharacters(string str)
        {
            var invalidCharactersRegex = new Regex("([\ud800-\udbff](?![\udc00-\udfff]))|((?<![\ud800-\udbff])[\udc00-\udfff])");
            return invalidCharactersRegex.Replace(str, "");
        }

        public string NormalizeFormKD(string text)
        {
            return text.Normalize(NormalizationForm.FormKD);
        }
    }
}
-												move localization classes

											
										
										
											2016-11-05 02:17:18 +00:00
+								using System;
 								using System.Globalization;
 								using System.Linq;
 								using System.Text;
-												update artist lists

											
										
										
											2016-11-29 19:13:01 +00:00
+								using System.Text.RegularExpressions;
-												move localization classes

											
										
										
											2016-11-05 02:17:18 +00:00
-												consolidate emby.server.core into emby.server.implementations

											
										
										
											2017-08-09 19:56:38 +00:00
+								namespace Emby.Server.Implementations.Localization
-												move localization classes

											
										
										
											2016-11-05 02:17:18 +00:00
+								{
 								    public class TextLocalizer : ITextLocalizer
 								    {
 								        public string RemoveDiacritics(string text)
 								        {
-												update artist lists

											
										
										
											2016-11-29 19:13:01 +00:00
+								            if (text == null)
 								            {
-												Mayor code cleanup

Add Argument*Exceptions now use proper nameof operators.

Added exception messages to quite a few Argument*Exceptions.

Fixed rethorwing to be proper syntax.

Added a ton of null checkes. (This is only a start, there are about 500 places that need proper null handling)

Added some TODOs to log certain exceptions.

Fix sln again.

Fixed all AssemblyInfo's and added proper copyright (where I could find them)

We live in *current year*.

Fixed the use of braces.

Fixed a ton of properties, and made a fair amount of functions static that should be and can be static.

Made more Methods that should be static static.

You can now use static to find bad functions!

Removed unused variable. And added one more proper XML comment.

											
										
										
											2019-01-06 20:50:43 +00:00
+								                throw new ArgumentNullException(nameof(text));
-												update artist lists

											
										
										
											2016-11-29 19:13:01 +00:00
+								            }
 								            var chars = Normalize(text, NormalizationForm.FormD)
 								                .Where(ch => CharUnicodeInfo.GetUnicodeCategory(ch) != UnicodeCategory.NonSpacingMark);
-												Mayor code cleanup

Add Argument*Exceptions now use proper nameof operators.

Added exception messages to quite a few Argument*Exceptions.

Fixed rethorwing to be proper syntax.

Added a ton of null checkes. (This is only a start, there are about 500 places that need proper null handling)

Added some TODOs to log certain exceptions.

Fix sln again.

Fixed all AssemblyInfo's and added proper copyright (where I could find them)

We live in *current year*.

Fixed the use of braces.

Fixed a ton of properties, and made a fair amount of functions static that should be and can be static.

Made more Methods that should be static static.

You can now use static to find bad functions!

Removed unused variable. And added one more proper XML comment.

											
										
										
											2019-01-06 20:50:43 +00:00
+								            return Normalize(string.Concat(chars), NormalizationForm.FormC);
-												update artist lists

											
										
										
											2016-11-29 19:13:01 +00:00
+								        }
 								        private static string Normalize(string text, NormalizationForm form, bool stripStringOnFailure = true)
 								        {
 								            if (stripStringOnFailure)
 								            {
 								                try
 								                {
 								                    return text.Normalize(form);
 								                }
 								                catch (ArgumentException)
 								                {
 								                    // will throw if input contains invalid unicode chars
-												remove trailing whitespace

											
										
										
											2019-01-07 23:27:46 +00:00
+								                    // https://mnaoumov.wordpress.com/2014/06/14/stripping-invalid-characters-from-utf-16-strings/
-												update artist lists

											
										
										
											2016-11-29 19:13:01 +00:00
+								                    text = StripInvalidUnicodeCharacters(text);
 								                    return Normalize(text, form, false);
 								                }
 								            }
-												update legacy hdhomerun support

											
										
										
											2017-03-06 02:32:56 +00:00
+								            try
 								            {
 								                return text.Normalize(form);
 								            }
 								            catch (ArgumentException)
 								            {
 								                // if it still fails, return the original text
 								                return text;
 								            }
-												update artist lists

											
										
										
											2016-11-29 19:13:01 +00:00
+								        }
 								        private static string StripInvalidUnicodeCharacters(string str)
 								        {
 								            var invalidCharactersRegex = new Regex("([\ud800-\udbff](?![\udc00-\udfff]))|((?<![\ud800-\udbff])[\udc00-\udfff])");
 								            return invalidCharactersRegex.Replace(str, "");
-												move localization classes

											
										
										
											2016-11-05 02:17:18 +00:00
+								        }
 								        public string NormalizeFormKD(string text)
 								        {
 								            return text.Normalize(NormalizationForm.FormKD);
 								        }
 								    }
 								}