jellyfin-server/Emby.Server.Implementations/TextEncoding/NLangDetect/DetectorFactory.cs

128 lines
3.2 KiB
C#

using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using NLangDetect.Core.Utils;
using MediaBrowser.Model.Serialization;
using System.Linq;
namespace NLangDetect.Core
{
public class DetectorFactory
{
public Dictionary<string, ProbVector> WordLangProbMap;
public List<string> Langlist;
private static readonly DetectorFactory _instance = new DetectorFactory();
#region Constructor(s)
private DetectorFactory()
{
WordLangProbMap = new Dictionary<string, ProbVector>();
Langlist = new List<string>();
}
#endregion
#region Public methods
public static void LoadProfiles(IJsonSerializer json)
{
var assembly = typeof(DetectorFactory).Assembly;
var names = assembly.GetManifestResourceNames()
.Where(i => i.IndexOf("NLangDetect.Profiles", StringComparison.Ordinal) != -1)
.ToList();
var index = 0;
foreach (var name in names)
{
using (var stream = assembly.GetManifestResourceStream(name))
{
var langProfile = (LangProfile)json.DeserializeFromStream(stream, typeof(LangProfile));
AddProfile(langProfile, index);
}
index++;
}
}
public static Detector Create()
{
return CreateDetector();
}
public static Detector Create(double alpha)
{
Detector detector = CreateDetector();
detector.SetAlpha(alpha);
return detector;
}
public static void SetSeed(int? seed)
{
_instance.Seed = seed;
}
#endregion
#region Internal methods
internal static void AddProfile(LangProfile profile, int index)
{
var lang = profile.name;
if (_instance.Langlist.Contains(lang))
{
throw new NLangDetectException("duplicate the same language profile", ErrorCode.DuplicateLangError);
}
_instance.Langlist.Add(lang);
foreach (string word in profile.freq.Keys)
{
if (!_instance.WordLangProbMap.ContainsKey(word))
{
_instance.WordLangProbMap.Add(word, new ProbVector());
}
double prob = (double)profile.freq[word] / profile.n_words[word.Length - 1];
_instance.WordLangProbMap[word][index] = prob;
}
}
internal static void Clear()
{
_instance.Langlist.Clear();
_instance.WordLangProbMap.Clear();
}
#endregion
#region Private helper methods
private static Detector CreateDetector()
{
if (_instance.Langlist.Count == 0)
{
throw new NLangDetectException("need to load profiles", ErrorCode.NeedLoadProfileError);
}
return new Detector(_instance);
}
#endregion
#region Properties
public int? Seed { get; private set; }
#endregion
}
}