Improve series name matching

Add a series path resolver that attempts to extract only the series
name from a path that contains more information that just the name.
This commit is contained in:
Fredrik Lindberg 2021-08-26 20:01:56 +02:00
parent e15fea5dad
commit ea439c5ccf
8 changed files with 233 additions and 3 deletions

View File

@ -368,6 +368,20 @@ namespace Emby.Naming.Common
IsOptimistic = true,
IsNamed = true
},
// Series and season only expression
// "the show/season 1", "the show/s01"
new EpisodeExpression(@"(.*(\\|\/))*(?<seriesname>.+)\/[Ss](eason)?[\. _\-]*(?<seasonnumber>[0-9]+)")
{
IsNamed = true
},
// Series and season only expression
// "the show S01", "the show season 1"
new EpisodeExpression(@"(.*(\\|\/))*(?<seriesname>.+)[\. _\-]+[sS](eason)?[\. _\-]*(?<seasonnumber>[0-9]+)")
{
IsNamed = true
},
};
EpisodeWithoutSeasonExpressions = new[]

View File

@ -0,0 +1,29 @@
namespace Emby.Naming.TV
{
/// <summary>
/// Holder object for Series information.
/// </summary>
public class SeriesInfo
{
/// <summary>
/// Initializes a new instance of the <see cref="SeriesInfo"/> class.
/// </summary>
/// <param name="path">Path to the file.</param>
public SeriesInfo(string path)
{
Path = path;
}
/// <summary>
/// Gets or sets the path.
/// </summary>
/// <value>The path.</value>
public string Path { get; set; }
/// <summary>
/// Gets or sets the name of the series.
/// </summary>
/// <value>The name of the series.</value>
public string? Name { get; set; }
}
}

View File

@ -0,0 +1,61 @@
using System.Globalization;
using Emby.Naming.Common;
namespace Emby.Naming.TV
{
/// <summary>
/// Used to parse information about series from paths containing more information that only the series name.
/// Uses the same regular expressions as the EpisodePathParser but have different success criteria.
/// </summary>
public static class SeriesPathParser
{
/// <summary>
/// Parses information about series from path.
/// </summary>
/// <param name="options"><see cref="NamingOptions"/> object containing EpisodeExpressions and MultipleEpisodeExpressions.</param>
/// <param name="path">Path.</param>
/// <returns>Returns <see cref="SeriesPathParserResult"/> object.</returns>
public static SeriesPathParserResult Parse(NamingOptions options, string path)
{
SeriesPathParserResult? result = null;
foreach (var expression in options.EpisodeExpressions)
{
var currentResult = Parse(path, expression);
if (currentResult.Success)
{
result = currentResult;
break;
}
}
if (result != null)
{
if (!string.IsNullOrEmpty(result.SeriesName))
{
result.SeriesName = result.SeriesName.Trim(' ', '_', '.', '-');
}
}
return result ?? new SeriesPathParserResult();
}
private static SeriesPathParserResult Parse(string name, EpisodeExpression expression)
{
var result = new SeriesPathParserResult();
var match = expression.Regex.Match(name);
if (match.Success && match.Groups.Count >= 3)
{
if (expression.IsNamed)
{
result.SeriesName = match.Groups["seriesname"].Value;
result.Success = !string.IsNullOrEmpty(result.SeriesName) && !string.IsNullOrEmpty(match.Groups["seasonnumber"]?.Value);
}
}
return result;
}
}
}

View File

@ -0,0 +1,19 @@
namespace Emby.Naming.TV
{
/// <summary>
/// Holder object for <see cref="SeriesPathParser"/> result.
/// </summary>
public class SeriesPathParserResult
{
/// <summary>
/// Gets or sets the name of the series.
/// </summary>
/// <value>The name of the series.</value>
public string? SeriesName { get; set; }
/// <summary>
/// Gets or sets a value indicating whether parsing was successful.
/// </summary>
public bool Success { get; set; }
}
}

View File

@ -0,0 +1,49 @@
using System.IO;
using System.Text.RegularExpressions;
using Emby.Naming.Common;
namespace Emby.Naming.TV
{
/// <summary>
/// Used to resolve information about series from path.
/// </summary>
public static class SeriesResolver
{
/// <summary>
/// Regex that matches strings of at least 2 characters separated by a dot or underscore.
/// Used for removing separators between words, i.e turns "The_show" into "The show" while
/// preserving namings like "S.H.O.W".
/// </summary>
private static readonly Regex _seriesNameRegex = new Regex(@"((?<a>[^\._]{2,})[\._]*)|([\._](?<b>[^\._]{2,}))");
/// <summary>
/// Resolve information about series from path.
/// </summary>
/// <param name="options"><see cref="NamingOptions"/> object passed to <see cref="SeriesPathParser"/>.</param>
/// <param name="path">Path to series.</param>
/// <returns>SeriesInfo.</returns>
public static SeriesInfo Resolve(NamingOptions options, string path)
{
string seriesName = Path.GetFileName(path);
SeriesPathParserResult result = SeriesPathParser.Parse(options, path);
if (result.Success)
{
if (!string.IsNullOrEmpty(result.SeriesName))
{
seriesName = result.SeriesName;
}
}
if (!string.IsNullOrEmpty(seriesName))
{
seriesName = _seriesNameRegex.Replace(seriesName, "${a} ${b}").Trim();
}
return new SeriesInfo(path)
{
Name = seriesName
};
}
}
}

View File

@ -55,6 +55,8 @@ namespace Emby.Server.Implementations.Library.Resolvers.TV
return null;
}
var seriesInfo = Naming.TV.SeriesResolver.Resolve(_libraryManager.GetNamingOptions(), args.Path);
var collectionType = args.GetCollectionType();
if (string.Equals(collectionType, CollectionType.TvShows, StringComparison.OrdinalIgnoreCase))
{
@ -64,7 +66,7 @@ namespace Emby.Server.Implementations.Library.Resolvers.TV
return new Series
{
Path = args.Path,
Name = Path.GetFileName(args.Path)
Name = seriesInfo.Name
};
}
}
@ -81,7 +83,7 @@ namespace Emby.Server.Implementations.Library.Resolvers.TV
return new Series
{
Path = args.Path,
Name = Path.GetFileName(args.Path)
Name = seriesInfo.Name
};
}
@ -95,7 +97,7 @@ namespace Emby.Server.Implementations.Library.Resolvers.TV
return new Series
{
Path = args.Path,
Name = Path.GetFileName(args.Path)
Name = seriesInfo.Name
};
}
}

View File

@ -0,0 +1,28 @@
using Emby.Naming.Common;
using Emby.Naming.TV;
using Xunit;
namespace Jellyfin.Naming.Tests.TV
{
public class SeriesPathParserTest
{
[Theory]
[InlineData("The.Show.S01", "The.Show")]
[InlineData("/The.Show.S01", "The.Show")]
[InlineData("/some/place/The.Show.S01", "The.Show")]
[InlineData("/something/The.Show.S01", "The.Show")]
[InlineData("The Show Season 10", "The Show")]
[InlineData("The Show S01E01", "The Show")]
[InlineData("The Show S01E01 Episode", "The Show")]
[InlineData("/something/The Show/Season 1", "The Show")]
[InlineData("/something/The Show/S01", "The Show")]
public void SeriesPathParserParseTest(string path, string name)
{
NamingOptions o = new NamingOptions();
var res = SeriesPathParser.Parse(o, path);
Assert.Equal(name, res.SeriesName);
Assert.True(res.Success);
}
}
}

View File

@ -0,0 +1,28 @@
using Emby.Naming.Common;
using Emby.Naming.TV;
using Xunit;
namespace Jellyfin.Naming.Tests.TV
{
public class SeriesResolverTests
{
[Theory]
[InlineData("The.Show.S01", "The Show")]
[InlineData("The.Show.S01.COMPLETE", "The Show")]
[InlineData("S.H.O.W.S01", "S.H.O.W")]
[InlineData("The.Show.P.I.S01", "The Show P.I")]
[InlineData("The_Show_Season_1", "The Show")]
[InlineData("/something/The_Show/Season 10", "The Show")]
[InlineData("The Show", "The Show")]
[InlineData("/some/path/The Show", "The Show")]
[InlineData("/some/path/The Show s02e10 720p hdtv", "The Show")]
[InlineData("/some/path/The Show s02e10 the episode 720p hdtv", "The Show")]
public void SeriesResolverResolveTest(string path, string name)
{
NamingOptions o = new NamingOptions();
var res = SeriesResolver.Resolve(o, path);
Assert.Equal(name, res.Name);
}
}
}