Made CleanStringParser more robust

Now it can handle [...] at beginning of string
This commit is contained in:
sushilicious 2021-08-03 13:46:56 -07:00
parent 0c9f824d0a
commit 26f8b501e7
3 changed files with 48 additions and 11 deletions

View File

@ -137,8 +137,11 @@ namespace Emby.Naming.Common
CleanStrings = new[] CleanStrings = new[]
{ {
@"[ _\,\.\(\)\[\]\-](3d|sbs|tab|hsbs|htab|mvc|HDR|HDC|UHD|UltraHD|4k|ac3|dts|custom|dc|divx|divx5|dsr|dsrip|dutch|dvd|dvdrip|dvdscr|dvdscreener|screener|dvdivx|cam|fragment|fs|hdtv|hdrip|hdtvrip|internal|limited|multisubs|ntsc|ogg|ogm|pal|pdtv|proper|repack|rerip|retail|cd[1-9]|r3|r5|bd5|bd|se|svcd|swedish|german|read.nfo|nfofix|unrated|ws|telesync|ts|telecine|tc|brrip|bdrip|480p|480i|576p|576i|720p|720i|1080p|1080i|2160p|hrhd|hrhdtv|hddvd|bluray|blu-ray|x264|x265|h264|h265|xvid|xvidvd|xxx|www.www|AAC|DTS|\[.*\])([ _\,\.\(\)\[\]\-]|$)", @"^\s*(?<cleaned>.+?)[ _\,\.\(\)\[\]\-](3d|sbs|tab|hsbs|htab|mvc|HDR|HDC|UHD|UltraHD|4k|ac3|dts|custom|dc|divx|divx5|dsr|dsrip|dutch|dvd|dvdrip|dvdscr|dvdscreener|screener|dvdivx|cam|fragment|fs|hdtv|hdrip|hdtvrip|internal|limited|multisubs|ntsc|ogg|ogm|pal|pdtv|proper|repack|rerip|retail|cd[1-9]|r3|r5|bd5|bd|se|svcd|swedish|german|read.nfo|nfofix|unrated|ws|telesync|ts|telecine|tc|brrip|bdrip|480p|480i|576p|576i|720p|720i|1080p|1080i|2160p|hrhd|hrhdtv|hddvd|bluray|blu-ray|x264|x265|h264|h265|xvid|xvidvd|xxx|www.www|AAC|DTS|\[.*\])([ _\,\.\(\)\[\]\-]|$)",
@"(\[.*\])" @"^(?<cleaned>.+?)(\[.*\])",
@"^\s*(?<cleaned>.+?)\WE\d+(-|~)E?\d+(\W|$)",
@"^\s*\[[^\]]+\](?!\.\w+$)\s*(?<cleaned>.+)",
@"^\s*(?<cleaned>.+?)\s+-\s+\d+\s*$"
}; };
SubtitleFileExtensions = new[] SubtitleFileExtensions = new[]

View File

@ -25,26 +25,54 @@ namespace Emby.Naming.Video
return false; return false;
} }
var len = expressions.Count; // Iteratively remove extra cruft until we're left with the string
for (int i = 0; i < len; i++) // we want.
newName = ReadOnlySpan<char>.Empty;
const int maxTries = 100; // This is just a precautionary
// measure. Should not be neccesary.
var loopCounter = 0;
for (; loopCounter < maxTries; loopCounter++)
{ {
if (TryClean(name, expressions[i], out newName)) bool cleaned = false;
var len = expressions.Count;
for (int i = 0; i < len; i++)
{ {
return true; if (TryClean(name, expressions[i], out newName))
{
cleaned = true;
name = newName.ToString();
break;
}
}
if (!cleaned)
{
break;
} }
} }
newName = ReadOnlySpan<char>.Empty; if (loopCounter > 0)
return false; {
newName = name.AsSpan();
}
return newName != ReadOnlySpan<char>.Empty;
} }
private static bool TryClean(string name, Regex expression, out ReadOnlySpan<char> newName) private static bool TryClean(string name, Regex expression, out ReadOnlySpan<char> newName)
{ {
var match = expression.Match(name); var match = expression.Match(name);
int index = match.Index; int index = match.Index;
if (match.Success && index != 0) if (match.Success)
{ {
newName = name.AsSpan().Slice(0, match.Index); var found = match.Groups.TryGetValue("cleaned", out var cleaned);
if (!found || cleaned == null)
{
newName = ReadOnlySpan<char>.Empty;
return false;
}
newName = name.AsSpan().Slice(cleaned.Index, cleaned.Length);
return true; return true;
} }

View File

@ -1,4 +1,4 @@
using System; using System;
using Emby.Naming.Common; using Emby.Naming.Common;
using Emby.Naming.Video; using Emby.Naming.Video;
using Xunit; using Xunit;
@ -23,6 +23,12 @@ namespace Jellyfin.Naming.Tests.Video
[InlineData("Crouching.Tiger.Hidden.Dragon.BDrip.mkv", "Crouching.Tiger.Hidden.Dragon")] [InlineData("Crouching.Tiger.Hidden.Dragon.BDrip.mkv", "Crouching.Tiger.Hidden.Dragon")]
[InlineData("Crouching.Tiger.Hidden.Dragon.BDrip-HDC.mkv", "Crouching.Tiger.Hidden.Dragon")] [InlineData("Crouching.Tiger.Hidden.Dragon.BDrip-HDC.mkv", "Crouching.Tiger.Hidden.Dragon")]
[InlineData("Crouching.Tiger.Hidden.Dragon.4K.UltraHD.HDR.BDrip-HDC.mkv", "Crouching.Tiger.Hidden.Dragon")] [InlineData("Crouching.Tiger.Hidden.Dragon.4K.UltraHD.HDR.BDrip-HDC.mkv", "Crouching.Tiger.Hidden.Dragon")]
[InlineData("[HorribleSubs] Made in Abyss - 13 [720p].mkv", "Made in Abyss")]
[InlineData("[Tsundere] Kore wa Zombie Desu ka of the Dead [BDRip h264 1920x1080 FLAC]", "Kore wa Zombie Desu ka of the Dead")]
[InlineData("[Erai-raws] Jujutsu Kaisen - 03 [720p][Multiple Subtitle].mkv", "Jujutsu Kaisen")]
[InlineData("[OCN] 720p-NEXT", " ")]
[InlineData("[tvN] .E01-E16.720p-NEXT", "")]
[InlineData("[tvN] E01~E16 END HDTV.H264.720p-WITH", " ")]
// FIXME: [InlineData("After The Sunset - [0004].mkv", "After The Sunset")] // FIXME: [InlineData("After The Sunset - [0004].mkv", "After The Sunset")]
public void CleanStringTest_NeedsCleaning_Success(string input, string expectedName) public void CleanStringTest_NeedsCleaning_Success(string input, string expectedName)
{ {