Made CleanStringParser more robust

Now it can handle [...] at beginning of string
This commit is contained in:
sushilicious 2021-08-03 13:46:56 -07:00
parent 0c9f824d0a
commit 26f8b501e7
3 changed files with 48 additions and 11 deletions

View File

@ -137,8 +137,11 @@ namespace Emby.Naming.Common
CleanStrings = new[]
{
@"[ _\,\.\(\)\[\]\-](3d|sbs|tab|hsbs|htab|mvc|HDR|HDC|UHD|UltraHD|4k|ac3|dts|custom|dc|divx|divx5|dsr|dsrip|dutch|dvd|dvdrip|dvdscr|dvdscreener|screener|dvdivx|cam|fragment|fs|hdtv|hdrip|hdtvrip|internal|limited|multisubs|ntsc|ogg|ogm|pal|pdtv|proper|repack|rerip|retail|cd[1-9]|r3|r5|bd5|bd|se|svcd|swedish|german|read.nfo|nfofix|unrated|ws|telesync|ts|telecine|tc|brrip|bdrip|480p|480i|576p|576i|720p|720i|1080p|1080i|2160p|hrhd|hrhdtv|hddvd|bluray|blu-ray|x264|x265|h264|h265|xvid|xvidvd|xxx|www.www|AAC|DTS|\[.*\])([ _\,\.\(\)\[\]\-]|$)",
@"(\[.*\])"
@"^\s*(?<cleaned>.+?)[ _\,\.\(\)\[\]\-](3d|sbs|tab|hsbs|htab|mvc|HDR|HDC|UHD|UltraHD|4k|ac3|dts|custom|dc|divx|divx5|dsr|dsrip|dutch|dvd|dvdrip|dvdscr|dvdscreener|screener|dvdivx|cam|fragment|fs|hdtv|hdrip|hdtvrip|internal|limited|multisubs|ntsc|ogg|ogm|pal|pdtv|proper|repack|rerip|retail|cd[1-9]|r3|r5|bd5|bd|se|svcd|swedish|german|read.nfo|nfofix|unrated|ws|telesync|ts|telecine|tc|brrip|bdrip|480p|480i|576p|576i|720p|720i|1080p|1080i|2160p|hrhd|hrhdtv|hddvd|bluray|blu-ray|x264|x265|h264|h265|xvid|xvidvd|xxx|www.www|AAC|DTS|\[.*\])([ _\,\.\(\)\[\]\-]|$)",
@"^(?<cleaned>.+?)(\[.*\])",
@"^\s*(?<cleaned>.+?)\WE\d+(-|~)E?\d+(\W|$)",
@"^\s*\[[^\]]+\](?!\.\w+$)\s*(?<cleaned>.+)",
@"^\s*(?<cleaned>.+?)\s+-\s+\d+\s*$"
};
SubtitleFileExtensions = new[]

View File

@ -25,26 +25,54 @@ namespace Emby.Naming.Video
return false;
}
var len = expressions.Count;
for (int i = 0; i < len; i++)
// Iteratively remove extra cruft until we're left with the string
// we want.
newName = ReadOnlySpan<char>.Empty;
const int maxTries = 100; // This is just a precautionary
// measure. Should not be neccesary.
var loopCounter = 0;
for (; loopCounter < maxTries; loopCounter++)
{
if (TryClean(name, expressions[i], out newName))
bool cleaned = false;
var len = expressions.Count;
for (int i = 0; i < len; i++)
{
return true;
if (TryClean(name, expressions[i], out newName))
{
cleaned = true;
name = newName.ToString();
break;
}
}
if (!cleaned)
{
break;
}
}
newName = ReadOnlySpan<char>.Empty;
return false;
if (loopCounter > 0)
{
newName = name.AsSpan();
}
return newName != ReadOnlySpan<char>.Empty;
}
private static bool TryClean(string name, Regex expression, out ReadOnlySpan<char> newName)
{
var match = expression.Match(name);
int index = match.Index;
if (match.Success && index != 0)
if (match.Success)
{
newName = name.AsSpan().Slice(0, match.Index);
var found = match.Groups.TryGetValue("cleaned", out var cleaned);
if (!found || cleaned == null)
{
newName = ReadOnlySpan<char>.Empty;
return false;
}
newName = name.AsSpan().Slice(cleaned.Index, cleaned.Length);
return true;
}

View File

@ -1,4 +1,4 @@
using System;
using System;
using Emby.Naming.Common;
using Emby.Naming.Video;
using Xunit;
@ -23,6 +23,12 @@ namespace Jellyfin.Naming.Tests.Video
[InlineData("Crouching.Tiger.Hidden.Dragon.BDrip.mkv", "Crouching.Tiger.Hidden.Dragon")]
[InlineData("Crouching.Tiger.Hidden.Dragon.BDrip-HDC.mkv", "Crouching.Tiger.Hidden.Dragon")]
[InlineData("Crouching.Tiger.Hidden.Dragon.4K.UltraHD.HDR.BDrip-HDC.mkv", "Crouching.Tiger.Hidden.Dragon")]
[InlineData("[HorribleSubs] Made in Abyss - 13 [720p].mkv", "Made in Abyss")]
[InlineData("[Tsundere] Kore wa Zombie Desu ka of the Dead [BDRip h264 1920x1080 FLAC]", "Kore wa Zombie Desu ka of the Dead")]
[InlineData("[Erai-raws] Jujutsu Kaisen - 03 [720p][Multiple Subtitle].mkv", "Jujutsu Kaisen")]
[InlineData("[OCN] 720p-NEXT", " ")]
[InlineData("[tvN] .E01-E16.720p-NEXT", "")]
[InlineData("[tvN] E01~E16 END HDTV.H264.720p-WITH", " ")]
// FIXME: [InlineData("After The Sunset - [0004].mkv", "After The Sunset")]
public void CleanStringTest_NeedsCleaning_Success(string input, string expectedName)
{