From f2237b858ad4defe47d7671cffdf3febeff3ad00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20Miguel=20Alm=C3=A1nzar?= Date: Sat, 10 May 2014 22:29:34 -0400 Subject: [PATCH] implemented SRT Parser --- .../Subtitles/SrtParser.cs | 56 ++++++++- MediaBrowser.Tests/MediaBrowser.Tests.csproj | 10 ++ .../MediaEncoding/Subtitles/SrtParserTests.cs | 108 ++++++++++++++++++ .../Subtitles/TestSubtitles/unit.srt | 44 +++++++ 4 files changed, 212 insertions(+), 6 deletions(-) create mode 100644 MediaBrowser.Tests/MediaEncoding/Subtitles/SrtParserTests.cs create mode 100644 MediaBrowser.Tests/MediaEncoding/Subtitles/TestSubtitles/unit.srt diff --git a/MediaBrowser.MediaEncoding/Subtitles/SrtParser.cs b/MediaBrowser.MediaEncoding/Subtitles/SrtParser.cs index 410c0bbdd..89676ba7e 100644 --- a/MediaBrowser.MediaEncoding/Subtitles/SrtParser.cs +++ b/MediaBrowser.MediaEncoding/Subtitles/SrtParser.cs @@ -1,17 +1,61 @@ using System; using System.Collections.Generic; +using System.Globalization; using System.IO; -using System.Linq; -using System.Text; -using System.Threading.Tasks; +using System.Text.RegularExpressions; namespace MediaBrowser.MediaEncoding.Subtitles { public class SrtParser : ISubtitleParser { - public SubtitleTrackInfo Parse(Stream stream) - { - throw new NotImplementedException(); + private readonly CultureInfo _usCulture = new CultureInfo("en-US"); + public SubtitleTrackInfo Parse(Stream stream) { + var trackInfo = new SubtitleTrackInfo(); + using ( var reader = new StreamReader(stream)) + { + string line; + while ((line = reader.ReadLine()) != null) + { + if (string.IsNullOrWhiteSpace(line)) + { + continue; + } + var subEvent = new SubtitleTrackEvent {Id = line}; + line = reader.ReadLine(); + var time = Regex.Split(line, @"[\t ]*-->[\t ]*"); + subEvent.StartPositionTicks = GetTicks(time[0]); + var endTime = time[1]; + var idx = endTime.IndexOf(" ", StringComparison.Ordinal); + if (idx > 0) + endTime = endTime.Substring(0, idx); + subEvent.EndPositionTicks = GetTicks(endTime); + var multiline = new List(); + while ((line = reader.ReadLine()) != null) + { + if (string.IsNullOrEmpty(line)) + { + break; + } + multiline.Add(line); + } + subEvent.Text = string.Join(@"\N", multiline); + subEvent.Text = Regex.Replace(subEvent.Text, "\\{(\\\\[\\w]+\\(?([\\w\\d]+,?)+\\)?)+\\}", string.Empty, RegexOptions.IgnoreCase); + subEvent.Text = Regex.Replace(subEvent.Text, "<", "<", RegexOptions.IgnoreCase); + subEvent.Text = Regex.Replace(subEvent.Text, ">", ">", RegexOptions.IgnoreCase); + subEvent.Text = Regex.Replace(subEvent.Text, "<(\\/?(font|b|u|i|s))((\\s+(\\w|\\w[\\w\\-]*\\w)(\\s*=\\s*(?:\\\".*?\\\"|'.*?'|[^'\\\">\\s]+))?)+\\s*|\\s*)(\\/?)>", "<$1$3$7>", RegexOptions.IgnoreCase); + subEvent.Text = Regex.Replace(subEvent.Text, @"\\N", "
",RegexOptions.IgnoreCase); + trackInfo.TrackEvents.Add(subEvent); + } + } + return trackInfo; + } + + long GetTicks(string time) { + TimeSpan span; + return TimeSpan.TryParseExact(time, @"hh\:mm\:ss\.fff", _usCulture, out span) + ? span.Ticks + : (TimeSpan.TryParseExact(time, @"hh\:mm\:ss\,fff", _usCulture, out span) + ? span.Ticks : 0); } } } diff --git a/MediaBrowser.Tests/MediaBrowser.Tests.csproj b/MediaBrowser.Tests/MediaBrowser.Tests.csproj index 6ae7544b8..46f748130 100644 --- a/MediaBrowser.Tests/MediaBrowser.Tests.csproj +++ b/MediaBrowser.Tests/MediaBrowser.Tests.csproj @@ -50,6 +50,7 @@ + @@ -61,6 +62,10 @@ {17e1f4e6-8abd-4fe5-9ecf-43d4b6087ba2} MediaBrowser.Controller + + {0BD82FA6-EB8A-4452-8AF5-74F9C3849451} + MediaBrowser.MediaEncoding + {7eeeb4bb-f3e8-48fc-b4c5-70f0fff8329b} MediaBrowser.Model @@ -77,6 +82,11 @@ + + + Always + + diff --git a/MediaBrowser.Tests/MediaEncoding/Subtitles/SrtParserTests.cs b/MediaBrowser.Tests/MediaEncoding/Subtitles/SrtParserTests.cs new file mode 100644 index 000000000..0d86fbdcd --- /dev/null +++ b/MediaBrowser.Tests/MediaEncoding/Subtitles/SrtParserTests.cs @@ -0,0 +1,108 @@ +using System; +using System.Collections.Generic; +using System.IO; +using MediaBrowser.MediaEncoding.Subtitles; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace MediaBrowser.Tests.MediaEncoding.Subtitles { + + [TestClass] + public class SrtParserTests { + + [TestMethod] + public void TestParse() { + + var expectedSubs = + new SubtitleTrackInfo { + TrackEvents = new List { + new SubtitleTrackEvent { + Id = "1", + StartPositionTicks = 24000000, + EndPositionTicks = 52000000, + Text = + "[Background Music Playing]" + }, + new SubtitleTrackEvent { + Id = "2", + StartPositionTicks = 157120000, + EndPositionTicks = 173990000, + Text = + "Oh my god, Watch out!
It's coming!!" + }, + new SubtitleTrackEvent { + Id = "3", + StartPositionTicks = 257120000, + EndPositionTicks = 303990000, + Text = "[Bird noises]" + }, + new SubtitleTrackEvent { + Id = "4", + StartPositionTicks = 310000000, + EndPositionTicks = 319990000, + Text = + "This text is RED and has not been positioned." + }, + new SubtitleTrackEvent { + Id = "5", + StartPositionTicks = 320000000, + EndPositionTicks = 329990000, + Text = + "This is a
new line, as is
this" + }, + new SubtitleTrackEvent { + Id = "6", + StartPositionTicks = 330000000, + EndPositionTicks = 339990000, + Text = + "This contains nested bold, italic, underline and strike-through HTML tags" + }, + new SubtitleTrackEvent { + Id = "7", + StartPositionTicks = 340000000, + EndPositionTicks = 349990000, + Text = + "Unclosed but supported HTML tags are left in, SSA italics aren't" + }, + new SubtitleTrackEvent { + Id = "8", + StartPositionTicks = 350000000, + EndPositionTicks = 359990000, + Text = + "<ggg>Unsupported</ggg> HTML tags are escaped and left in, even if <hhh>not closed." + }, + new SubtitleTrackEvent { + Id = "9", + StartPositionTicks = 360000000, + EndPositionTicks = 369990000, + Text = + "Multiple SSA tags are stripped" + }, + new SubtitleTrackEvent { + Id = "10", + StartPositionTicks = 370000000, + EndPositionTicks = 379990000, + Text = + "Greater than (<) and less than (>) are shown" + } + } + }; + + var sut = new SrtParser(); + + var stream = File.OpenRead(@"MediaEncoding\Subtitles\TestSubtitles\unit.srt"); + + var result = sut.Parse(stream); + + Assert.IsNotNull(result); + Assert.AreEqual(expectedSubs.TrackEvents.Count,result.TrackEvents.Count); + for (int i = 0; i < expectedSubs.TrackEvents.Count; i++) + { + Assert.AreEqual(expectedSubs.TrackEvents[i].Id, result.TrackEvents[i].Id); + Assert.AreEqual(expectedSubs.TrackEvents[i].StartPositionTicks, result.TrackEvents[i].StartPositionTicks); + Assert.AreEqual(expectedSubs.TrackEvents[i].EndPositionTicks, result.TrackEvents[i].EndPositionTicks); + Assert.AreEqual(expectedSubs.TrackEvents[i].Text, result.TrackEvents[i].Text); + } + + } + } +} \ No newline at end of file diff --git a/MediaBrowser.Tests/MediaEncoding/Subtitles/TestSubtitles/unit.srt b/MediaBrowser.Tests/MediaEncoding/Subtitles/TestSubtitles/unit.srt new file mode 100644 index 000000000..5f6e5636e --- /dev/null +++ b/MediaBrowser.Tests/MediaEncoding/Subtitles/TestSubtitles/unit.srt @@ -0,0 +1,44 @@ + + +1 +00:00:02.400 --> 00:00:05.200 +[Background Music Playing] + +2 +00:00:15,712 --> 00:00:17,399 X1:000 X2:000 Y1:050 Y2:100 +Oh my god, Watch out! +It's coming!! + +3 +00:00:25,712 --> 00:00:30,399 +[Bird noises] + +4 +00:00:31,000 --> 00:00:31,999 +This text is RED and has not been {\pos(142,120)}positioned. + +5 +00:00:32,000 --> 00:00:32,999 +This is a\nnew line, as is\Nthis + +6 +00:00:33,000 --> 00:00:33,999 +This contains nested bold, italic, underline and strike-through HTML tags + +7 +00:00:34,000 --> 00:00:34,999 +Unclosed but supported HTML tags are left in, {\i1} SSA italics aren't + +8 +00:00:35,000 --> 00:00:35,999 +Unsupported HTML tags are escaped and left in, even if not closed. + +9 +00:00:36,000 --> 00:00:36,999 +Multiple {\pos(142,120)\b1}SSA tags are stripped + +10 +00:00:37,000 --> 00:00:37,999 +Greater than (<) and less than (>) are shown + +