2020-07-22 11:34:51 +00:00
|
|
|
#pragma warning disable CS1591
|
|
|
|
|
2019-01-13 20:02:23 +00:00
|
|
|
using System;
|
2018-12-14 09:40:55 +00:00
|
|
|
using System.Collections.Generic;
|
|
|
|
using System.Globalization;
|
|
|
|
using System.IO;
|
|
|
|
using System.Text.RegularExpressions;
|
|
|
|
using System.Threading;
|
|
|
|
using MediaBrowser.Model.MediaInfo;
|
2019-01-13 19:26:04 +00:00
|
|
|
using Microsoft.Extensions.Logging;
|
2018-12-14 09:40:55 +00:00
|
|
|
|
|
|
|
namespace MediaBrowser.MediaEncoding.Subtitles
|
|
|
|
{
|
|
|
|
public class SrtParser : ISubtitleParser
|
|
|
|
{
|
|
|
|
private readonly ILogger _logger;
|
|
|
|
|
|
|
|
private readonly CultureInfo _usCulture = new CultureInfo("en-US");
|
|
|
|
|
|
|
|
public SrtParser(ILogger logger)
|
|
|
|
{
|
|
|
|
_logger = logger;
|
|
|
|
}
|
|
|
|
|
2020-07-22 11:34:51 +00:00
|
|
|
/// <inheritdoc />
|
2018-12-14 09:40:55 +00:00
|
|
|
public SubtitleTrackInfo Parse(Stream stream, CancellationToken cancellationToken)
|
|
|
|
{
|
|
|
|
var trackInfo = new SubtitleTrackInfo();
|
2019-01-13 20:37:13 +00:00
|
|
|
var trackEvents = new List<SubtitleTrackEvent>();
|
2019-01-13 19:26:04 +00:00
|
|
|
using (var reader = new StreamReader(stream))
|
2018-12-14 09:40:55 +00:00
|
|
|
{
|
|
|
|
string line;
|
|
|
|
while ((line = reader.ReadLine()) != null)
|
|
|
|
{
|
|
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
|
|
|
|
|
|
if (string.IsNullOrWhiteSpace(line))
|
|
|
|
{
|
|
|
|
continue;
|
|
|
|
}
|
2020-06-15 21:43:52 +00:00
|
|
|
|
2019-01-13 19:26:04 +00:00
|
|
|
var subEvent = new SubtitleTrackEvent { Id = line };
|
2018-12-14 09:40:55 +00:00
|
|
|
line = reader.ReadLine();
|
|
|
|
|
|
|
|
if (string.IsNullOrWhiteSpace(line))
|
|
|
|
{
|
|
|
|
continue;
|
|
|
|
}
|
2019-01-07 23:27:46 +00:00
|
|
|
|
2018-12-14 09:40:55 +00:00
|
|
|
var time = Regex.Split(line, @"[\t ]*-->[\t ]*");
|
|
|
|
|
|
|
|
if (time.Length < 2)
|
|
|
|
{
|
|
|
|
// This occurs when subtitle text has an empty line as part of the text.
|
|
|
|
// Need to adjust the break statement below to resolve this.
|
2018-12-14 23:48:06 +00:00
|
|
|
_logger.LogWarning("Unrecognized line in srt: {0}", line);
|
2018-12-14 09:40:55 +00:00
|
|
|
continue;
|
|
|
|
}
|
2020-06-15 21:43:52 +00:00
|
|
|
|
2018-12-14 09:40:55 +00:00
|
|
|
subEvent.StartPositionTicks = GetTicks(time[0]);
|
2020-07-22 11:34:51 +00:00
|
|
|
var endTime = time[1].AsSpan();
|
|
|
|
var idx = endTime.IndexOf(' ');
|
2018-12-14 09:40:55 +00:00
|
|
|
if (idx > 0)
|
2020-06-20 09:12:36 +00:00
|
|
|
{
|
2020-07-22 11:34:51 +00:00
|
|
|
endTime = endTime.Slice(0, idx);
|
2020-06-20 09:12:36 +00:00
|
|
|
}
|
|
|
|
|
2018-12-14 09:40:55 +00:00
|
|
|
subEvent.EndPositionTicks = GetTicks(endTime);
|
|
|
|
var multiline = new List<string>();
|
|
|
|
while ((line = reader.ReadLine()) != null)
|
|
|
|
{
|
2020-12-28 14:43:55 +00:00
|
|
|
if (line.Length == 0)
|
2018-12-14 09:40:55 +00:00
|
|
|
{
|
|
|
|
break;
|
|
|
|
}
|
2020-06-15 21:43:52 +00:00
|
|
|
|
2018-12-14 09:40:55 +00:00
|
|
|
multiline.Add(line);
|
|
|
|
}
|
2020-06-15 21:43:52 +00:00
|
|
|
|
2018-12-14 09:40:55 +00:00
|
|
|
subEvent.Text = string.Join(ParserValues.NewLine, multiline);
|
|
|
|
subEvent.Text = subEvent.Text.Replace(@"\N", ParserValues.NewLine, StringComparison.OrdinalIgnoreCase);
|
|
|
|
subEvent.Text = Regex.Replace(subEvent.Text, @"\{(?:\\\d?[\w.-]+(?:\([^\)]*\)|&H?[0-9A-Fa-f]+&|))+\}", string.Empty, RegexOptions.IgnoreCase);
|
|
|
|
subEvent.Text = Regex.Replace(subEvent.Text, "<", "<", RegexOptions.IgnoreCase);
|
|
|
|
subEvent.Text = Regex.Replace(subEvent.Text, ">", ">", RegexOptions.IgnoreCase);
|
|
|
|
subEvent.Text = Regex.Replace(subEvent.Text, "<(\\/?(font|b|u|i|s))((\\s+(\\w|\\w[\\w\\-]*\\w)(\\s*=\\s*(?:\\\".*?\\\"|'.*?'|[^'\\\">\\s]+))?)+\\s*|\\s*)(\\/?)>", "<$1$3$7>", RegexOptions.IgnoreCase);
|
|
|
|
trackEvents.Add(subEvent);
|
|
|
|
}
|
|
|
|
}
|
2020-06-15 21:43:52 +00:00
|
|
|
|
2020-12-28 14:43:55 +00:00
|
|
|
trackInfo.TrackEvents = trackEvents;
|
2018-12-14 09:40:55 +00:00
|
|
|
return trackInfo;
|
|
|
|
}
|
|
|
|
|
2020-07-22 11:34:51 +00:00
|
|
|
private long GetTicks(ReadOnlySpan<char> time)
|
2019-01-13 19:26:04 +00:00
|
|
|
{
|
2019-01-13 20:46:33 +00:00
|
|
|
return TimeSpan.TryParseExact(time, @"hh\:mm\:ss\.fff", _usCulture, out var span)
|
2018-12-14 09:40:55 +00:00
|
|
|
? span.Ticks
|
2019-01-07 23:27:46 +00:00
|
|
|
: (TimeSpan.TryParseExact(time, @"hh\:mm\:ss\,fff", _usCulture, out span)
|
2018-12-14 09:40:55 +00:00
|
|
|
? span.Ticks : 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|