jellyfin/MediaBrowser.MediaEncoding/Subtitles/SsaParser.cs

473 lines
19 KiB
C#
Raw Normal View History

using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Threading;
using MediaBrowser.Model.MediaInfo;
namespace MediaBrowser.MediaEncoding.Subtitles
{
/// <summary>
/// Credit to https://github.com/SubtitleEdit/subtitleedit/blob/a299dc4407a31796364cc6ad83f0d3786194ba22/src/Logic/SubtitleFormats/SubStationAlpha.cs
/// </summary>
public class SsaParser : ISubtitleParser
{
public SubtitleTrackInfo Parse(Stream stream, CancellationToken cancellationToken)
{
var trackInfo = new SubtitleTrackInfo();
2019-01-13 20:37:13 +00:00
var trackEvents = new List<SubtitleTrackEvent>();
using (var reader = new StreamReader(stream))
{
bool eventsStarted = false;
string[] format = "Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text".Split(',');
int indexLayer = 0;
int indexStart = 1;
int indexEnd = 2;
int indexStyle = 3;
int indexName = 4;
int indexEffect = 8;
int indexText = 9;
int lineNumber = 0;
var header = new StringBuilder();
string line;
while ((line = reader.ReadLine()) != null)
{
cancellationToken.ThrowIfCancellationRequested();
lineNumber++;
if (!eventsStarted)
2020-06-20 09:12:36 +00:00
{
header.AppendLine(line);
2020-06-20 09:12:36 +00:00
}
2019-01-27 11:03:43 +00:00
if (line.Trim().ToLowerInvariant() == "[events]")
{
eventsStarted = true;
}
else if (!string.IsNullOrEmpty(line) && line.Trim().StartsWith(";"))
{
// skip comment lines
}
else if (eventsStarted && line.Trim().Length > 0)
{
2019-01-27 11:03:43 +00:00
string s = line.Trim().ToLowerInvariant();
if (s.StartsWith("format:"))
{
if (line.Length > 10)
{
2019-01-27 11:03:43 +00:00
format = line.ToLowerInvariant().Substring(8).Split(',');
for (int i = 0; i < format.Length; i++)
{
2019-01-27 11:03:43 +00:00
if (format[i].Trim().ToLowerInvariant() == "layer")
2020-06-20 09:12:36 +00:00
{
indexLayer = i;
2020-06-20 09:12:36 +00:00
}
2019-01-27 11:03:43 +00:00
else if (format[i].Trim().ToLowerInvariant() == "start")
2020-06-20 09:12:36 +00:00
{
indexStart = i;
2020-06-20 09:12:36 +00:00
}
2019-01-27 11:03:43 +00:00
else if (format[i].Trim().ToLowerInvariant() == "end")
2020-06-20 09:12:36 +00:00
{
indexEnd = i;
2020-06-20 09:12:36 +00:00
}
2019-01-27 11:03:43 +00:00
else if (format[i].Trim().ToLowerInvariant() == "text")
2020-06-20 09:12:36 +00:00
{
indexText = i;
2020-06-20 09:12:36 +00:00
}
2019-01-27 11:03:43 +00:00
else if (format[i].Trim().ToLowerInvariant() == "effect")
2020-06-20 09:12:36 +00:00
{
indexEffect = i;
2020-06-20 09:12:36 +00:00
}
2019-01-27 11:03:43 +00:00
else if (format[i].Trim().ToLowerInvariant() == "style")
2020-06-20 09:12:36 +00:00
{
indexStyle = i;
2020-06-20 09:12:36 +00:00
}
}
}
}
else if (!string.IsNullOrEmpty(s))
{
string text = string.Empty;
string start = string.Empty;
string end = string.Empty;
string style = string.Empty;
string layer = string.Empty;
string effect = string.Empty;
string name = string.Empty;
string[] splittedLine;
if (s.StartsWith("dialogue:"))
2020-06-20 09:12:36 +00:00
{
splittedLine = line.Substring(10).Split(',');
2020-06-20 09:12:36 +00:00
}
else
2020-06-20 09:12:36 +00:00
{
splittedLine = line.Split(',');
2020-06-20 09:12:36 +00:00
}
for (int i = 0; i < splittedLine.Length; i++)
{
if (i == indexStart)
2020-06-20 09:12:36 +00:00
{
start = splittedLine[i].Trim();
2020-06-20 09:12:36 +00:00
}
else if (i == indexEnd)
2020-06-20 09:12:36 +00:00
{
end = splittedLine[i].Trim();
2020-06-20 09:12:36 +00:00
}
else if (i == indexLayer)
2020-06-20 09:12:36 +00:00
{
layer = splittedLine[i];
2020-06-20 09:12:36 +00:00
}
else if (i == indexEffect)
2020-06-20 09:12:36 +00:00
{
effect = splittedLine[i];
2020-06-20 09:12:36 +00:00
}
else if (i == indexText)
2020-06-20 09:12:36 +00:00
{
text = splittedLine[i];
2020-06-20 09:12:36 +00:00
}
else if (i == indexStyle)
2020-06-20 09:12:36 +00:00
{
style = splittedLine[i];
2020-06-20 09:12:36 +00:00
}
else if (i == indexName)
2020-06-20 09:12:36 +00:00
{
name = splittedLine[i];
2020-06-20 09:12:36 +00:00
}
else if (i > indexText)
2020-06-20 09:12:36 +00:00
{
text += "," + splittedLine[i];
2020-06-20 09:12:36 +00:00
}
}
try
{
var p = new SubtitleTrackEvent();
p.StartPositionTicks = GetTimeCodeFromString(start);
p.EndPositionTicks = GetTimeCodeFromString(end);
p.Text = GetFormattedText(text);
trackEvents.Add(p);
}
catch
{
}
}
}
2019-01-07 23:27:46 +00:00
}
2020-06-14 09:11:11 +00:00
// if (header.Length > 0)
// subtitle.Header = header.ToString();
2020-06-14 09:11:11 +00:00
// subtitle.Renumber(1);
}
2020-06-15 21:43:52 +00:00
trackInfo.TrackEvents = trackEvents.ToArray();
return trackInfo;
}
private static long GetTimeCodeFromString(string time)
{
// h:mm:ss.cc
string[] timeCode = time.Split(':', '.');
return new TimeSpan(0, int.Parse(timeCode[0]),
int.Parse(timeCode[1]),
int.Parse(timeCode[2]),
int.Parse(timeCode[3]) * 10).Ticks;
}
public static string GetFormattedText(string text)
{
text = text.Replace("\\n", ParserValues.NewLine, StringComparison.OrdinalIgnoreCase);
bool italic = false;
for (int i = 0; i < 10; i++) // just look ten times...
{
if (text.Contains(@"{\fn"))
{
int start = text.IndexOf(@"{\fn");
int end = text.IndexOf('}', start);
if (end > 0 && !text.Substring(start).StartsWith("{\\fn}"))
{
string fontName = text.Substring(start + 4, end - (start + 4));
string extraTags = string.Empty;
CheckAndAddSubTags(ref fontName, ref extraTags, out italic);
text = text.Remove(start, end - start + 1);
if (italic)
2020-06-20 09:12:36 +00:00
{
text = text.Insert(start, "<font face=\"" + fontName + "\"" + extraTags + "><i>");
2020-06-20 09:12:36 +00:00
}
else
2020-06-20 09:19:16 +00:00
{
text = text.Insert(start, "<font face=\"" + fontName + "\"" + extraTags + ">");
2020-06-20 09:19:16 +00:00
}
int indexOfEndTag = text.IndexOf("{\\fn}", start);
if (indexOfEndTag > 0)
2020-06-20 09:12:36 +00:00
{
text = text.Remove(indexOfEndTag, "{\\fn}".Length).Insert(indexOfEndTag, "</font>");
2020-06-20 09:12:36 +00:00
}
else
2020-06-20 09:19:16 +00:00
{
text += "</font>";
2020-06-20 09:19:16 +00:00
}
}
}
if (text.Contains(@"{\fs"))
{
int start = text.IndexOf(@"{\fs");
int end = text.IndexOf('}', start);
if (end > 0 && !text.Substring(start).StartsWith("{\\fs}"))
{
string fontSize = text.Substring(start + 4, end - (start + 4));
string extraTags = string.Empty;
CheckAndAddSubTags(ref fontSize, ref extraTags, out italic);
if (IsInteger(fontSize))
{
text = text.Remove(start, end - start + 1);
if (italic)
2020-06-20 09:12:36 +00:00
{
text = text.Insert(start, "<font size=\"" + fontSize + "\"" + extraTags + "><i>");
2020-06-20 09:12:36 +00:00
}
else
2020-06-20 09:19:16 +00:00
{
text = text.Insert(start, "<font size=\"" + fontSize + "\"" + extraTags + ">");
2020-06-20 09:19:16 +00:00
}
int indexOfEndTag = text.IndexOf("{\\fs}", start);
if (indexOfEndTag > 0)
2020-06-20 09:12:36 +00:00
{
text = text.Remove(indexOfEndTag, "{\\fs}".Length).Insert(indexOfEndTag, "</font>");
2020-06-20 09:12:36 +00:00
}
else
2020-06-20 09:19:16 +00:00
{
text += "</font>";
2020-06-20 09:19:16 +00:00
}
}
}
}
if (text.Contains(@"{\c"))
{
int start = text.IndexOf(@"{\c");
int end = text.IndexOf('}', start);
if (end > 0 && !text.Substring(start).StartsWith("{\\c}"))
{
string color = text.Substring(start + 4, end - (start + 4));
string extraTags = string.Empty;
CheckAndAddSubTags(ref color, ref extraTags, out italic);
color = color.Replace("&", string.Empty).TrimStart('H');
color = color.PadLeft(6, '0');
// switch to rrggbb from bbggrr
color = "#" + color.Remove(color.Length - 6) + color.Substring(color.Length - 2, 2) + color.Substring(color.Length - 4, 2) + color.Substring(color.Length - 6, 2);
2019-01-27 11:03:43 +00:00
color = color.ToLowerInvariant();
text = text.Remove(start, end - start + 1);
if (italic)
2020-06-20 09:12:36 +00:00
{
text = text.Insert(start, "<font color=\"" + color + "\"" + extraTags + "><i>");
2020-06-20 09:12:36 +00:00
}
else
2020-06-20 09:19:16 +00:00
{
text = text.Insert(start, "<font color=\"" + color + "\"" + extraTags + ">");
2020-06-20 09:19:16 +00:00
}
int indexOfEndTag = text.IndexOf("{\\c}", start);
if (indexOfEndTag > 0)
2020-06-20 09:12:36 +00:00
{
text = text.Remove(indexOfEndTag, "{\\c}".Length).Insert(indexOfEndTag, "</font>");
2020-06-20 09:12:36 +00:00
}
else
2020-06-20 09:19:16 +00:00
{
text += "</font>";
2020-06-20 09:19:16 +00:00
}
}
}
if (text.Contains(@"{\1c")) // "1" specifices primary color
{
int start = text.IndexOf(@"{\1c");
int end = text.IndexOf('}', start);
if (end > 0 && !text.Substring(start).StartsWith("{\\1c}"))
{
string color = text.Substring(start + 5, end - (start + 5));
string extraTags = string.Empty;
CheckAndAddSubTags(ref color, ref extraTags, out italic);
color = color.Replace("&", string.Empty).TrimStart('H');
color = color.PadLeft(6, '0');
// switch to rrggbb from bbggrr
color = "#" + color.Remove(color.Length - 6) + color.Substring(color.Length - 2, 2) + color.Substring(color.Length - 4, 2) + color.Substring(color.Length - 6, 2);
2019-01-27 11:03:43 +00:00
color = color.ToLowerInvariant();
text = text.Remove(start, end - start + 1);
if (italic)
2020-06-20 09:12:36 +00:00
{
text = text.Insert(start, "<font color=\"" + color + "\"" + extraTags + "><i>");
2020-06-20 09:12:36 +00:00
}
else
2020-06-20 09:19:16 +00:00
{
text = text.Insert(start, "<font color=\"" + color + "\"" + extraTags + ">");
2020-06-20 09:19:16 +00:00
}
text += "</font>";
}
}
}
text = text.Replace(@"{\i1}", "<i>");
text = text.Replace(@"{\i0}", "</i>");
text = text.Replace(@"{\i}", "</i>");
if (CountTagInText(text, "<i>") > CountTagInText(text, "</i>"))
2020-06-20 09:12:36 +00:00
{
text += "</i>";
2020-06-20 09:12:36 +00:00
}
text = text.Replace(@"{\u1}", "<u>");
text = text.Replace(@"{\u0}", "</u>");
text = text.Replace(@"{\u}", "</u>");
if (CountTagInText(text, "<u>") > CountTagInText(text, "</u>"))
2020-06-20 09:12:36 +00:00
{
text += "</u>";
2020-06-20 09:12:36 +00:00
}
text = text.Replace(@"{\b1}", "<b>");
text = text.Replace(@"{\b0}", "</b>");
text = text.Replace(@"{\b}", "</b>");
if (CountTagInText(text, "<b>") > CountTagInText(text, "</b>"))
2020-06-20 09:12:36 +00:00
{
text += "</b>";
2020-06-20 09:12:36 +00:00
}
return text;
}
private static bool IsInteger(string s)
{
if (int.TryParse(s, out var i))
2020-06-20 09:12:36 +00:00
{
return true;
2020-06-20 09:12:36 +00:00
}
2020-06-20 09:13:48 +00:00
return false;
}
private static int CountTagInText(string text, string tag)
{
int count = 0;
int index = text.IndexOf(tag);
while (index >= 0)
{
count++;
if (index == text.Length)
2020-06-20 09:12:36 +00:00
{
return count;
2020-06-20 09:12:36 +00:00
}
2020-06-20 09:13:48 +00:00
index = text.IndexOf(tag, index + 1);
}
2020-06-15 21:43:52 +00:00
return count;
}
private static void CheckAndAddSubTags(ref string tagName, ref string extraTags, out bool italic)
{
italic = false;
int indexOfSPlit = tagName.IndexOf(@"\");
if (indexOfSPlit > 0)
{
string rest = tagName.Substring(indexOfSPlit).TrimStart('\\');
tagName = tagName.Remove(indexOfSPlit);
for (int i = 0; i < 10; i++)
{
if (rest.StartsWith("fs") && rest.Length > 2)
{
indexOfSPlit = rest.IndexOf(@"\");
string fontSize = rest;
if (indexOfSPlit > 0)
{
fontSize = rest.Substring(0, indexOfSPlit);
rest = rest.Substring(indexOfSPlit).TrimStart('\\');
}
else
{
rest = string.Empty;
}
2020-06-15 21:43:52 +00:00
extraTags += " size=\"" + fontSize.Substring(2) + "\"";
}
else if (rest.StartsWith("fn") && rest.Length > 2)
{
indexOfSPlit = rest.IndexOf(@"\");
string fontName = rest;
if (indexOfSPlit > 0)
{
fontName = rest.Substring(0, indexOfSPlit);
rest = rest.Substring(indexOfSPlit).TrimStart('\\');
}
else
{
rest = string.Empty;
}
2020-06-15 21:43:52 +00:00
extraTags += " face=\"" + fontName.Substring(2) + "\"";
}
else if (rest.StartsWith("c") && rest.Length > 2)
{
indexOfSPlit = rest.IndexOf(@"\");
string fontColor = rest;
if (indexOfSPlit > 0)
{
fontColor = rest.Substring(0, indexOfSPlit);
rest = rest.Substring(indexOfSPlit).TrimStart('\\');
}
else
{
rest = string.Empty;
}
string color = fontColor.Substring(2);
color = color.Replace("&", string.Empty).TrimStart('H');
color = color.PadLeft(6, '0');
// switch to rrggbb from bbggrr
color = "#" + color.Remove(color.Length - 6) + color.Substring(color.Length - 2, 2) + color.Substring(color.Length - 4, 2) + color.Substring(color.Length - 6, 2);
2019-01-27 11:03:43 +00:00
color = color.ToLowerInvariant();
extraTags += " color=\"" + color + "\"";
}
else if (rest.StartsWith("i1") && rest.Length > 1)
{
indexOfSPlit = rest.IndexOf(@"\");
italic = true;
if (indexOfSPlit > 0)
{
rest = rest.Substring(indexOfSPlit).TrimStart('\\');
}
else
{
rest = string.Empty;
}
}
else if (rest.Length > 0 && rest.Contains("\\"))
{
indexOfSPlit = rest.IndexOf(@"\");
rest = rest.Substring(indexOfSPlit).TrimStart('\\');
}
}
}
}
}
}