jellyfin/MediaBrowser.Controller/Providers/BaseItemXmlParser.cs

730 lines
25 KiB
C#
Raw Normal View History

2013-04-22 17:35:42 +00:00
using System.Globalization;
using System.IO;
2013-06-06 15:11:51 +00:00
using System.Text;
2013-05-26 19:43:29 +00:00
using System.Text.RegularExpressions;
2013-04-22 17:35:42 +00:00
using MediaBrowser.Controller.Entities;
2013-02-21 01:33:05 +00:00
using MediaBrowser.Model.Entities;
2013-02-21 20:26:35 +00:00
using MediaBrowser.Model.Logging;
2013-02-21 01:33:05 +00:00
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Xml;
namespace MediaBrowser.Controller.Providers
{
/// <summary>
/// Provides a base class for parsing metadata xml
/// </summary>
/// <typeparam name="T"></typeparam>
public class BaseItemXmlParser<T>
where T : BaseItem, new()
{
2013-02-21 20:26:35 +00:00
/// <summary>
/// The logger
/// </summary>
protected ILogger Logger { get; private set; }
/// <summary>
/// Initializes a new instance of the <see cref="BaseItemXmlParser{T}" /> class.
/// </summary>
/// <param name="logger">The logger.</param>
public BaseItemXmlParser(ILogger logger)
{
Logger = logger;
}
2013-02-21 01:33:05 +00:00
/// <summary>
/// Fetches metadata for an item from one xml file
/// </summary>
/// <param name="item">The item.</param>
/// <param name="metadataFile">The metadata file.</param>
/// <param name="cancellationToken">The cancellation token.</param>
/// <exception cref="System.ArgumentNullException"></exception>
public void Fetch(T item, string metadataFile, CancellationToken cancellationToken)
{
if (item == null)
{
throw new ArgumentNullException();
}
if (string.IsNullOrEmpty(metadataFile))
{
throw new ArgumentNullException();
}
2013-04-15 18:45:58 +00:00
var settings = new XmlReaderSettings
{
CheckCharacters = false,
IgnoreProcessingInstructions = true,
IgnoreComments = true,
ValidationType = ValidationType.None
};
item.Taglines.Clear();
item.Studios.Clear();
item.Genres.Clear();
item.People.Clear();
2013-06-06 15:11:51 +00:00
// Use european encoding as it will accept more characters
using (var streamReader = new StreamReader(metadataFile, Encoding.GetEncoding("ISO-8859-1")))
2013-02-21 01:33:05 +00:00
{
2013-06-06 15:11:51 +00:00
// Use XmlReader for best performance
using (var reader = XmlReader.Create(streamReader, settings))
2013-02-21 01:33:05 +00:00
{
2013-06-06 15:11:51 +00:00
reader.MoveToContent();
2013-02-21 01:33:05 +00:00
2013-06-06 15:11:51 +00:00
// Loop through each element
while (reader.Read())
2013-02-21 01:33:05 +00:00
{
2013-06-06 15:11:51 +00:00
cancellationToken.ThrowIfCancellationRequested();
if (reader.NodeType == XmlNodeType.Element)
{
FetchDataFromXmlNode(reader, item);
}
2013-02-21 01:33:05 +00:00
}
}
}
}
2013-04-22 17:35:42 +00:00
private readonly CultureInfo _usCulture = new CultureInfo("en-US");
2013-02-21 01:33:05 +00:00
/// <summary>
/// Fetches metadata from one Xml Element
/// </summary>
/// <param name="reader">The reader.</param>
/// <param name="item">The item.</param>
protected virtual void FetchDataFromXmlNode(XmlReader reader, T item)
{
switch (reader.Name)
{
// DateCreated
case "Added":
DateTime added;
if (DateTime.TryParse(reader.ReadElementContentAsString() ?? string.Empty, out added))
{
item.DateCreated = added.ToUniversalTime();
}
break;
case "LocalTitle":
item.Name = reader.ReadElementContentAsString();
break;
case "Type":
{
var type = reader.ReadElementContentAsString();
2013-04-15 18:45:58 +00:00
if (!string.IsNullOrWhiteSpace(type) && !type.Equals("none", StringComparison.OrdinalIgnoreCase))
2013-02-21 01:33:05 +00:00
{
item.DisplayMediaType = type;
}
2013-04-22 17:35:42 +00:00
break;
}
case "Budget":
{
var text = reader.ReadElementContentAsString();
double value;
if (double.TryParse(text, NumberStyles.Any, _usCulture, out value))
{
item.Budget = value;
}
break;
}
case "Revenue":
{
var text = reader.ReadElementContentAsString();
double value;
if (double.TryParse(text, NumberStyles.Any, _usCulture, out value))
{
item.Revenue = value;
}
2013-02-21 01:33:05 +00:00
break;
}
case "SortTitle":
item.ForcedSortName = reader.ReadElementContentAsString();
2013-02-21 01:33:05 +00:00
break;
case "Overview":
case "Description":
2013-05-26 19:43:29 +00:00
{
2013-06-01 01:19:38 +00:00
var val = reader.ReadElementContentAsString();
if (!string.IsNullOrWhiteSpace(val))
{
item.Overview = val;
}
break;
2013-05-26 19:43:29 +00:00
}
2013-02-21 01:33:05 +00:00
case "TagLine":
{
var tagline = reader.ReadElementContentAsString();
if (!string.IsNullOrWhiteSpace(tagline))
{
item.AddTagline(tagline);
}
break;
}
case "Website":
{
var val = reader.ReadElementContentAsString();
if (!string.IsNullOrWhiteSpace(val))
{
item.HomePageUrl = val;
}
break;
}
2013-02-21 01:33:05 +00:00
case "TagLines":
{
FetchFromTaglinesNode(reader.ReadSubtree(), item);
break;
}
case "ContentRating":
case "certification":
case "MPAARating":
{
var rating = reader.ReadElementContentAsString();
if (!string.IsNullOrWhiteSpace(rating))
{
item.OfficialRating = rating;
}
break;
}
case "CustomRating":
{
var val = reader.ReadElementContentAsString();
if (!string.IsNullOrWhiteSpace(val))
{
item.CustomRating = val;
}
break;
}
case "Runtime":
case "RunningTime":
{
var text = reader.ReadElementContentAsString();
if (!string.IsNullOrWhiteSpace(text))
{
int runtime;
if (int.TryParse(text.Split(' ')[0], out runtime))
{
item.RunTimeTicks = TimeSpan.FromMinutes(runtime).Ticks;
}
}
break;
}
case "Genre":
{
foreach (var name in SplitNames(reader.ReadElementContentAsString()))
{
if (string.IsNullOrWhiteSpace(name))
{
continue;
}
item.AddGenre(name);
}
break;
}
case "AspectRatio":
{
var val = reader.ReadElementContentAsString();
if (!string.IsNullOrWhiteSpace(val))
{
item.AspectRatio = val;
}
break;
}
case "Network":
{
foreach (var name in SplitNames(reader.ReadElementContentAsString()))
{
if (string.IsNullOrWhiteSpace(name))
{
continue;
}
item.AddStudio(name);
}
break;
}
case "Director":
{
foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v, Type = PersonType.Director }))
{
if (string.IsNullOrWhiteSpace(p.Name))
{
continue;
}
item.AddPerson(p);
}
break;
}
case "Writer":
{
foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v, Type = PersonType.Writer }))
{
if (string.IsNullOrWhiteSpace(p.Name))
{
continue;
}
item.AddPerson(p);
}
break;
}
case "Actors":
{
var actors = reader.ReadInnerXml();
if (actors.Contains("<"))
2013-02-21 01:33:05 +00:00
{
// This is one of the mis-named "Actors" full nodes created by MB2
// Create a reader and pass it to the persons node processor
FetchDataFromPersonsNode(new XmlTextReader(new StringReader("<Persons>" + actors + "</Persons>")), item);
}
else
{
// Old-style piped string
foreach (var p in SplitNames(actors).Select(v => new PersonInfo { Name = v.Trim(), Type = PersonType.Actor }))
2013-02-21 01:33:05 +00:00
{
if (string.IsNullOrWhiteSpace(p.Name))
{
continue;
}
item.AddPerson(p);
2013-02-21 01:33:05 +00:00
}
}
break;
}
2013-04-13 23:43:41 +00:00
case "GuestStars":
{
foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v.Trim(), Type = PersonType.GuestStar }))
{
if (string.IsNullOrWhiteSpace(p.Name))
{
continue;
}
item.AddPerson(p);
}
break;
}
2013-02-21 01:33:05 +00:00
case "Trailer":
{
var val = reader.ReadElementContentAsString();
if (!string.IsNullOrWhiteSpace(val))
{
//item.AddTrailerUrl(val);
2013-02-21 01:33:05 +00:00
}
break;
}
case "ProductionYear":
{
var val = reader.ReadElementContentAsString();
if (!string.IsNullOrWhiteSpace(val))
{
int productionYear;
if (int.TryParse(val, out productionYear) && productionYear > 1850)
2013-02-21 01:33:05 +00:00
{
item.ProductionYear = productionYear;
2013-02-21 01:33:05 +00:00
}
}
break;
}
case "Rating":
case "IMDBrating":
{
var rating = reader.ReadElementContentAsString();
if (!string.IsNullOrWhiteSpace(rating))
{
float val;
// All external meta is saving this as '.' for decimal I believe...but just to be sure
if (float.TryParse(rating.Replace(',','.'), NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture, out val))
2013-02-21 01:33:05 +00:00
{
item.CommunityRating = val;
}
}
break;
}
case "FirstAired":
{
var firstAired = reader.ReadElementContentAsString();
if (!string.IsNullOrWhiteSpace(firstAired))
{
DateTime airDate;
if (DateTime.TryParse(firstAired, out airDate) && airDate.Year > 1850)
{
item.PremiereDate = airDate.ToUniversalTime();
item.ProductionYear = airDate.Year;
}
}
break;
}
case "TMDbId":
var tmdb = reader.ReadElementContentAsString();
if (!string.IsNullOrWhiteSpace(tmdb))
{
item.SetProviderId(MetadataProviders.Tmdb, tmdb);
}
break;
case "TVcomId":
var TVcomId = reader.ReadElementContentAsString();
if (!string.IsNullOrWhiteSpace(TVcomId))
{
item.SetProviderId(MetadataProviders.Tvcom, TVcomId);
}
break;
case "IMDB_ID":
case "IMDB":
case "IMDbId":
var IMDbId = reader.ReadElementContentAsString();
if (!string.IsNullOrWhiteSpace(IMDbId))
{
item.SetProviderId(MetadataProviders.Imdb, IMDbId);
}
break;
case "Genres":
FetchFromGenresNode(reader.ReadSubtree(), item);
break;
case "Persons":
FetchDataFromPersonsNode(reader.ReadSubtree(), item);
break;
case "ParentalRating":
FetchFromParentalRatingNode(reader.ReadSubtree(), item);
break;
case "Studios":
FetchFromStudiosNode(reader.ReadSubtree(), item);
break;
default:
reader.Skip();
break;
}
}
/// <summary>
/// Fetches from taglines node.
/// </summary>
/// <param name="reader">The reader.</param>
/// <param name="item">The item.</param>
private void FetchFromTaglinesNode(XmlReader reader, T item)
{
reader.MoveToContent();
while (reader.Read())
{
if (reader.NodeType == XmlNodeType.Element)
{
switch (reader.Name)
{
case "Tagline":
{
var val = reader.ReadElementContentAsString();
if (!string.IsNullOrWhiteSpace(val))
{
item.AddTagline(val);
}
break;
}
default:
reader.Skip();
break;
}
}
}
}
/// <summary>
/// Fetches from genres node.
/// </summary>
/// <param name="reader">The reader.</param>
/// <param name="item">The item.</param>
private void FetchFromGenresNode(XmlReader reader, T item)
{
reader.MoveToContent();
while (reader.Read())
{
if (reader.NodeType == XmlNodeType.Element)
{
switch (reader.Name)
{
case "Genre":
{
var genre = reader.ReadElementContentAsString();
if (!string.IsNullOrWhiteSpace(genre))
{
item.AddGenre(genre);
}
break;
}
default:
reader.Skip();
break;
}
}
}
}
/// <summary>
/// Fetches the data from persons node.
/// </summary>
/// <param name="reader">The reader.</param>
/// <param name="item">The item.</param>
private void FetchDataFromPersonsNode(XmlReader reader, T item)
{
reader.MoveToContent();
while (reader.Read())
{
if (reader.NodeType == XmlNodeType.Element)
{
switch (reader.Name)
{
case "Person":
case "Actor":
2013-02-21 01:33:05 +00:00
{
foreach (var person in GetPersonsFromXmlNode(reader.ReadSubtree()))
{
item.AddPerson(person);
}
2013-02-21 01:33:05 +00:00
break;
}
default:
reader.Skip();
break;
}
}
}
}
/// <summary>
/// Fetches from studios node.
/// </summary>
/// <param name="reader">The reader.</param>
/// <param name="item">The item.</param>
private void FetchFromStudiosNode(XmlReader reader, T item)
{
reader.MoveToContent();
while (reader.Read())
{
if (reader.NodeType == XmlNodeType.Element)
{
switch (reader.Name)
{
case "Studio":
{
var studio = reader.ReadElementContentAsString();
if (!string.IsNullOrWhiteSpace(studio))
{
item.AddStudio(studio);
}
break;
}
default:
reader.Skip();
break;
}
}
}
}
/// <summary>
/// Fetches from parental rating node.
/// </summary>
/// <param name="reader">The reader.</param>
/// <param name="item">The item.</param>
private void FetchFromParentalRatingNode(XmlReader reader, T item)
{
reader.MoveToContent();
while (reader.Read())
{
if (reader.NodeType == XmlNodeType.Element)
{
switch (reader.Name)
{
case "Value":
{
var ratingString = reader.ReadElementContentAsString();
int rating = 7;
if (!string.IsNullOrWhiteSpace(ratingString))
{
int.TryParse(ratingString, out rating);
}
switch (rating)
{
case -1:
item.OfficialRating = "NR";
break;
case 0:
item.OfficialRating = "UR";
break;
case 1:
item.OfficialRating = "G";
break;
case 3:
item.OfficialRating = "PG";
break;
case 4:
item.OfficialRating = "PG-13";
break;
case 5:
item.OfficialRating = "NC-17";
break;
case 6:
item.OfficialRating = "R";
break;
}
break;
}
default:
reader.Skip();
break;
}
}
}
}
/// <summary>
/// Gets the persons from XML node.
/// </summary>
/// <param name="reader">The reader.</param>
/// <returns>IEnumerable{PersonInfo}.</returns>
private IEnumerable<PersonInfo> GetPersonsFromXmlNode(XmlReader reader)
{
var names = new List<string>();
var type = "Actor"; // If type is not specified assume actor
2013-02-21 01:33:05 +00:00
var role = string.Empty;
reader.MoveToContent();
while (reader.Read())
{
if (reader.NodeType == XmlNodeType.Element)
{
switch (reader.Name)
{
case "Name":
names.AddRange(SplitNames(reader.ReadElementContentAsString()));
break;
case "Type":
{
var val = reader.ReadElementContentAsString();
if (!string.IsNullOrWhiteSpace(val))
{
type = val;
}
break;
}
case "Role":
{
var val = reader.ReadElementContentAsString();
if (!string.IsNullOrWhiteSpace(val))
{
role = val;
}
break;
}
default:
reader.Skip();
break;
}
}
}
return names.Select(n => new PersonInfo { Name = n, Role = role, Type = type });
}
/// <summary>
/// Used to split names of comma or pipe delimeted genres and people
/// </summary>
/// <param name="value">The value.</param>
/// <returns>IEnumerable{System.String}.</returns>
private IEnumerable<string> SplitNames(string value)
{
value = value ?? string.Empty;
// Only split by comma if there is no pipe in the string
// We have to be careful to not split names like Matthew, Jr.
var separator = value.IndexOf('|') == -1 ? ',' : '|';
value = value.Trim().Trim(separator);
return string.IsNullOrWhiteSpace(value) ? new string[] { } : Split(value, separator, StringSplitOptions.RemoveEmptyEntries);
2013-02-21 01:33:05 +00:00
}
/// <summary>
/// Provides an additional overload for string.split
/// </summary>
/// <param name="val">The val.</param>
/// <param name="separator">The separator.</param>
/// <param name="options">The options.</param>
/// <returns>System.String[][].</returns>
private static string[] Split(string val, char separator, StringSplitOptions options)
{
return val.Split(new[] { separator }, options);
}
2013-02-21 01:33:05 +00:00
}
}