using MediaBrowser.Controller.Entities; using MediaBrowser.Controller.Entities.Audio; using MediaBrowser.Controller.Persistence; using MediaBrowser.Model.Entities; using MediaBrowser.Model.Logging; using System; using System.Collections.Generic; using System.Globalization; using System.IO; using System.Linq; using System.Text; using System.Threading; using System.Threading.Tasks; using System.Xml; namespace MediaBrowser.Controller.Providers { /// /// Provides a base class for parsing metadata xml /// /// public class BaseItemXmlParser where T : BaseItem, new() { /// /// The logger /// protected ILogger Logger { get; private set; } /// /// Initializes a new instance of the class. /// /// The logger. public BaseItemXmlParser(ILogger logger) { Logger = logger; } /// /// Fetches metadata for an item from one xml file /// /// The item. /// The metadata file. /// The cancellation token. /// public void Fetch(T item, string metadataFile, CancellationToken cancellationToken) { if (item == null) { throw new ArgumentNullException(); } if (string.IsNullOrEmpty(metadataFile)) { throw new ArgumentNullException(); } var settings = new XmlReaderSettings { CheckCharacters = false, IgnoreProcessingInstructions = true, IgnoreComments = true, ValidationType = ValidationType.None }; item.Taglines.Clear(); item.Studios.Clear(); item.Genres.Clear(); item.People.Clear(); item.Tags.Clear(); item.RemoteTrailers.Clear(); //Fetch(item, metadataFile, settings, Encoding.GetEncoding("ISO-8859-1"), cancellationToken); Fetch(item, metadataFile, settings, Encoding.UTF8, cancellationToken); } /// /// Fetches the specified item. /// /// The item. /// The metadata file. /// The settings. /// The encoding. /// The cancellation token. private void Fetch(T item, string metadataFile, XmlReaderSettings settings, Encoding encoding, CancellationToken cancellationToken) { using (var streamReader = new StreamReader(metadataFile, encoding)) { // Use XmlReader for best performance using (var reader = XmlReader.Create(streamReader, settings)) { reader.MoveToContent(); // Loop through each element while (reader.Read()) { cancellationToken.ThrowIfCancellationRequested(); if (reader.NodeType == XmlNodeType.Element) { FetchDataFromXmlNode(reader, item); } } } } } private readonly CultureInfo _usCulture = new CultureInfo("en-US"); /// /// Fetches metadata from one Xml Element /// /// The reader. /// The item. protected virtual void FetchDataFromXmlNode(XmlReader reader, T item) { switch (reader.Name) { // DateCreated case "Added": DateTime added; if (DateTime.TryParse(reader.ReadElementContentAsString() ?? string.Empty, out added)) { item.DateCreated = added.ToUniversalTime(); } break; case "LocalTitle": item.Name = reader.ReadElementContentAsString(); break; case "Type": { var type = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(type) && !type.Equals("none", StringComparison.OrdinalIgnoreCase)) { item.DisplayMediaType = type; } break; } case "CriticRating": { var text = reader.ReadElementContentAsString(); var hasCriticRating = item as IHasCriticRating; if (hasCriticRating != null && !string.IsNullOrEmpty(text)) { float value; if (float.TryParse(text, NumberStyles.Any, _usCulture, out value)) { hasCriticRating.CriticRating = value; } } break; } case "Budget": { var text = reader.ReadElementContentAsString(); double value; if (double.TryParse(text, NumberStyles.Any, _usCulture, out value)) { item.Budget = value; } break; } case "Revenue": { var text = reader.ReadElementContentAsString(); double value; if (double.TryParse(text, NumberStyles.Any, _usCulture, out value)) { item.Revenue = value; } break; } case "SortTitle": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.ForcedSortName = val; } break; } case "Overview": case "Description": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.Overview = val; } break; } case "CriticRatingSummary": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { var hasCriticRating = item as IHasCriticRating; if (hasCriticRating != null) { hasCriticRating.CriticRatingSummary = val; } } break; } case "TagLine": { var tagline = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(tagline)) { item.AddTagline(tagline); } break; } case "PlaceOfBirth": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.ProductionLocations = new List { val }; } break; } case "Website": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.HomePageUrl = val; } break; } case "LockedFields": { var fields = new List(); var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { var list = val.Split('|').Select(i => { MetadataFields field; if (Enum.TryParse(i, true, out field)) { return (MetadataFields?)field; } return null; }).Where(i => i.HasValue).Select(i => i.Value); fields.AddRange(list); } item.LockedFields = fields; break; } case "TagLines": { using (var subtree = reader.ReadSubtree()) { FetchFromTaglinesNode(subtree, item); } break; } case "ContentRating": case "certification": case "MPAARating": case "ESRBRating": { var rating = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(rating)) { item.OfficialRating = rating; } break; } case "MPAADescription": { var rating = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(rating)) { item.OfficialRatingDescription = rating; } break; } case "CustomRating": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.CustomRating = val; } break; } case "Runtime": case "RunningTime": { var text = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(text)) { int runtime; if (int.TryParse(text.Split(' ')[0], NumberStyles.Integer, _usCulture, out runtime)) { // For audio and video don't replace ffmpeg data if (item is Video || item is Audio) { item.OriginalRunTimeTicks = TimeSpan.FromMinutes(runtime).Ticks; } else { item.RunTimeTicks = TimeSpan.FromMinutes(runtime).Ticks; } } } break; } case "Genre": { foreach (var name in SplitNames(reader.ReadElementContentAsString())) { if (string.IsNullOrWhiteSpace(name)) { continue; } item.AddGenre(name); } break; } case "AspectRatio": { var val = reader.ReadElementContentAsString(); var hasAspectRatio = item as IHasAspectRatio; if (!string.IsNullOrWhiteSpace(val) && hasAspectRatio != null) { hasAspectRatio.AspectRatio = val; } break; } case "LockData": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.DontFetchMeta = string.Equals("true", val, StringComparison.OrdinalIgnoreCase); } break; } case "Network": { foreach (var name in SplitNames(reader.ReadElementContentAsString())) { if (string.IsNullOrWhiteSpace(name)) { continue; } item.AddStudio(name); } break; } case "Director": { foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v.Trim(), Type = PersonType.Director })) { if (string.IsNullOrWhiteSpace(p.Name)) { continue; } item.AddPerson(p); } break; } case "Writer": { foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v.Trim(), Type = PersonType.Writer })) { if (string.IsNullOrWhiteSpace(p.Name)) { continue; } item.AddPerson(p); } break; } case "Actors": { var actors = reader.ReadInnerXml(); if (actors.Contains("<")) { // This is one of the mis-named "Actors" full nodes created by MB2 // Create a reader and pass it to the persons node processor FetchDataFromPersonsNode(new XmlTextReader(new StringReader("" + actors + "")), item); } else { // Old-style piped string foreach (var p in SplitNames(actors).Select(v => new PersonInfo { Name = v.Trim(), Type = PersonType.Actor })) { if (string.IsNullOrWhiteSpace(p.Name)) { continue; } item.AddPerson(p); } } break; } case "GuestStars": { foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v.Trim(), Type = PersonType.GuestStar })) { if (string.IsNullOrWhiteSpace(p.Name)) { continue; } item.AddPerson(p); } break; } case "Trailer": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.AddTrailerUrl(val, false); } break; } case "Trailers": { using (var subtree = reader.ReadSubtree()) { FetchDataFromTrailersNode(subtree, item); } break; } case "ReleaseYear": case "ProductionYear": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { int productionYear; if (int.TryParse(val, out productionYear) && productionYear > 1850) { item.ProductionYear = productionYear; } } break; } case "Rating": case "IMDBrating": case "TGDBRating": { var rating = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(rating)) { float val; // All external meta is saving this as '.' for decimal I believe...but just to be sure if (float.TryParse(rating.Replace(',', '.'), NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture, out val)) { item.CommunityRating = val; } } break; } case "BirthDate": case "PremiereDate": case "FirstAired": { var firstAired = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(firstAired)) { DateTime airDate; if (DateTime.TryParseExact(firstAired, "yyyy-MM-dd", CultureInfo.InvariantCulture, DateTimeStyles.AssumeLocal, out airDate) && airDate.Year > 1850) { item.PremiereDate = airDate.ToUniversalTime(); item.ProductionYear = airDate.Year; } } break; } case "DeathDate": case "EndDate": { var firstAired = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(firstAired)) { DateTime airDate; if (DateTime.TryParseExact(firstAired, "yyyy-MM-dd", CultureInfo.InvariantCulture, DateTimeStyles.AssumeLocal, out airDate) && airDate.Year > 1850) { item.EndDate = airDate.ToUniversalTime(); } } break; } case "TvDbId": var tvdbId = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(tvdbId)) { item.SetProviderId(MetadataProviders.Tvdb, tvdbId); } break; case "VoteCount": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { int num; if (int.TryParse(val, NumberStyles.Integer, _usCulture, out num)) { item.VoteCount = num; } } break; } case "MusicbrainzId": { var mbz = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(mbz)) { item.SetProviderId(MetadataProviders.Musicbrainz, mbz); } break; } case "MusicBrainzReleaseGroupId": { var mbz = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(mbz)) { item.SetProviderId(MetadataProviders.MusicBrainzReleaseGroup, mbz); } break; } case "RottenTomatoesId": var rtId = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(rtId)) { item.SetProviderId(MetadataProviders.RottenTomatoes, rtId); } break; case "TMDbId": var tmdb = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(tmdb)) { item.SetProviderId(MetadataProviders.Tmdb, tmdb); } break; case "TMDbCollectionId": var tmdbCollection = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(tmdbCollection)) { item.SetProviderId(MetadataProviders.TmdbCollection, tmdbCollection); } break; case "TVcomId": var TVcomId = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(TVcomId)) { item.SetProviderId(MetadataProviders.Tvcom, TVcomId); } break; case "Zap2ItId": var zap2ItId = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(zap2ItId)) { item.SetProviderId(MetadataProviders.Zap2It, zap2ItId); } break; case "IMDB_ID": case "IMDB": case "IMDbId": var imDbId = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(imDbId)) { item.SetProviderId(MetadataProviders.Imdb, imDbId); } break; case "Genres": { using (var subtree = reader.ReadSubtree()) { FetchFromGenresNode(subtree, item); } break; } case "Tags": { using (var subtree = reader.ReadSubtree()) { FetchFromTagsNode(subtree, item); } break; } case "Persons": { using (var subtree = reader.ReadSubtree()) { FetchDataFromPersonsNode(subtree, item); } break; } case "ParentalRating": { using (var subtree = reader.ReadSubtree()) { FetchFromParentalRatingNode(subtree, item); } break; } case "Studios": { using (var subtree = reader.ReadSubtree()) { FetchFromStudiosNode(subtree, item); } break; } case "MediaInfo": { using (var subtree = reader.ReadSubtree()) { FetchFromMediaInfoNode(subtree, item); } break; } default: reader.Skip(); break; } } /// /// Fetches from media info node. /// /// The reader. /// The item. private void FetchFromMediaInfoNode(XmlReader reader, T item) { reader.MoveToContent(); while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Video": { using (var subtree = reader.ReadSubtree()) { FetchFromMediaInfoVideoNode(subtree, item); } break; } default: reader.Skip(); break; } } } } /// /// Fetches from media info video node. /// /// The reader. /// The item. private void FetchFromMediaInfoVideoNode(XmlReader reader, T item) { reader.MoveToContent(); while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Format3D": { var video = item as Video; if (video != null) { var val = reader.ReadElementContentAsString(); if (string.Equals("HSBS", val)) { video.Video3DFormat = Video3DFormat.HalfSideBySide; } else if (string.Equals("HTAB", val)) { video.Video3DFormat = Video3DFormat.HalfTopAndBottom; } else if (string.Equals("FTAB", val)) { video.Video3DFormat = Video3DFormat.FullTopAndBottom; } else if (string.Equals("FSBS", val)) { video.Video3DFormat = Video3DFormat.FullSideBySide; } } break; } default: reader.Skip(); break; } } } } /// /// Fetches from taglines node. /// /// The reader. /// The item. private void FetchFromTaglinesNode(XmlReader reader, T item) { reader.MoveToContent(); while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Tagline": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.AddTagline(val); } break; } default: reader.Skip(); break; } } } } /// /// Fetches from genres node. /// /// The reader. /// The item. private void FetchFromGenresNode(XmlReader reader, T item) { reader.MoveToContent(); while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Genre": { var genre = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(genre)) { item.AddGenre(genre); } break; } default: reader.Skip(); break; } } } } private void FetchFromTagsNode(XmlReader reader, T item) { reader.MoveToContent(); while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Tag": { var tag = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(tag)) { item.AddTag(tag); } break; } default: reader.Skip(); break; } } } } /// /// Fetches the data from persons node. /// /// The reader. /// The item. private void FetchDataFromPersonsNode(XmlReader reader, T item) { reader.MoveToContent(); while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Person": case "Actor": { using (var subtree = reader.ReadSubtree()) { foreach (var person in GetPersonsFromXmlNode(subtree)) { item.AddPerson(person); } } break; } default: reader.Skip(); break; } } } } private void FetchDataFromTrailersNode(XmlReader reader, T item) { reader.MoveToContent(); while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Trailer": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.AddTrailerUrl(val, false); } break; } default: reader.Skip(); break; } } } } protected async Task FetchChaptersFromXmlNode(BaseItem item, XmlReader reader, IItemRepository repository, CancellationToken cancellationToken) { var runtime = item.RunTimeTicks ?? 0; using (reader) { var chapters = GetChaptersFromXmlNode(reader) .Where(i => i.StartPositionTicks >= 0 && i.StartPositionTicks < runtime); await repository.SaveChapters(item.Id, chapters, cancellationToken).ConfigureAwait(false); } } private IEnumerable GetChaptersFromXmlNode(XmlReader reader) { var chapters = new List(); reader.MoveToContent(); while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Chapter": { using (var subtree = reader.ReadSubtree()) { chapters.Add(GetChapterInfoFromXmlNode(subtree)); } break; } default: reader.Skip(); break; } } } return chapters; } private ChapterInfo GetChapterInfoFromXmlNode(XmlReader reader) { var chapter = new ChapterInfo(); reader.MoveToContent(); while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "StartPositionMs": { var val = reader.ReadElementContentAsString(); var ms = long.Parse(val, _usCulture); chapter.StartPositionTicks = TimeSpan.FromMilliseconds(ms).Ticks; break; } case "Name": { chapter.Name = reader.ReadElementContentAsString(); break; } default: reader.Skip(); break; } } } return chapter; } /// /// Fetches from studios node. /// /// The reader. /// The item. private void FetchFromStudiosNode(XmlReader reader, T item) { reader.MoveToContent(); while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Studio": { var studio = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(studio)) { item.AddStudio(studio); } break; } default: reader.Skip(); break; } } } } /// /// Fetches from parental rating node. /// /// The reader. /// The item. private void FetchFromParentalRatingNode(XmlReader reader, T item) { reader.MoveToContent(); while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { // Removed support for "Value" tag as it conflicted with MPAA rating but leaving this function for possible // future support of "Description" -ebr default: reader.Skip(); break; } } } } /// /// Gets the persons from XML node. /// /// The reader. /// IEnumerable{PersonInfo}. private IEnumerable GetPersonsFromXmlNode(XmlReader reader) { var name = string.Empty; var type = "Actor"; // If type is not specified assume actor var role = string.Empty; int? sortOrder = null; reader.MoveToContent(); while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Name": name = reader.ReadElementContentAsString() ?? string.Empty; break; case "Type": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { type = val; } break; } case "Role": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { role = val; } break; } case "SortOrder": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { int intVal; if (int.TryParse(val, NumberStyles.Integer, _usCulture, out intVal)) { sortOrder = intVal; } } break; } default: reader.Skip(); break; } } } var personInfo = new PersonInfo { Name = name.Trim(), Role = role, Type = type, SortOrder = sortOrder }; return new[] { personInfo }; } /// /// Used to split names of comma or pipe delimeted genres and people /// /// The value. /// IEnumerable{System.String}. private IEnumerable SplitNames(string value) { value = value ?? string.Empty; // Only split by comma if there is no pipe in the string // We have to be careful to not split names like Matthew, Jr. var separator = value.IndexOf('|') == -1 && value.IndexOf(';') == -1 ? new[] { ',' } : new[] { '|', ';' }; value = value.Trim().Trim(separator); return string.IsNullOrWhiteSpace(value) ? new string[] { } : Split(value, separator, StringSplitOptions.RemoveEmptyEntries); } /// /// Provides an additional overload for string.split /// /// The val. /// The separators. /// The options. /// System.String[][]. private static string[] Split(string val, char[] separators, StringSplitOptions options) { return val.Split(separators, options); } } }