using System; using System.Collections.Generic; using System.Globalization; using System.IO; using System.Linq; using System.Text; using System.Threading; using System.Xml; using MediaBrowser.Controller.Entities; using MediaBrowser.Controller.Providers; using MediaBrowser.Model.Entities; using MediaBrowser.Model.IO; using MediaBrowser.Model.Logging; using MediaBrowser.Model.Xml; namespace MediaBrowser.LocalMetadata.Parsers { /// /// Provides a base class for parsing metadata xml /// /// public class BaseItemXmlParser where T : BaseItem { /// /// The logger /// protected ILogger Logger { get; private set; } protected IProviderManager ProviderManager { get; private set; } private Dictionary _validProviderIds; protected IXmlReaderSettingsFactory XmlReaderSettingsFactory { get; private set; } protected IFileSystem FileSystem { get; private set; } /// /// Initializes a new instance of the class. /// /// The logger. public BaseItemXmlParser(ILogger logger, IProviderManager providerManager, IXmlReaderSettingsFactory xmlReaderSettingsFactory, IFileSystem fileSystem) { Logger = logger; ProviderManager = providerManager; XmlReaderSettingsFactory = xmlReaderSettingsFactory; FileSystem = fileSystem; } /// /// Fetches metadata for an item from one xml file /// /// The item. /// The metadata file. /// The cancellation token. /// public void Fetch(MetadataResult item, string metadataFile, CancellationToken cancellationToken) { if (item == null) { throw new ArgumentNullException(); } if (string.IsNullOrEmpty(metadataFile)) { throw new ArgumentNullException(); } var settings = XmlReaderSettingsFactory.Create(false); settings.CheckCharacters = false; settings.IgnoreProcessingInstructions = true; settings.IgnoreComments = true; _validProviderIds = _validProviderIds = new Dictionary(StringComparer.OrdinalIgnoreCase); var idInfos = ProviderManager.GetExternalIdInfos(item.Item); foreach (var info in idInfos) { var id = info.Key + "Id"; if (!_validProviderIds.ContainsKey(id)) { _validProviderIds.Add(id, info.Key); } } //Additional Mappings _validProviderIds.Add("IMDB", "Imdb"); //Fetch(item, metadataFile, settings, Encoding.GetEncoding("ISO-8859-1"), cancellationToken); Fetch(item, metadataFile, settings, Encoding.UTF8, cancellationToken); } /// /// Fetches the specified item. /// /// The item. /// The metadata file. /// The settings. /// The encoding. /// The cancellation token. private void Fetch(MetadataResult item, string metadataFile, XmlReaderSettings settings, Encoding encoding, CancellationToken cancellationToken) { item.ResetPeople(); using (Stream fileStream = FileSystem.OpenRead(metadataFile)) { using (var streamReader = new StreamReader(fileStream, encoding)) { // Use XmlReader for best performance using (var reader = XmlReader.Create(streamReader, settings)) { reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { cancellationToken.ThrowIfCancellationRequested(); if (reader.NodeType == XmlNodeType.Element) { FetchDataFromXmlNode(reader, item); } else { reader.Read(); } } } } } } private readonly CultureInfo _usCulture = new CultureInfo("en-US"); /// /// Fetches metadata from one Xml Element /// /// The reader. /// The item result. protected virtual void FetchDataFromXmlNode(XmlReader reader, MetadataResult itemResult) { var item = itemResult.Item; switch (reader.Name) { // DateCreated case "Added": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { DateTime added; if (DateTime.TryParse(val, out added)) { item.DateCreated = added.ToUniversalTime(); } else { Logger.Warn("Invalid Added value found: " + val); } } break; } case "OriginalTitle": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrEmpty(val)) { item.OriginalTitle = val; } break; } case "LocalTitle": item.Name = reader.ReadElementContentAsString(); break; case "Type": { var type = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(type) && !type.Equals("none", StringComparison.OrdinalIgnoreCase)) { item.DisplayMediaType = type; } break; } case "CriticRating": { var text = reader.ReadElementContentAsString(); if (!string.IsNullOrEmpty(text)) { float value; if (float.TryParse(text, NumberStyles.Any, _usCulture, out value)) { item.CriticRating = value; } } break; } case "AwardSummary": { var text = reader.ReadElementContentAsString(); var hasAwards = item as IHasAwards; if (hasAwards != null) { if (!string.IsNullOrWhiteSpace(text)) { hasAwards.AwardSummary = text; } } break; } case "SortTitle": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.ForcedSortName = val; } break; } case "Overview": case "Description": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.Overview = val; } break; } case "Language": { var val = reader.ReadElementContentAsString(); item.PreferredMetadataLanguage = val; break; } case "CountryCode": { var val = reader.ReadElementContentAsString(); item.PreferredMetadataCountryCode = val; break; } case "PlaceOfBirth": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { var person = item as Person; if (person != null) { person.ProductionLocations = new List { val }; } } break; } case "Website": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.HomePageUrl = val; } break; } case "LockedFields": { var fields = new List(); var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { var list = val.Split('|').Select(i => { MetadataFields field; if (Enum.TryParse(i, true, out field)) { return (MetadataFields?)field; } return null; }).Where(i => i.HasValue).Select(i => i.Value); fields.AddRange(list); } item.LockedFields = fields; break; } case "TagLines": { if (!reader.IsEmptyElement) { using (var subtree = reader.ReadSubtree()) { FetchFromTaglinesNode(subtree, item); } } else { reader.Read(); } break; } case "Countries": { if (!reader.IsEmptyElement) { using (var subtree = reader.ReadSubtree()) { FetchFromCountriesNode(subtree, item); } } else { reader.Read(); } break; } case "ContentRating": case "MPAARating": { var rating = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(rating)) { item.OfficialRating = rating; } break; } case "MPAADescription": { var rating = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(rating)) { item.OfficialRatingDescription = rating; } break; } case "CustomRating": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.CustomRating = val; } break; } case "RunningTime": { var text = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(text)) { int runtime; if (int.TryParse(text.Split(' ')[0], NumberStyles.Integer, _usCulture, out runtime)) { item.RunTimeTicks = TimeSpan.FromMinutes(runtime).Ticks; } } break; } case "AspectRatio": { var val = reader.ReadElementContentAsString(); var hasAspectRatio = item as IHasAspectRatio; if (!string.IsNullOrWhiteSpace(val) && hasAspectRatio != null) { hasAspectRatio.AspectRatio = val; } break; } case "LockData": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.IsLocked = string.Equals("true", val, StringComparison.OrdinalIgnoreCase); } break; } case "Network": { foreach (var name in SplitNames(reader.ReadElementContentAsString())) { if (string.IsNullOrWhiteSpace(name)) { continue; } item.AddStudio(name); } break; } case "Director": { foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new Controller.Entities.PersonInfo { Name = v.Trim(), Type = PersonType.Director })) { if (string.IsNullOrWhiteSpace(p.Name)) { continue; } itemResult.AddPerson(p); } break; } case "Writer": { foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new Controller.Entities.PersonInfo { Name = v.Trim(), Type = PersonType.Writer })) { if (string.IsNullOrWhiteSpace(p.Name)) { continue; } itemResult.AddPerson(p); } break; } case "Actors": { var actors = reader.ReadInnerXml(); if (actors.Contains("<")) { // This is one of the mis-named "Actors" full nodes created by MB2 // Create a reader and pass it to the persons node processor FetchDataFromPersonsNode(XmlReader.Create(new StringReader("" + actors + "")), itemResult); } else { // Old-style piped string foreach (var p in SplitNames(actors).Select(v => new Controller.Entities.PersonInfo { Name = v.Trim(), Type = PersonType.Actor })) { if (string.IsNullOrWhiteSpace(p.Name)) { continue; } itemResult.AddPerson(p); } } break; } case "GuestStars": { foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new Controller.Entities.PersonInfo { Name = v.Trim(), Type = PersonType.GuestStar })) { if (string.IsNullOrWhiteSpace(p.Name)) { continue; } itemResult.AddPerson(p); } break; } case "Trailer": { var val = reader.ReadElementContentAsString(); var hasTrailers = item as IHasTrailers; if (hasTrailers != null) { if (!string.IsNullOrWhiteSpace(val)) { hasTrailers.AddTrailerUrl(val, false); } } break; } case "DisplayOrder": { var val = reader.ReadElementContentAsString(); var hasDisplayOrder = item as IHasDisplayOrder; if (hasDisplayOrder != null) { if (!string.IsNullOrWhiteSpace(val)) { hasDisplayOrder.DisplayOrder = val; } } break; } case "Trailers": { if (!reader.IsEmptyElement) { using (var subtree = reader.ReadSubtree()) { var hasTrailers = item as IHasTrailers; if (hasTrailers != null) { FetchDataFromTrailersNode(subtree, hasTrailers); } } } else { reader.Read(); } break; } case "ProductionYear": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { int productionYear; if (int.TryParse(val, out productionYear) && productionYear > 1850) { item.ProductionYear = productionYear; } } break; } case "Rating": case "IMDBrating": { var rating = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(rating)) { float val; // All external meta is saving this as '.' for decimal I believe...but just to be sure if (float.TryParse(rating.Replace(',', '.'), NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture, out val)) { item.CommunityRating = val; } } break; } case "BirthDate": case "PremiereDate": case "FirstAired": { var firstAired = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(firstAired)) { DateTime airDate; if (DateTime.TryParseExact(firstAired, "yyyy-MM-dd", CultureInfo.InvariantCulture, DateTimeStyles.AssumeLocal, out airDate) && airDate.Year > 1850) { item.PremiereDate = airDate.ToUniversalTime(); item.ProductionYear = airDate.Year; } } break; } case "DeathDate": case "EndDate": { var firstAired = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(firstAired)) { DateTime airDate; if (DateTime.TryParseExact(firstAired, "yyyy-MM-dd", CultureInfo.InvariantCulture, DateTimeStyles.AssumeLocal, out airDate) && airDate.Year > 1850) { item.EndDate = airDate.ToUniversalTime(); } } break; } case "VoteCount": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { int num; if (int.TryParse(val, NumberStyles.Integer, _usCulture, out num)) { item.VoteCount = num; } } break; } case "CollectionNumber": var tmdbCollection = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(tmdbCollection)) { item.SetProviderId(MetadataProviders.TmdbCollection, tmdbCollection); } break; case "Genres": { if (!reader.IsEmptyElement) { using (var subtree = reader.ReadSubtree()) { FetchFromGenresNode(subtree, item); } } else { reader.Read(); } break; } case "Tags": { if (!reader.IsEmptyElement) { using (var subtree = reader.ReadSubtree()) { FetchFromTagsNode(subtree, item); } } else { reader.Read(); } break; } case "PlotKeywords": { if (!reader.IsEmptyElement) { using (var subtree = reader.ReadSubtree()) { FetchFromKeywordsNode(subtree, item); } } else { reader.Read(); } break; } case "Persons": { if (!reader.IsEmptyElement) { using (var subtree = reader.ReadSubtree()) { FetchDataFromPersonsNode(subtree, itemResult); } } else { reader.Read(); } break; } case "Studios": { if (!reader.IsEmptyElement) { using (var subtree = reader.ReadSubtree()) { FetchFromStudiosNode(subtree, item); } } else { reader.Read(); } break; } case "Shares": { if (!reader.IsEmptyElement) { using (var subtree = reader.ReadSubtree()) { var hasShares = item as IHasShares; if (hasShares != null) { FetchFromSharesNode(subtree, hasShares); } } } else { reader.Read(); } break; } case "Format3D": { var val = reader.ReadElementContentAsString(); var video = item as Video; if (video != null) { if (string.Equals("HSBS", val, StringComparison.OrdinalIgnoreCase)) { video.Video3DFormat = Video3DFormat.HalfSideBySide; } else if (string.Equals("HTAB", val, StringComparison.OrdinalIgnoreCase)) { video.Video3DFormat = Video3DFormat.HalfTopAndBottom; } else if (string.Equals("FTAB", val, StringComparison.OrdinalIgnoreCase)) { video.Video3DFormat = Video3DFormat.FullTopAndBottom; } else if (string.Equals("FSBS", val, StringComparison.OrdinalIgnoreCase)) { video.Video3DFormat = Video3DFormat.FullSideBySide; } else if (string.Equals("MVC", val, StringComparison.OrdinalIgnoreCase)) { video.Video3DFormat = Video3DFormat.MVC; } } break; } default: { string readerName = reader.Name; string providerIdValue; if (_validProviderIds.TryGetValue(readerName, out providerIdValue)) { var id = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(id)) { item.SetProviderId(providerIdValue, id); } } else { reader.Skip(); } break; } } } private void FetchFromSharesNode(XmlReader reader, IHasShares item) { reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Share": { if (reader.IsEmptyElement) { reader.Read(); continue; } using (var subtree = reader.ReadSubtree()) { var share = GetShareFromNode(subtree); if (share != null) { item.Shares.Add(share); } } break; } default: reader.Skip(); break; } } else { reader.Read(); } } } private Share GetShareFromNode(XmlReader reader) { var share = new Share(); reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "UserId": { share.UserId = reader.ReadElementContentAsString(); break; } case "CanEdit": { share.CanEdit = string.Equals(reader.ReadElementContentAsString(), true.ToString(), StringComparison.OrdinalIgnoreCase); break; } default: reader.Skip(); break; } } else { reader.Read(); } } return share; } private void FetchFromCountriesNode(XmlReader reader, T item) { reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Country": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { } break; } default: reader.Skip(); break; } } else { reader.Read(); } } } /// /// Fetches from taglines node. /// /// The reader. /// The item. private void FetchFromTaglinesNode(XmlReader reader, T item) { reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Tagline": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.Tagline = val; } break; } default: reader.Skip(); break; } } else { reader.Read(); } } } /// /// Fetches from genres node. /// /// The reader. /// The item. private void FetchFromGenresNode(XmlReader reader, T item) { reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Genre": { var genre = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(genre)) { item.AddGenre(genre); } break; } default: reader.Skip(); break; } } else { reader.Read(); } } } private void FetchFromTagsNode(XmlReader reader, BaseItem item) { reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Tag": { var tag = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(tag)) { item.AddTag(tag); } break; } default: reader.Skip(); break; } } else { reader.Read(); } } } private void FetchFromKeywordsNode(XmlReader reader, BaseItem item) { reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "PlotKeyword": { var tag = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(tag)) { item.AddKeyword(tag); } break; } default: reader.Skip(); break; } } else { reader.Read(); } } } /// /// Fetches the data from persons node. /// /// The reader. /// The item. private void FetchDataFromPersonsNode(XmlReader reader, MetadataResult item) { reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Person": case "Actor": { if (reader.IsEmptyElement) { reader.Read(); continue; } using (var subtree = reader.ReadSubtree()) { foreach (var person in GetPersonsFromXmlNode(subtree)) { if (string.IsNullOrWhiteSpace(person.Name)) { continue; } item.AddPerson(person); } } break; } default: reader.Skip(); break; } } else { reader.Read(); } } } private void FetchDataFromTrailersNode(XmlReader reader, IHasTrailers item) { reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Trailer": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.AddTrailerUrl(val, false); } break; } default: reader.Skip(); break; } } else { reader.Read(); } } } /// /// Fetches from studios node. /// /// The reader. /// The item. private void FetchFromStudiosNode(XmlReader reader, T item) { reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Studio": { var studio = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(studio)) { item.AddStudio(studio); } break; } default: reader.Skip(); break; } } else { reader.Read(); } } } /// /// Gets the persons from XML node. /// /// The reader. /// IEnumerable{PersonInfo}. private IEnumerable GetPersonsFromXmlNode(XmlReader reader) { var name = string.Empty; var type = PersonType.Actor; // If type is not specified assume actor var role = string.Empty; int? sortOrder = null; reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Name": name = reader.ReadElementContentAsString() ?? string.Empty; break; case "Type": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { type = val; } break; } case "Role": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { role = val; } break; } case "SortOrder": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { int intVal; if (int.TryParse(val, NumberStyles.Integer, _usCulture, out intVal)) { sortOrder = intVal; } } break; } default: reader.Skip(); break; } } else { reader.Read(); } } var personInfo = new PersonInfo { Name = name.Trim(), Role = role, Type = type, SortOrder = sortOrder }; return new[] { personInfo }; } protected LinkedChild GetLinkedChild(XmlReader reader) { var linkedItem = new LinkedChild { Type = LinkedChildType.Manual }; reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Path": { linkedItem.Path = reader.ReadElementContentAsString(); break; } default: reader.Skip(); break; } } else { reader.Read(); } } // This is valid if (!string.IsNullOrWhiteSpace(linkedItem.Path)) { return linkedItem; } return null; } protected Share GetShare(XmlReader reader) { var item = new Share(); reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "UserId": { item.UserId = reader.ReadElementContentAsString(); break; } case "CanEdit": { item.CanEdit = string.Equals(reader.ReadElementContentAsString(), "true", StringComparison.OrdinalIgnoreCase); break; } default: { reader.Skip(); break; } } } else { reader.Read(); } } // This is valid if (!string.IsNullOrWhiteSpace(item.UserId)) { return item; } return null; } /// /// Used to split names of comma or pipe delimeted genres and people /// /// The value. /// IEnumerable{System.String}. private IEnumerable SplitNames(string value) { value = value ?? string.Empty; // Only split by comma if there is no pipe in the string // We have to be careful to not split names like Matthew, Jr. var separator = value.IndexOf('|') == -1 && value.IndexOf(';') == -1 ? new[] { ',' } : new[] { '|', ';' }; value = value.Trim().Trim(separator); return string.IsNullOrWhiteSpace(value) ? new string[] { } : Split(value, separator, StringSplitOptions.RemoveEmptyEntries); } /// /// Provides an additional overload for string.split /// /// The val. /// The separators. /// The options. /// System.String[][]. private static string[] Split(string val, char[] separators, StringSplitOptions options) { return val.Split(separators, options); } } }