using System; using System.Collections.Generic; using System.Collections.ObjectModel; using System.Threading; using System.Threading.Tasks; using MediaBrowser.Model.Tasks; using Microsoft.Extensions.Logging; namespace ConfusedPolarBear.Plugin.IntroSkipper; /// /// Fingerprint and analyze all queued episodes for common audio sequences. /// public class FingerprinterTask : IScheduledTask { /// /// Minimum time (in seconds) for a contiguous time range to be considered an introduction. /// private const int MinimumIntroDuration = 15; /// /// Maximum number of bits (out of 32 total) that can be different between segments before they are considered dissimilar. /// private const double MaximumDifferences = 3; /// /// Maximum time (in seconds) permitted between timestamps before they are considered non-contiguous. /// private const double MaximumDistance = 3.25; /// /// Seconds of audio in one fingerprint point. This value is defined by the Chromaprint library and should not be changed. /// private const double SamplesToSeconds = 0.128; /// /// Bucket size used in the reanalysis histogram. /// private const int ReanalysisBucketWidth = 5; /// /// Maximum time (in seconds) that an intro's duration can be different from a typical intro's duration before marking it for reanalysis. /// private const double ReanalysisTolerance = ReanalysisBucketWidth * 1.5; private readonly ILogger _logger; /// /// Initializes a new instance of the class. /// /// Logger. public FingerprinterTask(ILogger logger) { _logger = logger; } /// /// Gets the task name. /// public string Name => "Analyze episodes"; /// /// Gets the task category. /// public string Category => "Intro Skipper"; /// /// Gets the task description. /// public string Description => "Analyzes the audio of all television episodes to find introduction sequences."; /// /// Gets the task key. /// public string Key => "CPBIntroSkipperRunFingerprinter"; /// /// Analyze all episodes in the queue. /// /// Task progress. /// Cancellation token. /// Task. public Task ExecuteAsync(IProgress progress, CancellationToken cancellationToken) { var queue = Plugin.Instance!.AnalysisQueue; var totalProcessed = 0; foreach (var season in queue) { AnalyzeSeason(season, cancellationToken); // TODO: report progress on a per episode basis totalProcessed += season.Value.Count; progress.Report((totalProcessed * 100) / Plugin.Instance!.TotalQueued); } return Task.CompletedTask; } private void AnalyzeSeason( KeyValuePair> season, CancellationToken cancellationToken) { var first = season.Value[0]; /* Don't analyze specials or seasons with an insufficient number of episodes. * A season with only 1 episode can't be analyzed as it would compare the episode to itself, * which would result in the entire episode being marked as an introduction, as the audio is identical. */ if (season.Value.Count < 2 || first.SeasonNumber == 0) { return; } _logger.LogInformation( "Analyzing {Count} episodes from {Name} season {Season}", season.Value.Count, first.SeriesName, first.SeasonNumber); // Ensure there are an even number of episodes var episodes = season.Value; if (episodes.Count % 2 != 0) { episodes.Add(episodes[episodes.Count - 2]); } // Analyze each pair of episodes in the current season var everFoundIntro = false; var failures = 0; for (var i = 0; i < episodes.Count; i += 2) { if (cancellationToken.IsCancellationRequested) { break; } var lhs = episodes[i]; var rhs = episodes[i + 1]; // TODO: make configurable if (!everFoundIntro && failures >= 6) { _logger.LogWarning( "Failed to find an introduction in {Series} season {Season}", lhs.SeriesName, lhs.SeasonNumber); break; } // FIXME: add retry logic var alreadyDone = Plugin.Instance!.Intros; if (alreadyDone.ContainsKey(lhs.EpisodeId) && alreadyDone.ContainsKey(rhs.EpisodeId)) { _logger.LogDebug( "Episodes {LHS} and {RHS} have both already been fingerprinted", lhs.EpisodeId, rhs.EpisodeId); /* TODO: bring back totalProcessed += 2; progress.Report((totalProcessed * 100) / Plugin.Instance!.TotalQueued); */ continue; } try { _logger.LogDebug("Analyzing {LHS} and {RHS}", lhs.Path, rhs.Path); var (lhsIntro, rhsIntro) = FingerprintEpisodes(lhs, rhs); Plugin.Instance.Intros![lhsIntro.EpisodeId] = lhsIntro; Plugin.Instance.Intros![rhsIntro.EpisodeId] = rhsIntro; if (!lhsIntro.Valid) { failures += 2; continue; } everFoundIntro = true; } catch (FingerprintException ex) { _logger.LogError("Caught fingerprint error: {Ex}", ex); } finally { /* TODO: bring back totalProcessed += 2; progress.Report((totalProcessed * 100) / Plugin.Instance!.TotalQueued); */ } } Plugin.Instance!.SaveTimestamps(); if (cancellationToken.IsCancellationRequested || !everFoundIntro) { return; } // Reanalyze this season to check for (and hopefully correct) outliers and failed episodes. CheckSeason(season.Value); } /// /// Analyze two episodes to find an introduction sequence shared between them. /// /// First episode to analyze. /// Second episode to analyze. /// Intros for the first and second episodes. public (Intro Lhs, Intro Rhs) FingerprintEpisodes(QueuedEpisode lhsEpisode, QueuedEpisode rhsEpisode) { var lhsFingerprint = FPCalc.Fingerprint(lhsEpisode); var rhsFingerprint = FPCalc.Fingerprint(rhsEpisode); return FingerprintEpisodes( lhsEpisode.EpisodeId, lhsFingerprint, rhsEpisode.EpisodeId, rhsFingerprint); } /// /// Analyze two episodes to find an introduction sequence shared between them. /// /// First episode id. /// First episode to analyze. /// Second episode id. /// Second episode to analyze. /// Intros for the first and second episodes. public (Intro Lhs, Intro Rhs) FingerprintEpisodes( Guid lhsId, ReadOnlyCollection lhs, Guid rhsId, ReadOnlyCollection rhs) { var lhsRanges = new List(); var rhsRanges = new List(); // Compare all elements of the shortest fingerprint to the other fingerprint. var limit = Math.Min(lhs.Count, rhs.Count); // First, test if an intro can be found within the first 5 seconds of the episodes (±5/0.128 = ±40 samples). var (lhsContiguous, rhsContiguous) = ShiftEpisodes(lhs, rhs, -40, 40); lhsRanges.AddRange(lhsContiguous); rhsRanges.AddRange(rhsContiguous); // If no valid ranges were found, re-analyze the episodes considering all possible shifts. if (lhsRanges.Count == 0) { _logger.LogDebug("using full scan"); (lhsContiguous, rhsContiguous) = ShiftEpisodes(lhs, rhs, -1 * limit, limit); lhsRanges.AddRange(lhsContiguous); rhsRanges.AddRange(rhsContiguous); } else { _logger.LogDebug("intro found with quick scan"); } if (lhsRanges.Count == 0) { _logger.LogDebug( "Unable to find a shared introduction sequence {LHS} and {RHS}", lhsId, rhsId); return (new Intro(lhsId, 0, 0), new Intro(rhsId, 0, 0)); } // After comparing both episodes at all possible shift positions, store the longest time range as the intro. lhsRanges.Sort(); rhsRanges.Sort(); var lhsIntro = lhsRanges[0]; var rhsIntro = rhsRanges[0]; // If the intro starts early in the episode, move it to the beginning. if (lhsIntro.Start <= 5) { lhsIntro.Start = 0; } if (rhsIntro.Start <= 5) { rhsIntro.Start = 0; } return (new Intro(lhsId, lhsIntro.Start, lhsIntro.End), new Intro(rhsId, rhsIntro.Start, rhsIntro.End)); } /// /// Shifts episodes through the range of provided shift amounts and returns discovered contiguous time ranges. /// /// First episode fingerprint. /// Second episode fingerprint. /// Lower end of the shift range. /// Upper end of the shift range. private static (List Lhs, List Rhs) ShiftEpisodes( ReadOnlyCollection lhs, ReadOnlyCollection rhs, int lower, int upper) { var lhsRanges = new List(); var rhsRanges = new List(); for (int amount = lower; amount <= upper; amount++) { var (lRange, rRange) = FindContiguous(lhs, rhs, amount); if (lRange.End == 0 && rRange.End == 0) { continue; } lhsRanges.Add(lRange); rhsRanges.Add(rRange); } return (lhsRanges, rhsRanges); } /// /// Finds the longest contiguous region of similar audio between two fingerprints using the provided shift amount. /// /// First fingerprint to compare. /// Second fingerprint to compare. /// Amount to shift one fingerprint by. private static (TimeRange Lhs, TimeRange Rhs) FindContiguous( ReadOnlyCollection lhs, ReadOnlyCollection rhs, int shiftAmount) { var leftOffset = 0; var rightOffset = 0; // Calculate the offsets for the left and right hand sides. if (shiftAmount < 0) { leftOffset -= shiftAmount; } else { rightOffset += shiftAmount; } // Store similar times for both LHS and RHS. var lhsTimes = new List(); var rhsTimes = new List(); var upperLimit = Math.Min(lhs.Count, rhs.Count) - Math.Abs(shiftAmount); // XOR all elements in LHS and RHS, using the shift amount from above. for (var i = 0; i < upperLimit; i++) { // XOR both samples at the current position. var lhsPosition = i + leftOffset; var rhsPosition = i + rightOffset; var diff = lhs[lhsPosition] ^ rhs[rhsPosition]; // If the difference between the samples is small, flag both times as similar. if (CountBits(diff) > MaximumDifferences) { continue; } var lhsTime = lhsPosition * SamplesToSeconds; var rhsTime = rhsPosition * SamplesToSeconds; lhsTimes.Add(lhsTime); rhsTimes.Add(rhsTime); } // Ensure the last timestamp is checked lhsTimes.Add(double.MaxValue); rhsTimes.Add(double.MaxValue); // Now that both fingerprints have been compared at this shift, see if there's a contiguous time range. var lContiguous = TimeRangeHelpers.FindContiguous(lhsTimes.ToArray(), MaximumDistance); if (lContiguous is null || lContiguous.Duration < MinimumIntroDuration) { return (new TimeRange(), new TimeRange()); } // Since LHS had a contiguous time range, RHS must have one also. var rContiguous = TimeRangeHelpers.FindContiguous(rhsTimes.ToArray(), MaximumDistance)!; // Tweak the end timestamps just a bit to ensure as little content as possible is skipped over. if (lContiguous.Duration >= 90) { lContiguous.End -= 2 * MaximumDistance; rContiguous.End -= 2 * MaximumDistance; } else if (lContiguous.Duration >= 35) { lContiguous.End -= MaximumDistance; rContiguous.End -= MaximumDistance; } return (lContiguous, rContiguous); } /// /// Count the number of bits that are set in the provided number. /// /// Number to count bits in. /// Number of bits that are equal to 1. public static int CountBits(uint number) { var count = 0; for (var i = 0; i < 32; i++) { var low = (number >> i) & 1; if (low == 1) { count++; } } return count; } /// /// Reanalyze the most recently analyzed season. /// Looks for and fixes intro durations that were either not found or are statistical outliers. /// /// List of episodes that was just analyzed. private void CheckSeason(List episodes) { var intros = Plugin.Instance!.Intros; // First, assert that at least half of the episodes in this season have an intro. var validCount = 0; var totalCount = episodes.Count; foreach (var episode in episodes) { if (intros[episode.EpisodeId].Valid) { validCount++; } } var percentValid = (validCount * 100) / totalCount; _logger.LogDebug("Found intros in {Valid}/{Total} ({Percent}%) of episodes", validCount, totalCount, percentValid); if (percentValid < 50) { return; } // Create a histogram of all episode durations var histogram = new Dictionary(); foreach (var episode in episodes) { var id = episode.EpisodeId; var duration = GetIntroDuration(id); if (duration < MinimumIntroDuration) { continue; } // Bucket the duration into equally sized groups var bucket = Convert.ToInt32(Math.Floor(duration / ReanalysisBucketWidth)) * ReanalysisBucketWidth; // TryAdd returns true when the key was successfully added (i.e. for newly created buckets). // Newly created buckets are initialized with the provided episode ID, so nothing else needs to be done for them. if (histogram.TryAdd(bucket, new SeasonHistogram(id))) { continue; } histogram[bucket].Episodes.Add(id); } // Find the bucket that was seen most often, as this is likely to be the true intro length. var maxDuration = 0; var maxBucket = new SeasonHistogram(Guid.Empty); foreach (var entry in histogram) { if (entry.Value.Count > maxBucket.Count) { maxDuration = entry.Key; maxBucket = entry.Value; } } // Ensure that the most frequently seen bucket has a majority // TODO: change to debug percentValid = (maxBucket.Count * 100) / validCount; _logger.LogInformation( "Intro duration {Duration} appeared {Frequency} times ({Percent}%)", maxDuration, maxBucket.Count, percentValid); if (percentValid < 50 || maxBucket.Episodes[0].Equals(Guid.Empty)) { return; } _logger.LogInformation("Reanalyzing {Count} episodes", totalCount - maxBucket.Count); // TODO: pick two episodes at random // Cache the fingerprint of the first episode in the max bucket to save CPU cycles var lhs = episodes.Find(x => x.EpisodeId == maxBucket.Episodes[1]); if (lhs is null) { _logger.LogError("Reanalysis failed to get episode from bucket"); return; } ReadOnlyCollection lhsFingerprint; try { lhsFingerprint = FPCalc.Fingerprint(lhs); } catch (FingerprintException ex) { _logger.LogWarning("Skipping reanalysis of {Show} season {Season}: {Exception}", lhs.SeriesName, lhs.SeasonNumber, ex); return; } var lhsDuration = GetIntroDuration(lhs.EpisodeId); var (lowTargetDuration, highTargetDuration) = ( lhsDuration - ReanalysisTolerance, lhsDuration + ReanalysisTolerance); foreach (var episode in episodes) { // Don't reanalyze episodes from the max bucket if (maxBucket.Episodes.Contains(episode.EpisodeId)) { continue; } var oldDuration = GetIntroDuration(episode.EpisodeId); // TODO: remove var shortPath = episode.Path.Substring(episode.Path.Length - 40); // If the episode's intro duration is close enough to the targeted bucket, leave it alone. if (Math.Abs(lhsDuration - oldDuration) <= ReanalysisTolerance) { _logger.LogInformation( "Not reanalyzing episode {Path} (intro is {Initial}, target is {Max})", shortPath, Math.Round(oldDuration, 2), maxDuration); continue; } _logger.LogDebug( "Reanalyzing episode {Path} (intro is {Initial}, target is {Max})", shortPath, Math.Round(oldDuration, 2), maxDuration); // Analyze the episode again, ignoring whatever is returned for the known good episode. var (_, newRhs) = FingerprintEpisodes( lhs.EpisodeId, lhsFingerprint, episode.EpisodeId, FPCalc.Fingerprint(episode)); // Ensure that the new intro duration is within the targeted bucket and longer than what was found previously. var newDuration = Math.Round(newRhs.IntroEnd - newRhs.IntroStart, 2); if (newDuration < oldDuration || newDuration < lowTargetDuration || newDuration > highTargetDuration) { _logger.LogInformation( "Ignoring reanalysis for {Path} (was {Initial}, now is {New})", shortPath, oldDuration, newDuration); continue; } // TODO: change to debug _logger.LogInformation( "Reanalysis succeeded for {Path} (was {Initial}, now is {New})", shortPath, oldDuration, newDuration); Plugin.Instance!.Intros[episode.EpisodeId] = newRhs; } } private double GetIntroDuration(Guid id) { var episode = Plugin.Instance!.Intros[id]; return episode.Valid ? Math.Round(episode.IntroEnd - episode.IntroStart, 2) : 0; } /// /// Get task triggers. /// /// Task triggers. public IEnumerable GetDefaultTriggers() { return new[] { new TaskTriggerInfo { Type = TaskTriggerInfo.TriggerDaily, TimeOfDayTicks = TimeSpan.FromDays(24).Ticks } }; } }