610 lines
21 KiB
C#
Raw Normal View History

2022-05-01 00:33:22 -05:00
using System;
using System.Collections.Generic;
2022-05-03 01:09:50 -05:00
using System.Collections.ObjectModel;
2022-05-01 00:33:22 -05:00
using System.Threading;
using System.Threading.Tasks;
using MediaBrowser.Model.Tasks;
using Microsoft.Extensions.Logging;
namespace ConfusedPolarBear.Plugin.IntroSkipper;
/// <summary>
/// Fingerprint and analyze all queued episodes for common audio sequences.
2022-05-01 00:33:22 -05:00
/// </summary>
public class FingerprinterTask : IScheduledTask
{
2022-05-01 00:33:22 -05:00
/// <summary>
/// Minimum time (in seconds) for a contiguous time range to be considered an introduction.
/// </summary>
2022-05-09 22:56:03 -05:00
private const int MinimumIntroDuration = 15;
2022-05-01 00:33:22 -05:00
/// <summary>
/// Maximum number of bits (out of 32 total) that can be different between segments before they are considered dissimilar.
/// </summary>
2022-05-09 22:56:03 -05:00
private const double MaximumDifferences = 3;
2022-05-01 00:33:22 -05:00
/// <summary>
/// Maximum time (in seconds) permitted between timestamps before they are considered non-contiguous.
2022-05-01 00:33:22 -05:00
/// </summary>
2022-05-09 22:56:03 -05:00
private const double MaximumDistance = 3.25;
2022-05-01 00:33:22 -05:00
/// <summary>
/// Seconds of audio in one fingerprint point. This value is defined by the Chromaprint library and should not be changed.
2022-05-01 00:33:22 -05:00
/// </summary>
2022-05-09 22:56:03 -05:00
private const double SamplesToSeconds = 0.128;
2022-05-01 00:33:22 -05:00
2022-05-13 01:13:13 -05:00
/// <summary>
/// Bucket size used in the reanalysis histogram.
/// </summary>
private const int ReanalysisBucketWidth = 5;
/// <summary>
/// Maximum time (in seconds) that an intro's duration can be different from a typical intro's duration before marking it for reanalysis.
/// </summary>
private const double ReanalysisTolerance = ReanalysisBucketWidth * 1.5;
2022-05-09 22:56:03 -05:00
private readonly ILogger<FingerprinterTask> _logger;
2022-05-01 00:33:22 -05:00
/// <summary>
/// Initializes a new instance of the <see cref="FingerprinterTask"/> class.
2022-05-01 00:33:22 -05:00
/// </summary>
/// <param name="logger">Logger.</param>
2022-05-01 00:33:22 -05:00
public FingerprinterTask(ILogger<FingerprinterTask> logger)
{
_logger = logger;
}
/// <summary>
/// Gets the task name.
2022-05-01 00:33:22 -05:00
/// </summary>
public string Name => "Analyze episodes";
/// <summary>
/// Gets the task category.
2022-05-01 00:33:22 -05:00
/// </summary>
public string Category => "Intro Skipper";
/// <summary>
/// Gets the task description.
2022-05-01 00:33:22 -05:00
/// </summary>
public string Description => "Analyzes the audio of all television episodes to find introduction sequences.";
/// <summary>
/// Gets the task key.
2022-05-01 00:33:22 -05:00
/// </summary>
public string Key => "CPBIntroSkipperRunFingerprinter";
/// <summary>
/// Analyze all episodes in the queue.
/// </summary>
/// <param name="progress">Task progress.</param>
2022-05-01 00:33:22 -05:00
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Task.</returns>
2022-05-01 00:33:22 -05:00
public Task ExecuteAsync(IProgress<double> progress, CancellationToken cancellationToken)
{
var queue = Plugin.Instance!.AnalysisQueue;
var totalProcessed = 0;
foreach (var season in queue)
{
2022-05-01 00:33:22 -05:00
var first = season.Value[0];
/* Don't analyze specials or seasons with an insufficient number of episodes.
* A season with only 1 episode can't be analyzed as it would compare the episode to itself,
* which would result in the entire episode being marked as an introduction, as the audio is identical.
*/
if (season.Value.Count < 2 || first.SeasonNumber == 0)
2022-05-02 01:18:31 -05:00
{
continue;
}
2022-05-03 01:09:50 -05:00
_logger.LogInformation(
"Analyzing {Count} episodes from {Name} season {Season}",
2022-05-01 00:33:22 -05:00
season.Value.Count,
first.SeriesName,
first.SeasonNumber);
// Ensure there are an even number of episodes
var episodes = season.Value;
if (episodes.Count % 2 != 0)
{
2022-05-01 00:33:22 -05:00
episodes.Add(episodes[episodes.Count - 2]);
}
2022-05-03 01:09:50 -05:00
// Analyze each pair of episodes in the current season
var everFoundIntro = false;
var failures = 0;
2022-05-01 00:33:22 -05:00
for (var i = 0; i < episodes.Count; i += 2)
{
2022-05-02 01:18:31 -05:00
if (cancellationToken.IsCancellationRequested)
{
break;
}
2022-05-01 00:33:22 -05:00
var lhs = episodes[i];
var rhs = episodes[i + 1];
2022-05-01 00:33:22 -05:00
2022-05-03 01:09:50 -05:00
// TODO: make configurable
if (!everFoundIntro && failures >= 6)
{
_logger.LogWarning(
"Failed to find an introduction in {Series} season {Season}",
lhs.SeriesName,
lhs.SeasonNumber);
break;
}
2022-05-01 00:33:22 -05:00
// FIXME: add retry logic
var alreadyDone = Plugin.Instance!.Intros;
if (alreadyDone.ContainsKey(lhs.EpisodeId) && alreadyDone.ContainsKey(rhs.EpisodeId))
{
_logger.LogDebug(
"Episodes {LHS} and {RHS} have both already been fingerprinted",
lhs.EpisodeId,
rhs.EpisodeId);
totalProcessed += 2;
progress.Report((totalProcessed * 100) / Plugin.Instance!.TotalQueued);
2022-05-01 00:33:22 -05:00
continue;
}
try
{
2022-05-03 01:09:50 -05:00
_logger.LogDebug("Analyzing {LHS} and {RHS}", lhs.Path, rhs.Path);
2022-05-13 01:13:13 -05:00
var (lhsIntro, rhsIntro) = FingerprintEpisodes(lhs, rhs);
Plugin.Instance.Intros![lhsIntro.EpisodeId] = lhsIntro;
Plugin.Instance.Intros![rhsIntro.EpisodeId] = rhsIntro;
if (!lhsIntro.Valid)
2022-05-03 01:09:50 -05:00
{
failures += 2;
2022-05-13 01:13:13 -05:00
continue;
2022-05-03 01:09:50 -05:00
}
2022-05-13 01:13:13 -05:00
everFoundIntro = true;
2022-05-01 00:33:22 -05:00
}
catch (FingerprintException ex)
{
_logger.LogError("Caught fingerprint error: {Ex}", ex);
}
finally
{
totalProcessed += 2;
progress.Report((totalProcessed * 100) / Plugin.Instance!.TotalQueued);
}
}
Plugin.Instance!.SaveTimestamps();
2022-05-02 01:18:31 -05:00
if (cancellationToken.IsCancellationRequested)
{
break;
}
2022-05-13 01:13:13 -05:00
if (!everFoundIntro)
{
continue;
}
// Reanalyze this season to check for (and hopefully correct) outliers and failed episodes.
CheckSeason(season.Value);
2022-05-01 00:33:22 -05:00
}
return Task.CompletedTask;
}
2022-05-03 01:09:50 -05:00
/// <summary>
/// Analyze two episodes to find an introduction sequence shared between them.
/// </summary>
/// <param name="lhsEpisode">First episode to analyze.</param>
/// <param name="rhsEpisode">Second episode to analyze.</param>
2022-05-13 01:13:13 -05:00
/// <returns>Intros for the first and second episodes.</returns>
public (Intro Lhs, Intro Rhs) FingerprintEpisodes(QueuedEpisode lhsEpisode, QueuedEpisode rhsEpisode)
2022-05-01 00:33:22 -05:00
{
2022-05-13 01:13:13 -05:00
var lhsFingerprint = FPCalc.Fingerprint(lhsEpisode);
var rhsFingerprint = FPCalc.Fingerprint(rhsEpisode);
return FingerprintEpisodes(
lhsEpisode.EpisodeId,
lhsFingerprint,
rhsEpisode.EpisodeId,
rhsFingerprint);
}
2022-05-01 00:33:22 -05:00
2022-05-13 01:13:13 -05:00
/// <summary>
/// Analyze two episodes to find an introduction sequence shared between them.
/// </summary>
/// <param name="lhsId">First episode id.</param>
/// <param name="lhs">First episode to analyze.</param>
/// <param name="rhsId">Second episode id.</param>
/// <param name="rhs">Second episode to analyze.</param>
/// <returns>Intros for the first and second episodes.</returns>
public (Intro Lhs, Intro Rhs) FingerprintEpisodes(
Guid lhsId,
ReadOnlyCollection<uint> lhs,
Guid rhsId,
ReadOnlyCollection<uint> rhs)
{
2022-05-01 00:33:22 -05:00
var lhsRanges = new List<TimeRange>();
var rhsRanges = new List<TimeRange>();
// Compare all elements of the shortest fingerprint to the other fingerprint.
2022-05-03 01:09:50 -05:00
var limit = Math.Min(lhs.Count, rhs.Count);
2022-05-01 00:33:22 -05:00
2022-05-03 01:09:50 -05:00
// First, test if an intro can be found within the first 5 seconds of the episodes (±5/0.128 = ±40 samples).
var (lhsContiguous, rhsContiguous) = ShiftEpisodes(lhs, rhs, -40, 40);
2022-05-03 01:09:50 -05:00
lhsRanges.AddRange(lhsContiguous);
rhsRanges.AddRange(rhsContiguous);
2022-05-01 00:33:22 -05:00
2022-05-03 01:09:50 -05:00
// If no valid ranges were found, re-analyze the episodes considering all possible shifts.
if (lhsRanges.Count == 0)
{
_logger.LogDebug("using full scan");
2022-05-01 00:33:22 -05:00
(lhsContiguous, rhsContiguous) = ShiftEpisodes(lhs, rhs, -1 * limit, limit);
2022-05-03 01:09:50 -05:00
lhsRanges.AddRange(lhsContiguous);
rhsRanges.AddRange(rhsContiguous);
}
else
{
_logger.LogDebug("intro found with quick scan");
2022-05-01 00:33:22 -05:00
}
if (lhsRanges.Count == 0)
{
_logger.LogDebug(
2022-05-13 01:13:13 -05:00
"Unable to find a shared introduction sequence {LHS} and {RHS}",
lhsId,
rhsId);
2022-05-01 00:33:22 -05:00
2022-05-13 01:13:13 -05:00
return (new Intro(lhsId, 0, 0), new Intro(rhsId, 0, 0));
2022-05-01 00:33:22 -05:00
}
// After comparing both episodes at all possible shift positions, store the longest time range as the intro.
lhsRanges.Sort();
rhsRanges.Sort();
var lhsIntro = lhsRanges[0];
var rhsIntro = rhsRanges[0];
2022-05-03 01:09:50 -05:00
// If the intro starts early in the episode, move it to the beginning.
2022-05-01 00:33:22 -05:00
if (lhsIntro.Start <= 5)
{
lhsIntro.Start = 0;
}
if (rhsIntro.Start <= 5)
{
rhsIntro.Start = 0;
}
2022-05-13 01:13:13 -05:00
return (new Intro(lhsId, lhsIntro.Start, lhsIntro.End), new Intro(rhsId, rhsIntro.Start, rhsIntro.End));
2022-05-03 01:09:50 -05:00
}
/// <summary>
/// Shifts episodes through the range of provided shift amounts and returns discovered contiguous time ranges.
/// </summary>
/// <param name="lhs">First episode fingerprint.</param>
/// <param name="rhs">Second episode fingerprint.</param>
/// <param name="lower">Lower end of the shift range.</param>
/// <param name="upper">Upper end of the shift range.</param>
private static (List<TimeRange> Lhs, List<TimeRange> Rhs) ShiftEpisodes(
2022-05-03 01:09:50 -05:00
ReadOnlyCollection<uint> lhs,
ReadOnlyCollection<uint> rhs,
int lower,
int upper)
{
2022-05-03 01:09:50 -05:00
var lhsRanges = new List<TimeRange>();
var rhsRanges = new List<TimeRange>();
for (int amount = lower; amount <= upper; amount++)
{
var (lRange, rRange) = FindContiguous(lhs, rhs, amount);
2022-05-03 01:09:50 -05:00
if (lRange.End == 0 && rRange.End == 0)
{
continue;
}
lhsRanges.Add(lRange);
rhsRanges.Add(rRange);
}
return (lhsRanges, rhsRanges);
}
/// <summary>
/// Finds the longest contiguous region of similar audio between two fingerprints using the provided shift amount.
/// </summary>
/// <param name="lhs">First fingerprint to compare.</param>
/// <param name="rhs">Second fingerprint to compare.</param>
/// <param name="shiftAmount">Amount to shift one fingerprint by.</param>
private static (TimeRange Lhs, TimeRange Rhs) FindContiguous(
2022-05-03 01:09:50 -05:00
ReadOnlyCollection<uint> lhs,
ReadOnlyCollection<uint> rhs,
int shiftAmount)
{
2022-05-03 01:09:50 -05:00
var leftOffset = 0;
var rightOffset = 0;
// Calculate the offsets for the left and right hand sides.
if (shiftAmount < 0)
{
2022-05-03 01:09:50 -05:00
leftOffset -= shiftAmount;
}
else
{
2022-05-03 01:09:50 -05:00
rightOffset += shiftAmount;
}
// Store similar times for both LHS and RHS.
var lhsTimes = new List<double>();
var rhsTimes = new List<double>();
var upperLimit = Math.Min(lhs.Count, rhs.Count) - Math.Abs(shiftAmount);
// XOR all elements in LHS and RHS, using the shift amount from above.
for (var i = 0; i < upperLimit; i++)
{
2022-05-03 01:09:50 -05:00
// XOR both samples at the current position.
var lhsPosition = i + leftOffset;
var rhsPosition = i + rightOffset;
var diff = lhs[lhsPosition] ^ rhs[rhsPosition];
2022-05-07 21:11:59 -05:00
// If the difference between the samples is small, flag both times as similar.
2022-05-09 22:56:03 -05:00
if (CountBits(diff) > MaximumDifferences)
2022-05-03 01:09:50 -05:00
{
continue;
}
2022-05-09 22:56:03 -05:00
var lhsTime = lhsPosition * SamplesToSeconds;
var rhsTime = rhsPosition * SamplesToSeconds;
2022-05-03 01:09:50 -05:00
lhsTimes.Add(lhsTime);
rhsTimes.Add(rhsTime);
}
// Ensure the last timestamp is checked
lhsTimes.Add(double.MaxValue);
rhsTimes.Add(double.MaxValue);
2022-05-03 01:09:50 -05:00
// Now that both fingerprints have been compared at this shift, see if there's a contiguous time range.
2022-05-09 22:56:03 -05:00
var lContiguous = TimeRangeHelpers.FindContiguous(lhsTimes.ToArray(), MaximumDistance);
if (lContiguous is null || lContiguous.Duration < MinimumIntroDuration)
2022-05-03 01:09:50 -05:00
{
return (new TimeRange(), new TimeRange());
}
// Since LHS had a contiguous time range, RHS must have one also.
2022-05-09 22:56:03 -05:00
var rContiguous = TimeRangeHelpers.FindContiguous(rhsTimes.ToArray(), MaximumDistance)!;
2022-05-03 01:09:50 -05:00
// Tweak the end timestamps just a bit to ensure as little content as possible is skipped over.
if (lContiguous.Duration >= 90)
{
lContiguous.End -= 2 * MaximumDistance;
rContiguous.End -= 2 * MaximumDistance;
2022-05-03 01:09:50 -05:00
}
else if (lContiguous.Duration >= 35)
{
lContiguous.End -= MaximumDistance;
rContiguous.End -= MaximumDistance;
2022-05-03 01:09:50 -05:00
}
return (lContiguous, rContiguous);
2022-05-01 00:33:22 -05:00
}
/// <summary>
/// Count the number of bits that are set in the provided number.
/// </summary>
/// <param name="number">Number to count bits in.</param>
/// <returns>Number of bits that are equal to 1.</returns>
public static int CountBits(uint number)
{
2022-05-01 00:33:22 -05:00
var count = 0;
for (var i = 0; i < 32; i++)
{
2022-05-01 00:33:22 -05:00
var low = (number >> i) & 1;
if (low == 1)
{
2022-05-01 00:33:22 -05:00
count++;
}
}
return count;
}
2022-05-13 01:13:13 -05:00
/// <summary>
/// Reanalyze the most recently analyzed season.
/// Looks for and fixes intro durations that were either not found or are statistical outliers.
/// </summary>
/// <param name="episodes">List of episodes that was just analyzed.</param>
private void CheckSeason(List<QueuedEpisode> episodes)
{
var intros = Plugin.Instance!.Intros;
// First, assert that at least half of the episodes in this season have an intro.
var validCount = 0;
var totalCount = episodes.Count;
foreach (var episode in episodes)
{
if (intros[episode.EpisodeId].Valid)
{
validCount++;
}
}
var percentValid = (validCount * 100) / totalCount;
_logger.LogDebug("Found intros in {Valid}/{Total} ({Percent}%) of episodes", validCount, totalCount, percentValid);
if (percentValid < 50)
{
return;
}
// Create a histogram of all episode durations
var histogram = new Dictionary<int, SeasonHistogram>();
foreach (var episode in episodes)
{
var id = episode.EpisodeId;
var duration = GetIntroDuration(id);
if (duration < MinimumIntroDuration)
{
continue;
}
// Bucket the duration into equally sized groups
var bucket = Convert.ToInt32(Math.Floor(duration / ReanalysisBucketWidth)) * ReanalysisBucketWidth;
// TryAdd returns true when the key was successfully added (i.e. for newly created buckets).
// Newly created buckets are initialized with the provided episode ID, so nothing else needs to be done for them.
if (histogram.TryAdd(bucket, new SeasonHistogram(id)))
{
continue;
}
histogram[bucket].Episodes.Add(id);
}
// Find the bucket that was seen most often, as this is likely to be the true intro length.
var maxDuration = 0;
var maxBucket = new SeasonHistogram(Guid.Empty);
foreach (var entry in histogram)
{
if (entry.Value.Count > maxBucket.Count)
{
maxDuration = entry.Key;
maxBucket = entry.Value;
}
}
// Ensure that the most frequently seen bucket has a majority
// TODO: change to debug
percentValid = (maxBucket.Count * 100) / validCount;
_logger.LogInformation(
"Intro duration {Duration} appeared {Frequency} times ({Percent}%)",
maxDuration,
maxBucket.Count,
percentValid);
if (percentValid < 50 || maxBucket.Episodes[0].Equals(Guid.Empty))
{
return;
}
_logger.LogInformation("Reanalyzing {Count} episodes", totalCount - maxBucket.Count);
// TODO: pick two episodes at random
// Cache the fingerprint of the first episode in the max bucket to save CPU cycles
var lhs = episodes.Find(x => x.EpisodeId == maxBucket.Episodes[1]);
if (lhs is null)
{
_logger.LogError("Reanalysis failed to get episode from bucket");
return;
}
ReadOnlyCollection<uint> lhsFingerprint;
try
{
lhsFingerprint = FPCalc.Fingerprint(lhs);
}
catch (FingerprintException ex)
{
_logger.LogWarning("Skipping reanalysis of {Show} season {Season}: {Exception}", lhs.SeriesName, lhs.SeasonNumber, ex);
return;
}
var lhsDuration = GetIntroDuration(lhs.EpisodeId);
var (lowTargetDuration, highTargetDuration) = (
lhsDuration - ReanalysisTolerance,
lhsDuration + ReanalysisTolerance);
foreach (var episode in episodes)
{
// Don't reanalyze episodes from the max bucket
if (maxBucket.Episodes.Contains(episode.EpisodeId))
{
continue;
}
var oldDuration = GetIntroDuration(episode.EpisodeId);
// TODO: remove
var shortPath = episode.Path.Substring(episode.Path.Length - 40);
// If the episode's intro duration is close enough to the targeted bucket, leave it alone.
if (Math.Abs(lhsDuration - oldDuration) <= ReanalysisTolerance)
{
_logger.LogInformation(
"Not reanalyzing episode {Path} (intro is {Initial}, target is {Max})",
shortPath,
Math.Round(oldDuration, 2),
maxDuration);
continue;
}
_logger.LogDebug(
"Reanalyzing episode {Path} (intro is {Initial}, target is {Max})",
shortPath,
Math.Round(oldDuration, 2),
maxDuration);
// Analyze the episode again, ignoring whatever is returned for the known good episode.
var (_, newRhs) = FingerprintEpisodes(
lhs.EpisodeId,
lhsFingerprint,
episode.EpisodeId,
FPCalc.Fingerprint(episode));
// Ensure that the new intro duration is within the targeted bucket and longer than what was found previously.
var newDuration = Math.Round(newRhs.IntroEnd - newRhs.IntroStart, 2);
if (newDuration < oldDuration || newDuration < lowTargetDuration || newDuration > highTargetDuration)
{
_logger.LogInformation(
"Ignoring reanalysis for {Path} (was {Initial}, now is {New})",
shortPath,
oldDuration,
newDuration);
continue;
}
// TODO: change to debug
_logger.LogInformation(
"Reanalysis succeeded for {Path} (was {Initial}, now is {New})",
shortPath,
oldDuration,
newDuration);
Plugin.Instance!.Intros[episode.EpisodeId] = newRhs;
}
}
private double GetIntroDuration(Guid id)
{
var episode = Plugin.Instance!.Intros[id];
return episode.Valid ? Math.Round(episode.IntroEnd - episode.IntroStart, 2) : 0;
}
2022-05-01 00:33:22 -05:00
/// <summary>
/// Get task triggers.
/// </summary>
/// <returns>Task triggers.</returns>
2022-05-01 00:33:22 -05:00
public IEnumerable<TaskTriggerInfo> GetDefaultTriggers()
{
return new[]
{
new TaskTriggerInfo
{
Type = TaskTriggerInfo.TriggerDaily,
TimeOfDayTicks = TimeSpan.FromDays(24).Ticks
}
};
}
}