Add second analysis pass

This commit is contained in:
ConfusedPolarBear 2022-05-13 01:13:13 -05:00
parent 56b88d55d4
commit 3370f1058d
4 changed files with 294 additions and 56 deletions

View File

@ -63,16 +63,18 @@ public class TestFPCalc
var logger = new Logger<FingerprinterTask>(new LoggerFactory());
var task = new FingerprinterTask(logger);
var lhs = queueEpisode("audio/big_buck_bunny_intro.mp3");
var rhs = queueEpisode("audio/big_buck_bunny_clip.mp3");
var lhsEpisode = queueEpisode("audio/big_buck_bunny_intro.mp3");
var rhsEpisode = queueEpisode("audio/big_buck_bunny_clip.mp3");
var result = task.FingerprintEpisodes(lhs, rhs);
var actual = FingerprinterTask.LastIntro;
var (lhs, rhs) = task.FingerprintEpisodes(lhsEpisode, rhsEpisode);
Assert.True(result);
Assert.True(actual.Valid);
Assert.Equal(5.12, actual.IntroStart);
Assert.Equal(22.912, actual.IntroEnd);
Assert.True(lhs.Valid);
Assert.Equal(0, lhs.IntroStart);
Assert.Equal(17.792, lhs.IntroEnd);
Assert.True(rhs.Valid);
Assert.Equal(5.12, rhs.IntroStart);
Assert.Equal(22.912, rhs.IntroEnd);
}
private QueuedEpisode queueEpisode(string path)

View File

@ -8,16 +8,36 @@ namespace ConfusedPolarBear.Plugin.IntroSkipper;
/// </summary>
public class Intro
{
/// <summary>
/// Initializes a new instance of the <see cref="Intro"/> class.
/// </summary>
/// <param name="episode">Episode.</param>
/// <param name="start">Intro start time.</param>
/// <param name="end">Intro end time.</param>
public Intro(Guid episode, double start, double end)
{
EpisodeId = episode;
IntroStart = start;
IntroEnd = end;
}
/// <summary>
/// Initializes a new instance of the <see cref="Intro"/> class.
/// </summary>
public Intro()
{
}
/// <summary>
/// Gets or sets the Episode ID.
/// </summary>
public Guid EpisodeId { get; set; }
/// <summary>
/// Gets or sets a value indicating whether this introduction is valid or not.
/// Gets a value indicating whether this introduction is valid or not.
/// Invalid results must not be returned through the API.
/// </summary>
public bool Valid { get; set; }
public bool Valid => IntroEnd > 0;
/// <summary>
/// Gets or sets the introduction sequence start time.

View File

@ -0,0 +1,31 @@
#pragma warning disable CA1815
using System;
using System.Collections.ObjectModel;
namespace ConfusedPolarBear.Plugin.IntroSkipper;
/// <summary>
/// Histogram entry for episodes in a season.
/// </summary>
public struct SeasonHistogram
{
/// <summary>
/// Initializes a new instance of the <see cref="SeasonHistogram"/> struct.
/// </summary>
/// <param name="firstEpisode">First episode seen with this duration.</param>
public SeasonHistogram(Guid firstEpisode)
{
Episodes.Add(firstEpisode);
}
/// <summary>
/// Gets episodes with this duration.
/// </summary>
public Collection<Guid> Episodes { get; } = new Collection<Guid>();
/// <summary>
/// Gets the number of times an episode with an intro of this duration has been seen.
/// </summary>
public int Count => Episodes?.Count ?? 0;
}

View File

@ -33,6 +33,16 @@ public class FingerprinterTask : IScheduledTask
/// </summary>
private const double SamplesToSeconds = 0.128;
/// <summary>
/// Bucket size used in the reanalysis histogram.
/// </summary>
private const int ReanalysisBucketWidth = 5;
/// <summary>
/// Maximum time (in seconds) that an intro's duration can be different from a typical intro's duration before marking it for reanalysis.
/// </summary>
private const double ReanalysisTolerance = ReanalysisBucketWidth * 1.5;
private readonly ILogger<FingerprinterTask> _logger;
/// <summary>
@ -44,11 +54,6 @@ public class FingerprinterTask : IScheduledTask
_logger = logger;
}
/// <summary>
/// Gets the last detected intro sequence. Only populated when a unit test is running.
/// </summary>
public static Intro LastIntro { get; private set; } = new Intro();
/// <summary>
/// Gets the task name.
/// </summary>
@ -149,14 +154,18 @@ public class FingerprinterTask : IScheduledTask
{
_logger.LogDebug("Analyzing {LHS} and {RHS}", lhs.Path, rhs.Path);
if (FingerprintEpisodes(lhs, rhs))
{
everFoundIntro = true;
}
else
var (lhsIntro, rhsIntro) = FingerprintEpisodes(lhs, rhs);
Plugin.Instance.Intros![lhsIntro.EpisodeId] = lhsIntro;
Plugin.Instance.Intros![rhsIntro.EpisodeId] = rhsIntro;
if (!lhsIntro.Valid)
{
failures += 2;
continue;
}
everFoundIntro = true;
}
catch (FingerprintException ex)
{
@ -175,6 +184,14 @@ public class FingerprinterTask : IScheduledTask
{
break;
}
if (!everFoundIntro)
{
continue;
}
// Reanalyze this season to check for (and hopefully correct) outliers and failed episodes.
CheckSeason(season.Value);
}
return Task.CompletedTask;
@ -185,12 +202,33 @@ public class FingerprinterTask : IScheduledTask
/// </summary>
/// <param name="lhsEpisode">First episode to analyze.</param>
/// <param name="rhsEpisode">Second episode to analyze.</param>
/// <returns>true if an intro was found in both episodes, otherwise false.</returns>
public bool FingerprintEpisodes(QueuedEpisode lhsEpisode, QueuedEpisode rhsEpisode)
/// <returns>Intros for the first and second episodes.</returns>
public (Intro Lhs, Intro Rhs) FingerprintEpisodes(QueuedEpisode lhsEpisode, QueuedEpisode rhsEpisode)
{
var lhs = FPCalc.Fingerprint(lhsEpisode);
var rhs = FPCalc.Fingerprint(rhsEpisode);
var lhsFingerprint = FPCalc.Fingerprint(lhsEpisode);
var rhsFingerprint = FPCalc.Fingerprint(rhsEpisode);
return FingerprintEpisodes(
lhsEpisode.EpisodeId,
lhsFingerprint,
rhsEpisode.EpisodeId,
rhsFingerprint);
}
/// <summary>
/// Analyze two episodes to find an introduction sequence shared between them.
/// </summary>
/// <param name="lhsId">First episode id.</param>
/// <param name="lhs">First episode to analyze.</param>
/// <param name="rhsId">Second episode id.</param>
/// <param name="rhs">Second episode to analyze.</param>
/// <returns>Intros for the first and second episodes.</returns>
public (Intro Lhs, Intro Rhs) FingerprintEpisodes(
Guid lhsId,
ReadOnlyCollection<uint> lhs,
Guid rhsId,
ReadOnlyCollection<uint> rhs)
{
var lhsRanges = new List<TimeRange>();
var rhsRanges = new List<TimeRange>();
@ -219,16 +257,11 @@ public class FingerprinterTask : IScheduledTask
if (lhsRanges.Count == 0)
{
_logger.LogDebug(
"Unable to find a shared introduction sequence between {LHS} and {RHS}",
lhsEpisode.Path,
rhsEpisode.Path);
"Unable to find a shared introduction sequence {LHS} and {RHS}",
lhsId,
rhsId);
// TODO: if an episode fails but others in the season succeed, reanalyze it against two that succeeded.
StoreIntro(lhsEpisode.EpisodeId, 0, 0);
StoreIntro(rhsEpisode.EpisodeId, 0, 0);
return false;
return (new Intro(lhsId, 0, 0), new Intro(rhsId, 0, 0));
}
// After comparing both episodes at all possible shift positions, store the longest time range as the intro.
@ -249,10 +282,7 @@ public class FingerprinterTask : IScheduledTask
rhsIntro.Start = 0;
}
StoreIntro(lhsEpisode.EpisodeId, lhsIntro.Start, lhsIntro.End);
StoreIntro(rhsEpisode.EpisodeId, rhsIntro.Start, rhsIntro.End);
return true;
return (new Intro(lhsId, lhsIntro.Start, lhsIntro.End), new Intro(rhsId, rhsIntro.Start, rhsIntro.End));
}
/// <summary>
@ -366,25 +396,6 @@ public class FingerprinterTask : IScheduledTask
return (lContiguous, rContiguous);
}
private static void StoreIntro(Guid episode, double introStart, double introEnd)
{
var intro = new Intro()
{
EpisodeId = episode,
Valid = introEnd > 0, // don't test introStart here as the intro could legitimately happen at the start.
IntroStart = introStart,
IntroEnd = introEnd
};
if (Plugin.Instance is null)
{
LastIntro = intro;
return;
}
Plugin.Instance.Intros[episode] = intro;
}
/// <summary>
/// Count the number of bits that are set in the provided number.
/// </summary>
@ -406,6 +417,180 @@ public class FingerprinterTask : IScheduledTask
return count;
}
/// <summary>
/// Reanalyze the most recently analyzed season.
/// Looks for and fixes intro durations that were either not found or are statistical outliers.
/// </summary>
/// <param name="episodes">List of episodes that was just analyzed.</param>
private void CheckSeason(List<QueuedEpisode> episodes)
{
var intros = Plugin.Instance!.Intros;
// First, assert that at least half of the episodes in this season have an intro.
var validCount = 0;
var totalCount = episodes.Count;
foreach (var episode in episodes)
{
if (intros[episode.EpisodeId].Valid)
{
validCount++;
}
}
var percentValid = (validCount * 100) / totalCount;
_logger.LogDebug("Found intros in {Valid}/{Total} ({Percent}%) of episodes", validCount, totalCount, percentValid);
if (percentValid < 50)
{
return;
}
// Create a histogram of all episode durations
var histogram = new Dictionary<int, SeasonHistogram>();
foreach (var episode in episodes)
{
var id = episode.EpisodeId;
var duration = GetIntroDuration(id);
if (duration < MinimumIntroDuration)
{
continue;
}
// Bucket the duration into equally sized groups
var bucket = Convert.ToInt32(Math.Floor(duration / ReanalysisBucketWidth)) * ReanalysisBucketWidth;
// TryAdd returns true when the key was successfully added (i.e. for newly created buckets).
// Newly created buckets are initialized with the provided episode ID, so nothing else needs to be done for them.
if (histogram.TryAdd(bucket, new SeasonHistogram(id)))
{
continue;
}
histogram[bucket].Episodes.Add(id);
}
// Find the bucket that was seen most often, as this is likely to be the true intro length.
var maxDuration = 0;
var maxBucket = new SeasonHistogram(Guid.Empty);
foreach (var entry in histogram)
{
if (entry.Value.Count > maxBucket.Count)
{
maxDuration = entry.Key;
maxBucket = entry.Value;
}
}
// Ensure that the most frequently seen bucket has a majority
// TODO: change to debug
percentValid = (maxBucket.Count * 100) / validCount;
_logger.LogInformation(
"Intro duration {Duration} appeared {Frequency} times ({Percent}%)",
maxDuration,
maxBucket.Count,
percentValid);
if (percentValid < 50 || maxBucket.Episodes[0].Equals(Guid.Empty))
{
return;
}
_logger.LogInformation("Reanalyzing {Count} episodes", totalCount - maxBucket.Count);
// TODO: pick two episodes at random
// Cache the fingerprint of the first episode in the max bucket to save CPU cycles
var lhs = episodes.Find(x => x.EpisodeId == maxBucket.Episodes[1]);
if (lhs is null)
{
_logger.LogError("Reanalysis failed to get episode from bucket");
return;
}
ReadOnlyCollection<uint> lhsFingerprint;
try
{
lhsFingerprint = FPCalc.Fingerprint(lhs);
}
catch (FingerprintException ex)
{
_logger.LogWarning("Skipping reanalysis of {Show} season {Season}: {Exception}", lhs.SeriesName, lhs.SeasonNumber, ex);
return;
}
var lhsDuration = GetIntroDuration(lhs.EpisodeId);
var (lowTargetDuration, highTargetDuration) = (
lhsDuration - ReanalysisTolerance,
lhsDuration + ReanalysisTolerance);
foreach (var episode in episodes)
{
// Don't reanalyze episodes from the max bucket
if (maxBucket.Episodes.Contains(episode.EpisodeId))
{
continue;
}
var oldDuration = GetIntroDuration(episode.EpisodeId);
// TODO: remove
var shortPath = episode.Path.Substring(episode.Path.Length - 40);
// If the episode's intro duration is close enough to the targeted bucket, leave it alone.
if (Math.Abs(lhsDuration - oldDuration) <= ReanalysisTolerance)
{
_logger.LogInformation(
"Not reanalyzing episode {Path} (intro is {Initial}, target is {Max})",
shortPath,
Math.Round(oldDuration, 2),
maxDuration);
continue;
}
_logger.LogDebug(
"Reanalyzing episode {Path} (intro is {Initial}, target is {Max})",
shortPath,
Math.Round(oldDuration, 2),
maxDuration);
// Analyze the episode again, ignoring whatever is returned for the known good episode.
var (_, newRhs) = FingerprintEpisodes(
lhs.EpisodeId,
lhsFingerprint,
episode.EpisodeId,
FPCalc.Fingerprint(episode));
// Ensure that the new intro duration is within the targeted bucket and longer than what was found previously.
var newDuration = Math.Round(newRhs.IntroEnd - newRhs.IntroStart, 2);
if (newDuration < oldDuration || newDuration < lowTargetDuration || newDuration > highTargetDuration)
{
_logger.LogInformation(
"Ignoring reanalysis for {Path} (was {Initial}, now is {New})",
shortPath,
oldDuration,
newDuration);
continue;
}
// TODO: change to debug
_logger.LogInformation(
"Reanalysis succeeded for {Path} (was {Initial}, now is {New})",
shortPath,
oldDuration,
newDuration);
Plugin.Instance!.Intros[episode.EpisodeId] = newRhs;
}
}
private double GetIntroDuration(Guid id)
{
var episode = Plugin.Instance!.Intros[id];
return episode.Valid ? Math.Round(episode.IntroEnd - episode.IntroStart, 2) : 0;
}
/// <summary>
/// Get task triggers.
/// </summary>