Add second analysis pass
This commit is contained in:
parent
56b88d55d4
commit
3370f1058d
@ -63,16 +63,18 @@ public class TestFPCalc
|
|||||||
var logger = new Logger<FingerprinterTask>(new LoggerFactory());
|
var logger = new Logger<FingerprinterTask>(new LoggerFactory());
|
||||||
var task = new FingerprinterTask(logger);
|
var task = new FingerprinterTask(logger);
|
||||||
|
|
||||||
var lhs = queueEpisode("audio/big_buck_bunny_intro.mp3");
|
var lhsEpisode = queueEpisode("audio/big_buck_bunny_intro.mp3");
|
||||||
var rhs = queueEpisode("audio/big_buck_bunny_clip.mp3");
|
var rhsEpisode = queueEpisode("audio/big_buck_bunny_clip.mp3");
|
||||||
|
|
||||||
var result = task.FingerprintEpisodes(lhs, rhs);
|
var (lhs, rhs) = task.FingerprintEpisodes(lhsEpisode, rhsEpisode);
|
||||||
var actual = FingerprinterTask.LastIntro;
|
|
||||||
|
|
||||||
Assert.True(result);
|
Assert.True(lhs.Valid);
|
||||||
Assert.True(actual.Valid);
|
Assert.Equal(0, lhs.IntroStart);
|
||||||
Assert.Equal(5.12, actual.IntroStart);
|
Assert.Equal(17.792, lhs.IntroEnd);
|
||||||
Assert.Equal(22.912, actual.IntroEnd);
|
|
||||||
|
Assert.True(rhs.Valid);
|
||||||
|
Assert.Equal(5.12, rhs.IntroStart);
|
||||||
|
Assert.Equal(22.912, rhs.IntroEnd);
|
||||||
}
|
}
|
||||||
|
|
||||||
private QueuedEpisode queueEpisode(string path)
|
private QueuedEpisode queueEpisode(string path)
|
||||||
|
@ -8,16 +8,36 @@ namespace ConfusedPolarBear.Plugin.IntroSkipper;
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
public class Intro
|
public class Intro
|
||||||
{
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Initializes a new instance of the <see cref="Intro"/> class.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="episode">Episode.</param>
|
||||||
|
/// <param name="start">Intro start time.</param>
|
||||||
|
/// <param name="end">Intro end time.</param>
|
||||||
|
public Intro(Guid episode, double start, double end)
|
||||||
|
{
|
||||||
|
EpisodeId = episode;
|
||||||
|
IntroStart = start;
|
||||||
|
IntroEnd = end;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Initializes a new instance of the <see cref="Intro"/> class.
|
||||||
|
/// </summary>
|
||||||
|
public Intro()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Gets or sets the Episode ID.
|
/// Gets or sets the Episode ID.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public Guid EpisodeId { get; set; }
|
public Guid EpisodeId { get; set; }
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Gets or sets a value indicating whether this introduction is valid or not.
|
/// Gets a value indicating whether this introduction is valid or not.
|
||||||
/// Invalid results must not be returned through the API.
|
/// Invalid results must not be returned through the API.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public bool Valid { get; set; }
|
public bool Valid => IntroEnd > 0;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Gets or sets the introduction sequence start time.
|
/// Gets or sets the introduction sequence start time.
|
||||||
|
@ -0,0 +1,31 @@
|
|||||||
|
#pragma warning disable CA1815
|
||||||
|
|
||||||
|
using System;
|
||||||
|
using System.Collections.ObjectModel;
|
||||||
|
|
||||||
|
namespace ConfusedPolarBear.Plugin.IntroSkipper;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Histogram entry for episodes in a season.
|
||||||
|
/// </summary>
|
||||||
|
public struct SeasonHistogram
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Initializes a new instance of the <see cref="SeasonHistogram"/> struct.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="firstEpisode">First episode seen with this duration.</param>
|
||||||
|
public SeasonHistogram(Guid firstEpisode)
|
||||||
|
{
|
||||||
|
Episodes.Add(firstEpisode);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Gets episodes with this duration.
|
||||||
|
/// </summary>
|
||||||
|
public Collection<Guid> Episodes { get; } = new Collection<Guid>();
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Gets the number of times an episode with an intro of this duration has been seen.
|
||||||
|
/// </summary>
|
||||||
|
public int Count => Episodes?.Count ?? 0;
|
||||||
|
}
|
@ -33,6 +33,16 @@ public class FingerprinterTask : IScheduledTask
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
private const double SamplesToSeconds = 0.128;
|
private const double SamplesToSeconds = 0.128;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Bucket size used in the reanalysis histogram.
|
||||||
|
/// </summary>
|
||||||
|
private const int ReanalysisBucketWidth = 5;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Maximum time (in seconds) that an intro's duration can be different from a typical intro's duration before marking it for reanalysis.
|
||||||
|
/// </summary>
|
||||||
|
private const double ReanalysisTolerance = ReanalysisBucketWidth * 1.5;
|
||||||
|
|
||||||
private readonly ILogger<FingerprinterTask> _logger;
|
private readonly ILogger<FingerprinterTask> _logger;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@ -44,11 +54,6 @@ public class FingerprinterTask : IScheduledTask
|
|||||||
_logger = logger;
|
_logger = logger;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Gets the last detected intro sequence. Only populated when a unit test is running.
|
|
||||||
/// </summary>
|
|
||||||
public static Intro LastIntro { get; private set; } = new Intro();
|
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Gets the task name.
|
/// Gets the task name.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
@ -149,14 +154,18 @@ public class FingerprinterTask : IScheduledTask
|
|||||||
{
|
{
|
||||||
_logger.LogDebug("Analyzing {LHS} and {RHS}", lhs.Path, rhs.Path);
|
_logger.LogDebug("Analyzing {LHS} and {RHS}", lhs.Path, rhs.Path);
|
||||||
|
|
||||||
if (FingerprintEpisodes(lhs, rhs))
|
var (lhsIntro, rhsIntro) = FingerprintEpisodes(lhs, rhs);
|
||||||
{
|
|
||||||
everFoundIntro = true;
|
Plugin.Instance.Intros![lhsIntro.EpisodeId] = lhsIntro;
|
||||||
}
|
Plugin.Instance.Intros![rhsIntro.EpisodeId] = rhsIntro;
|
||||||
else
|
|
||||||
|
if (!lhsIntro.Valid)
|
||||||
{
|
{
|
||||||
failures += 2;
|
failures += 2;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
everFoundIntro = true;
|
||||||
}
|
}
|
||||||
catch (FingerprintException ex)
|
catch (FingerprintException ex)
|
||||||
{
|
{
|
||||||
@ -175,6 +184,14 @@ public class FingerprinterTask : IScheduledTask
|
|||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!everFoundIntro)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reanalyze this season to check for (and hopefully correct) outliers and failed episodes.
|
||||||
|
CheckSeason(season.Value);
|
||||||
}
|
}
|
||||||
|
|
||||||
return Task.CompletedTask;
|
return Task.CompletedTask;
|
||||||
@ -185,12 +202,33 @@ public class FingerprinterTask : IScheduledTask
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="lhsEpisode">First episode to analyze.</param>
|
/// <param name="lhsEpisode">First episode to analyze.</param>
|
||||||
/// <param name="rhsEpisode">Second episode to analyze.</param>
|
/// <param name="rhsEpisode">Second episode to analyze.</param>
|
||||||
/// <returns>true if an intro was found in both episodes, otherwise false.</returns>
|
/// <returns>Intros for the first and second episodes.</returns>
|
||||||
public bool FingerprintEpisodes(QueuedEpisode lhsEpisode, QueuedEpisode rhsEpisode)
|
public (Intro Lhs, Intro Rhs) FingerprintEpisodes(QueuedEpisode lhsEpisode, QueuedEpisode rhsEpisode)
|
||||||
{
|
{
|
||||||
var lhs = FPCalc.Fingerprint(lhsEpisode);
|
var lhsFingerprint = FPCalc.Fingerprint(lhsEpisode);
|
||||||
var rhs = FPCalc.Fingerprint(rhsEpisode);
|
var rhsFingerprint = FPCalc.Fingerprint(rhsEpisode);
|
||||||
|
|
||||||
|
return FingerprintEpisodes(
|
||||||
|
lhsEpisode.EpisodeId,
|
||||||
|
lhsFingerprint,
|
||||||
|
rhsEpisode.EpisodeId,
|
||||||
|
rhsFingerprint);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Analyze two episodes to find an introduction sequence shared between them.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="lhsId">First episode id.</param>
|
||||||
|
/// <param name="lhs">First episode to analyze.</param>
|
||||||
|
/// <param name="rhsId">Second episode id.</param>
|
||||||
|
/// <param name="rhs">Second episode to analyze.</param>
|
||||||
|
/// <returns>Intros for the first and second episodes.</returns>
|
||||||
|
public (Intro Lhs, Intro Rhs) FingerprintEpisodes(
|
||||||
|
Guid lhsId,
|
||||||
|
ReadOnlyCollection<uint> lhs,
|
||||||
|
Guid rhsId,
|
||||||
|
ReadOnlyCollection<uint> rhs)
|
||||||
|
{
|
||||||
var lhsRanges = new List<TimeRange>();
|
var lhsRanges = new List<TimeRange>();
|
||||||
var rhsRanges = new List<TimeRange>();
|
var rhsRanges = new List<TimeRange>();
|
||||||
|
|
||||||
@ -219,16 +257,11 @@ public class FingerprinterTask : IScheduledTask
|
|||||||
if (lhsRanges.Count == 0)
|
if (lhsRanges.Count == 0)
|
||||||
{
|
{
|
||||||
_logger.LogDebug(
|
_logger.LogDebug(
|
||||||
"Unable to find a shared introduction sequence between {LHS} and {RHS}",
|
"Unable to find a shared introduction sequence {LHS} and {RHS}",
|
||||||
lhsEpisode.Path,
|
lhsId,
|
||||||
rhsEpisode.Path);
|
rhsId);
|
||||||
|
|
||||||
// TODO: if an episode fails but others in the season succeed, reanalyze it against two that succeeded.
|
return (new Intro(lhsId, 0, 0), new Intro(rhsId, 0, 0));
|
||||||
|
|
||||||
StoreIntro(lhsEpisode.EpisodeId, 0, 0);
|
|
||||||
StoreIntro(rhsEpisode.EpisodeId, 0, 0);
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// After comparing both episodes at all possible shift positions, store the longest time range as the intro.
|
// After comparing both episodes at all possible shift positions, store the longest time range as the intro.
|
||||||
@ -249,10 +282,7 @@ public class FingerprinterTask : IScheduledTask
|
|||||||
rhsIntro.Start = 0;
|
rhsIntro.Start = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
StoreIntro(lhsEpisode.EpisodeId, lhsIntro.Start, lhsIntro.End);
|
return (new Intro(lhsId, lhsIntro.Start, lhsIntro.End), new Intro(rhsId, rhsIntro.Start, rhsIntro.End));
|
||||||
StoreIntro(rhsEpisode.EpisodeId, rhsIntro.Start, rhsIntro.End);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@ -366,25 +396,6 @@ public class FingerprinterTask : IScheduledTask
|
|||||||
return (lContiguous, rContiguous);
|
return (lContiguous, rContiguous);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void StoreIntro(Guid episode, double introStart, double introEnd)
|
|
||||||
{
|
|
||||||
var intro = new Intro()
|
|
||||||
{
|
|
||||||
EpisodeId = episode,
|
|
||||||
Valid = introEnd > 0, // don't test introStart here as the intro could legitimately happen at the start.
|
|
||||||
IntroStart = introStart,
|
|
||||||
IntroEnd = introEnd
|
|
||||||
};
|
|
||||||
|
|
||||||
if (Plugin.Instance is null)
|
|
||||||
{
|
|
||||||
LastIntro = intro;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
Plugin.Instance.Intros[episode] = intro;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Count the number of bits that are set in the provided number.
|
/// Count the number of bits that are set in the provided number.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
@ -406,6 +417,180 @@ public class FingerprinterTask : IScheduledTask
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Reanalyze the most recently analyzed season.
|
||||||
|
/// Looks for and fixes intro durations that were either not found or are statistical outliers.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="episodes">List of episodes that was just analyzed.</param>
|
||||||
|
private void CheckSeason(List<QueuedEpisode> episodes)
|
||||||
|
{
|
||||||
|
var intros = Plugin.Instance!.Intros;
|
||||||
|
|
||||||
|
// First, assert that at least half of the episodes in this season have an intro.
|
||||||
|
var validCount = 0;
|
||||||
|
var totalCount = episodes.Count;
|
||||||
|
|
||||||
|
foreach (var episode in episodes)
|
||||||
|
{
|
||||||
|
if (intros[episode.EpisodeId].Valid)
|
||||||
|
{
|
||||||
|
validCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var percentValid = (validCount * 100) / totalCount;
|
||||||
|
_logger.LogDebug("Found intros in {Valid}/{Total} ({Percent}%) of episodes", validCount, totalCount, percentValid);
|
||||||
|
if (percentValid < 50)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a histogram of all episode durations
|
||||||
|
var histogram = new Dictionary<int, SeasonHistogram>();
|
||||||
|
foreach (var episode in episodes)
|
||||||
|
{
|
||||||
|
var id = episode.EpisodeId;
|
||||||
|
var duration = GetIntroDuration(id);
|
||||||
|
|
||||||
|
if (duration < MinimumIntroDuration)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bucket the duration into equally sized groups
|
||||||
|
var bucket = Convert.ToInt32(Math.Floor(duration / ReanalysisBucketWidth)) * ReanalysisBucketWidth;
|
||||||
|
|
||||||
|
// TryAdd returns true when the key was successfully added (i.e. for newly created buckets).
|
||||||
|
// Newly created buckets are initialized with the provided episode ID, so nothing else needs to be done for them.
|
||||||
|
if (histogram.TryAdd(bucket, new SeasonHistogram(id)))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
histogram[bucket].Episodes.Add(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the bucket that was seen most often, as this is likely to be the true intro length.
|
||||||
|
var maxDuration = 0;
|
||||||
|
var maxBucket = new SeasonHistogram(Guid.Empty);
|
||||||
|
foreach (var entry in histogram)
|
||||||
|
{
|
||||||
|
if (entry.Value.Count > maxBucket.Count)
|
||||||
|
{
|
||||||
|
maxDuration = entry.Key;
|
||||||
|
maxBucket = entry.Value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure that the most frequently seen bucket has a majority
|
||||||
|
// TODO: change to debug
|
||||||
|
percentValid = (maxBucket.Count * 100) / validCount;
|
||||||
|
_logger.LogInformation(
|
||||||
|
"Intro duration {Duration} appeared {Frequency} times ({Percent}%)",
|
||||||
|
maxDuration,
|
||||||
|
maxBucket.Count,
|
||||||
|
percentValid);
|
||||||
|
|
||||||
|
if (percentValid < 50 || maxBucket.Episodes[0].Equals(Guid.Empty))
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
_logger.LogInformation("Reanalyzing {Count} episodes", totalCount - maxBucket.Count);
|
||||||
|
|
||||||
|
// TODO: pick two episodes at random
|
||||||
|
// Cache the fingerprint of the first episode in the max bucket to save CPU cycles
|
||||||
|
var lhs = episodes.Find(x => x.EpisodeId == maxBucket.Episodes[1]);
|
||||||
|
if (lhs is null)
|
||||||
|
{
|
||||||
|
_logger.LogError("Reanalysis failed to get episode from bucket");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ReadOnlyCollection<uint> lhsFingerprint;
|
||||||
|
try
|
||||||
|
{
|
||||||
|
lhsFingerprint = FPCalc.Fingerprint(lhs);
|
||||||
|
}
|
||||||
|
catch (FingerprintException ex)
|
||||||
|
{
|
||||||
|
_logger.LogWarning("Skipping reanalysis of {Show} season {Season}: {Exception}", lhs.SeriesName, lhs.SeasonNumber, ex);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var lhsDuration = GetIntroDuration(lhs.EpisodeId);
|
||||||
|
var (lowTargetDuration, highTargetDuration) = (
|
||||||
|
lhsDuration - ReanalysisTolerance,
|
||||||
|
lhsDuration + ReanalysisTolerance);
|
||||||
|
|
||||||
|
foreach (var episode in episodes)
|
||||||
|
{
|
||||||
|
// Don't reanalyze episodes from the max bucket
|
||||||
|
if (maxBucket.Episodes.Contains(episode.EpisodeId))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
var oldDuration = GetIntroDuration(episode.EpisodeId);
|
||||||
|
|
||||||
|
// TODO: remove
|
||||||
|
var shortPath = episode.Path.Substring(episode.Path.Length - 40);
|
||||||
|
|
||||||
|
// If the episode's intro duration is close enough to the targeted bucket, leave it alone.
|
||||||
|
if (Math.Abs(lhsDuration - oldDuration) <= ReanalysisTolerance)
|
||||||
|
{
|
||||||
|
_logger.LogInformation(
|
||||||
|
"Not reanalyzing episode {Path} (intro is {Initial}, target is {Max})",
|
||||||
|
shortPath,
|
||||||
|
Math.Round(oldDuration, 2),
|
||||||
|
maxDuration);
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
_logger.LogDebug(
|
||||||
|
"Reanalyzing episode {Path} (intro is {Initial}, target is {Max})",
|
||||||
|
shortPath,
|
||||||
|
Math.Round(oldDuration, 2),
|
||||||
|
maxDuration);
|
||||||
|
|
||||||
|
// Analyze the episode again, ignoring whatever is returned for the known good episode.
|
||||||
|
var (_, newRhs) = FingerprintEpisodes(
|
||||||
|
lhs.EpisodeId,
|
||||||
|
lhsFingerprint,
|
||||||
|
episode.EpisodeId,
|
||||||
|
FPCalc.Fingerprint(episode));
|
||||||
|
|
||||||
|
// Ensure that the new intro duration is within the targeted bucket and longer than what was found previously.
|
||||||
|
var newDuration = Math.Round(newRhs.IntroEnd - newRhs.IntroStart, 2);
|
||||||
|
if (newDuration < oldDuration || newDuration < lowTargetDuration || newDuration > highTargetDuration)
|
||||||
|
{
|
||||||
|
_logger.LogInformation(
|
||||||
|
"Ignoring reanalysis for {Path} (was {Initial}, now is {New})",
|
||||||
|
shortPath,
|
||||||
|
oldDuration,
|
||||||
|
newDuration);
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: change to debug
|
||||||
|
_logger.LogInformation(
|
||||||
|
"Reanalysis succeeded for {Path} (was {Initial}, now is {New})",
|
||||||
|
shortPath,
|
||||||
|
oldDuration,
|
||||||
|
newDuration);
|
||||||
|
|
||||||
|
Plugin.Instance!.Intros[episode.EpisodeId] = newRhs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private double GetIntroDuration(Guid id)
|
||||||
|
{
|
||||||
|
var episode = Plugin.Instance!.Intros[id];
|
||||||
|
return episode.Valid ? Math.Round(episode.IntroEnd - episode.IntroStart, 2) : 0;
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Get task triggers.
|
/// Get task triggers.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user