Only use inverted indexes to search
This commit is contained in:
parent
ce107f2161
commit
4558b59e95
@ -25,30 +25,15 @@ public class AnalysisStatistics
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
public ThreadSafeInteger IndexSearches { get; } = new ThreadSafeInteger();
|
public ThreadSafeInteger IndexSearches { get; } = new ThreadSafeInteger();
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Gets the number of times a quick scan successfully located a pair of introductions.
|
|
||||||
/// </summary>
|
|
||||||
public ThreadSafeInteger QuickScans { get; } = new ThreadSafeInteger();
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Gets the number of times a full scan successfully located a pair of introductions.
|
|
||||||
/// </summary>
|
|
||||||
public ThreadSafeInteger FullScans { get; } = new ThreadSafeInteger();
|
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Gets the total CPU time spent waiting for audio fingerprints to be generated.
|
/// Gets the total CPU time spent waiting for audio fingerprints to be generated.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public ThreadSafeInteger FingerprintCPUTime { get; } = new ThreadSafeInteger();
|
public ThreadSafeInteger FingerprintCPUTime { get; } = new ThreadSafeInteger();
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Gets the total CPU time spent analyzing fingerprints in the initial pass.
|
/// Gets the total CPU time spent analyzing fingerprints.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public ThreadSafeInteger FirstPassCPUTime { get; } = new ThreadSafeInteger();
|
public ThreadSafeInteger AnalysisCPUTime { get; } = new ThreadSafeInteger();
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Gets the total CPU time spent analyzing fingerprints in the second pass.
|
|
||||||
/// </summary>
|
|
||||||
public ThreadSafeInteger SecondPassCPUTime { get; } = new ThreadSafeInteger();
|
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Gets the total task runtime across all threads.
|
/// Gets the total task runtime across all threads.
|
||||||
|
@ -30,16 +30,6 @@ public class AnalyzeEpisodesTask : IScheduledTask
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
private const double SamplesToSeconds = 0.128;
|
private const double SamplesToSeconds = 0.128;
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Bucket size used in the reanalysis histogram.
|
|
||||||
/// </summary>
|
|
||||||
private const int ReanalysisBucketWidth = 5;
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Maximum time (in seconds) that an intro's duration can be different from a typical intro's duration before marking it for reanalysis.
|
|
||||||
/// </summary>
|
|
||||||
private const double ReanalysisTolerance = ReanalysisBucketWidth * 1.5;
|
|
||||||
|
|
||||||
private readonly ILogger<AnalyzeEpisodesTask> _logger;
|
private readonly ILogger<AnalyzeEpisodesTask> _logger;
|
||||||
|
|
||||||
private readonly ILogger<QueueManager> _queueLogger;
|
private readonly ILogger<QueueManager> _queueLogger;
|
||||||
@ -375,17 +365,6 @@ public class AnalyzeEpisodesTask : IScheduledTask
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Only run the second pass if the user hasn't requested cancellation and we found an intro
|
|
||||||
if (!cancellationToken.IsCancellationRequested && everFoundIntro)
|
|
||||||
{
|
|
||||||
var start = DateTime.Now;
|
|
||||||
|
|
||||||
// Run a second pass over this season to remove outliers and fix episodes that failed in the first pass.
|
|
||||||
RunSecondPass(season.Value);
|
|
||||||
|
|
||||||
analysisStatistics.SecondPassCPUTime.AddDuration(start);
|
|
||||||
}
|
|
||||||
|
|
||||||
lock (_introsLock)
|
lock (_introsLock)
|
||||||
{
|
{
|
||||||
Plugin.Instance!.SaveTimestamps();
|
Plugin.Instance!.SaveTimestamps();
|
||||||
@ -441,7 +420,6 @@ public class AnalyzeEpisodesTask : IScheduledTask
|
|||||||
// If this isn't running as part of the first analysis pass, don't count this CPU time as first pass time.
|
// If this isn't running as part of the first analysis pass, don't count this CPU time as first pass time.
|
||||||
var start = isFirstPass ? DateTime.Now : DateTime.MinValue;
|
var start = isFirstPass ? DateTime.Now : DateTime.MinValue;
|
||||||
|
|
||||||
// ===== Method 1: Inverted indexes =====
|
|
||||||
// Creates an inverted fingerprint point index for both episodes.
|
// Creates an inverted fingerprint point index for both episodes.
|
||||||
// For every point which is a 100% match, search for an introduction at that point.
|
// For every point which is a 100% match, search for an introduction at that point.
|
||||||
var (lhsRanges, rhsRanges) = SearchInvertedIndex(lhsPoints, rhsPoints);
|
var (lhsRanges, rhsRanges) = SearchInvertedIndex(lhsPoints, rhsPoints);
|
||||||
@ -450,46 +428,17 @@ public class AnalyzeEpisodesTask : IScheduledTask
|
|||||||
{
|
{
|
||||||
_logger.LogTrace("Index search successful");
|
_logger.LogTrace("Index search successful");
|
||||||
analysisStatistics.IndexSearches.Increment();
|
analysisStatistics.IndexSearches.Increment();
|
||||||
analysisStatistics.FirstPassCPUTime.AddDuration(start);
|
analysisStatistics.AnalysisCPUTime.AddDuration(start);
|
||||||
|
|
||||||
return GetLongestTimeRange(lhsId, lhsRanges, rhsId, rhsRanges);
|
return GetLongestTimeRange(lhsId, lhsRanges, rhsId, rhsRanges);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ===== Method 2: Quick scan =====
|
|
||||||
// Tests if an intro can be found within the first 5 seconds of the episodes. ±5/0.128 = ±40 samples.
|
|
||||||
(lhsRanges, rhsRanges) = ShiftEpisodes(lhsPoints, rhsPoints, -40, 40);
|
|
||||||
|
|
||||||
if (lhsRanges.Count > 0)
|
|
||||||
{
|
|
||||||
_logger.LogTrace("Quick scan successful");
|
|
||||||
analysisStatistics.QuickScans.Increment();
|
|
||||||
analysisStatistics.FirstPassCPUTime.AddDuration(start);
|
|
||||||
|
|
||||||
return GetLongestTimeRange(lhsId, lhsRanges, rhsId, rhsRanges);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ===== Method 3: Full scan =====
|
|
||||||
// Compares all elements of the shortest fingerprint to the other fingerprint.
|
|
||||||
var limit = Math.Min(lhsPoints.Length, rhsPoints.Length);
|
|
||||||
(lhsRanges, rhsRanges) = ShiftEpisodes(lhsPoints, rhsPoints, -1 * limit, limit);
|
|
||||||
|
|
||||||
if (lhsRanges.Count > 0)
|
|
||||||
{
|
|
||||||
_logger.LogTrace("Full scan successful");
|
|
||||||
analysisStatistics.FullScans.Increment();
|
|
||||||
analysisStatistics.FirstPassCPUTime.AddDuration(start);
|
|
||||||
|
|
||||||
return GetLongestTimeRange(lhsId, lhsRanges, rhsId, rhsRanges);
|
|
||||||
}
|
|
||||||
|
|
||||||
// No method was able to find an introduction, return nothing.
|
|
||||||
|
|
||||||
_logger.LogTrace(
|
_logger.LogTrace(
|
||||||
"Unable to find a shared introduction sequence between {LHS} and {RHS}",
|
"Unable to find a shared introduction sequence between {LHS} and {RHS}",
|
||||||
lhsId,
|
lhsId,
|
||||||
rhsId);
|
rhsId);
|
||||||
|
|
||||||
analysisStatistics.FirstPassCPUTime.AddDuration(start);
|
analysisStatistics.AnalysisCPUTime.AddDuration(start);
|
||||||
|
|
||||||
return (new Intro(lhsId), new Intro(rhsId));
|
return (new Intro(lhsId), new Intro(rhsId));
|
||||||
}
|
}
|
||||||
@ -694,184 +643,6 @@ public class AnalyzeEpisodesTask : IScheduledTask
|
|||||||
return BitOperations.PopCount(number);
|
return BitOperations.PopCount(number);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Reanalyze the most recently analyzed season.
|
|
||||||
/// Looks for and fixes intro durations that were either not found or are statistical outliers.
|
|
||||||
/// </summary>
|
|
||||||
/// <param name="episodes">List of episodes that was just analyzed.</param>
|
|
||||||
private void RunSecondPass(List<QueuedEpisode> episodes)
|
|
||||||
{
|
|
||||||
// First, assert that at least half of the episodes in this season have an intro.
|
|
||||||
var validCount = 0;
|
|
||||||
var totalCount = episodes.Count;
|
|
||||||
|
|
||||||
foreach (var episode in episodes)
|
|
||||||
{
|
|
||||||
if (Plugin.Instance!.Intros.TryGetValue(episode.EpisodeId, out var intro) && intro.Valid)
|
|
||||||
{
|
|
||||||
validCount++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var percentValid = (validCount * 100) / totalCount;
|
|
||||||
_logger.LogTrace("Found intros in {Valid}/{Total} ({Percent}%) of episodes", validCount, totalCount, percentValid);
|
|
||||||
if (percentValid < 50)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create a histogram of all episode durations
|
|
||||||
var histogram = new Dictionary<int, SeasonHistogram>();
|
|
||||||
foreach (var episode in episodes)
|
|
||||||
{
|
|
||||||
var id = episode.EpisodeId;
|
|
||||||
var duration = GetIntroDuration(id);
|
|
||||||
|
|
||||||
if (duration < minimumIntroDuration)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Bucket the duration into equally sized groups
|
|
||||||
var bucket = Convert.ToInt32(Math.Floor(duration / ReanalysisBucketWidth)) * ReanalysisBucketWidth;
|
|
||||||
|
|
||||||
// TryAdd returns true when the key was successfully added (i.e. for newly created buckets).
|
|
||||||
// Newly created buckets are initialized with the provided episode ID, so nothing else needs to be done for them.
|
|
||||||
if (histogram.TryAdd(bucket, new SeasonHistogram(id)))
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
histogram[bucket].Episodes.Add(id);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find the bucket that was seen most often, as this is likely to be the true intro length.
|
|
||||||
var maxDuration = 0;
|
|
||||||
var maxBucket = new SeasonHistogram(Guid.Empty);
|
|
||||||
foreach (var entry in histogram)
|
|
||||||
{
|
|
||||||
if (entry.Value.Count > maxBucket.Count)
|
|
||||||
{
|
|
||||||
maxDuration = entry.Key;
|
|
||||||
maxBucket = entry.Value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure that the most frequently seen bucket has a majority
|
|
||||||
percentValid = (maxBucket.Count * 100) / validCount;
|
|
||||||
_logger.LogTrace(
|
|
||||||
"Intro duration {Duration} appeared {Frequency} times ({Percent}%)",
|
|
||||||
maxDuration,
|
|
||||||
maxBucket.Count,
|
|
||||||
percentValid);
|
|
||||||
|
|
||||||
if (percentValid < 50 || maxBucket.Episodes[0].Equals(Guid.Empty))
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
_logger.LogTrace("Second pass is processing {Count} episodes", totalCount - maxBucket.Count);
|
|
||||||
|
|
||||||
// Calculate a range of intro durations that are most likely to be correct.
|
|
||||||
var maxEpisode = episodes.Find(x => x.EpisodeId == maxBucket.Episodes[0]);
|
|
||||||
if (maxEpisode is null)
|
|
||||||
{
|
|
||||||
_logger.LogError("Second pass failed to get episode from bucket");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
var lhsDuration = GetIntroDuration(maxEpisode.EpisodeId);
|
|
||||||
var (lowTargetDuration, highTargetDuration) = (
|
|
||||||
lhsDuration - ReanalysisTolerance,
|
|
||||||
lhsDuration + ReanalysisTolerance);
|
|
||||||
|
|
||||||
// TODO: add limit and make it customizable
|
|
||||||
var count = maxBucket.Episodes.Count - 1;
|
|
||||||
var goodFingerprints = new List<uint[]>();
|
|
||||||
foreach (var id in maxBucket.Episodes)
|
|
||||||
{
|
|
||||||
if (!_fingerprintCache.TryGetValue(id, out var fp))
|
|
||||||
{
|
|
||||||
_logger.LogTrace("Second pass: max bucket episode {Id} not in cache, skipping", id);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
goodFingerprints.Add(fp);
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach (var episode in episodes)
|
|
||||||
{
|
|
||||||
// Don't reanalyze episodes from the max bucket
|
|
||||||
if (maxBucket.Episodes.Contains(episode.EpisodeId))
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
var oldDuration = GetIntroDuration(episode.EpisodeId);
|
|
||||||
|
|
||||||
// If the episode's intro duration is close enough to the targeted bucket, leave it alone.
|
|
||||||
if (Math.Abs(lhsDuration - oldDuration) <= ReanalysisTolerance)
|
|
||||||
{
|
|
||||||
_logger.LogTrace(
|
|
||||||
"Not reanalyzing episode {Path} (intro is {Initial}, target is {Max})",
|
|
||||||
episode.Path,
|
|
||||||
Math.Round(oldDuration, 2),
|
|
||||||
maxDuration);
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
_logger.LogTrace(
|
|
||||||
"Reanalyzing episode {Path} (intro is {Initial}, target is {Max})",
|
|
||||||
episode.Path,
|
|
||||||
Math.Round(oldDuration, 2),
|
|
||||||
maxDuration);
|
|
||||||
|
|
||||||
// Analyze the episode again, ignoring whatever is returned for the known good episode.
|
|
||||||
foreach (var lhsFingerprint in goodFingerprints)
|
|
||||||
{
|
|
||||||
if (!_fingerprintCache.TryGetValue(episode.EpisodeId, out var fp))
|
|
||||||
{
|
|
||||||
_logger.LogTrace("Unable to get cached fingerprint for {Id}, skipping", episode.EpisodeId);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
var (_, newRhs) = FingerprintEpisodes(
|
|
||||||
maxEpisode.EpisodeId,
|
|
||||||
lhsFingerprint,
|
|
||||||
episode.EpisodeId,
|
|
||||||
fp,
|
|
||||||
false);
|
|
||||||
|
|
||||||
// Ensure that the new intro duration is within the targeted bucket and longer than what was found previously.
|
|
||||||
var newDuration = Math.Round(newRhs.IntroEnd - newRhs.IntroStart, 2);
|
|
||||||
if (newDuration < oldDuration || newDuration < lowTargetDuration || newDuration > highTargetDuration)
|
|
||||||
{
|
|
||||||
_logger.LogTrace(
|
|
||||||
"Ignoring reanalysis for {Path} (was {Initial}, now is {New})",
|
|
||||||
episode.Path,
|
|
||||||
oldDuration,
|
|
||||||
newDuration);
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
_logger.LogTrace(
|
|
||||||
"Reanalysis succeeded for {Path} (was {Initial}, now is {New})",
|
|
||||||
episode.Path,
|
|
||||||
oldDuration,
|
|
||||||
newDuration);
|
|
||||||
|
|
||||||
lock (_introsLock)
|
|
||||||
{
|
|
||||||
Plugin.Instance!.Intros[episode.EpisodeId] = newRhs;
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private double GetIntroDuration(Guid id)
|
private double GetIntroDuration(Guid id)
|
||||||
{
|
{
|
||||||
if (!Plugin.Instance!.Intros.TryGetValue(id, out var episode))
|
if (!Plugin.Instance!.Intros.TryGetValue(id, out var episode))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user