Cache inverted indexes

This commit is contained in:
ConfusedPolarBear 2022-08-26 00:50:45 -05:00
parent e4da9342c8
commit f92eea20b3
3 changed files with 21 additions and 7 deletions

View File

@ -2,6 +2,7 @@
* which supports both chromaprint and the "-fp_format raw" flag.
*/
using System;
using System.Collections.Generic;
using Xunit;
using Microsoft.Extensions.Logging;
@ -77,7 +78,7 @@ public class TestAudioFingerprinting
{77, 5},
};
var actual = Chromaprint.CreateInvertedIndex(fpr);
var actual = Chromaprint.CreateInvertedIndex(Guid.NewGuid(), fpr);
Assert.Equal(expected, actual);
}
@ -111,6 +112,7 @@ public class TestAudioFingerprinting
{
return new QueuedEpisode()
{
EpisodeId = Guid.NewGuid(),
Path = "../../../" + path,
FingerprintDuration = 60
};

View File

@ -20,6 +20,8 @@ public static class Chromaprint
private static Dictionary<string, string> ChromaprintLogs { get; set; } = new();
private static Dictionary<Guid, Dictionary<uint, int>> InvertedIndexCache { get; set; } = new();
/// <summary>
/// Check that the installed version of ffmpeg supports chromaprint.
/// </summary>
@ -124,10 +126,16 @@ public static class Chromaprint
/// <summary>
/// Transforms a Chromaprint into an inverted index of fingerprint points to the last index it appeared at.
/// </summary>
/// <param name="id">Episode ID.</param>
/// <param name="fingerprint">Chromaprint fingerprint.</param>
/// <returns>Inverted index.</returns>
public static Dictionary<uint, int> CreateInvertedIndex(uint[] fingerprint)
public static Dictionary<uint, int> CreateInvertedIndex(Guid id, uint[] fingerprint)
{
if (InvertedIndexCache.TryGetValue(id, out var cached))
{
return cached;
}
var invIndex = new Dictionary<uint, int>();
for (int i = 0; i < fingerprint.Length; i++)
@ -139,6 +147,8 @@ public static class Chromaprint
invIndex[point] = i;
}
InvertedIndexCache[id] = invIndex;
return invIndex;
}

View File

@ -268,8 +268,6 @@ public class AnalyzeEpisodesTask : IScheduledTask
}
}
// TODO: cache inverted indexes
// TODO: implementing bucketing
// For all episodes
@ -357,7 +355,7 @@ public class AnalyzeEpisodesTask : IScheduledTask
// Creates an inverted fingerprint point index for both episodes.
// For every point which is a 100% match, search for an introduction at that point.
var (lhsRanges, rhsRanges) = SearchInvertedIndex(lhsPoints, rhsPoints);
var (lhsRanges, rhsRanges) = SearchInvertedIndex(lhsId, lhsPoints, rhsId, rhsPoints);
if (lhsRanges.Count > 0)
{
@ -417,19 +415,23 @@ public class AnalyzeEpisodesTask : IScheduledTask
/// <summary>
/// Search for a shared introduction sequence using inverted indexes.
/// </summary>
/// <param name="lhsId">LHS ID.</param>
/// <param name="lhsPoints">Left episode fingerprint points.</param>
/// <param name="rhsId">RHS ID.</param>
/// <param name="rhsPoints">Right episode fingerprint points.</param>
/// <returns>List of shared TimeRanges between the left and right episodes.</returns>
private (List<TimeRange> Lhs, List<TimeRange> Rhs) SearchInvertedIndex(
Guid lhsId,
uint[] lhsPoints,
Guid rhsId,
uint[] rhsPoints)
{
var lhsRanges = new List<TimeRange>();
var rhsRanges = new List<TimeRange>();
// Generate inverted indexes for the left and right episodes.
var lhsIndex = Chromaprint.CreateInvertedIndex(lhsPoints);
var rhsIndex = Chromaprint.CreateInvertedIndex(rhsPoints);
var lhsIndex = Chromaprint.CreateInvertedIndex(lhsId, lhsPoints);
var rhsIndex = Chromaprint.CreateInvertedIndex(rhsId, rhsPoints);
var indexShifts = new HashSet<int>();
// For all audio points in the left episode, check if the right episode has a point which matches exactly.