Cache inverted indexes

This commit is contained in:
ConfusedPolarBear 2022-08-26 00:50:45 -05:00
parent e4da9342c8
commit f92eea20b3
3 changed files with 21 additions and 7 deletions

View File

@ -2,6 +2,7 @@
* which supports both chromaprint and the "-fp_format raw" flag. * which supports both chromaprint and the "-fp_format raw" flag.
*/ */
using System;
using System.Collections.Generic; using System.Collections.Generic;
using Xunit; using Xunit;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
@ -77,7 +78,7 @@ public class TestAudioFingerprinting
{77, 5}, {77, 5},
}; };
var actual = Chromaprint.CreateInvertedIndex(fpr); var actual = Chromaprint.CreateInvertedIndex(Guid.NewGuid(), fpr);
Assert.Equal(expected, actual); Assert.Equal(expected, actual);
} }
@ -111,6 +112,7 @@ public class TestAudioFingerprinting
{ {
return new QueuedEpisode() return new QueuedEpisode()
{ {
EpisodeId = Guid.NewGuid(),
Path = "../../../" + path, Path = "../../../" + path,
FingerprintDuration = 60 FingerprintDuration = 60
}; };

View File

@ -20,6 +20,8 @@ public static class Chromaprint
private static Dictionary<string, string> ChromaprintLogs { get; set; } = new(); private static Dictionary<string, string> ChromaprintLogs { get; set; } = new();
private static Dictionary<Guid, Dictionary<uint, int>> InvertedIndexCache { get; set; } = new();
/// <summary> /// <summary>
/// Check that the installed version of ffmpeg supports chromaprint. /// Check that the installed version of ffmpeg supports chromaprint.
/// </summary> /// </summary>
@ -124,10 +126,16 @@ public static class Chromaprint
/// <summary> /// <summary>
/// Transforms a Chromaprint into an inverted index of fingerprint points to the last index it appeared at. /// Transforms a Chromaprint into an inverted index of fingerprint points to the last index it appeared at.
/// </summary> /// </summary>
/// <param name="id">Episode ID.</param>
/// <param name="fingerprint">Chromaprint fingerprint.</param> /// <param name="fingerprint">Chromaprint fingerprint.</param>
/// <returns>Inverted index.</returns> /// <returns>Inverted index.</returns>
public static Dictionary<uint, int> CreateInvertedIndex(uint[] fingerprint) public static Dictionary<uint, int> CreateInvertedIndex(Guid id, uint[] fingerprint)
{ {
if (InvertedIndexCache.TryGetValue(id, out var cached))
{
return cached;
}
var invIndex = new Dictionary<uint, int>(); var invIndex = new Dictionary<uint, int>();
for (int i = 0; i < fingerprint.Length; i++) for (int i = 0; i < fingerprint.Length; i++)
@ -139,6 +147,8 @@ public static class Chromaprint
invIndex[point] = i; invIndex[point] = i;
} }
InvertedIndexCache[id] = invIndex;
return invIndex; return invIndex;
} }

View File

@ -268,8 +268,6 @@ public class AnalyzeEpisodesTask : IScheduledTask
} }
} }
// TODO: cache inverted indexes
// TODO: implementing bucketing // TODO: implementing bucketing
// For all episodes // For all episodes
@ -357,7 +355,7 @@ public class AnalyzeEpisodesTask : IScheduledTask
// Creates an inverted fingerprint point index for both episodes. // Creates an inverted fingerprint point index for both episodes.
// For every point which is a 100% match, search for an introduction at that point. // For every point which is a 100% match, search for an introduction at that point.
var (lhsRanges, rhsRanges) = SearchInvertedIndex(lhsPoints, rhsPoints); var (lhsRanges, rhsRanges) = SearchInvertedIndex(lhsId, lhsPoints, rhsId, rhsPoints);
if (lhsRanges.Count > 0) if (lhsRanges.Count > 0)
{ {
@ -417,19 +415,23 @@ public class AnalyzeEpisodesTask : IScheduledTask
/// <summary> /// <summary>
/// Search for a shared introduction sequence using inverted indexes. /// Search for a shared introduction sequence using inverted indexes.
/// </summary> /// </summary>
/// <param name="lhsId">LHS ID.</param>
/// <param name="lhsPoints">Left episode fingerprint points.</param> /// <param name="lhsPoints">Left episode fingerprint points.</param>
/// <param name="rhsId">RHS ID.</param>
/// <param name="rhsPoints">Right episode fingerprint points.</param> /// <param name="rhsPoints">Right episode fingerprint points.</param>
/// <returns>List of shared TimeRanges between the left and right episodes.</returns> /// <returns>List of shared TimeRanges between the left and right episodes.</returns>
private (List<TimeRange> Lhs, List<TimeRange> Rhs) SearchInvertedIndex( private (List<TimeRange> Lhs, List<TimeRange> Rhs) SearchInvertedIndex(
Guid lhsId,
uint[] lhsPoints, uint[] lhsPoints,
Guid rhsId,
uint[] rhsPoints) uint[] rhsPoints)
{ {
var lhsRanges = new List<TimeRange>(); var lhsRanges = new List<TimeRange>();
var rhsRanges = new List<TimeRange>(); var rhsRanges = new List<TimeRange>();
// Generate inverted indexes for the left and right episodes. // Generate inverted indexes for the left and right episodes.
var lhsIndex = Chromaprint.CreateInvertedIndex(lhsPoints); var lhsIndex = Chromaprint.CreateInvertedIndex(lhsId, lhsPoints);
var rhsIndex = Chromaprint.CreateInvertedIndex(rhsPoints); var rhsIndex = Chromaprint.CreateInvertedIndex(rhsId, rhsPoints);
var indexShifts = new HashSet<int>(); var indexShifts = new HashSet<int>();
// For all audio points in the left episode, check if the right episode has a point which matches exactly. // For all audio points in the left episode, check if the right episode has a point which matches exactly.