Implement silence detection

This commit is contained in:
ConfusedPolarBear 2022-08-29 23:56:13 -05:00
parent fb6cd5c1d7
commit cbd00b2675
7 changed files with 274 additions and 23 deletions

View File

@ -108,6 +108,29 @@ public class TestAudioFingerprinting
Assert.Equal(22.912, rhs.IntroEnd); Assert.Equal(22.912, rhs.IntroEnd);
} }
/// <summary>
/// Test that the silencedetect wrapper is working.
/// </summary>
[FactSkipFFmpegTests]
public void TestSilenceDetection()
{
var clip = queueEpisode("audio/big_buck_bunny_clip.mp3");
var expected = new TimeRange[]
{
new TimeRange(44.6310, 44.8072),
new TimeRange(53.5905, 53.8070),
new TimeRange(53.8458, 54.2024),
new TimeRange(54.2611, 54.5935),
new TimeRange(54.7098, 54.9293),
new TimeRange(54.9294, 55.2590),
};
var actual = FFmpegWrapper.DetectSilence(clip, 60);
Assert.Equal(expected, actual);
}
private QueuedEpisode queueEpisode(string path) private QueuedEpisode queueEpisode(string path)
{ {
return new QueuedEpisode() return new QueuedEpisode()

View File

@ -71,4 +71,22 @@ public class TestTimeRanges
Assert.Equal(expected, actual); Assert.Equal(expected, actual);
} }
/// <summary>
/// Tests that TimeRange intersections are detected correctly.
/// Tests each time range against a range of 5 to 10 seconds.
/// </summary>
[Theory]
[InlineData(1, 4, false)] // too early
[InlineData(4, 6, true)] // intersects on the left
[InlineData(7, 8, true)] // in the middle
[InlineData(9, 12, true)] // intersects on the right
[InlineData(13, 15, false)] // too late
public void TestTimeRangeIntersection(int start, int end, bool expected)
{
var large = new TimeRange(5, 10);
var testRange = new TimeRange(start, end);
Assert.Equal(expected, large.Intersects(testRange));
}
} }

View File

@ -169,6 +169,8 @@ public class AutoSkip : IServerEntryPoint
// Send the seek command // Send the seek command
_logger.LogDebug("Sending seek command to {Session}", deviceId); _logger.LogDebug("Sending seek command to {Session}", deviceId);
var introEnd = (long)intro.IntroEnd - Plugin.Instance!.Configuration.AmountOfIntroToPlay;
_sessionManager.SendPlaystateCommand( _sessionManager.SendPlaystateCommand(
session.Id, session.Id,
session.Id, session.Id,
@ -176,7 +178,7 @@ public class AutoSkip : IServerEntryPoint
{ {
Command = PlaystateCommand.Seek, Command = PlaystateCommand.Seek,
ControllingUserId = session.UserId.ToString("N"), ControllingUserId = session.UserId.ToString("N"),
SeekPositionTicks = (long)intro.IntroEnd * TimeSpan.TicksPerSecond, SeekPositionTicks = introEnd * TimeSpan.TicksPerSecond,
}, },
CancellationToken.None); CancellationToken.None);

View File

@ -62,6 +62,17 @@ public class PluginConfiguration : BasePluginConfiguration
/// </summary> /// </summary>
public int MinimumIntroDuration { get; set; } = 15; public int MinimumIntroDuration { get; set; } = 15;
/// <summary>
/// Gets or sets the maximum amount of noise (in dB) that is considered silent.
/// Lowering this number will increase the filter's sensitivity to noise.
/// </summary>
public int SilenceDetectionMaximumNoise { get; set; } = -50;
/// <summary>
/// Gets or sets the minimum duration of audio (in seconds) that is considered silent.
/// </summary>
public double SilenceDetectionMinimumDuration { get; set; } = 0.50;
// ===== Playback settings ===== // ===== Playback settings =====
/// <summary> /// <summary>
@ -83,5 +94,5 @@ public class PluginConfiguration : BasePluginConfiguration
/// Gets or sets the amount of intro to play (in seconds). /// Gets or sets the amount of intro to play (in seconds).
/// TODO: rename. /// TODO: rename.
/// </summary> /// </summary>
public int AmountOfIntroToPlay { get; set; } = 5; public int AmountOfIntroToPlay { get; set; } = 2;
} }

View File

@ -69,6 +69,18 @@ public class TimeRange : IComparable
return tr.Duration.CompareTo(Duration); return tr.Duration.CompareTo(Duration);
} }
/// <summary>
/// Tests if this TimeRange object intersects the provided TimeRange.
/// </summary>
/// <param name="tr">Second TimeRange object to test.</param>
/// <returns>true if tr intersects the current TimeRange, false otherwise.</returns>
public bool Intersects(TimeRange tr)
{
return
(Start < tr.Start && tr.Start < End) ||
(Start < tr.End && tr.End < End);
}
} }
#pragma warning restore CA1036 #pragma warning restore CA1036

View File

@ -4,6 +4,7 @@ using System.Diagnostics;
using System.Globalization; using System.Globalization;
using System.IO; using System.IO;
using System.Text; using System.Text;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
namespace ConfusedPolarBear.Plugin.IntroSkipper; namespace ConfusedPolarBear.Plugin.IntroSkipper;
@ -13,6 +14,16 @@ namespace ConfusedPolarBear.Plugin.IntroSkipper;
/// </summary> /// </summary>
public static class FFmpegWrapper public static class FFmpegWrapper
{ {
// FFmpeg logs lines similar to the following:
// [silencedetect @ 0x000000000000] silence_start: 12.34
// [silencedetect @ 0x000000000000] silence_end: 56.123 | silence_duration: 43.783
/// <summary>
/// Used with FFmpeg's silencedetect filter to extract the start and end times of silence.
/// </summary>
private static readonly Regex SilenceDetectionExpression = new(
"silence_(?<type>start|end): (?<time>[0-9\\.]+)");
/// <summary> /// <summary>
/// Gets or sets the logger. /// Gets or sets the logger.
/// </summary> /// </summary>
@ -31,11 +42,11 @@ public static class FFmpegWrapper
try try
{ {
// Log the output of "ffmpeg -version". // Log the output of "ffmpeg -version".
ChromaprintLogs["version"] = Encoding.UTF8.GetString(GetOutput("-version", 2000)); ChromaprintLogs["version"] = Encoding.UTF8.GetString(GetOutput("-version", string.Empty, false, 2000));
Logger?.LogDebug("ffmpeg version information: {Version}", ChromaprintLogs["version"]); Logger?.LogDebug("ffmpeg version information: {Version}", ChromaprintLogs["version"]);
// First, validate that the installed version of ffmpeg supports chromaprint at all. // First, validate that the installed version of ffmpeg supports chromaprint at all.
var muxers = Encoding.UTF8.GetString(GetOutput("-muxers", 2000)); var muxers = Encoding.UTF8.GetString(GetOutput("-muxers", string.Empty, false, 2000));
ChromaprintLogs["muxer list"] = muxers; ChromaprintLogs["muxer list"] = muxers;
Logger?.LogTrace("ffmpeg muxers: {Muxers}", muxers); Logger?.LogTrace("ffmpeg muxers: {Muxers}", muxers);
@ -47,7 +58,7 @@ public static class FFmpegWrapper
} }
// Second, validate that ffmpeg understands the "-fp_format raw" option. // Second, validate that ffmpeg understands the "-fp_format raw" option.
var muxerHelp = Encoding.UTF8.GetString(GetOutput("-h muxer=chromaprint", 2000)); var muxerHelp = Encoding.UTF8.GetString(GetOutput("-h muxer=chromaprint", string.Empty, false, 2000));
ChromaprintLogs["muxer options"] = muxerHelp; ChromaprintLogs["muxer options"] = muxerHelp;
Logger?.LogTrace("ffmpeg chromaprint help: {MuxerHelp}", muxerHelp); Logger?.LogTrace("ffmpeg chromaprint help: {MuxerHelp}", muxerHelp);
@ -90,10 +101,9 @@ public static class FFmpegWrapper
} }
Logger?.LogDebug( Logger?.LogDebug(
"Fingerprinting {Duration} seconds from \"{File}\" (length {Length}, id {Id})", "Fingerprinting {Duration} seconds from \"{File}\" (id {Id})",
episode.FingerprintDuration, episode.FingerprintDuration,
episode.Path, episode.Path,
episode.Path.Length,
episode.EpisodeId); episode.EpisodeId);
var args = string.Format( var args = string.Format(
@ -103,7 +113,7 @@ public static class FFmpegWrapper
episode.FingerprintDuration); episode.FingerprintDuration);
// Returns all fingerprint points as raw 32 bit unsigned integers (little endian). // Returns all fingerprint points as raw 32 bit unsigned integers (little endian).
var rawPoints = GetOutput(args); var rawPoints = GetOutput(args, string.Empty);
if (rawPoints.Length == 0 || rawPoints.Length % 4 != 0) if (rawPoints.Length == 0 || rawPoints.Length % 4 != 0)
{ {
Logger?.LogWarning("Chromaprint returned {Count} points for \"{Path}\"", rawPoints.Length, episode.Path); Logger?.LogWarning("Chromaprint returned {Count} points for \"{Path}\"", rawPoints.Length, episode.Path);
@ -153,26 +163,105 @@ public static class FFmpegWrapper
} }
/// <summary> /// <summary>
/// Runs ffmpeg and returns standard output. /// Detect ranges of silence in the provided episode.
/// </summary>
/// <param name="episode">Queued episode.</param>
/// <param name="limit">Maximum amount of audio (in seconds) to detect silence in.</param>
/// <returns>Array of TimeRange objects that are silent in the queued episode.</returns>
public static TimeRange[] DetectSilence(QueuedEpisode episode, int limit)
{
Logger?.LogTrace(
"Detecting silence in \"{File}\" (limit {Limit}, id {Id})",
episode.Path,
limit,
episode.EpisodeId);
// TODO: select the audio track that matches the user's preferred language, falling
// back to the first track if nothing matches
// -vn, -sn, -dn: ignore video, subtitle, and data tracks
var args = string.Format(
CultureInfo.InvariantCulture,
"-vn -sn -dn " +
"-i \"{0}\" -to {1} -af \"silencedetect=noise={2}dB:duration=0.1\" -f null -",
episode.Path,
limit,
Plugin.Instance?.Configuration.SilenceDetectionMaximumNoise ?? -50);
// Cache the output of this command to "GUID-intro-silence-v1"
var cacheKey = episode.EpisodeId.ToString("N") + "-intro-silence-v1";
var currentRange = new TimeRange();
var silenceRanges = new List<TimeRange>();
// Each match will have a type (either "start" or "end") and a timecode (a double).
var raw = Encoding.UTF8.GetString(GetOutput(args, cacheKey, true));
foreach (Match match in SilenceDetectionExpression.Matches(raw))
{
var isStart = match.Groups["type"].Value == "start";
var time = Convert.ToDouble(match.Groups["time"].Value, CultureInfo.InvariantCulture);
if (isStart)
{
currentRange.Start = time;
}
else
{
currentRange.End = time;
silenceRanges.Add(new TimeRange(currentRange));
}
}
return silenceRanges.ToArray();
}
/// <summary>
/// Runs ffmpeg and returns standard output (or error).
/// If caching is enabled, will use cacheFilename to cache the output of this command.
/// </summary> /// </summary>
/// <param name="args">Arguments to pass to ffmpeg.</param> /// <param name="args">Arguments to pass to ffmpeg.</param>
/// <param name="timeout">Timeout (in seconds) to wait for ffmpeg to exit.</param> /// <param name="cacheFilename">Filename to cache the output of this command to, or string.Empty if this command should not be cached.</param>
private static ReadOnlySpan<byte> GetOutput(string args, int timeout = 60 * 1000) /// <param name="stderr">If standard error should be returned.</param>
/// <param name="timeout">Timeout (in miliseconds) to wait for ffmpeg to exit.</param>
private static ReadOnlySpan<byte> GetOutput(
string args,
string cacheFilename,
bool stderr = false,
int timeout = 60 * 1000)
{ {
var ffmpegPath = Plugin.Instance?.FFmpegPath ?? "ffmpeg"; var ffmpegPath = Plugin.Instance?.FFmpegPath ?? "ffmpeg";
var cacheOutput =
(Plugin.Instance?.Configuration.CacheFingerprints ?? false) &&
!string.IsNullOrEmpty(cacheFilename);
// If caching is enabled, try to load the output of this command from the cached file.
if (cacheOutput)
{
// Calculate the absolute path to the cached file.
cacheFilename = Path.Join(Plugin.Instance!.FingerprintCachePath, cacheFilename);
// If the cached file exists, return whatever it holds.
if (File.Exists(cacheFilename))
{
Logger?.LogTrace("Returning contents of cache {Cache}", cacheFilename);
return File.ReadAllBytes(cacheFilename);
}
Logger?.LogTrace("Not returning contents of cache {Cache} (not found)", cacheFilename);
}
// Prepend some flags to prevent FFmpeg from logging it's banner and progress information // Prepend some flags to prevent FFmpeg from logging it's banner and progress information
// for each file that is fingerprinted. // for each file that is fingerprinted.
var info = new ProcessStartInfo(ffmpegPath, args.Insert(0, "-hide_banner -loglevel warning ")) var info = new ProcessStartInfo(ffmpegPath, args.Insert(0, "-hide_banner -loglevel info "))
{ {
WindowStyle = ProcessWindowStyle.Hidden, WindowStyle = ProcessWindowStyle.Hidden,
CreateNoWindow = true, CreateNoWindow = true,
UseShellExecute = false, UseShellExecute = false,
ErrorDialog = false, ErrorDialog = false,
// We only consume standardOutput. RedirectStandardOutput = !stderr,
RedirectStandardOutput = true, RedirectStandardError = stderr
RedirectStandardError = false
}; };
var ffmpeg = new Process var ffmpeg = new Process
@ -180,7 +269,10 @@ public static class FFmpegWrapper
StartInfo = info StartInfo = info
}; };
Logger?.LogDebug("Starting ffmpeg with the following arguments: {Arguments}", ffmpeg.StartInfo.Arguments); Logger?.LogDebug(
"Starting ffmpeg with the following arguments: {Arguments}",
ffmpeg.StartInfo.Arguments);
ffmpeg.Start(); ffmpeg.Start();
using (MemoryStream ms = new MemoryStream()) using (MemoryStream ms = new MemoryStream())
@ -190,19 +282,29 @@ public static class FFmpegWrapper
do do
{ {
bytesRead = ffmpeg.StandardOutput.BaseStream.Read(buf, 0, buf.Length); var streamReader = stderr ? ffmpeg.StandardError : ffmpeg.StandardOutput;
bytesRead = streamReader.BaseStream.Read(buf, 0, buf.Length);
ms.Write(buf, 0, bytesRead); ms.Write(buf, 0, bytesRead);
} }
while (bytesRead > 0); while (bytesRead > 0);
ffmpeg.WaitForExit(timeout); ffmpeg.WaitForExit(timeout);
return ms.ToArray().AsSpan(); var output = ms.ToArray();
// If caching is enabled, cache the output of this command.
if (cacheOutput)
{
File.WriteAllBytes(cacheFilename, output);
}
return output;
} }
} }
/// <summary> /// <summary>
/// Tries to load an episode's fingerprint from cache. If caching is not enabled, calling this function is a no-op. /// Tries to load an episode's fingerprint from cache. If caching is not enabled, calling this function is a no-op.
/// This function was created before the unified caching mechanism was introduced (in v0.1.7).
/// </summary> /// </summary>
/// <param name="episode">Episode to try to load from cache.</param> /// <param name="episode">Episode to try to load from cache.</param>
/// <param name="fingerprint">Array to store the fingerprint in.</param> /// <param name="fingerprint">Array to store the fingerprint in.</param>
@ -256,6 +358,7 @@ public static class FFmpegWrapper
/// <summary> /// <summary>
/// Cache an episode's fingerprint to disk. If caching is not enabled, calling this function is a no-op. /// Cache an episode's fingerprint to disk. If caching is not enabled, calling this function is a no-op.
/// This function was created before the unified caching mechanism was introduced (in v0.1.7).
/// </summary> /// </summary>
/// <param name="episode">Episode to store in cache.</param> /// <param name="episode">Episode to store in cache.</param>
/// <param name="fingerprint">Fingerprint of the episode to store.</param> /// <param name="fingerprint">Fingerprint of the episode to store.</param>
@ -280,6 +383,7 @@ public static class FFmpegWrapper
/// <summary> /// <summary>
/// Determines the path an episode should be cached at. /// Determines the path an episode should be cached at.
/// This function was created before the unified caching mechanism was introduced (in v0.1.7).
/// </summary> /// </summary>
/// <param name="episode">Episode.</param> /// <param name="episode">Episode.</param>
private static string GetFingerprintCachePath(QueuedEpisode episode) private static string GetFingerprintCachePath(QueuedEpisode episode)

View File

@ -243,9 +243,8 @@ public class AnalyzeEpisodesTask : IScheduledTask
// Cache of all fingerprints for this season. // Cache of all fingerprints for this season.
var fingerprintCache = new Dictionary<Guid, uint[]>(); var fingerprintCache = new Dictionary<Guid, uint[]>();
// Total episodes in this season. Counted at the start of this function as episodes // Original episode queue.
// are popped from here during analysis. var originalEpisodes = new List<QueuedEpisode>(episodes);
var episodeCount = episodes.Count;
/* Don't analyze specials or seasons with an insufficient number of episodes. /* Don't analyze specials or seasons with an insufficient number of episodes.
* A season with only 1 episode can't be analyzed as it would compare the episode to itself, * A season with only 1 episode can't be analyzed as it would compare the episode to itself,
@ -253,7 +252,7 @@ public class AnalyzeEpisodesTask : IScheduledTask
*/ */
if (episodes.Count < 2 || episodes[0].SeasonNumber == 0) if (episodes.Count < 2 || episodes[0].SeasonNumber == 0)
{ {
return episodeCount; return originalEpisodes.Count;
} }
var first = episodes[0]; var first = episodes[0];
@ -326,6 +325,11 @@ public class AnalyzeEpisodesTask : IScheduledTask
// If no intro is found at this point, the popped episode is not reinserted into the queue. // If no intro is found at this point, the popped episode is not reinserted into the queue.
} }
// Adjust all introduction end times so that they end at silence.
seasonIntros = AdjustIntroEndTimes(
new ReadOnlyCollection<QueuedEpisode>(originalEpisodes),
seasonIntros);
// Ensure only one thread at a time can update the shared intro dictionary. // Ensure only one thread at a time can update the shared intro dictionary.
lock (_introsLock) lock (_introsLock)
{ {
@ -340,7 +344,7 @@ public class AnalyzeEpisodesTask : IScheduledTask
Plugin.Instance!.SaveTimestamps(); Plugin.Instance!.SaveTimestamps();
} }
return episodeCount; return originalEpisodes.Count;
} }
/// <summary> /// <summary>
@ -581,6 +585,83 @@ public class AnalyzeEpisodesTask : IScheduledTask
return (lContiguous, rContiguous); return (lContiguous, rContiguous);
} }
/// <summary>
/// Adjusts the end timestamps of all intros so that they end at silence.
/// </summary>
/// <param name="episodes">QueuedEpisodes to adjust.</param>
/// <param name="originalIntros">Original introductions.</param>
private Dictionary<Guid, Intro> AdjustIntroEndTimes(
ReadOnlyCollection<QueuedEpisode> episodes,
Dictionary<Guid, Intro> originalIntros)
{
// The minimum duration of audio that must be silent before adjusting the intro's end.
var minimumSilence = Plugin.Instance!.Configuration.SilenceDetectionMinimumDuration;
Dictionary<Guid, Intro> modifiedIntros = new();
// For all episodes
foreach (var episode in episodes)
{
_logger.LogTrace(
"Adjusting introduction end time for {Name} ({Id})",
episode.Name,
episode.EpisodeId);
// If no intro was found for this episode, skip it.
if (!originalIntros.TryGetValue(episode.EpisodeId, out var originalIntro))
{
_logger.LogTrace("{Name} does not have an intro", episode.Name);
continue;
}
// Since we only want to adjust the end timestamp of the intro, create a new TimeRange
// that covers the last few seconds.
var originalIntroEnd = new TimeRange(originalIntro.IntroEnd - 10, originalIntro.IntroEnd);
_logger.LogTrace(
"{Name} original intro: {Start} - {End}",
episode.Name,
originalIntro.IntroStart,
originalIntro.IntroEnd);
// Detect silence in the media file up to the end of the intro.
var silence = FFmpegWrapper.DetectSilence(episode, (int)originalIntro.IntroEnd + 2);
// For all periods of silence
foreach (var currentRange in silence)
{
_logger.LogTrace(
"{Name} silence: {Start} - {End}",
episode.Name,
currentRange.Start,
currentRange.End);
// Ignore any silence that:
// * doesn't intersect the ending of the intro, or
// * is less than half a second long
if (!originalIntroEnd.Intersects(currentRange) || currentRange.Duration < 0.5)
{
continue;
}
// Adjust the end timestamp of the intro to match the start of the silence region.
originalIntro.IntroEnd = currentRange.Start;
break;
}
_logger.LogTrace(
"{Name} adjusted intro: {Start} - {End}",
episode.Name,
originalIntro.IntroStart,
originalIntro.IntroEnd);
// Add the (potentially) modified intro back.
modifiedIntros[episode.EpisodeId] = originalIntro;
}
return modifiedIntros;
}
/// <summary> /// <summary>
/// Count the number of bits that are set in the provided number. /// Count the number of bits that are set in the provided number.
/// </summary> /// </summary>