'use strict'; // From https://github.com/johni0702/voice-activity-detection var analyserFrequency = require('analyser-frequency-average'); module.exports = function(audioContext, stream, opts) { opts = opts || {}; var defaults = { fftSize: 1024, bufferLen: 1024, smoothingTimeConstant: 0.2, minCaptureFreq: 85, // in Hz maxCaptureFreq: 255, // in Hz noiseCaptureDuration: 1000, // in ms minNoiseLevel: 0.3, // from 0 to 1 maxNoiseLevel: 0.7, // from 0 to 1 avgNoiseMultiplier: 1.2, onVoiceStart: function() { }, onVoiceStop: function() { }, onUpdate: function(val) { } }; var options = {}; for (var key in defaults) { options[key] = opts.hasOwnProperty(key) ? opts[key] : defaults[key]; } var baseLevel = 0; var voiceScale = 1; var activityCounter = 0; var activityCounterMin = 0; var activityCounterMax = 60; var activityCounterThresh = 5; var envFreqRange = []; var isNoiseCapturing = true; var prevVadState = undefined; var vadState = false; var captureTimeout = null; var source = audioContext.createMediaStreamSource(stream); var analyser = audioContext.createAnalyser(); analyser.smoothingTimeConstant = options.smoothingTimeConstant; analyser.fftSize = options.fftSize; var scriptProcessorNode = audioContext.createScriptProcessor(options.bufferLen, 1, 1); connect(); scriptProcessorNode.onaudioprocess = monitor; if (isNoiseCapturing) { //console.log('VAD: start noise capturing'); captureTimeout = setTimeout(init, options.noiseCaptureDuration); } function init() { //console.log('VAD: stop noise capturing'); isNoiseCapturing = false; envFreqRange = envFreqRange.filter(function(val) { return val; }).sort(); var averageEnvFreq = envFreqRange.length ? envFreqRange.reduce(function (p, c) { return Math.min(p, c) }, 1) : (options.minNoiseLevel || 0.1); baseLevel = averageEnvFreq * options.avgNoiseMultiplier; if (options.minNoiseLevel && baseLevel < options.minNoiseLevel) baseLevel = options.minNoiseLevel; if (options.maxNoiseLevel && baseLevel > options.maxNoiseLevel) baseLevel = options.maxNoiseLevel; voiceScale = 1 - baseLevel; //console.log('VAD: base level:', baseLevel); } function connect() { source.connect(analyser); analyser.connect(scriptProcessorNode); scriptProcessorNode.connect(audioContext.destination); } function disconnect() { scriptProcessorNode.disconnect(); } function destroy() { captureTimeout && clearTimeout(captureTimeout); disconnect(); analyser.disconnect(); source.disconnect(); source.mediaStream.getTracks().forEach(track => track.stop()); } function monitor() { var frequencies = new Uint8Array(analyser.frequencyBinCount); analyser.getByteFrequencyData(frequencies); var average = analyserFrequency(analyser, frequencies, options.minCaptureFreq, options.maxCaptureFreq); if (isNoiseCapturing) { envFreqRange.push(average); return; } if (average >= baseLevel && activityCounter < activityCounterMax) { activityCounter++; } else if (average < baseLevel && activityCounter > activityCounterMin) { activityCounter--; } vadState = activityCounter > activityCounterThresh; if (prevVadState !== vadState) { vadState ? onVoiceStart() : onVoiceStop(); prevVadState = vadState; } options.onUpdate(Math.max(0, average - baseLevel) / voiceScale); } function onVoiceStart() { options.onVoiceStart(); } function onVoiceStop() { options.onVoiceStop(); } return {connect: connect, disconnect: disconnect, destroy: destroy}; };