pyACA: Documentation 0.3.1
Source Code for Audio Content Analysis
Loading...
Searching...
No Matches
computeMelSpectrogram.py
Go to the documentation of this file.
1# -*- coding: utf-8 -*-
2
3import numpy as np
4
5from pyACA.computeSpectrogram import computeSpectrogram
6from pyACA.ToolPreprocAudio import ToolPreprocAudio
7from pyACA.ToolComputeHann import ToolComputeHann
8from pyACA.ToolFreq2Mel import ToolFreq2Mel
9from pyACA.ToolMel2Freq import ToolMel2Freq
10
11
12
26def computeMelSpectrogram(x, f_s, afWindow=None, bLogarithmic=True, iBlockLength=4096, iHopLength=2048, iNumMelBands=128, fMaxInHz=None):
27
28 if not fMaxInHz:
29 fMaxInHz = f_s / 2
30
31 # Pre-process: down-mix, normalize, zero-pad
32 x = ToolPreprocAudio(x)
33
34 if afWindow is None:
35 # Compute window function for FFT
36 afWindow = ToolComputeHann(iBlockLength)
37
38 assert(afWindow.shape[0] == iBlockLength), "parameter error: invalid window dimension"
39
40 # Compute spectrogram (in the real world, we would do this block by block)
41 [X, f, t] = computeSpectrogram(x, f_s, None, iBlockLength, iHopLength)
42
43 # Compute Mel filters
44 H, f_c = generateMelFb_I(iBlockLength, f_s, iNumMelBands, fMaxInHz)
45
46 M = np.matmul(H, X)
47
48 if bLogarithmic:
49 # Convert amplitude to level (dB)
50 M = 20 * np.log10(M + 1e-12)
51
52 return M, f_c, t
53
54
55def generateMelFb_I(iFftLength, f_s, iNumFilters, f_max):
56
57 # initialization
58 f_min = 0
59 f_max = min(f_max, f_s / 2)
60 f_fft = np.linspace(0, f_s / 2, iFftLength // 2 + 1)
61 H = np.zeros((iNumFilters, f_fft.size))
62
63 # compute center band frequencies
64 mel_min = ToolFreq2Mel(f_min)
65 mel_max = ToolFreq2Mel(f_max)
66 f_mel = ToolMel2Freq(np.linspace(mel_min, mel_max, iNumFilters + 2))
67
68 f_l = f_mel[0:iNumFilters]
69 f_c = f_mel[1:iNumFilters + 1]
70 f_u = f_mel[2:iNumFilters + 2]
71
72 afFilterMax = 2 / (f_u - f_l)
73
74 # compute the transfer functions
75 for c in range(iNumFilters):
76 H[c] = np.logical_and(f_fft > f_l[c], f_fft <= f_c[c]) * \
77 afFilterMax[c] * (f_fft-f_l[c]) / (f_c[c]-f_l[c]) + \
78 np.logical_and(f_fft > f_c[c], f_fft < f_u[c]) * \
79 afFilterMax[c] * (f_u[c]-f_fft) / (f_u[c]-f_c[c])
80
81 return H, f_c
82
83
84
87 from pyACA.ToolReadAudio import ToolReadAudio
88
89 # read audio file
90 [f_s, x] = ToolReadAudio(cPath)
91
92 # compute feature
93 [M, f, t] = computeMelSpectrogram(x, f_s)
94
95 return M, f, t
96
97
98if __name__ == "__main__":
99 import argparse
100
101 # add command line args and parse them
102 parser = argparse.ArgumentParser(description='Compute key of wav file')
103 parser.add_argument('--infile', metavar='path', required=False,
104 help='path to input audio file')
105
106 # retrieve command line args
107 args = parser.parse_args()
108 cPath = args.infile
109
110 # only for debugging
111 if __debug__:
112 if not cPath:
113 cPath = "../ACA-Plots/audio/sax_example.wav"
114
115 # call the function
116 computeMelSpectrogramCl(cPath)
generateMelFb_I(iFftLength, f_s, iNumFilters, f_max)