You've already forked torvalds-GuitarPedal
mirror of
https://github.com/torvalds/GuitarPedal.git
synced 2026-06-21 21:37:59 +00:00
This is all antigravity based on my requests to show the things our tuner code does. I should have done this originally, because the harmonic peaks are much more obvious when visualized this way, and it also shows why the whitening is so hard. The harmonic peaks move up a bit due to the inharmonicity of the guitar strings. I wonder if I can actually try to account for that and improve the whitening? But the really good news is that this also compares the 4x downsampled signal and my fastsincos Hann function with the "real thing". Including a zero-padded full-frequency FFT (that wouldn't be remotely reasonable to run on the poor little rp2354 - it makes this visualizer slightly slow even on my desktop workstation ;) And the math itself comes out with absolutely flying colors. There is effectively zero difference. The 4x downsampling works perfectly, and the Grandke interpolation gives effectively the exact correct result and a higher-resolution FFT doesn't add any value. If anything, being _so_ spot-on means that we probably could use a smaller FFT. But why mess with perfection? Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
417 lines
14 KiB
Python
Executable File
417 lines
14 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import sys
|
|
import struct
|
|
import subprocess
|
|
import math
|
|
import numpy as np
|
|
import scipy.io.wavfile as wavfile
|
|
import scipy.signal.windows as windows
|
|
from scipy.fft import fft
|
|
import matplotlib.pyplot as plt
|
|
from matplotlib.widgets import Slider, Button
|
|
|
|
# Pedal's FFT settings from analyze.h
|
|
FFT_SHIFT = 13
|
|
FFT_SIZE = 1 << FFT_SHIFT
|
|
|
|
note_names = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
|
|
|
|
def calculate_note_and_cents(freq):
|
|
if freq <= 0.0:
|
|
return 0, 0
|
|
note_float = 69.0 + 12.0 * math.log2(freq / 440.0)
|
|
note_idx = int(round(note_float))
|
|
cents = int(round((note_float - note_idx) * 100.0))
|
|
return note_idx, cents
|
|
|
|
def format_note(note_idx, cents):
|
|
name_idx = note_idx % 12
|
|
name = note_names[name_idx]
|
|
octave = (note_idx // 12) - 1
|
|
sign = "+" if cents >= 0 else ""
|
|
return f"{name}{octave} {sign}{cents}c"
|
|
|
|
def extract_peak(mags, i, avg_mag, max_mag, min_mag=5.0):
|
|
mag = mags[i]
|
|
if mag < min_mag:
|
|
return None
|
|
if mag < 5.0 or mag < avg_mag * 5.0:
|
|
return None
|
|
if mag < max_mag * 0.05:
|
|
return None
|
|
if mag <= mags[i-1] or mag <= mags[i+1] or mag <= 1.2 * mags[i-2] or mag <= 1.2 * mags[i+2]:
|
|
return None
|
|
|
|
y_m2 = mags[i-2]
|
|
y1 = mags[i-1]
|
|
y2 = mag
|
|
y3 = mags[i+1]
|
|
y_p2 = mags[i+2]
|
|
|
|
if y3 > y1:
|
|
p = (2.0 * y3 - y2) / (y3 + y2)
|
|
else:
|
|
p = (y2 - 2.0 * y1) / (y1 + y2)
|
|
|
|
peak_bin = i + p
|
|
peak_freq = peak_bin * (12000.0 / FFT_SIZE)
|
|
peak_mag = y2 - 0.25 * (y1 - y3) * p
|
|
|
|
return peak_freq, peak_mag, peak_bin, i, y_m2, y1, y2, y3, y_p2
|
|
|
|
def get_peaks(mags, top_n=10):
|
|
peaks = []
|
|
avg_mag = np.mean(mags)
|
|
max_mag = np.max(mags)
|
|
|
|
for i in range(2, len(mags) - 2):
|
|
res = extract_peak(mags, i, avg_mag, max_mag)
|
|
if res is not None:
|
|
peaks.append(res)
|
|
|
|
peaks.sort(key=lambda x: x[1], reverse=True)
|
|
return peaks[:top_n]
|
|
|
|
def suppress_harmonics(mags_input):
|
|
mags = np.copy(mags_input)
|
|
max_bin = len(mags)
|
|
avg_mag = np.mean(mags)
|
|
max_mag = np.max(mags)
|
|
|
|
for i in range(2, max_bin // 2):
|
|
res = extract_peak(mags, i, avg_mag, max_mag, min_mag=0.0)
|
|
if res is None:
|
|
continue
|
|
|
|
peak_mag = res[1]
|
|
peak_bin = res[2]
|
|
|
|
h = 2
|
|
while h * peak_bin < max_bin:
|
|
target_exact = h * peak_bin
|
|
low = int(math.floor(target_exact - 2.0))
|
|
high = int(math.ceil(target_exact + 2.0))
|
|
|
|
for j in range(low, high + 1):
|
|
if j >= max_bin:
|
|
break
|
|
if j < 0:
|
|
continue
|
|
|
|
d = abs(j - target_exact)
|
|
w = 0.0
|
|
if d <= 1.0:
|
|
w = 1.0 - 0.5 * d * d
|
|
elif d <= 2.0:
|
|
x = 2.0 - d
|
|
w = 0.5 * x * x
|
|
|
|
suppression = peak_mag * w
|
|
if mags[j] > suppression:
|
|
mags[j] -= suppression
|
|
else:
|
|
mags[j] = 0.0
|
|
h += 1
|
|
|
|
return mags
|
|
|
|
def run_c_tuner(wav_file, offset):
|
|
cmd = ['./test-fft', wav_file, str(offset * 4)]
|
|
try:
|
|
result = subprocess.run(cmd, capture_output=True, check=True)
|
|
data = result.stdout
|
|
|
|
header_format = '<II'
|
|
header_size = struct.calcsize(header_format)
|
|
|
|
if len(data) < header_size:
|
|
print("Error: Output too short")
|
|
return None, None
|
|
|
|
out_offset, num_mags = struct.unpack(header_format, data[:header_size])
|
|
|
|
if num_mags != FFT_SIZE // 2:
|
|
print(f"Error: Unexpected number of magnitudes: {num_mags}")
|
|
return None, None
|
|
|
|
mags_format = f'<{num_mags}f'
|
|
mags_size = struct.calcsize(mags_format)
|
|
|
|
mags = struct.unpack(mags_format, data[header_size:header_size+mags_size])
|
|
return out_offset, np.array(mags)
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"C program failed with error code {e.returncode}")
|
|
print("stderr:", e.stderr.decode('utf-8'))
|
|
return None, None
|
|
|
|
current_peaks = {'raw': [], 'supp': []}
|
|
|
|
def main():
|
|
if len(sys.argv) < 2:
|
|
print(f"Usage: {sys.argv[0]} <tuning.wav>")
|
|
sys.exit(1)
|
|
|
|
wav_file = sys.argv[1]
|
|
|
|
try:
|
|
sample_rate, audio_data = wavfile.read(wav_file)
|
|
except Exception as e:
|
|
print(f"Failed to read {wav_file}: {e}")
|
|
sys.exit(1)
|
|
|
|
if audio_data.dtype != np.int32:
|
|
print(f"Warning: Audio data is {audio_data.dtype}, expected int32")
|
|
|
|
if sample_rate != 48000:
|
|
print(f"Warning: Sample rate is {sample_rate}, expected 48000")
|
|
|
|
if len(audio_data.shape) > 1:
|
|
audio_data = audio_data[:, 0]
|
|
|
|
num_downsampled = len(audio_data) // 4
|
|
downsampled_audio = audio_data[:num_downsampled*4].reshape(-1, 4).sum(axis=1)
|
|
downsampled_audio = downsampled_audio / 2147483648.0
|
|
|
|
hann_window = windows.hann(FFT_SIZE, sym=False)
|
|
freqs = np.fft.fftfreq(FFT_SIZE, d=1.0/(sample_rate/4))
|
|
freqs = freqs[:FFT_SIZE//2]
|
|
|
|
# Pre-calculate Hann window for the non-downsampled audio (4x length)
|
|
hann_window_full = windows.hann(FFT_SIZE * 4, sym=False)
|
|
n_zp = FFT_SIZE * 4 * 16 # 16x zero-padding
|
|
freqs_zp = np.fft.fftfreq(n_zp, d=1.0/sample_rate)
|
|
freqs_zp = freqs_zp[:n_zp//2]
|
|
max_zp_idx = int(1500.0 * n_zp / sample_rate) + 100
|
|
|
|
fig, (ax_wav_full, ax_wav_zoom, ax_mag, ax_mag_supp) = plt.subplots(4, 1, figsize=(12, 12))
|
|
plt.subplots_adjust(bottom=0.15, hspace=0.4)
|
|
|
|
time_axis = np.arange(num_downsampled) / (sample_rate / 4)
|
|
step = max(1, num_downsampled // 10000)
|
|
ax_wav_full.plot(time_axis[::step], downsampled_audio[::step], color='lightgray', alpha=0.8)
|
|
ax_wav_full.set_title("Full WAV File Amplitude (Downsampled)")
|
|
ax_wav_full.set_ylabel("Amplitude")
|
|
|
|
vline_start = ax_wav_full.axvline(0, color='red', linestyle='--')
|
|
vline_end = ax_wav_full.axvline(FFT_SIZE / (sample_rate/4), color='red', linestyle='--')
|
|
|
|
line_zoom, = ax_wav_zoom.plot(np.zeros(FFT_SIZE), np.zeros(FFT_SIZE), color='blue', alpha=0.7)
|
|
ax_wav_zoom.set_title("Zoomed WAV Segment")
|
|
ax_wav_zoom.set_ylabel("Amplitude")
|
|
ax_wav_zoom.set_xlim(0, FFT_SIZE / (sample_rate/4))
|
|
ax_wav_zoom.set_ylim(-1, 1)
|
|
|
|
line_c, = ax_mag.plot(freqs, np.zeros_like(freqs), label="C test-fft", color='blue', alpha=0.7)
|
|
line_py, = ax_mag.plot(freqs, np.zeros_like(freqs), label="SciPy Reference", color='orange', alpha=0.7, linestyle='--')
|
|
line_zp, = ax_mag.plot(freqs_zp[:max_zp_idx], np.zeros(max_zp_idx), label="Zero-Padded (Full Rate)", color='green', alpha=0.5)
|
|
|
|
ax_mag.set_title("FFT Magnitudes")
|
|
ax_mag.set_ylabel("Magnitude")
|
|
ax_mag.set_xlim(0, 1500)
|
|
ax_mag.legend()
|
|
ax_mag.grid(True)
|
|
|
|
line_supp, = ax_mag_supp.plot(freqs, np.zeros_like(freqs), color='purple', alpha=0.8)
|
|
ax_mag_supp.set_title("FFT Magnitudes (After Harmonic Suppression)")
|
|
ax_mag_supp.set_ylabel("Magnitude")
|
|
ax_mag_supp.set_xlabel("Frequency (Hz)")
|
|
ax_mag_supp.set_xlim(0, 1500)
|
|
ax_mag_supp.grid(True)
|
|
|
|
error_text = fig.text(0.5, 0.95, '', ha='center', color='red', fontweight='bold')
|
|
|
|
ax_slider = plt.axes([0.15, 0.05, 0.75, 0.03])
|
|
max_offset = max(0, num_downsampled - FFT_SIZE)
|
|
slider = Slider(ax_slider, 'Offset', 0, max_offset, valinit=0, valstep=128)
|
|
|
|
peak_annotations_mag = []
|
|
peak_annotations_supp = []
|
|
|
|
def update(val):
|
|
offset = int(slider.val)
|
|
|
|
t_start = offset / (sample_rate / 4)
|
|
t_end = (offset + FFT_SIZE) / (sample_rate / 4)
|
|
vline_start.set_xdata([t_start, t_start])
|
|
vline_end.set_xdata([t_end, t_end])
|
|
|
|
segment = downsampled_audio[offset : offset + FFT_SIZE]
|
|
if len(segment) < FFT_SIZE:
|
|
return
|
|
|
|
time_zoom = np.arange(FFT_SIZE) / (sample_rate / 4)
|
|
line_zoom.set_xdata(time_zoom)
|
|
line_zoom.set_ydata(segment)
|
|
ax_wav_zoom.set_ylim(min(np.min(segment)*1.1, -0.01), max(np.max(segment)*1.1, 0.01))
|
|
|
|
c_offset, c_mags = run_c_tuner(wav_file, offset)
|
|
if c_mags is None:
|
|
return
|
|
|
|
windowed_data = segment * hann_window
|
|
py_fft = fft(windowed_data)
|
|
py_mags = np.abs(py_fft)[:FFT_SIZE//2]
|
|
|
|
# Non-downsampled, zero-padded reference
|
|
full_segment = audio_data[offset*4 : offset*4 + FFT_SIZE*4] / 2147483648.0
|
|
full_windowed = full_segment * hann_window_full
|
|
py_fft_zp = fft(full_windowed, n=n_zp)
|
|
py_mags_zp = np.abs(py_fft_zp)[:n_zp//2]
|
|
|
|
max_err = np.max(np.abs(c_mags - py_mags))
|
|
if max_err > 0.1:
|
|
error_text.set_text(f'WARNING: Max Error = {max_err:.4f}')
|
|
else:
|
|
error_text.set_text('')
|
|
|
|
line_c.set_ydata(c_mags)
|
|
line_py.set_ydata(py_mags)
|
|
line_zp.set_ydata(py_mags_zp[:max_zp_idx])
|
|
|
|
max_mag = max(np.max(c_mags), np.max(py_mags), np.max(py_mags_zp[:max_zp_idx]))
|
|
if max_mag > 0:
|
|
ax_mag.set_ylim(0, max_mag * 1.2)
|
|
|
|
for text in peak_annotations_mag:
|
|
text.remove()
|
|
peak_annotations_mag.clear()
|
|
|
|
peaks_raw = get_peaks(c_mags)
|
|
enriched_peaks_raw = []
|
|
for freq, mag, bin_idx, i, y_m2, y1, y2, y3, y_p2 in peaks_raw:
|
|
# Find the true peak in the zero-padded data
|
|
center_idx = int(freq * n_zp / sample_rate)
|
|
search_window = int(2.0 * n_zp / sample_rate) # +/- 2 Hz
|
|
w_start = max(0, center_idx - search_window)
|
|
w_end = min(len(py_mags_zp), center_idx + search_window)
|
|
|
|
zp_freq, zp_mag = freq, mag
|
|
if w_end > w_start:
|
|
local_max = w_start + np.argmax(py_mags_zp[w_start:w_end])
|
|
zp_mag = py_mags_zp[local_max]
|
|
|
|
# Standard parabolic interpolation on the dense zero-padded peak
|
|
if local_max > 0 and local_max < len(py_mags_zp) - 1:
|
|
y1_zp = py_mags_zp[local_max - 1]
|
|
y2_zp = py_mags_zp[local_max]
|
|
y3_zp = py_mags_zp[local_max + 1]
|
|
|
|
denom = y1_zp - 2 * y2_zp + y3_zp
|
|
if denom != 0:
|
|
p_zp = 0.5 * (y1_zp - y3_zp) / denom
|
|
else:
|
|
p_zp = 0.0
|
|
else:
|
|
p_zp = 0.0
|
|
|
|
zp_freq = (local_max + p_zp) * (sample_rate / n_zp)
|
|
|
|
enriched_peaks_raw.append((freq, mag, bin_idx, i, y_m2, y1, y2, y3, y_p2, zp_freq, zp_mag))
|
|
|
|
note_idx, cents = calculate_note_and_cents(freq)
|
|
note_str = format_note(note_idx, cents)
|
|
text = ax_mag.annotate(f'{freq:.2f}Hz\n{note_str}',
|
|
xy=(freq, mag),
|
|
xytext=(0, 5),
|
|
textcoords='offset points',
|
|
ha='center', va='bottom',
|
|
fontsize=8, color='darkblue',
|
|
arrowprops=dict(arrowstyle='-', color='gray', lw=0.5))
|
|
peak_annotations_mag.append(text)
|
|
current_peaks['raw'] = enriched_peaks_raw
|
|
|
|
# Whitening / Harmonic Suppression
|
|
supp_mags = suppress_harmonics(c_mags)
|
|
line_supp.set_ydata(supp_mags)
|
|
|
|
max_supp_mag = np.max(supp_mags)
|
|
if max_supp_mag > 0:
|
|
ax_mag_supp.set_ylim(0, max_supp_mag * 1.2)
|
|
|
|
for text in peak_annotations_supp:
|
|
text.remove()
|
|
peak_annotations_supp.clear()
|
|
|
|
peaks_supp = get_peaks(supp_mags)
|
|
current_peaks['supp'] = peaks_supp
|
|
for freq, mag, bin_idx, i, y_m2, y1, y2, y3, y_p2 in peaks_supp:
|
|
note_idx, cents = calculate_note_and_cents(freq)
|
|
note_str = format_note(note_idx, cents)
|
|
text = ax_mag_supp.annotate(f'{freq:.2f}Hz\n{note_str}',
|
|
xy=(freq, mag),
|
|
xytext=(0, 5),
|
|
textcoords='offset points',
|
|
ha='center', va='bottom',
|
|
fontsize=8, color='purple',
|
|
arrowprops=dict(arrowstyle='-', color='gray', lw=0.5))
|
|
peak_annotations_supp.append(text)
|
|
|
|
fig.canvas.draw_idle()
|
|
|
|
slider.on_changed(update)
|
|
|
|
step_size = FFT_SIZE // 16
|
|
|
|
def on_key(event):
|
|
if event.key == 'right':
|
|
new_val = min(slider.val + step_size, max_offset)
|
|
slider.set_val(new_val)
|
|
elif event.key == 'left':
|
|
new_val = max(slider.val - step_size, 0)
|
|
slider.set_val(new_val)
|
|
|
|
fig.canvas.mpl_connect('key_press_event', on_key)
|
|
|
|
def on_click(event):
|
|
if event.inaxes == ax_wav_full:
|
|
center_sample = int(event.xdata * (sample_rate / 4))
|
|
new_offset = center_sample - FFT_SIZE // 2
|
|
new_val = max(0, min(new_offset, max_offset))
|
|
slider.set_val(new_val)
|
|
|
|
fig.canvas.mpl_connect('button_press_event', on_click)
|
|
|
|
ax_btn = plt.axes([0.9, 0.05, 0.08, 0.03])
|
|
btn = Button(ax_btn, 'Copy Peaks')
|
|
|
|
def copy_peaks_cb(event):
|
|
text = "=== PEAK DATA ===\n"
|
|
text += "Raw Peaks:\n"
|
|
for freq, mag, bin_idx, i, y_m2, y1, y2, y3, y_p2, zp_freq, zp_mag in current_peaks.get('raw', []):
|
|
note_idx, cents = calculate_note_and_cents(freq)
|
|
note_str = format_note(note_idx, cents)
|
|
text += f" - Freq: {freq:7.2f} Hz | Mag: {mag:6.2f} | Note: {note_str}\n"
|
|
text += f" [Bin {i:4d}] y-2: {y_m2:9.6f}, y-1: {y1:9.6f}, y0: {y2:9.6f}, y+1: {y3:9.6f}, y+2: {y_p2:9.6f}\n"
|
|
text += f" [Zero-Pad] Freq: {zp_freq:7.2f} Hz | Mag: {zp_mag:6.2f} | Diff: {freq - zp_freq:6.2f} Hz\n"
|
|
|
|
text += "\nSuppressed Peaks:\n"
|
|
for freq, mag, bin_idx, i, y_m2, y1, y2, y3, y_p2 in current_peaks.get('supp', []):
|
|
note_idx, cents = calculate_note_and_cents(freq)
|
|
note_str = format_note(note_idx, cents)
|
|
text += f" - Freq: {freq:7.2f} Hz | Mag: {mag:6.2f} | Note: {note_str}\n"
|
|
text += f" [Bin {i:4d}] y-2: {y_m2:9.6f}, y-1: {y1:9.6f}, y0: {y2:9.6f}, y+1: {y3:9.6f}, y+2: {y_p2:9.6f}\n"
|
|
|
|
text += "=================\n"
|
|
print(text)
|
|
|
|
try:
|
|
import tkinter as tk
|
|
r = tk.Tk()
|
|
r.withdraw()
|
|
r.clipboard_clear()
|
|
r.clipboard_append(text)
|
|
r.update()
|
|
r.destroy()
|
|
print("(Also copied to clipboard!)")
|
|
except Exception:
|
|
print("(Could not copy to clipboard automatically, please copy from the terminal)")
|
|
|
|
btn.on_clicked(copy_peaks_cb)
|
|
|
|
update(0)
|
|
plt.show()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|