Source code for astrolab.timing

#!/usr/bin/env python

"""
This package contains a list of functions that can be used to analyse signals for the Doppler Effect experiment.
"""

import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile


[docs] def load_signal(filename, clip=None, print_log=False, color='steelblue', fig=None, ax=None): """ Load a ``.wav`` file and return both the time-series information and the sample rate. Parameters ---------- filename: str The ``.wav`` file to read: a filename pointing to the location of the file. clip: array_like, default: None A two-element list ``[a,b]`` and clips the input signal between the times ``a`` and ``b`` (in seconds). This can be useful if you want to remove the first ``a`` seconds or the last ``b`` seconds of your sound file. By default, no clipping is done. print_log: bool, default: False A boolean variable to decide whether you want to print a log of what you've done. In this case, whether you want to plot the resulting signal or not. By default, the signal is not plotted. color: str, default: "steelblue" Colour of this plot. Must be one of the matplotlib "named colors": https://matplotlib.org/stable/gallery/color/named_colors.html. fig: matplotlib figure object, default: None Figure on which to plot the result. By default, a new figure is created. ax: matplotlib axes object, default: None Axes on which to plot the result. By default, a new axis is created. Returns ------- samplerate: int The sample rate of the signal. This is defined as the number of datapoints of the input signal per second. Typically, most recordings are taken at 44,100 Hz, meaning that there are 44,100 samples in each second. times: array_like An array of the time-steps at which the data is sampled, in seconds. signal: array_like An array containing the value of the amplitude of the signal at each time-step in times. Raises ------ ValueError In case the ``clip`` array's entries are not in ascending order. Usage ----- >>> sr, times, sig = load_signal("./filename.wav", clip=[3,20], print_log=True, color='firebrick', fig=None, ax=None) """ samplerate, signal = wavfile.read(filename) # Read the data into variables if(signal.ndim > 1): # If the audio is stereo, multiple channels exist signal = np.mean(signal, axis=1) # Averaged over multiple channels to get a single array. T = len(signal)/samplerate # Total signal time T times = np.linspace(0,T,len(signal)) # Time-steps (in seconds) of the signal if(clip is not None): # If clipping is to be done, if(clip[0] > clip[1]): # check if the clip array is arranged in ascending order, raise ValueError("Clipping Error: the `clip` array should be in ascending order, i.e. the code requires clip[0]<clip[1].") mask = (times>clip[0]) & (times<clip[1]) # create a mask of times to clip times = times[mask] # clip the arrays using this mask times = times-times[0] signal = signal[mask] if(print_log): # If a log must be printed, if(fig is None or ax is None): # If no figure or axis element fig, ax = plt.subplots() # is provided, create an empty axis. ax.plot(times, signal, color=color) # plot the signal, with the specified colour ax.set_ylabel("Amplitude") ax.set_xlabel("Time (seconds)") return samplerate, times, signal
[docs] def chunk_signal(times, signal, size=2048, step=128): """ Break up a signal (given in the ``signal`` array) into individual pieces (or chunks) of size ``size``, whose left-edges are separated by a ``step`` samples. Thus, if "chunk 0" ranges from ``(0, size)``, "chunk 1" would range from ``(step, size + step)``. Notice how both chunks are ``size`` samples wide, and their left edges are separated by ``step`` samples. The splitting is thus done with significant overlap. For each of these chunks, it returns the average time at which the chunk was taken, and the signal's amplitude values within that chunk. Parameters ---------- times: array_like An array of times, usually one of the outputs of the ``load_signal`` function. signal: array_like An array of amplitudes for each of the times in ``times``, also typically one of the outputs of the ``load_signal`` function. size: int, default: 2048 The number of samples in a single chunk. step: int, default: 128 The number of samples between the left-edges of successive chunks. Returns ------- t: array_like A one-dimensional array containing the average time values for each of the chunks. chunks: array_like A two-dimensional array. The first dimension is the chunk-number. For each chunk, it returns size number of amplitude values. Usage ----- >>> average_times, chunks = chunk signal(times, sig) """ size = int(size); step = int(step) # Cast size and step into integers, if necessary n_steps = len(signal) # Number of steps in the signal n_chunks = (n_steps-size) // step # Number of chunks in the signal array = np.zeros((n_chunks, size)) # Empty 2-dimensional array to hold the chunks t = np.zeros(n_chunks) # Empty 1-dimensional array for the time-steps counter = 0 # Integer counter to count each chunk while(counter < n_chunks): # Until the total number of chunks is reached, start = counter*step # Choose the start-location of a chunk, end = start + size # and the end-location of a chunk. array[counter] = signal[start:end] # Clip the signal between ``start`` and ``end`` t[counter] = (times[start]+times[end])/2 # Compute the average time in the interval counter += 1 # Increment the counter return t, array
[docs] def power_spectrum(chunk, samplerate, lowx=1000, highx=10_000, print_log=False, color='coral', alpha=1, label=None, fig=None, ax=None): """ Accept an array of signal values, compute its power spectrum, and return the dominant frequency in the range ``[lowx, highx]``. Ideally, this range should be centered around the expected "stationary" frequency, and should be wide enough contain all the variation in the frequency during the object's motion. Parameters ---------- chunk: array_like An array of amplitude values, usually a specific chunk that was returned by the ``chunk_signal`` function, although the entire signal could be used as well. samplerate: int The original samplerate of the signal, obtained from the ``load_signal`` function. lowx: float, default: 1000 Lower limit (in Hz) of the frequency-range within which the dominant frequency is found and (if ``print_log=True``) the result is plotted. highx: float, default: 10_000 Upper limit (in Hz) of the frequency-range within which the dominant frequency is found and (if ``print_log=True``) the result is plotted. print_log: bool, default: False A boolean variable to decide whether you want to print a log of what you've done. In this case, if ``print_log=True``, the resulting power spectrum is plotted. By default, the power spectrum is not plotted. color: str, default: "coral" Set the colour of the plot that is produced if ``print_log=True``. alpha: float, default: 1 Set the transparency of the plot that is produced if ``print_log=True``. label: str or None, default: None Set the label of the plot that is produced if ``print_log=True``. By default, no label is set. fig: matplotlib figure object, default: None Figure on which to plot the result. By default, a new figure is created. ax: matplotlib axes object, default: None Axes on which to plot the result. By default, a new axis is created. Returns ------- dom_freq: float The frequency in the range ``[lowx , highx]`` that contributes the greatest deal to our signal in the given range of frequencies. Usage: ------ >>> dominant_frequency = power_spectrum(chunks[0], samplerate, lowx=2000, highx=4000) """ signal = chunk - np.mean(chunk) # Subtract the mean from the signal, so that it is oscillating about zero. This is to remove any overall "offset" to the signal. signal = signal*np.hamming(len(signal)) # The signal is multiplied by a Hamming window (a weighted cosine) to remove discontinuities at the edge, and reduce the edge effects due to the finite size of the signal. N = len(signal) powspec = np.abs(np.fft.fft(signal)) # Compute the power spectrum of the signal fftfreqs = np.fft.fftfreq(len(signal), d=1/samplerate) # Compute the range of frequencies of the power spectrum mask = (fftfreqs>lowx) & (fftfreqs<highx) # Mask of frequencies between the high and low values clipped_freqs = fftfreqs[mask] # Apply the mask to the frequencies and clipped_powspec = powspec[mask] # the power spectrum if(print_log==True): # If a log must be printed, if(fig is None or ax is None): fig, ax = plt.subplots() # create an axis if one isn't already provided ax.plot(clipped_freqs, clipped_powspec, color=color, label=label, alpha=alpha) # Plot the power spectrum ax.set_ylabel("Power spectrum") ax.set_xlabel("Frequency (Hz)") max_ps = np.max(clipped_powspec) # Find the maximum of the power spectrum dom_freq = clipped_freqs[clipped_powspec==max_ps][0] # Find the dominant frequency in the range return dom_freq