Whole-cell electrophysiology analysis with python

This notebook is to demonstrate how I have used Neo and EFEL packages to visualize and analyze whole-cell electrophysiology data. This was one of the first scientific problems that showed me how python could speed up my required analysis for my research project.

import efel
import numpy as np
import matplotlib.pyplot as plt
import neo
import pandas as pd
import glob
from scipy.optimize import curve_fit
from scipy import stats

%matplotlib inline

Representing experiment files in python

First, I want to create a list of the the recording data files which are in a directory based on the experiment conducted.

recording_list = glob.glob('../data/iclamp/examples/*.abf')

Neo has many different IO classes for reading various electrophysiology data formats.

def read_recording(file):
    """Provide filename and converts recording from Axon's .abf
    fileformat into a neo object"""
    reader = neo.io.AxonIO(filename=file)
    block = reader.read_block(cascade=True, lazy=False)
    return block

block = read_recording(recording_list[0])

len(block.segments)

In these experiments, each recording from a neuron is represented as a block with multiple segments. Each segment has a corresponding analogsignal which was recorded with a different stimulus intensity (100 pA current steps for 100 ms).

block.segments[0]

Segment with 1 analogs, 1 event arrays
# Analog signals (N=1)
0: AnalogSignal in 1.0 mV with 12500 float32 values
   name: 'IN 1'
   channel index: 1
   sampling rate: 50000.0 Hz
   time: 0.0 s to 0.25 s

Visualizing intracellular recordings

In order to visualize a complete single neuron recording, I overlay each recorded analogsignal on the same temporal axis.

def plot_block(block, step=1):
    """plots all traces/analogsignals from a single recording/block"""
    fig, ax = plt.subplots()

    for seg in block.segments[::step]:
        timecourse = seg.analogsignals[0].times - seg.analogsignals[0].t_start
        timecourse = timecourse.rescale('ms')
        ax.plot(timecourse, seg.analogsignals[0])
        ax.set_xlabel('time (ms)')
        ax.set_ylabel('mV')
        ax.plot(timecourse, seg.analogsignals[0])

plot_block(block)

png

It's hard to distinguish each individual trace if I plot them on top of each other, so I can plot a single trace or a matrix like series of all the traces to visualize them all together at once.

def plot_trace(segment, features=None):
    """plots a single trace (segment.analogsignal)"""
    fig, ax = plt.subplots()
    timecourse = segment.analogsignals[0].times - segment.analogsignals[0].t_start
    timecourse = timecourse.rescale('ms')
    ax.set_xlabel('time (ms)')
    ax.set_ylabel('mV')
    ax.plot(timecourse, segment.analogsignals[0])
    #ax.plot
    return ax

plot_trace(block.segments[12])

<matplotlib.axes._subplots.AxesSubplot at 0x7fccaa325a50>

png

def plot_recordings(block):
    plt.figure(figsize=(12, 16))
    stimulus_range = range(-100, 1150, 50)

    for plot_num, seg in enumerate(block.segments):
        ax = plt.subplot(5, 5, plot_num + 1)

        timecourse = seg.analogsignals[0].times - seg.analogsignals[0].t_start
        timecourse = timecourse.rescale('ms')
        plt.plot(timecourse, seg.analogsignals[0])
        plt.title('{} pA current step'.format(stimulus_range[plot_num]))

        if plot_num % 5 == 0:
            plt.yticks(np.arange(-90, 40, 10), fontsize=10)
            ax.set_ylabel('mV')
        else:
            plt.yticks(np.arange(-90, 40, 10), [''])

        if plot_num <= 4 or plot_num >= 20:
            plt.xticks(np.arange(0, 250, 50), fontsize=10)
            ax.set_xlabel('time (ms)')
        else:
            plt.xticks(np.arange(0, 250, 50), [''])

    plt.tight_layout()

plot_recordings(block)

png

Extracting spike features

To analyze these recordings I use a library called the Electrophys Feature Extraction Library (eFEL). This package allows me to extract specific features of from experimental or model generated recordings.

In this example I will identify each spike that occurs within the stimulus period and plot a marker on the peak of the spike.

def build_efel_traces(block):
    """takes a block and returns a list of trace dicts which contain necessary information for efel feature
       extraction (time, voltage, stim_start and stim_end)"""
    efel_traces=[]
    for seg in block.segments:
        time=seg.analogsignals[0].times-seg.analogsignals[0].t_start
        time=time.rescale('ms')
        voltage=seg.analogsignals[0]
        stim_start = 1
        stim_end = 100
        efel_traces.append({'T': time, 'V': voltage, 'stim_start': [stim_start], 'stim_end': [stim_end]})
    return efel_traces

def get_spike_features(block):
    """takes a recording block and returns a list of dicts (one for each segment)
    with extracted features for each segment"""
    traces = build_efel_traces(block)
    features = efel.getFeatureValues(traces, ['Spikecount','peak_time', 'peak_voltage'])
    return features

features = get_spike_features(block)

The features list contains dicts with the extracted features for each segment. For example, the last segment here had 3 spikes.

features[24]

{'Spikecount': array([2]),
 'peak_time': array([ 4.9,  8. ]),
 'peak_voltage': array([ 31.7993145, -12.2070303])}

Now I can create a plot_spikes() function by modifying the plot_recordings function to take in the features list.

def plot_spikes(block, features):
    """The features list should contain the peak times and peak voltages of each extracted spike and will
       plot markers at spike peak time for those counted in addition to all traces/analogsignals
       from a single recording/block."""

    stimulus_range = range(-100, 1150, 50)
    plt.figure(figsize=(12, 16))

    traces = build_efel_traces(block)

    for plot_num, (seg, feats) in enumerate(zip(block.segments, features)):
        ax = plt.subplot(5, 5, plot_num + 1)
        timecourse = seg.analogsignals[0].times - seg.analogsignals[0].t_start
        timecourse = timecourse.rescale('ms')
        plt.plot(timecourse[:7500], seg.analogsignals[0][:7500])
        plt.title('{} pA current step'.format(stimulus_range[plot_num]))

        peak_time, peak_voltage = feats['peak_time'], feats['peak_voltage']
            #peak time is in seconds, divide by 1000 so its in ms
        if feats['peak_time'].size != 0 and feats['peak_voltage'] is not None:
            ax.plot(peak_time, peak_voltage, 'o')

        if plot_num % 5 == 0:
            plt.yticks(np.arange(-90, 40, 10), fontsize=10)
            ax.set_ylabel('mV')
        else:
            plt.yticks(np.arange(-90, 40, 10), [''])

        if plot_num <= 4 or plot_num >= 20:
            plt.xticks(np.arange(0, 150, 50), fontsize=10)
            ax.set_xlabel('time (ms)')
        else:
            plt.xticks(np.arange(0, 150, 50), [''])

    plt.tight_layout()

plot_spikes(block, features)

png

Processing a list of recordings

Now that we can extract features from a recording and visualize them to make sure it's doing what we expect, we can put it in a function to extract all the features from a list of recordings.

def process_recording_list(recording_list):
    """takes a list of recordings names and returns a dictionary with
       filename as key and a list of spikenumbers as value"""

    spike_dict = {}
    for recording in recording_list:
        block = read_recording(recording)
        recording_features = get_spike_features(block)

        step_list=[]
        for current_step in recording_features:
            step_list.append(int(current_step['Spikecount']))

        spike_dict[block.file_origin] = step_list

    return spike_dict

spike_dict = process_recording_list(recording_list)

stim_values = range(-100, 1101, 50)

df = pd.DataFrame(spike_dict, index=stim_values).T

df

	100	150	200	250	300	350	...	650	700	750	800	850	900	950	1000	1050	1100
2013_02_15_0000.abf	0	1	1	2	2	4	...	6	5	5	6	6	5	5	5	4	4
2013_02_19_0043.abf	0	1	1	2	2	3	...	4	3	3	3	3	3	3	3	3	2
2013_03_06_0000.abf	1	1	4	5	8	7	...	3	3	3	3	2	2	2	2	2	2
2013_03_08_0005.abf	0	1	1	1	1	1	...	2	2	2	2	2	2	2	3	2	2
2013_04_10_0000.abf	0	1	1	1	1	1	...	2	2	3	2	3	3	3	2	2	2
2013_04_10_0003.abf	1	1	1	1	2	2	...	2	2	2	2	2	2	2	2	2	2
2013_04_10_0006.abf	0	1	1	2	3	4	...	8	8	7	8	7	5	6	6	7	19
2013_04_10_0014.abf	0	1	1	2	2	3	...	4	3	3	3	2	3	2	2	2	2
2013_04_11_0000.abf	0	1	1	1	3	3	...	3	2	2	2	2	2	2	2	2	2

9 rows × 25 columns

Fitting a sigmoid function to spike output data

We want to fit a sigmoidal function to the output of each cell.

First define the function:

def sigmoid(x, a, x0, k):
     y = a / (1 + np.exp((x0-x)/k))
     return y

def fit_sig(dataseries):
    """fits a sigmoid function to a series of data points.
       Returns a set of 3 parameters to define the function."""

    a = dataseries.max()-2
    x0 = 400
    k = 50
    guess = (a,x0,k)

    popt, pcov = curve_fit(sigmoid, dataseries.index, dataseries,
                           guess, maxfev=2500)
    return popt

Then we want to see how that function represents our data so we plot it on top of the plot with spike number vs injected current:

dataseries = df.loc['2013_03_06_0000.abf']
single_cell_fit = fit_sig(dataseries)

X = dataseries.index[:17]
y = dataseries[:17]

y_fits = sigmoid(X, *single_cell_fit)

plt.scatter(dataseries.index[:17], dataseries[:17])
plt.plot(X, y_fits)
plt.xlabel('Current injected (pA)')
plt.ylabel('Number of spikes')
plt.title('Poor fit')

<matplotlib.text.Text at 0x7fcca9bd3610>

png

This is an example of a poor fit, where the points with larger amount of current injected and small number of spikes are weighted too heavily. We are most interested in the activating function before the input/output levels are saturated, so we change the function as such:

def fit_sig(dataseries):
    """fits a sigmoid function to a series of data points.
       Returns a set of 3 parameters to define the function."""

    a = dataseries.max()-2
    x0 = 400
    k = 50
    guess = (a,x0,k)

    #the function is fit up to the point of max output level + 1 more step
    y_max = dataseries.idxmax()+50
    x_max  = len(dataseries.loc[:y_max])

    popt, pcov = curve_fit(sigmoid, dataseries.index[:x_max],
                           dataseries.loc[:y_max], guess, maxfev=16000)
    return popt

single_cell_fit = fit_sig(dataseries)

X = dataseries.index[:17]
y = dataseries[:17]

y_fits = sigmoid(X, *single_cell_fit)

plt.scatter(dataseries.index[:17], dataseries[:17])
plt.plot(X, y_fits)
plt.xlabel('Current injected (pA)')
plt.ylabel('Number of spikes')
plt.title('Improved fit')

<matplotlib.text.Text at 0x7fcca961ae10>

png

As a sanity check, we can plot the fits for each of the neurons in the list of recordings.

for i, row in df.iterrows():
    fig = plt.figure()
    ax = fig.add_subplot()
    fits = fit_sig(row)
    x = row
    y_values = sigmoid(x.index, *fits)
    plt.scatter(df.columns[:17], row[:17])
    plt.plot(x.index, y_values)
    plt.xlabel('Current injected (pA)')
    plt.ylabel('Number of spikes')
    plt.title(i)

png

Once we've determined that those fits are satisfactory, we can apply the fitting function to each recording in the dataframe and get a resulting dataframe of the fit values for each neuron.

def fit_dataframe(df):
    """Takes a dataframe of spike counts per current step as parameter.
       Fits a sigmoidal function for each neuron recording and returns
       a dataframe with fit parameters for each neuron recording. """
    result = pd.DataFrame()
    for i, row in df.iterrows():
        fit_values = fit_sig(row)
        frames = [result, pd.DataFrame(fit_values)]
        result = pd.concat(frames,axis=1)
    result.columns = df.index
    return result.T.rename(columns={0: 'a', 1: 'x0', 2: 'k'})

fits_df = fit_dataframe(df)
fits_df

	a	x0	k
2013_02_15_0000.abf	5.702951	304.418502	64.675765
2013_02_19_0043.abf	4.307646	291.948754	82.887837
2013_03_06_0000.abf	7.720273	207.414223	39.948745
2013_03_08_0005.abf	2.173899	304.535559	125.139246
2013_04_10_0000.abf	2.273765	288.213602	109.743930
2013_04_10_0003.abf	2.583458	231.417318	93.624331
2013_04_10_0006.abf	8.240112	351.781098	81.572626
2013_04_10_0014.abf	4.465958	310.926909	97.424697
2013_04_11_0000.abf	3.814719	270.552561	67.999466

Final plot: putting it all together

Requires: - the average of the points at each current step - the y std error - the mean fit values

def plot_data(analysis_df, fit_df, colour='black', label=None):
    a_mean = fit_df.a.mean()
    x0_mean = fit_df.x0.mean()
    k_mean = fit_df.k.mean()
    popt = (a_mean,x0_mean,k_mean)

    y_err = stats.sem(analysis_df)

    x = np.linspace(-100,700,100)
    y = sigmoid(x,*popt)

    plt.errorbar(analysis_df.columns[:17], analysis_df.mean()[:17], yerr=y_err[:17], fmt='o', c=colour, label=label)
    plt.xlim(-150, 750)
    plt.ylim(-0.5, 6)
    plt.xlabel('Current injected (pA)')
    plt.ylabel('Number of spikes')
    plt.plot(x,y, c='blue')
    plt.legend(loc=2)

fits_df

	a	x0	k
2013_02_15_0000.abf	5.702951	304.418502	64.675765
2013_02_19_0043.abf	4.307646	291.948754	82.887837
2013_03_06_0000.abf	7.720273	207.414223	39.948745
2013_03_08_0005.abf	2.173899	304.535559	125.139246
2013_04_10_0000.abf	2.273765	288.213602	109.743930
2013_04_10_0003.abf	2.583458	231.417318	93.624331
2013_04_10_0006.abf	8.240112	351.781098	81.572626
2013_04_10_0014.abf	4.465958	310.926909	97.424697
2013_04_11_0000.abf	3.814719	270.552561	67.999466

import seaborn as sns

sns.set_style('whitegrid')
sns.set_context('paper', font_scale=1.75)
plot_data(df, fits_df, label='Example')

png