In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math

In [None]:
def do_work_numpy(a):
    return np.sin(a - 1) + 1

def do_work(a):
    return math.sin(a - 1) + 1

do_work_vector = np.vectorize(do_work, otypes=[np.float64])

def do_work_row(row):
    return math.sin(row['a'] - 1) + 1

def do_work_python(a):
    result = np.empty(a.shape)
    for i in range(a.size):
        result[i] = math.sin(a[i] - 1) + 1
    return result

import numexpr
def do_work_numexpr(a):
    expr = 'sin(a - 1) + 1'
    return numexpr.evaluate(expr)

def do_work_pandas_eval(a):
    expr = 'sin(a - 1) + 1'
    return pd.eval(expr, engine='numexpr')

In [None]:
n = 10000000
df = pd.DataFrame({
    'a': np.random.rand(n),
    'b': np.random.rand(n),
    'c': np.random.rand(n),
})

In [None]:
%timeit do_work_numpy(df['a'])

In [None]:
%timeit df['a'].apply(do_work)

In [None]:
%timeit do_work_vector(df['a'])

In [None]:
%timeit df.apply(do_work_row, axis=1)

In [None]:
%timeit do_work_python(df['a'])

In [None]:
%timeit do_work_numexpr(df['a'])

In [None]:
%timeit do_work_pandas_eval(df['a'])

## Single-Precision Equivalent

I was wondering if there would be any difference if we give these tools single-precision floats, instead of double-precision. There seems to be an approximately 1.5&ndash;2 times speedup on my machine/library combination for the more efficient cases.

In [None]:
df_single = pd.DataFrame({
    'a': df['a'].values.astype('float32'),
    'b': df['b'].values.astype('float32'),
    'c': df['c'].values.astype('float32'),
})

In [None]:
%timeit do_work_numpy(df_single['a'])

In [None]:
%timeit df_single['a'].apply(do_work)

In [None]:
%timeit do_work_numexpr(df_single['a'])

In [None]:
%timeit do_work_pandas_eval(df_single['a'])