import numpy as np
import sympy as sy
from scipy import optimize
from matplotlib import pyplot as plt
from mpl_toolkits.axes_grid.axislines import SubplotZero
In today's seminar we will take a look at data fitting as well as some usefull scipy and sympy functions. You can find a lot of information on the topic on the following pages:
sy.init_session(quiet = False)
a = sy.Integral(x, (x, 1, 2))
l = latex(a)
print l
Recap: what is a function?
def guess_a_number(a):
if a > 5:
b = "that is a very large number"
else:
b = "this is not a large number"
return b
print guess_a_number(10)
print guess_a_number(1)
Example: We can use functions to spawn a set of parameters. The following function prepares the plot environment.
def init_plot(Xmax = 3, Ymax = 3):
%matplotlib inline
fig = plt.figure(1)
ax = SubplotZero(fig, 111)
fig.add_subplot(ax)
ax.axhline(linewidth=1.7, color="black")
ax.axvline(linewidth=1.7, color="black")
plt.xticks([])
plt.yticks([])
ax.set_xlim([-0.5,3])
ax.set_ylim([-0.5,3])
for direction in ["xzero", "yzero"]:
ax.axis[direction].set_visible(True)
for direction in ["left", "right", "bottom", "top"]:
ax.axis[direction].set_visible(False)
plt.arrow(Xmax, 0.0035, 0.1, 0, width=0.01, color="k", clip_on=False, head_width=0.12, head_length=0.06)
plt.arrow(0.0035, Ymax, 0, 0.1, width=0.01, color="k", clip_on=False, head_width=0.06, head_length=0.12)
return ax
ax = init_plot()
ax.plot([1,2],[1,2], "-")
ax.plot([1,2],[1,2], "o")
ax.fill_between([1,2],[1,2],[0,0], alpha = 0.4)
ax.plot([0,1,1,2,2,0],[1,1,0,0,2,2], "--k", alpha = 0.3)
plt.show()
a = sy.diff(x**2, x)
l = latex(a)
print l
x = np.linspace(-0.5,3,30)
y = x**2.
xi = np.array([-0.3, 0.3])
ax = init_plot()
ax.plot(x,y, "-")
ax.plot(xi+1,np.arctan(xi)+(xi+1), "-")
ax.plot([1],[1], "o")
ax.plot([0,1,1],[1,1,0], "--k", alpha = 0.3)
plt.show()
In the simplest generall case we can numerically fit a set of data points ($x_i, y_i$) by comparing them to a set of functions $f$ spawned from a set of parameters $\beta$.
Following, we will try to understand the formulation above and try to create a simple fitting algorithm. First, we create a data set for x and y using the following example function $f(x) = b x^2$.
def X2(x,b): ### python function
y = b*x**2
return y
x = np.linspace(-2.,+2,20) ### spawns a set of x-coordinates.
We can directly use the function from above to create a set of graphs with a range of parameters $\beta$ from zero to five.
plt.figure(figsize = (10,5) )
for i in range(5):
plt.plot(x,X2(x,i), "--", label = "b="+str(i))
plt.legend()
plt.show()
However, we would like to create an array with size (m,n) with the following shape:
$Y = f(x_{m,n} \beta_n ) = f \begin{pmatrix} x_{1,1},\beta_1 & x_{1,2},\beta_2 & \cdots & x_{1,n},\beta_n \\ x_{2,1},\beta_1 & x_{2,2},\beta_2 & \cdots & x_{2,n},\beta_n \\ \vdots & \vdots & \ddots & \vdots \\ x_{m,1},\beta_1 & x_{m,2},\beta_2 & \cdots & x_{m,n},\beta_n \end{pmatrix}$
Y = []
b = range(10)
for i in b:
Y.append(X2(x,i))
Y = np.asarray(Y)
print b
Let's choose one $\beta == 2$, compute $y = f(x,\beta=2)$ and than try to find the coresponding line in Y.
y = X2(x,2)
print Y.T.shape, x.shape ### .T transposes the array
plt.figure(figsize = (10,5) )
plt.plot(x,Y.T, )
plt.plot(x,y, "ok")
plt.show()
We now compute the residual with:
$Res = Y-y = f \begin{pmatrix} x_{1,1},\beta_1 & x_{1,2},\beta_2 & \cdots & x_{1,n},\beta_n \\ x_{2,1},\beta_1 & x_{2,2},\beta_2 & \cdots & x_{2,n},\beta_n \\ \vdots & \vdots & \ddots & \vdots \\ x_{m,1},\beta_1 & x_{m,2},\beta_2 & \cdots & x_{m,n},\beta_n\end{pmatrix}-\begin{pmatrix} y_{1} \\y_{2} \\ \vdots \\ y_{m} \end{pmatrix}$
Residual = np.abs(Y-y)
Plot the residual
plt.figure(figsize = (10,5) )
plt.plot(Residual,".k")
plt.plot(np.argmin(Residual.std(1)), Residual.std(1).min(), "o", label = "Minimum")
plt.xlabel(r"$\beta$ index")
plt.ylabel("residual amplitude")
plt.legend()
plt.show()
To find the best fit we will now sum up the amplitudes for the individual indices.
Mresidual = Residual.sum(1)**2
print Mresidual
Next, we will search for the minimum index of the summed up residual distribution.
index = np.argmin(Mresidual)
print index
b[index] #<--- b parameter for the correct fitting function
We can summerise the previous steps to finding a sufficient fitting parameter as follows:
def simple_fitting(x,y, start=0., end = 10., steps = 100):
b = np.linspace(start, end, steps)
print "test Bs:"+str(b)
Y = []
X, B = np.meshgrid(x,b)
Y = X2(X, B)
Y = np.asarray(Y) ### transform list into numpy array
Residual = abs(Y - y) ### compute residual
Mresidual = (Residual**2).sum(1) ### average residual for each function
index = np.argmin(Mresidual) ### minimum of residual distribution
return b[index] ### fitting parameter
x = np.linspace(-2,2)
y = X2(x,3)
b = simple_fitting(x,y,start=0., end = 9., steps = 10)
print b
plt.figure(figsize=(10,5))
plt.plot(x,y, "o", label = "data")
plt.plot(x, X2(x,b), label = r"fit with $\beta$="+str(b))
plt.legend()
plt.show()
Lets's try our algorythm on a randomly distorted distribution.
x = x
yN = y+(np.random.rand(y.shape[0])-0.5)*10.
bn = simple_fitting(x,yN)
print bn
plt.figure(figsize=(10,5))
plt.plot(x, yN, "o", label = "data points")
plt.plot(x, X2(x,bn), "-", label = "fit with b="+str(bn))
plt.legend(loc = "upper center")
plt.show()
This works well for finding single integer parameters in a set of integer numbers. But how about float number? We can use a larger parameter set to find as follows:
y = X2(x,3.333333)
bsmall = simple_fitting(x,y,start=0., end = 9., steps = 10) ### fits from a set of 100 functions
blarge = simple_fitting(x,y,start=0., end = 9., steps = 10000) ### fits from a set of 10000 functions
print b
plt.figure(figsize=(10,5))
plt.plot(x,y, "o", label = "data")
plt.plot(x, X2(x,bsmall), label = r"fit using 10 iteration")
plt.plot(x, X2(x,blarge), label = r"fit using 10000 iteration")
plt.legend()
plt.show()
For our approach we require a sufficient iteration window also the accuracy is depending on the spacing of the $\beta$ distribution (presented in the last example). Also, we havent tested our fitting approach on multiple parameters. For multiplefitting parameters, our algorithm would not be able to process finding multiple local minimas. Therefore, we would need to enhence our algorithm by performing multiple fitting iterations with converging parameter windows and to improve the guess ($\beta$) of each solution with an estimation ($\delta$) and evaluate it with a cost function $J$.
which could be solved with:
Let's not do this now .... Instead, let's use the curve_fit function provided by scipy's optimize module
The following steps, discripe the procedure on how to find a set of fitting parameters using scipy's leastsq.
def XX2(x, a, b, c):
y = a*x**2. + b*x + c
return y
x = np.linspace(-2.,2.,100)
y = XX2(x, a = 2., b = 3., c= 1.)+(np.random.rand(100)-0.5)*10.
curve_fit
fitP, error = optimize.curve_fit(XX2, x, y, p0 = [2., 3., 1.])
print fitP
print fitP.shape, error.shape
plt.figure(figsize=(10,5))
plt.plot(x,y, ".")
plt.plot(x,XX2(x, fitP[0], fitP[1], fitP[2]))
#### plotting the error
plt.plot(x,XX2(x,fitP[0]+error[0,0],fitP[1]+error[0,1],fitP[2]+error[0,2]), "-b", alpha = 0.5)
plt.plot(x,XX2(x,fitP[0]-error[0,0],fitP[1]-error[0,1],fitP[2]-error[0,2]), "-b", alpha = 0.5)
plt.show()
We will continue here next week ....