How to properly install beta distribution in python?

Question

How to properly install beta distribution in python?

I am trying to get the right way to fit the beta distribution. This is not a real world problem, I just check the effects of several different methods, and at the same time something puzzles me.

Here is the python code I'm working on, in which I tested 3 different approaches: 1>: fitting using moments (average sample value and variance). 2>: whenever possible, minimizing negative logarithmic likelihood (using scipy.optimize.fmin ()). 3>: just call scipy.stats.beta.fit ()

from scipy.optimize import fmin
from scipy.stats import beta
from scipy.special import gamma as gammaf
import matplotlib.pyplot as plt
import numpy


def betaNLL(param,*args):
    '''Negative log likelihood function for beta
    <param>: list for parameters to be fitted.
    <args>: 1-element array containing the sample data.

    Return <nll>: negative log-likelihood to be minimized.
    '''

    a,b=param
    data=args[0]
    pdf=beta.pdf(data,a,b,loc=0,scale=1)
    lg=numpy.log(pdf)
    #-----Replace -inf with 0s------
    lg=numpy.where(lg==-numpy.inf,0,lg)
    nll=-1*numpy.sum(lg)
    return nll

#-------------------Sample data-------------------
data=beta.rvs(5,2,loc=0,scale=1,size=500)

#----------------Normalize to [0,1]----------------
#data=(data-numpy.min(data))/(numpy.max(data)-numpy.min(data))

#----------------Fit using moments----------------
mean=numpy.mean(data)
var=numpy.var(data,ddof=1)
alpha1=mean**2*(1-mean)/var-mean
beta1=alpha1*(1-mean)/mean

#------------------Fit using mle------------------
result=fmin(betaNLL,[1,1],args=(data,))
alpha2,beta2=result

#----------------Fit using beta.fit----------------
alpha3,beta3,xx,yy=beta.fit(data)

print '\n# alpha,beta from moments:',alpha1,beta1
print '# alpha,beta from mle:',alpha2,beta2
print '# alpha,beta from beta.fit:',alpha3,beta3

#-----------------------Plot-----------------------
plt.hist(data,bins=30,normed=True)
fitted=lambda x,a,b:gammaf(a+b)/gammaf(a)/gammaf(b)*x**(a-1)*(1-x)**(b-1) #pdf of beta

xx=numpy.linspace(0,max(data),len(data))
plt.plot(xx,fitted(xx,alpha1,beta1),'g')
plt.plot(xx,fitted(xx,alpha2,beta2),'b')
plt.plot(xx,fitted(xx,alpha3,beta3),'r')

plt.show()

The problem I have is the normalization process ( z=(x-a)/(b-a)), where aand bare the min and max of the sample, respectively.

, , , .

, , .

( ) .

scipy.stats.beta.fit() ( ) , , .

MLE ( ) .

, , . , x=0 x=1 -. , , [0,1]? , ?

+4

python curve-fitting beta-distribution

Jason 27 . '14 21:37

2

jdj081 · Answer 1 · 2014-08-29T16:41:17+0000

docstring beta.fit , , beta.fit, kwargs floc fscale.

beta.fit, floc fscale kwargs. , ints floats, , . ( . , .)

>>> from scipy.stats import beta
>>> import numpy
>>> def betaNLL(param,*args):
    '''Negative log likelihood function for beta
    <param>: list for parameters to be fitted.
    <args>: 1-element array containing the sample data.

    Return <nll>: negative log-likelihood to be minimized.
    '''

    a,b=param
    data=args[0]
    pdf=beta.pdf(data,a,b,loc=0,scale=1)
    lg=numpy.log(pdf)
    #-----Replace -inf with 0s------
    lg=numpy.where(lg==-numpy.inf,0,lg)
    nll=-1*numpy.sum(lg)
    return nll

>>> data=beta.rvs(5,2,loc=0,scale=1,size=500)
>>> beta.fit(data)
(5.696963536654355, 2.0005252702837009, -0.060443307228404922, 1.0580278414086459)
>>> beta.fit(data,floc=0,fscale=1)
(5.0952451826831462, 1.9546341057106007, 0, 1)
>>> beta.fit(data,floc=0.,fscale=1.)
(5.0952451826831462, 1.9546341057106007, 0.0, 1.0)

, , ( ) . , , . , 0 1, , 0 1. , , beta.fit , , 0 1, beta.fit loc=-0.06 scale=1.058.

user545424 · Answer 2 · 2014-04-28T00:01:04+0000

, beta.pdf() 0 inf 0 1. :

>>> from scipy.stats import beta
>>> beta.pdf(1,1.05,0.95)
/usr/lib64/python2.6/site-packages/scipy/stats/distributions.py:1165: RuntimeWarning: divide by zero encountered in power
  Px = (1.0-x)**(b-1.0) * x**(a-1.0)
inf
>>> beta.pdf(0,1.05,0.95)
0.0

, 0 1 . "" , pdf 0, , inf. , :

def betaNLL(param,*args):
    """
    Negative log likelihood function for beta
    <param>: list for parameters to be fitted.
    <args>: 1-element array containing the sample data.

    Return <nll>: negative log-likelihood to be minimized.
    """

    a, b = param
    data = args[0]
    pdf = beta.pdf(data,a,b,loc=0,scale=1)
    lg = np.log(pdf)
    mask = np.isfinite(lg)
    nll = -lg[mask].sum()
    return nll

, , .

How to properly install beta distribution in python?

More articles: