# step1: read your data and do some basic check (visualization is important)
# step2: prepare a 2D -parameter mesh grid, which can be done by meshgrid function in numpy
# step3: Given the 2D parameter grid, we can calculate the probability of each datum at the every grid coordinate, i.e. , a 2D probability for each data point. It is usually convenient to do it in log scale.
# step4: sum the log probability of all data points, then we have the joint 2D log probability for
the whole data set.
# step5: find the maximum joint log probability and subtract it. We do this to avoid numerical outflow problem since unnormalized joint probability is usually a very small value.
# step6: exp(log prob) and plot a contour
The example of CA3_problem4
from numpy import *
import math
import matplotlib
import matplotlib.pyplot as plt
import asciitable
import scipy
import math
import matplotlib
import matplotlib.pyplot as plt
import asciitable
import scipy
# read the data
data = asciitable.read('fv05_vf05_data.txt')
detection = data['comp'] #detection or not 1/0
feh = data['metallicity'] #metallicity
# check data
n = detection.size #the total number of data point
print feh.min(),feh.max() # metallicity range in the data
#choose the proper range of alpha, f>0 --> alpha>0
alpha_grid= arange(0.01,0.1,0.001)
beta_grid = arange(.0,3.0,0.01)
# generate a 2D meshgrid for alpha and beta
(alpha,beta) = meshgrid(alpha_grid,beta_grid)
logpdf = zeros((beta_grid.size,alpha_grid.size))
print feh.min(),feh.max() # metallicity range in the data
#choose the proper range of alpha, f>0 --> alpha>0
alpha_grid= arange(0.01,0.1,0.001)
beta_grid = arange(.0,3.0,0.01)
# generate a 2D meshgrid for alpha and beta
(alpha,beta) = meshgrid(alpha_grid,beta_grid)
logpdf = zeros((beta_grid.size,alpha_grid.size))
# loop through each data point
for i in range(n):
f = alpha*10**(beta*feh[i])
f[(f >1)] = None
#set f to none if f>1, here we don't need to check f>0
# because we already set alpha>0
# sum the log(prob) of the all data
logpdf += log(detection[i]*f+(1-detection[i])*(1.-f))
# set the log(prob) at forbidden (alpha,beta) value to -999
whereAreNaNs = isnan(logpdf)
logpdf[whereAreNaNs] = -999
# find the max logpdf
print logpdf.max()
for i in range(n):
f = alpha*10**(beta*feh[i])
f[(f >1)] = None
#set f to none if f>1, here we don't need to check f>0
# because we already set alpha>0
# sum the log(prob) of the all data
logpdf += log(detection[i]*f+(1-detection[i])*(1.-f))
# set the log(prob) at forbidden (alpha,beta) value to -999
whereAreNaNs = isnan(logpdf)
logpdf[whereAreNaNs] = -999
# find the max logpdf
print logpdf.max()
# find the peak location
ind = where(logpdf == logpdf.max())
print 'the peak is at alpha=%.4f,beta=%.4f' %(alpha_grid[ind[1]],beta_grid[ind[0]])
logpdf = logpdf-logpdf.max()
# pdf is the normal probability function
pdf = exp(logpdf)
# 2D plot
plt.figure()CS = plt.contour(alpha,beta,pdf)
No comments:
Post a Comment