# step1: read your data and do some basic check (visualization is important)

# step2: prepare a 2D -parameter mesh grid, which can be done by meshgrid function in numpy

# step3: Given the 2D parameter grid, we can calculate the probability of each datum at the every grid coordinate, i.e. , a 2D probability for each data point. It is usually convenient to do it in log scale.

# step4: sum the log probability of all data points, then we have the joint 2D log probability for

the whole data set.

# step5: find the maximum joint log probability and subtract it. We do this to avoid numerical outflow problem since unnormalized joint probability is usually a very small value.

# step6: exp(log prob) and plot a contour

The example of CA3_problem4

from numpy import *

import math

import matplotlib

import matplotlib.pyplot as plt

import asciitable

import scipy

import math

import matplotlib

import matplotlib.pyplot as plt

import asciitable

import scipy

# read the data

data = asciitable.read('fv05_vf05_data.txt')

detection = data['comp'] #detection or not 1/0

feh = data['metallicity'] #metallicity

# check data

n = detection.size #the total number of data point

print feh.min(),feh.max() # metallicity range in the data

#choose the proper range of alpha, f>0 --> alpha>0

alpha_grid= arange(0.01,0.1,0.001)

beta_grid = arange(.0,3.0,0.01)

# generate a 2D meshgrid for alpha and beta

(alpha,beta) = meshgrid(alpha_grid,beta_grid)

logpdf = zeros((beta_grid.size,alpha_grid.size))

print feh.min(),feh.max() # metallicity range in the data

#choose the proper range of alpha, f>0 --> alpha>0

alpha_grid= arange(0.01,0.1,0.001)

beta_grid = arange(.0,3.0,0.01)

# generate a 2D meshgrid for alpha and beta

(alpha,beta) = meshgrid(alpha_grid,beta_grid)

logpdf = zeros((beta_grid.size,alpha_grid.size))

# loop through each data point

for i in range(n):

f = alpha*10**(beta*feh[i])

f[(f >1)] = None

#set f to none if f>1, here we don't need to check f>0

# because we already set alpha>0

# sum the log(prob) of the all data

logpdf += log(detection[i]*f+(1-detection[i])*(1.-f))

# set the log(prob) at forbidden (alpha,beta) value to -999

whereAreNaNs = isnan(logpdf)

logpdf[whereAreNaNs] = -999

# find the max logpdf

print logpdf.max()

for i in range(n):

f = alpha*10**(beta*feh[i])

f[(f >1)] = None

#set f to none if f>1, here we don't need to check f>0

# because we already set alpha>0

# sum the log(prob) of the all data

logpdf += log(detection[i]*f+(1-detection[i])*(1.-f))

# set the log(prob) at forbidden (alpha,beta) value to -999

whereAreNaNs = isnan(logpdf)

logpdf[whereAreNaNs] = -999

# find the max logpdf

print logpdf.max()

# find the peak location

ind = where(logpdf == logpdf.max())

print 'the peak is at alpha=%.4f,beta=%.4f' %(alpha_grid[ind[1]],beta_grid[ind[0]])

logpdf = logpdf-logpdf.max()

# pdf is the normal probability function

pdf = exp(logpdf)

# 2D plot

plt.figure()CS = plt.contour(alpha,beta,pdf)

## No comments:

## Post a Comment