Purpose
To understand MLE and code a function so that you can estimate the parameters for MLE. The basic idea is to look at any ARMA process , estimate the parameters , their standard errors, t statistics etc by maximizing the MLE function.

Typically one uses MLE function, tries to look at mean, sigma of the process. If there are let’s say q parameters and you fit a MLE function with q parameters, then one can maximize the MLE function.

This MLE function can then be subsequently used to find out various stuff about the parameters.

First lets simulate some data

> set.seed(1977)
> N <- 10000
> x <- cbind(1, runif(N))
> beta.true <- c(2, 3)
> error.var <- 3
> indep <- as.matrix(x)
> dep <- indep %*% beta.true + sqrt(error.var) * rnorm(N)
> fit <- lm(dep ~ indep[, 2])
> fit.sum <- summary(fit)
> coef(fit.sum)[, 1]
(Intercept)  indep[, 2]
   2.035283    2.957402

Now lets write the MLE function and the gradient function. The input to one pair is parameter as a vector. The input to another pairs is parameter as a list.

> myMLE <- function(params, n, dep, indep) {
+     beta.test <- as.matrix(params[1:2])
+     sigma.sq <- as.matrix(params[3])
+     if (sigma.sq <= 0)
+         return(NA)
+     e <- dep - indep %*% beta.test
+     loglike <- (-n/2) * log(2 * pi) - (n/2) * log(sigma.sq) -
+         (1/(2 * sigma.sq)) * t(e) %*% (e)
+     return(-loglike)
+ }
> myparams <- list(intercept = 1, slope = 1, sigma.sq = 1)
> myMLE.p <- function(myparams, n, dep, indep) {
+     myparams <- as.list(myparams)
+     beta.test <- as.matrix(c(myparams$intercept, myparams$slope))
+     sigma.sq <- as.matrix(myparams$sigma.sq)
+     if (sigma.sq <= 0)
+         return(NA)
+     e <- dep - indep %*% beta.test
+     loglike <- (-n/2) * log(2 * pi) - (n/2) * log(sigma.sq) -
+         (1/(2 * sigma.sq)) * t(e) %*% (e)
+     return(-loglike)
+ }
> ols.gradient <- function(params, n, dep, indep) {
+     beta.test <- as.matrix(params[1:2])
+     sigma.sq <- params[3]
+     e <- dep - indep %*% beta.test
+     g <- numeric(3)
+     g[1:2] <- (t(indep) %*% e)/sigma.sq
+     g[3] <- (-n/(2 * sigma.sq)) + (t(e) %*% e)/(2 * sigma.sq *
+         sigma.sq)
+     return(-g)
+ }
> fit <- optim(c(1, 1, 1), myMLE, n = N, dep = dep, indep = indep)
> cat("The fit parameters are ", fit$par)
The fit parameters are  2.035939 2.957095 3.051341
> fit <- optim(myparams, myMLE.p, n = N, dep = dep, indep = indep)
> cat("The fit parameters are ", fit$par)
The fit parameters are  2.035939 2.957095 3.051341
> fit <- optim(c(1, 1, 1), method = "L-BFGS-B", myMLE, gr = ols.gradient,
+     lower = c(-Inf, -Inf, 1e-06), upper = rep(Inf, 3), n = N,
+     dep = dep, indep = indep)
> cat("The fit parameters are ", fit$par)
The fit parameters are  2.035283 2.957401 3.051711
> fit <- optim(c(1, 1, 1), method = "L-BFGS-B", myMLE, gr = ols.gradient,
+     lower = c(-Inf, -Inf, 1e-06), upper = rep(Inf, 3), hessian = TRUE,
+     n = N, dep = dep, indep = indep)
> inverted <- solve(fit$hessian)
> results <- cbind(fit$par, sqrt(diag(inverted)), fit$par/sqrt(diag(inverted)))
> colnames(results) <- c("Coefficient", "Std. Err.", "t")
> rownames(results) <- c("Sigma", "Intercept", "X")
> cat("MLE results --\n")
MLE results --
> print(results)
          Coefficient  Std. Err.        t
Sigma        2.035283 0.03509248 57.99769
Intercept    2.957401 0.06077070 48.66492
X            3.051711 0.04315770 70.71068

Results using other optimization routines are

> fit <- optim(c(1, 1, 1), method = "Nelder-Mead", myMLE, gr = ols.gradient,
+     n = N, dep = dep, indep = indep)
> cat("The fit using Nelder Mead is ", fit$par, "\n")
The fit using Nelder Mead is  2.035939 2.957095 3.051341
> fit <- optim(c(1, 1, 1), method = "L-BFGS-B", myMLE, gr = ols.gradient,
+     lower = c(-Inf, -Inf, 1e-06), upper = rep(Inf, 3), hessian = TRUE,
+     n = N, dep = dep, indep = indep)
> cat("The fit using L-BFGS-B is ", fit$par)
The fit using L-BFGS-B is  2.035283 2.957401 3.051711
> fit <- optim(c(1, 1, 1), method = "SANN", myMLE, gr = ols.gradient,
+     n = N, dep = dep, indep = indep)
> cat("The fit using SANN is ", fit$par)
The fit using SANN is  1 1 1
> fit <- optim(c(1, 1, 1), method = "CG", myMLE, gr = ols.gradient,
+     n = N, dep = dep, indep = indep)
> cat("The fit using CG is ", fit$par)
The fit using CG is  2.035283 2.957401 3.051710

The key is obviously to decide on the prior distribution and then you can do all the jazz to work on the parameters.