Spurious Regression - Visualization -II

Simulate the regression statistics between 2 random walks

> set.seed(1977)
> N <- 100
> coefs <- numeric(0)
> tstat <- numeric(0)
> ssd <- numeric(0)
> for (i in 1:1000) {
+     y <- cumsum(rnorm(N))
+     x <- cumsum(rnorm(N))
+     fit.sum <- summary(lm(y ~ x))
+     coefs <- c(coefs, coef(fit.sum)[2, 1])
+     tstat <- c(tstat, coef(fit.sum)[2, 3])
+ }
> length(which(abs(tstat) > 1.96))/1000
[1] 0.76

77 percent of the times you would tend to reject the null hypothesis.

Frequency Histogram of t value

> sample.mean <- mean(coefs)
> sample.sd <- sd(coefs)
> hist((coefs - sample.mean)/sample.sd, breaks = seq(-6, 6, 0.1),
+     main = "Beta", xlab = "", ylab = "")

Montecarlo Sigma is actually higher than the reported standard error

> N <- seq(30, 100, 10)
> k <- 1
> coefs <- numeric(0)
> tse <- numeric(0)
> mcse <- numeric(0)
> tstat <- numeric(0)
> tstat.sd <- numeric(0)
> probs <- numeric(0)
> for (k in seq_along(N)) {
+     print(k)
+     coefs.temp <- numeric(0)
+     tse.temp <- numeric(0)
+     tstat.temp <- numeric(0)
+     for (i in 1:100) {
+         y <- cumsum(rnorm(N[k]))
+         x <- cumsum(rnorm(N[k]))
+         fit.sum <- summary(lm(y ~ x))
+         coefs.temp <- c(coefs.temp, coef(fit.sum)[2, 1])
+         tse.temp <- c(tse.temp, coef(fit.sum)[2, 2])
+         tstat.temp <- c(tstat.temp, coef(fit.sum)[2, 3])
+     }
+     sample.mean <- mean(coefs.temp)
+     sample.sd <- sd(coefs.temp)
+     coefs <- c(coefs, sample.mean)
+     mcse <- c(mcse, sample.sd)
+     tse <- c(tse, mean(tse.temp))
+     tstat <- c(tstat, mean(tstat.temp))
+     tstat.sd <- c(tstat.sd, sd(tstat.temp))
+     probs <- c(probs, length(which(abs(tstat.temp) > 2))/100)
+ }

> plot(N, (coefs), type = "l", ylim = c(-2, 2), ylab = "beta")
> points(N, (coefs) + 2 * (mcse), type = "l", lty = "dashed", col = "blue")
> points(N, (coefs) - 2 * (mcse), type = "l", lty = "dashed", col = "blue")
> points(N, (coefs) + 2 * (tse), type = "l", lty = "dashed", col = "red")
> points(N, (coefs) - 2 * (tse), type = "l", lty = "dashed", col = "red")
> legend("topleft", legend = c("estimated se", "montecarlose"),
+     fill = c("red", "blue"))

One can clearly see that montecarlo se remains high even as n increases And t stat error severely underestimates the mean

> plot(N, (tstat), type = "l", ylim = c(-20, 20), ylab = "tstat")
> points(N, (tstat) + 2 * (tstat.sd), type = "l", lty = "dashed",
+     col = "blue")
> points(N, (tstat) - 2 * (tstat.sd), type = "l", lty = "dashed",
+     col = "blue")
> legend("topleft", legend = c("bands", "average tstat"), fill = c("blue",
+     "black"))

t stat shows no sign of converging

> plot(N, probs, type = "l", , ylab = "prob of Rejecting H0", col = "blue")

Probability of rejecting null increases even though it is spurious reg