Spurious Regression - Visualization -II
Simulate the regression statistics between 2 random walks
> set.seed(1977) > N <- 100 > coefs <- numeric(0) > tstat <- numeric(0) > ssd <- numeric(0) > for (i in 1:1000) { + y <- cumsum(rnorm(N)) + x <- cumsum(rnorm(N)) + fit.sum <- summary(lm(y ~ x)) + coefs <- c(coefs, coef(fit.sum)[2, 1]) + tstat <- c(tstat, coef(fit.sum)[2, 3]) + } > length(which(abs(tstat) > 1.96))/1000 [1] 0.76 |
77 percent of the times you would tend to reject the null hypothesis.
Frequency Histogram of t value
> sample.mean <- mean(coefs) > sample.sd <- sd(coefs) > hist((coefs - sample.mean)/sample.sd, breaks = seq(-6, 6, 0.1), + main = "Beta", xlab = "", ylab = "") |
Montecarlo Sigma is actually higher than the reported standard error
> N <- seq(30, 100, 10) > k <- 1 > coefs <- numeric(0) > tse <- numeric(0) > mcse <- numeric(0) > tstat <- numeric(0) > tstat.sd <- numeric(0) > probs <- numeric(0) > for (k in seq_along(N)) { + print(k) + coefs.temp <- numeric(0) + tse.temp <- numeric(0) + tstat.temp <- numeric(0) + for (i in 1:100) { + y <- cumsum(rnorm(N[k])) + x <- cumsum(rnorm(N[k])) + fit.sum <- summary(lm(y ~ x)) + coefs.temp <- c(coefs.temp, coef(fit.sum)[2, 1]) + tse.temp <- c(tse.temp, coef(fit.sum)[2, 2]) + tstat.temp <- c(tstat.temp, coef(fit.sum)[2, 3]) + } + sample.mean <- mean(coefs.temp) + sample.sd <- sd(coefs.temp) + coefs <- c(coefs, sample.mean) + mcse <- c(mcse, sample.sd) + tse <- c(tse, mean(tse.temp)) + tstat <- c(tstat, mean(tstat.temp)) + tstat.sd <- c(tstat.sd, sd(tstat.temp)) + probs <- c(probs, length(which(abs(tstat.temp) > 2))/100) + } |
> plot(N, (coefs), type = "l", ylim = c(-2, 2), ylab = "beta") > points(N, (coefs) + 2 * (mcse), type = "l", lty = "dashed", col = "blue") > points(N, (coefs) - 2 * (mcse), type = "l", lty = "dashed", col = "blue") > points(N, (coefs) + 2 * (tse), type = "l", lty = "dashed", col = "red") > points(N, (coefs) - 2 * (tse), type = "l", lty = "dashed", col = "red") > legend("topleft", legend = c("estimated se", "montecarlose"), + fill = c("red", "blue")) |
One can clearly see that montecarlo se remains high even as n increases And t stat error severely underestimates the mean
> plot(N, (tstat), type = "l", ylim = c(-20, 20), ylab = "tstat") > points(N, (tstat) + 2 * (tstat.sd), type = "l", lty = "dashed", + col = "blue") > points(N, (tstat) - 2 * (tstat.sd), type = "l", lty = "dashed", + col = "blue") > legend("topleft", legend = c("bands", "average tstat"), fill = c("blue", + "black")) |
t stat shows no sign of converging
> plot(N, probs, type = "l", , ylab = "prob of Rejecting H0", col = "blue") |
Probability of rejecting null increases even though it is spurious reg