Stationarity Tests
Purpose
To check the pairs for residuals stationarity using adf tests and unit root tests
Let me prepare the data first
> library(fArma) > library(fSeries) > library(TSA) > library(pspline) > library(fUnitRoots) > library(RSQLite) > library(lmtest) > library(tseries) > program.date <- "feb18" > ticker.data <- "tickers.csv" > date.start <- "2009-02-02" > date.end <- "2010-02-18" > INITIAL.CAPITAL <- 1e+06 > UNITS <- 1e+05 > all.pairs.file <- paste("all_pairs_", program.date, "_v1.csv", + sep = "") > shortlisted.pairs.file <- paste("shortlisted_pairs_", program.date, + "_v1.csv", sep = "") > arma.file <- paste("arma_", program.date, "_revised.csv", sep = "") > final.pairs.file <- paste("finalpairs_", program.date, ".csv", + sep = "") > discuss.pairs.file <- paste("disusspairs_", program.date, ".csv", + sep = "") > eacf.result.file <- paste("eacf_resultfile_", program.date, ".csv", + sep = "") > db <- "C:/sqlite/mydbases/pairs/pairsv1.s3db" > drv <- dbDriver("SQLite") > con <- dbConnect(drv, dbname = db) > query <- > security.db <- dbGetQuery(con, query) > security.db <- security.db[, 3:4] > security.db1 <- unstack(security.db, form = security.db$price ~ + security.db$ticker) > dbDisconnect(con) [1] TRUE > con <- dbConnect(drv, dbname = db) > query <- > security.db <- dbGetQuery(con, query) > security.db <- security.db[, 3:4] > security.db1.high <- unstack(security.db, form = security.db$high_price ~ + security.db$ticker) > query <- > security.db <- dbGetQuery(con, query) > head(security.db) ticker_id trade_date ticker low_price*1.0 1 1 2009-02-02 ABAN 462.20 2 1 2009-02-03 ABAN 461.15 3 1 2009-02-04 ABAN 424.50 4 1 2009-02-05 ABAN 412.60 5 1 2009-02-06 ABAN 417.20 6 1 2009-02-09 ABAN 424.10 > security.db <- security.db[, 3:4] > security.db1.low <- unstack(security.db, form = security.db$low_price ~ + security.db$ticker) > stkreturns <- returns(ts(security.db1), "simple") > stkreturns <- stkreturns[-1, ] > n <- dim(stkreturns)[2] > pair.combinations <- matrix(data = NA, nrow = n * (n + 1)/2, + ncol = 2) > rowcount <- 1 > for (i in 1:n) { + for (j in 1:i) { + pair.combinations[rowcount, 1] <- i + pair.combinations[rowcount, 2] <- j + rowcount <- rowcount + 1 + } + } > pair.combinations <- data.frame(pair.combinations) > colnames(pair.combinations) <- c("i", "j") > tickers$mktcap.cr <- round(tickers$mktcap, 0) > lookupi <- tickers[, c("tickers", "ticker_id", "mktcap.cr", "sector", + "sector_id")] > lookupj <- tickers[, c("tickers", "ticker_id", "mktcap.cr", "sector", + "sector_id")] > colnames(lookupi) <- c("tickeri", "i", "mktcap.cr.i", "sector.i", + "sector.i.id") > colnames(lookupj) <- c("tickerj", "j", "mktcap.cr.j", "sector.j", + "sector.j.id") > d1 <- merge(pair.combinations, lookupj, by.x = "j", by.y = "j", + all.x = T) > d2 <- merge(d1, lookupi, by.x = "i", by.y = "i", all.x = T) > d3 <- d2[d2$i != d2$j, c("i", "j", "tickeri", "tickerj", "sector.i", + "mktcap.cr.i", "sector.j", "mktcap.cr.j", "sector.i.id", + "sector.j.id")] > samesector <- d3[d3$sector.i.id == d3$sector.j.id, ] > x <- pmax(samesector$mktcap.cr.i, samesector$mktcap.cr.j)/pmin(samesector$mktcap.cr.i, + samesector$mktcap.cr.j) > x <- round(x, 0) > samesector <- samesector[which(x <= 8), ] > sector.tests <- samesector > sector.tests$p.a.b.1 <- 0 > sector.tests$p.a.b.2 <- 0 > sector.tests$p.b.a.1 <- 0 > sector.tests$p.b.a.2 <- 0 > npairs <- dim(samesector)[1] > pair = 1 > for (pair in 1:npairs) { + a <- samesector[pair, "tickeri"] + b <- samesector[pair, "tickerj"] + y1 <- log(security.db1[, a]) + x1 <- log(security.db1[, b]) + fit <- lm(y1 ~ x1) + if (summary(fit)$coefficients[1, 4] < 0.05) { + error <- residuals(fit) + } + else { + fit <- lm(y1 ~ x1 + 0) + error <- residuals(fit) + } + n <- length(error) + res <- unitrootTest(error, lag = 1, type = "c") + p.a.b.1 <- attr(res, "test")$p.value[1] + res <- adfTest(error, lag = 1, type = "c") + p.a.b.2 <- attr(res, "test")$p.value[1] + y1 <- log(security.db1[, b]) + x1 <- log(security.db1[, a]) + fit <- lm(y1 ~ x1) + if (summary(fit)$coefficients[1, 4] < 0.05) { + error <- residuals(fit) + } + else { + fit <- lm(y1 ~ x1 + 0) + error <- residuals(fit) + } + n <- length(error) + res <- unitrootTest(error, lag = 1, type = "c") + p.b.a.1 <- attr(res, "test")$p.value[1] + res <- adfTest(error, lag = 1, type = "c") + p.b.a.2 <- attr(res, "test")$p.value[1] + sector.tests[pair, "p.a.b.1"] <- p.a.b.1 + sector.tests[pair, "p.a.b.2"] <- p.a.b.2 + sector.tests[pair, "p.b.a.1"] <- p.b.a.1 + sector.tests[pair, "p.b.a.2"] <- p.b.a.2 + } |
> library(ggplot2) > temp <- data.frame(x = 1:311, p.a.b.1 = sector.tests$p.a.b.1, + p.a.b.2 = sector.tests$p.a.b.2, p.b.a.1 = sector.tests$p.b.a.1, + p.b.a.2 = sector.tests$p.b.a.2) > temp <- temp[temp[, 2] < 0.05 & temp[, 3] < 0.05, ] > dim(temp) [1] 127 5 > p <- ggplot(temp, aes(x = p.a.b.1, y = p.a.b.2)) > q <- p + geom_point() > q <- q + geom_abline(intercept = 0, slope = 1, colour = "sienna", + lwd = 1.2) > print(q) > p <- ggplot(temp, aes(x = p.b.a.1, y = p.b.a.2)) > q <- p + geom_point() > q <- q + geom_abline(intercept = 0, slope = 1, colour = "sienna", + lwd = 1.2) > print(q) > temp <- data.frame(x = 1:311, p.a.b.1 = sector.tests$p.a.b.1, + p.a.b.2 = sector.tests$p.a.b.2, p.b.a.1 = sector.tests$p.b.a.1, + p.b.a.2 = sector.tests$p.b.a.2) > temp <- temp[temp[, 4] < 0.05 & temp[, 5] < 0.05, ] > dim(temp) [1] 121 5 > p <- ggplot(temp, aes(x = p.a.b.1, y = p.a.b.2)) > q <- p + geom_point() > q <- q + geom_abline(intercept = 0, slope = 1, colour = "sienna", + lwd = 1.2) > print(q) > p <- ggplot(temp, aes(x = p.b.a.1, y = p.b.a.2)) > q <- p + geom_point() > q <- q + geom_abline(intercept = 0, slope = 1, colour = "sienna", + lwd = 1.2) > print(q) |
Till now I have used only first lag and found that there are about 120 pairs which pass the stationarity tests
Now I want to use more lags and check the same.
> sector.tests <- samesector > sector.tests$p.a.b.1 <- 0 > sector.tests$p.a.b.2 <- 0 > sector.tests$p.a.b.3 <- 0 > sector.tests$p.b.a.1 <- 0 > sector.tests$p.b.a.2 <- 0 > sector.tests$p.b.a.3 <- 0 > npairs <- dim(samesector)[1] > pair = 1 > for (pair in 1:npairs) { + a <- samesector[pair, "tickeri"] + b <- samesector[pair, "tickerj"] + y1 <- log(security.db1[, a]) + x1 <- log(security.db1[, b]) + fit <- lm(y1 ~ x1) + if (summary(fit)$coefficients[1, 4] < 0.05) { + error <- residuals(fit) + } + else { + fit <- lm(y1 ~ x1 + 0) + error <- residuals(fit) + } + n <- length(error) + res <- unitrootTest(error, lag = 5, type = "c") + p.a.b.1 <- attr(res, "test")$p.value[1] + res <- adfTest(error, lag = 5, type = "c") + p.a.b.2 <- attr(res, "test")$p.value[1] + res <- adf.test(error) + p.a.b.3 <- res$p.value + y1 <- log(security.db1[, b]) + x1 <- log(security.db1[, a]) + fit <- lm(y1 ~ x1) + if (summary(fit)$coefficients[1, 4] < 0.05) { + error <- residuals(fit) + } + else { + fit <- lm(y1 ~ x1 + 0) + error <- residuals(fit) + } + n <- length(error) + res <- unitrootTest(error, lag = 5, type = "c") + p.b.a.1 <- attr(res, "test")$p.value[1] + res <- adfTest(error, lag = 5, type = "c") + p.b.a.2 <- attr(res, "test")$p.value[1] + res <- adf.test(error) + p.b.a.3 <- res$p.value + sector.tests[pair, "p.a.b.1"] <- p.a.b.1 + sector.tests[pair, "p.a.b.2"] <- p.a.b.2 + sector.tests[pair, "p.a.b.3"] <- p.a.b.3 + sector.tests[pair, "p.b.a.1"] <- p.b.a.1 + sector.tests[pair, "p.b.a.3"] <- p.b.a.3 + } > temp <- data.frame(x = 1:311, p.a.b.1 = sector.tests$p.a.b.1, + p.a.b.2 = sector.tests$p.a.b.2, p.b.a.1 = sector.tests$p.b.a.1, + p.b.a.2 = sector.tests$p.b.a.2) > temp <- temp[temp[, 2] < 0.05 & temp[, 3] < 0.05, ] > dim(temp) [1] 88 5 > temp <- data.frame(x = 1:311, p.a.b.1 = sector.tests$p.a.b.1, + p.a.b.2 = sector.tests$p.a.b.2, p.b.a.1 = sector.tests$p.b.a.1, + p.b.a.2 = sector.tests$p.b.a.2) > temp <- temp[temp[, 4] < 0.05 & temp[, 5] < 0.05, ] > length(which(sector.tests$p.b.a.3 < 0.05 & sector.tests$p.a.b.3 < + 0.05)) [1] 42 > length(which(sector.tests$p.b.a.3 < 0.05 | sector.tests$p.a.b.3 < + 0.05)) [1] 59 |
If you take more lags in to consideration, there are only about 90 pairs which pass the stationarity tests. Does it surprise me ?
I was under the misconception that if I take more lags, then there would be more pairs for my disposal..
But I got a completely different result.
If I do adf test I get only 59 pairs
So, should I be following adf test..Let me see what pairs I get if I follow adf test only
> sector.tests[which(sector.tests$p.b.a.3 < 0.05 | sector.tests$p.a.b.3 < + 0.05), c(3, 4)] tickeri tickerj 11 AMBUJACEM ACC 439 GESHIP CONCOR 511 GRASIM AMBUJACEM 531 GSPL BPCL 918 HINDPETRO GSPL 1044 IBREALEST DLF 1129 IDBI ANDHRABANK 1173 IDBI BANKINDIA 1272 IDFC HDFC 1428 INDIACEM ACC 1474 INDIAINFO IDFC 1480 INDIAINFO IFCI 1539 INFOSYSTCH HCLTECH 2063 KOTAKBANK ICICIBANK 2072 KOTAKBANK AXISBANK 2152 LITL HCC 2446 MLL GESHIP 2607 MPHASIS HCLTECH 2748 MTNL IDEA 2827 NAGARCONST LITL 2950 NATIONALUM HINDALCO 3149 ONGC CAIRN 3844 RANBAXY CIPLA 3887 RANBAXY LUPIN 3908 RANBAXY BIOCON 4030 RECLTD PFC 4105 RELCAPITAL INDIAINFO 4370 RELINFRA POWERGRID 4584 ROLTA MPHASIS 4617 ROLTA POLARIS 4623 ROLTA HCLTECH 4727 RPOWER POWERGRID 4959 SCI GESHIP 5040 SCI CONCOR 5175 SIEMENS CESC 5221 SIEMENS ABB 5609 SUZLON RPOWER 5621 SUZLON POWERGRID 5682 SYNDIBANK BANKINDIA 5701 SYNDIBANK IOB 5719 SYNDIBANK PNB 5723 SYNDIBANK ANDHRABANK 5727 SYNDIBANK IDBI 6295 TATASTEEL JSWSTEEL 6562 TECHM ROLTA 6954 TULIP TTML 7122 ULTRACEMCO INDIACEM 7237 UNIONBANK IDBI 7260 UNIONBANK SYNDIBANK 7484 UNITECH DLF 7554 VIJAYABANK IDBI 7559 VIJAYABANK SYNDIBANK 8195 DISHTV ZEEL 8307 DRREDDY BIOCON 8337 DRREDDY RANBAXY 8940 PATNI HCLTECH 8991 PATNI MPHASIS 9015 PATNI ROLTA 9199 HDFCBANK1 HDFC1 |
Let me check the other stats for these
INDIACEM ACC TULIP TTML ULTRACEMCO INDIACEM DISHTV ZEEL DRREDDY RANBAXY
The above are the pairs which passed ADF test but failed stricter tests like that from urca.