Purpose
To check the pairs for residuals stationarity using adf tests and unit root tests

Let me prepare the data first

> library(fArma)
> library(fSeries)
> library(TSA)
> library(pspline)
> library(fUnitRoots)
> library(RSQLite)
> library(lmtest)
> library(tseries)
> program.date <- "feb18"
> ticker.data <- "tickers.csv"
> date.start <- "2009-02-02"
> date.end <- "2010-02-18"
> INITIAL.CAPITAL <- 1e+06
> UNITS <- 1e+05
> all.pairs.file <- paste("all_pairs_", program.date, "_v1.csv",
+     sep = "")
> shortlisted.pairs.file <- paste("shortlisted_pairs_", program.date,
+     "_v1.csv", sep = "")
> arma.file <- paste("arma_", program.date, "_revised.csv", sep = "")
> final.pairs.file <- paste("finalpairs_", program.date, ".csv",
+     sep = "")
> discuss.pairs.file <- paste("disusspairs_", program.date, ".csv",
+     sep = "")
> eacf.result.file <- paste("eacf_resultfile_", program.date, ".csv",
+     sep = "")
> db <- "C:/sqlite/mydbases/pairs/pairsv1.s3db"
> drv <- dbDriver("SQLite")
> con <- dbConnect(drv, dbname = db)
> query <-
> security.db <- dbGetQuery(con, query)
> security.db <- security.db[, 3:4]
> security.db1 <- unstack(security.db, form = security.db$price ~
+     security.db$ticker)
> dbDisconnect(con)
[1] TRUE
> con <- dbConnect(drv, dbname = db)
> query <-
> security.db <- dbGetQuery(con, query)
> security.db <- security.db[, 3:4]
> security.db1.high <- unstack(security.db, form = security.db$high_price ~
+     security.db$ticker)
> query <-
> security.db <- dbGetQuery(con, query)
> head(security.db)
  ticker_id trade_date ticker low_price*1.0
1         1 2009-02-02   ABAN        462.20
2         1 2009-02-03   ABAN        461.15
3         1 2009-02-04   ABAN        424.50
4         1 2009-02-05   ABAN        412.60
5         1 2009-02-06   ABAN        417.20
6         1 2009-02-09   ABAN        424.10
> security.db <- security.db[, 3:4]
> security.db1.low <- unstack(security.db, form = security.db$low_price ~
+     security.db$ticker)
> stkreturns <- returns(ts(security.db1), "simple")
> stkreturns <- stkreturns[-1, ]
> n <- dim(stkreturns)[2]
> pair.combinations <- matrix(data = NA, nrow = n * (n + 1)/2,
+     ncol = 2)
> rowcount <- 1
> for (i in 1:n) {
+     for (j in 1:i) {
+         pair.combinations[rowcount, 1] <- i
+         pair.combinations[rowcount, 2] <- j
+         rowcount <- rowcount + 1
+     }
+ }
> pair.combinations <- data.frame(pair.combinations)
> colnames(pair.combinations) <- c("i", "j")
> tickers$mktcap.cr <- round(tickers$mktcap, 0)
> lookupi <- tickers[, c("tickers", "ticker_id", "mktcap.cr", "sector",
+     "sector_id")]
> lookupj <- tickers[, c("tickers", "ticker_id", "mktcap.cr", "sector",
+     "sector_id")]
> colnames(lookupi) <- c("tickeri", "i", "mktcap.cr.i", "sector.i",
+     "sector.i.id")
> colnames(lookupj) <- c("tickerj", "j", "mktcap.cr.j", "sector.j",
+     "sector.j.id")
> d1 <- merge(pair.combinations, lookupj, by.x = "j", by.y = "j",
+     all.x = T)
> d2 <- merge(d1, lookupi, by.x = "i", by.y = "i", all.x = T)
> d3 <- d2[d2$i != d2$j, c("i", "j", "tickeri", "tickerj", "sector.i",
+     "mktcap.cr.i", "sector.j", "mktcap.cr.j", "sector.i.id",
+     "sector.j.id")]
> samesector <- d3[d3$sector.i.id == d3$sector.j.id, ]
> x <- pmax(samesector$mktcap.cr.i, samesector$mktcap.cr.j)/pmin(samesector$mktcap.cr.i,
+     samesector$mktcap.cr.j)
> x <- round(x, 0)
> samesector <- samesector[which(x <= 8), ]
> sector.tests <- samesector
> sector.tests$p.a.b.1 <- 0
> sector.tests$p.a.b.2 <- 0
> sector.tests$p.b.a.1 <- 0
> sector.tests$p.b.a.2 <- 0
> npairs <- dim(samesector)[1]
> pair = 1
> for (pair in 1:npairs) {
+     a <- samesector[pair, "tickeri"]
+     b <- samesector[pair, "tickerj"]
+     y1 <- log(security.db1[, a])
+     x1 <- log(security.db1[, b])
+     fit <- lm(y1 ~ x1)
+     if (summary(fit)$coefficients[1, 4] < 0.05) {
+         error <- residuals(fit)
+     }
+     else {
+         fit <- lm(y1 ~ x1 + 0)
+         error <- residuals(fit)
+     }
+     n <- length(error)
+     res <- unitrootTest(error, lag = 1, type = "c")
+     p.a.b.1 <- attr(res, "test")$p.value[1]
+     res <- adfTest(error, lag = 1, type = "c")
+     p.a.b.2 <- attr(res, "test")$p.value[1]
+     y1 <- log(security.db1[, b])
+     x1 <- log(security.db1[, a])
+     fit <- lm(y1 ~ x1)
+     if (summary(fit)$coefficients[1, 4] < 0.05) {
+         error <- residuals(fit)
+     }
+     else {
+         fit <- lm(y1 ~ x1 + 0)
+         error <- residuals(fit)
+     }
+     n <- length(error)
+     res <- unitrootTest(error, lag = 1, type = "c")
+     p.b.a.1 <- attr(res, "test")$p.value[1]
+     res <- adfTest(error, lag = 1, type = "c")
+     p.b.a.2 <- attr(res, "test")$p.value[1]
+     sector.tests[pair, "p.a.b.1"] <- p.a.b.1
+     sector.tests[pair, "p.a.b.2"] <- p.a.b.2
+     sector.tests[pair, "p.b.a.1"] <- p.b.a.1
+     sector.tests[pair, "p.b.a.2"] <- p.b.a.2
+ }
> library(ggplot2)
> temp <- data.frame(x = 1:311, p.a.b.1 = sector.tests$p.a.b.1,
+     p.a.b.2 = sector.tests$p.a.b.2, p.b.a.1 = sector.tests$p.b.a.1,
+     p.b.a.2 = sector.tests$p.b.a.2)
> temp <- temp[temp[, 2] < 0.05 & temp[, 3] < 0.05, ]
> dim(temp)
[1] 127   5
> p <- ggplot(temp, aes(x = p.a.b.1, y = p.a.b.2))
> q <- p + geom_point()
> q <- q + geom_abline(intercept = 0, slope = 1, colour = "sienna",
+     lwd = 1.2)
> print(q)
> p <- ggplot(temp, aes(x = p.b.a.1, y = p.b.a.2))
> q <- p + geom_point()
> q <- q + geom_abline(intercept = 0, slope = 1, colour = "sienna",
+     lwd = 1.2)
> print(q)
> temp <- data.frame(x = 1:311, p.a.b.1 = sector.tests$p.a.b.1,
+     p.a.b.2 = sector.tests$p.a.b.2, p.b.a.1 = sector.tests$p.b.a.1,
+     p.b.a.2 = sector.tests$p.b.a.2)
> temp <- temp[temp[, 4] < 0.05 & temp[, 5] < 0.05, ]
> dim(temp)
[1] 121   5
> p <- ggplot(temp, aes(x = p.a.b.1, y = p.a.b.2))
> q <- p + geom_point()
> q <- q + geom_abline(intercept = 0, slope = 1, colour = "sienna",
+     lwd = 1.2)
> print(q)
> p <- ggplot(temp, aes(x = p.b.a.1, y = p.b.a.2))
> q <- p + geom_point()
> q <- q + geom_abline(intercept = 0, slope = 1, colour = "sienna",
+     lwd = 1.2)
> print(q)

StationarityTests-002.jpg

Till now I have used only first lag and found that there are about 120 pairs which pass the stationarity tests

Now I want to use more lags and check the same.

> sector.tests <- samesector
> sector.tests$p.a.b.1 <- 0
> sector.tests$p.a.b.2 <- 0
> sector.tests$p.a.b.3 <- 0
> sector.tests$p.b.a.1 <- 0
> sector.tests$p.b.a.2 <- 0
> sector.tests$p.b.a.3 <- 0
> npairs <- dim(samesector)[1]
> pair = 1
> for (pair in 1:npairs) {
+     a <- samesector[pair, "tickeri"]
+     b <- samesector[pair, "tickerj"]
+     y1 <- log(security.db1[, a])
+     x1 <- log(security.db1[, b])
+     fit <- lm(y1 ~ x1)
+     if (summary(fit)$coefficients[1, 4] < 0.05) {
+         error <- residuals(fit)
+     }
+     else {
+         fit <- lm(y1 ~ x1 + 0)
+         error <- residuals(fit)
+     }
+     n <- length(error)
+     res <- unitrootTest(error, lag = 5, type = "c")
+     p.a.b.1 <- attr(res, "test")$p.value[1]
+     res <- adfTest(error, lag = 5, type = "c")
+     p.a.b.2 <- attr(res, "test")$p.value[1]
+     res <- adf.test(error)
+     p.a.b.3 <- res$p.value
+     y1 <- log(security.db1[, b])
+     x1 <- log(security.db1[, a])
+     fit <- lm(y1 ~ x1)
+     if (summary(fit)$coefficients[1, 4] < 0.05) {
+         error <- residuals(fit)
+     }
+     else {
+         fit <- lm(y1 ~ x1 + 0)
+         error <- residuals(fit)
+     }
+     n <- length(error)
+     res <- unitrootTest(error, lag = 5, type = "c")
+     p.b.a.1 <- attr(res, "test")$p.value[1]
+     res <- adfTest(error, lag = 5, type = "c")
+     p.b.a.2 <- attr(res, "test")$p.value[1]
+     res <- adf.test(error)
+     p.b.a.3 <- res$p.value
+     sector.tests[pair, "p.a.b.1"] <- p.a.b.1
+     sector.tests[pair, "p.a.b.2"] <- p.a.b.2
+     sector.tests[pair, "p.a.b.3"] <- p.a.b.3
+     sector.tests[pair, "p.b.a.1"] <- p.b.a.1
+     sector.tests[pair, "p.b.a.3"] <- p.b.a.3
+ }
> temp <- data.frame(x = 1:311, p.a.b.1 = sector.tests$p.a.b.1,
+     p.a.b.2 = sector.tests$p.a.b.2, p.b.a.1 = sector.tests$p.b.a.1,
+     p.b.a.2 = sector.tests$p.b.a.2)
> temp <- temp[temp[, 2] < 0.05 & temp[, 3] < 0.05, ]
> dim(temp)
[1] 88  5
> temp <- data.frame(x = 1:311, p.a.b.1 = sector.tests$p.a.b.1,
+     p.a.b.2 = sector.tests$p.a.b.2, p.b.a.1 = sector.tests$p.b.a.1,
+     p.b.a.2 = sector.tests$p.b.a.2)
> temp <- temp[temp[, 4] < 0.05 & temp[, 5] < 0.05, ]
> length(which(sector.tests$p.b.a.3 < 0.05 & sector.tests$p.a.b.3 <
+     0.05))
[1] 42
> length(which(sector.tests$p.b.a.3 < 0.05 | sector.tests$p.a.b.3 <
+     0.05))
[1] 59

If you take more lags in to consideration, there are only about 90 pairs which pass the stationarity tests. Does it surprise me ?

I was under the misconception that if I take more lags, then there would be more pairs for my disposal..

But I got a completely different result.

If I do adf test I get only 59 pairs

So, should I be following adf test..Let me see what pairs I get if I follow adf test only

> sector.tests[which(sector.tests$p.b.a.3 < 0.05 | sector.tests$p.a.b.3 <
+     0.05), c(3, 4)]
        tickeri    tickerj
11    AMBUJACEM        ACC
439      GESHIP     CONCOR
511      GRASIM  AMBUJACEM
531        GSPL       BPCL
918   HINDPETRO       GSPL
1044  IBREALEST        DLF
1129       IDBI ANDHRABANK
1173       IDBI  BANKINDIA
1272       IDFC       HDFC
1428   INDIACEM        ACC
1474  INDIAINFO       IDFC
1480  INDIAINFO       IFCI
1539 INFOSYSTCH    HCLTECH
2063  KOTAKBANK  ICICIBANK
2072  KOTAKBANK   AXISBANK
2152       LITL        HCC
2446        MLL     GESHIP
2607    MPHASIS    HCLTECH
2748       MTNL       IDEA
2827 NAGARCONST       LITL
2950 NATIONALUM   HINDALCO
3149       ONGC      CAIRN
3844    RANBAXY      CIPLA
3887    RANBAXY      LUPIN
3908    RANBAXY     BIOCON
4030     RECLTD        PFC
4105 RELCAPITAL  INDIAINFO
4370   RELINFRA  POWERGRID
4584      ROLTA    MPHASIS
4617      ROLTA    POLARIS
4623      ROLTA    HCLTECH
4727     RPOWER  POWERGRID
4959        SCI     GESHIP
5040        SCI     CONCOR
5175    SIEMENS       CESC
5221    SIEMENS        ABB
5609     SUZLON     RPOWER
5621     SUZLON  POWERGRID
5682  SYNDIBANK  BANKINDIA
5701  SYNDIBANK        IOB
5719  SYNDIBANK        PNB
5723  SYNDIBANK ANDHRABANK
5727  SYNDIBANK       IDBI
6295  TATASTEEL   JSWSTEEL
6562      TECHM      ROLTA
6954      TULIP       TTML
7122 ULTRACEMCO   INDIACEM
7237  UNIONBANK       IDBI
7260  UNIONBANK  SYNDIBANK
7484    UNITECH        DLF
7554 VIJAYABANK       IDBI
7559 VIJAYABANK  SYNDIBANK
8195     DISHTV       ZEEL
8307    DRREDDY     BIOCON
8337    DRREDDY    RANBAXY
8940      PATNI    HCLTECH
8991      PATNI    MPHASIS
9015      PATNI      ROLTA
9199  HDFCBANK1      HDFC1

Let me check the other stats for these

INDIACEM ACC TULIP TTML ULTRACEMCO INDIACEM DISHTV ZEEL DRREDDY RANBAXY

The above are the pairs which passed ADF test but failed stricter tests like that from urca.