Quantile Regression

Purpose To understand the application of quantile regression in pairs and generalized pairs.

> library(quantreg)
> library(ggplot2)
> set.seed(1977)
> x <- rnorm(100)
> y <- as.data.frame(quantile(x, seq(0, 1, 0.25)))
> temp <- data.frame(y = y[, 1], x = 0:4)
> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + geom_point() + xlab("Quantiles") + ylab("") + opts(title = "Quantile Function")
> print(q)

Median quantile fit

> data(engel)
> fit1 <- rq(foodexp ~ income, tau = 0.5, data = engel)
> summary(fit1)
Call: rq(formula = foodexp ~ income, tau = 0.5, data = engel)

tau: [1] 0.5

Coefficients:
            coefficients lower bd  upper bd
(Intercept)  81.48225     53.25915 114.01156
income        0.56018      0.48702   0.60199

> data(engel)
> fit1 <- rq(foodexp ~ income, tau = 0.5, data = engel)
> summary(fit1)
Call: rq(formula = foodexp ~ income, tau = 0.5, data = engel)

tau: [1] 0.5

Coefficients:
            coefficients lower bd  upper bd
(Intercept)  81.48225     53.25915 114.01156
income        0.56018      0.48702   0.60199
> resids <- data.frame(dates = 1:235, y = resid(fit1))
> p <- ggplot(resids, aes(x = dates, y = y))
> q <- p + geom_line(colour = "sienna", lwd = 1.4)
> print(q)

> data(engel)
> head(engel)
    income  foodexp
1 420.1577 255.8394
2 541.4117 310.9587
3 901.1575 485.6800
4 639.0802 402.9974
5 750.8756 495.5608
6 945.7989 633.7978
> fit1 <- rq(foodexp ~ income, tau = 0.5, data = engel)
> summary(fit1, se = "boot")
Call: rq(formula = foodexp ~ income, tau = 0.5, data = engel)

tau: [1] 0.5

Coefficients:
            Value    Std. Error t value  Pr(>|t|)
(Intercept) 81.48225 27.58345    2.95403  0.00346
income       0.56018  0.03559   15.73917  0.00000
> summary(fit1, se = "ker")
Call: rq(formula = foodexp ~ income, tau = 0.5, data = engel)

tau: [1] 0.5

Coefficients:
            Value    Std. Error t value  Pr(>|t|)
(Intercept) 81.48225 30.21532    2.69672  0.00751
income       0.56018  0.03732   15.01139  0.00000

A very simple application of quantreg on engle data

> data(engel)
> attach(engel)
        The following object(s) are masked from engel ( position 6 ) :

         foodexp income
> plot(income, foodexp, cex = 0.25, type = "n", xlab = "Household Income",
+     ylab = "Food Expenditure")
> points(income, foodexp, cex = 0.5, col = "blue")
> abline(rq(foodexp ~ income, tau = 0.5), col = "blue")
> abline(lm(foodexp ~ income), lty = 2, col = "red")
> taus <- c(0.05, 0.1, 0.25, 0.75, 0.9, 0.95)
> for (i in 1:length(taus)) {
+     abline(rq(foodexp ~ income, tau = taus[i]), col = "gray")
+ }
> detach(engel)

Confidence bands for the estimate across quantiles

> attach(engel)
        The following object(s) are masked from engel ( position 6 ) :

         foodexp income
> taus <- seq(0.05, 0.95, 0.1)
> i <- 1
> intercept <- data.frame()
> slope <- data.frame()
> for (i in 1:length(taus)) {
+     fit <- rq(foodexp ~ income, tau = taus[i])
+     fit.sum <- summary(fit)
+     temp1 <- coef(fit.sum)[1, ]
+     temp2 <- coef(fit.sum)[2, ]
+     intercept <- rbind(intercept, temp1)
+     slope <- rbind(slope, temp2)
+ }
> colnames(intercept) <- c("intercept", "i.low", "i.high")
> colnames(slope) <- c("slope", "s.low", "s.high")
> intercept$tau <- taus
> slope$tau <- taus
> p <- ggplot(intercept, aes(x = taus, ymin = i.low, ymax = i.high))
> q <- p + geom_ribbon(fill = "black") + ylim(0, 300)
> print(q)
> p <- ggplot(slope, aes(x = taus, ymin = s.low, ymax = s.high))
> q <- p + geom_ribbon()
> print(q)
> detach(engel)

For Specific Quantiles only

> attach(engel)
        The following object(s) are masked from engel ( position 6 ) :

         foodexp income
> taus <- c(0.05, 0.25, 0.5, 0.75, 0.95)
> i <- 1
> intercept <- data.frame()
> slope <- data.frame()
> for (i in 1:length(taus)) {
+     fit <- rq(foodexp ~ income, tau = taus[i])
+     fit.sum <- summary(fit)
+     temp1 <- coef(fit.sum)[1, ]
+     temp2 <- coef(fit.sum)[2, ]
+     intercept <- rbind(intercept, temp1)
+     slope <- rbind(slope, temp2)
+ }
> colnames(intercept) <- c("intercept", "i.low", "i.high")
> colnames(slope) <- c("slope", "s.low", "s.high")
> intercept$tau <- taus
> slope$tau <- taus
> intercept
  intercept    i.low    i.high  tau
1 124.88004 98.30212 130.51695 0.05
2  95.48354 73.78608 120.09847 0.25
3  81.48225 53.25915 114.01156 0.50
4  62.39659 32.74488 107.31362 0.75
5  64.10396 46.26495  83.57896 0.95
> slope
      slope     s.low    s.high  tau
1 0.3433611 0.3433270 0.3897500 0.05
2 0.4741032 0.4203298 0.4943288 0.25
3 0.5601806 0.4870223 0.6019890 0.50
4 0.6440141 0.5801552 0.6904127 0.75
5 0.7090685 0.6739000 0.7344405 0.95
> detach(engel)

For all the Quantiles

> attach(engel)
        The following object(s) are masked from engel ( position 6 ) :

         foodexp income
> fit <- rq(foodexp ~ income, tau = -1)
> fit.sum <- summary(fit)
> results <- data.frame(tau = fit$sol[1, ], intercept = fit$sol[4,
+     ], slope = fit$sol[5, ])
> p <- ggplot(results, aes(x = tau, y = intercept))
> q <- p + geom_line(lwd = 1.2)
> print(q)
> q <- p + geom_line(aes(y = slope), lwd = 1.2)
> print(q)
> detach(engel)

Anova to check the various models

> fit1 <- rq(foodexp ~ income, tau = 0.25)
> fit2 <- rq(foodexp ~ income, tau = 0.5)
> fit3 <- rq(foodexp ~ income, tau = 0.75)
> anova(fit1, fit2, fit3)
Quantile Regression Analysis of Deviance Table

Model: foodexp ~ income
Joint Test of Equality of Slopes: tau in {  0.25 0.5 0.75  }

   Df Resid Df F value    Pr(>F)
1   2      703  15.557 2.449e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The whole purpose of this exercise is to investigate the relationship between let’s say ZEE and SUNTV across all quantiles. Also to check whether the log transformations also change the estimates of the cointegrating equation.

Let me first populate the database

> library(RSQLite)
> date.start <- "2009-02-02"
> date.end <- "2010-02-18"
> db <- "C:/sqlite/mydbases/pairs/pairsv1.s3db"
> drv <- dbDriver("SQLite")
> con <- dbConnect(drv, dbname = db)
> query <- paste("DELETED")
> security.db <- dbGetQuery(con, query)
> query <- " select * from security_master"
> master <- dbGetQuery(con, query)
> dataset <- merge(security.db, master, by.x = "ticker", by.y = "ticker",
+     all.x = T)
> dataset <- dataset[, -5]
> colnames(dataset) <- c("ticker", "ticker.id", "trade.date", "price",
+     "market.cap", "sector", "sector.id")
> colnames(master) <- c("ticker.id", "ticker", "market.cap", "sector.name",
+     "sector.id")
> master$mcap.rank <- factor(round(rank(master$market.cap)/38) +
+     1)
> tickers <- unique(master$ticker)
> security.db <- security.db[, 3:4]
> security.db1 <- unstack(security.db, form = security.db$price ~
+     security.db$ticker)
> y.t <- "ZEE"
> x.t <- "SUNTV"
> dates <- as.Date(unique(dataset$trade.date))
> prices <- security.db1[, c("ZEEL", "SUNTV")]

Run a quantile regression between ZEEL and SUNTV

> fit <- rq(prices[, 2] ~ prices[, 1], tau = -1)
> fit.sum <- summary(fit)
> results <- data.frame(tau = fit$sol[1, ], intercept = fit$sol[4,
+     ], slope = fit$sol[5, ])
> p <- ggplot(results, aes(x = tau, y = intercept)) + opts(title = "Quantile Function")
> q <- p + geom_line(lwd = 1.2)
> temp <- coef(lm(prices[, 2] ~ prices[, 1]))[1]
> q <- q + geom_hline(yintercept = temp, colour = "red", lwd = 2)
> print(q)
> p <- ggplot(results, aes(x = tau, y = intercept)) + opts(title = "Quantile Function")
> q <- p + geom_line(aes(y = slope), lwd = 1.2)
> temp <- coef(lm(prices[, 2] ~ prices[, 1]))[2]
> q <- q + geom_hline(yintercept = temp, colour = "red", lwd = 2)
> print(q)

Transform it to log prices and run the same

> fit <- rq(log(prices[, 2]) ~ log(prices[, 1]), tau = -1)
> fit.sum <- summary(fit)
> results <- data.frame(tau = fit$sol[1, ], intercept = fit$sol[4,
+     ], slope = fit$sol[5, ])
> p <- ggplot(results, aes(x = tau, y = intercept)) + opts(title = "Quantile Function")
> q <- p + geom_line(lwd = 1.2)
> temp <- coef(lm(log(prices[, 2]) ~ log(prices[, 1])))[1]
> q <- q + geom_hline(yintercept = temp, colour = "red", lwd = 2)
> print(q)
> p <- ggplot(results, aes(x = tau, y = intercept)) + opts(title = "Quantile Function")
> q <- p + geom_line(aes(y = slope), lwd = 1.2)
> temp <- coef(lm(log(prices[, 2]) ~ log(prices[, 1])))[2]
> q <- q + geom_hline(yintercept = temp, colour = "red", lwd = 2)
> print(q)

Can one trace back the intercept levels ?

> fit1 <- lm(prices[, 2] ~ prices[, 1])
> fit2 <- lm(log(prices[, 2]) ~ log(prices[, 1]))
> coef(fit1)[2]
prices[, 1]
   1.162249
> exp(coef(fit2)[2]) - 1
log(prices[, 1])
        1.235314
> fit1 <- rq(prices[, 2] ~ prices[, 1], tau = 0.5)
> fit2 <- rq(log(prices[, 2]) ~ log(prices[, 1]), tau = 0.5)
> coef(fit1)[2]
prices[, 1]
   1.141397
> exp(coef(fit2)[2]) - 1
log(prices[, 1])
        1.202772

Clearly the fits are close What if I delete the highest obs from y and x and then run a median reg.? Will the estimates remain same ?

> fit1 <- lm(prices[, 2] ~ prices[, 1])
> fit2 <- lm(prices[1:241, 2] ~ prices[1:241, 1])
> coef(fit1)
(Intercept) prices[, 1]
  44.571232    1.162249
> coef(fit2)
     (Intercept) prices[1:241, 1]
       51.698425         1.112381
> (coef(fit2)[2] - coef(fit1)[2]) * 100/coef(fit1)[2]
prices[1:241, 1]
       -4.290653
> fit1 <- rq(prices[, 2] ~ prices[, 1], tau = 0.5)
> fit2 <- rq(prices[1:241, 2] ~ prices[1:241, 1], tau = 0.5)
> coef(fit1)
(Intercept) prices[, 1]
  48.255190    1.141397
> coef(fit2)
     (Intercept) prices[1:241, 1]
       52.118702         1.108058
> (coef(fit2)[2] - coef(fit1)[2]) * 100/coef(fit1)[2]
prices[1:241, 1]
       -2.920852

If I remove a fornight of pricepoints, the hedge ratio change is only 3 percent via median regression while it is 4.5 percent using conditional mean equation

Need to figure out to tabulate skew shift and scale shift