Quantile Regression
Purpose To understand the application of quantile regression in pairs and generalized pairs.
> library(quantreg) > library(ggplot2) > set.seed(1977) > x <- rnorm(100) > y <- as.data.frame(quantile(x, seq(0, 1, 0.25))) > temp <- data.frame(y = y[, 1], x = 0:4) > p <- ggplot(temp, aes(x = x, y = y)) > q <- p + geom_point() + xlab("Quantiles") + ylab("") + opts(title = "Quantile Function") > print(q) |
Median quantile fit
> data(engel) > fit1 <- rq(foodexp ~ income, tau = 0.5, data = engel) > summary(fit1) Call: rq(formula = foodexp ~ income, tau = 0.5, data = engel) tau: [1] 0.5 Coefficients: coefficients lower bd upper bd (Intercept) 81.48225 53.25915 114.01156 income 0.56018 0.48702 0.60199 |
> data(engel) > fit1 <- rq(foodexp ~ income, tau = 0.5, data = engel) > summary(fit1) Call: rq(formula = foodexp ~ income, tau = 0.5, data = engel) tau: [1] 0.5 Coefficients: coefficients lower bd upper bd (Intercept) 81.48225 53.25915 114.01156 income 0.56018 0.48702 0.60199 > resids <- data.frame(dates = 1:235, y = resid(fit1)) > p <- ggplot(resids, aes(x = dates, y = y)) > q <- p + geom_line(colour = "sienna", lwd = 1.4) > print(q) |
> data(engel) > head(engel) income foodexp 1 420.1577 255.8394 2 541.4117 310.9587 3 901.1575 485.6800 4 639.0802 402.9974 5 750.8756 495.5608 6 945.7989 633.7978 > fit1 <- rq(foodexp ~ income, tau = 0.5, data = engel) > summary(fit1, se = "boot") Call: rq(formula = foodexp ~ income, tau = 0.5, data = engel) tau: [1] 0.5 Coefficients: Value Std. Error t value Pr(>|t|) (Intercept) 81.48225 27.58345 2.95403 0.00346 income 0.56018 0.03559 15.73917 0.00000 > summary(fit1, se = "ker") Call: rq(formula = foodexp ~ income, tau = 0.5, data = engel) tau: [1] 0.5 Coefficients: Value Std. Error t value Pr(>|t|) (Intercept) 81.48225 30.21532 2.69672 0.00751 income 0.56018 0.03732 15.01139 0.00000 |
A very simple application of quantreg on engle data
> data(engel) > attach(engel) The following object(s) are masked from engel ( position 6 ) : foodexp income > plot(income, foodexp, cex = 0.25, type = "n", xlab = "Household Income", + ylab = "Food Expenditure") > points(income, foodexp, cex = 0.5, col = "blue") > abline(rq(foodexp ~ income, tau = 0.5), col = "blue") > abline(lm(foodexp ~ income), lty = 2, col = "red") > taus <- c(0.05, 0.1, 0.25, 0.75, 0.9, 0.95) > for (i in 1:length(taus)) { + abline(rq(foodexp ~ income, tau = taus[i]), col = "gray") + } > detach(engel) |
Confidence bands for the estimate across quantiles
> attach(engel) The following object(s) are masked from engel ( position 6 ) : foodexp income > taus <- seq(0.05, 0.95, 0.1) > i <- 1 > intercept <- data.frame() > slope <- data.frame() > for (i in 1:length(taus)) { + fit <- rq(foodexp ~ income, tau = taus[i]) + fit.sum <- summary(fit) + temp1 <- coef(fit.sum)[1, ] + temp2 <- coef(fit.sum)[2, ] + intercept <- rbind(intercept, temp1) + slope <- rbind(slope, temp2) + } > colnames(intercept) <- c("intercept", "i.low", "i.high") > colnames(slope) <- c("slope", "s.low", "s.high") > intercept$tau <- taus > slope$tau <- taus > p <- ggplot(intercept, aes(x = taus, ymin = i.low, ymax = i.high)) > q <- p + geom_ribbon(fill = "black") + ylim(0, 300) > print(q) > p <- ggplot(slope, aes(x = taus, ymin = s.low, ymax = s.high)) > q <- p + geom_ribbon() > print(q) > detach(engel) |
For Specific Quantiles only
> attach(engel) The following object(s) are masked from engel ( position 6 ) : foodexp income > taus <- c(0.05, 0.25, 0.5, 0.75, 0.95) > i <- 1 > intercept <- data.frame() > slope <- data.frame() > for (i in 1:length(taus)) { + fit <- rq(foodexp ~ income, tau = taus[i]) + fit.sum <- summary(fit) + temp1 <- coef(fit.sum)[1, ] + temp2 <- coef(fit.sum)[2, ] + intercept <- rbind(intercept, temp1) + slope <- rbind(slope, temp2) + } > colnames(intercept) <- c("intercept", "i.low", "i.high") > colnames(slope) <- c("slope", "s.low", "s.high") > intercept$tau <- taus > slope$tau <- taus > intercept intercept i.low i.high tau 1 124.88004 98.30212 130.51695 0.05 2 95.48354 73.78608 120.09847 0.25 3 81.48225 53.25915 114.01156 0.50 4 62.39659 32.74488 107.31362 0.75 5 64.10396 46.26495 83.57896 0.95 > slope slope s.low s.high tau 1 0.3433611 0.3433270 0.3897500 0.05 2 0.4741032 0.4203298 0.4943288 0.25 3 0.5601806 0.4870223 0.6019890 0.50 4 0.6440141 0.5801552 0.6904127 0.75 5 0.7090685 0.6739000 0.7344405 0.95 > detach(engel) |
For all the Quantiles
> attach(engel) The following object(s) are masked from engel ( position 6 ) : foodexp income > fit <- rq(foodexp ~ income, tau = -1) > fit.sum <- summary(fit) > results <- data.frame(tau = fit$sol[1, ], intercept = fit$sol[4, + ], slope = fit$sol[5, ]) > p <- ggplot(results, aes(x = tau, y = intercept)) > q <- p + geom_line(lwd = 1.2) > print(q) > q <- p + geom_line(aes(y = slope), lwd = 1.2) > print(q) > detach(engel) |
Anova to check the various models
> fit1 <- rq(foodexp ~ income, tau = 0.25) > fit2 <- rq(foodexp ~ income, tau = 0.5) > fit3 <- rq(foodexp ~ income, tau = 0.75) > anova(fit1, fit2, fit3) Quantile Regression Analysis of Deviance Table Model: foodexp ~ income Joint Test of Equality of Slopes: tau in { 0.25 0.5 0.75 } Df Resid Df F value Pr(>F) 1 2 703 15.557 2.449e-07 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 |
The whole purpose of this exercise is to investigate the relationship between let’s say ZEE and SUNTV across all quantiles. Also to check whether the log transformations also change the estimates of the cointegrating equation.
Let me first populate the database
> library(RSQLite) > date.start <- "2009-02-02" > date.end <- "2010-02-18" > db <- "C:/sqlite/mydbases/pairs/pairsv1.s3db" > drv <- dbDriver("SQLite") > con <- dbConnect(drv, dbname = db) > query <- paste("DELETED") > security.db <- dbGetQuery(con, query) > query <- " select * from security_master" > master <- dbGetQuery(con, query) > dataset <- merge(security.db, master, by.x = "ticker", by.y = "ticker", + all.x = T) > dataset <- dataset[, -5] > colnames(dataset) <- c("ticker", "ticker.id", "trade.date", "price", + "market.cap", "sector", "sector.id") > colnames(master) <- c("ticker.id", "ticker", "market.cap", "sector.name", + "sector.id") > master$mcap.rank <- factor(round(rank(master$market.cap)/38) + + 1) > tickers <- unique(master$ticker) > security.db <- security.db[, 3:4] > security.db1 <- unstack(security.db, form = security.db$price ~ + security.db$ticker) > y.t <- "ZEE" > x.t <- "SUNTV" > dates <- as.Date(unique(dataset$trade.date)) > prices <- security.db1[, c("ZEEL", "SUNTV")] |
Run a quantile regression between ZEEL and SUNTV
> fit <- rq(prices[, 2] ~ prices[, 1], tau = -1) > fit.sum <- summary(fit) > results <- data.frame(tau = fit$sol[1, ], intercept = fit$sol[4, + ], slope = fit$sol[5, ]) > p <- ggplot(results, aes(x = tau, y = intercept)) + opts(title = "Quantile Function") > q <- p + geom_line(lwd = 1.2) > temp <- coef(lm(prices[, 2] ~ prices[, 1]))[1] > q <- q + geom_hline(yintercept = temp, colour = "red", lwd = 2) > print(q) > p <- ggplot(results, aes(x = tau, y = intercept)) + opts(title = "Quantile Function") > q <- p + geom_line(aes(y = slope), lwd = 1.2) > temp <- coef(lm(prices[, 2] ~ prices[, 1]))[2] > q <- q + geom_hline(yintercept = temp, colour = "red", lwd = 2) > print(q) |
- Transform it to log prices and run the same
> fit <- rq(log(prices[, 2]) ~ log(prices[, 1]), tau = -1) > fit.sum <- summary(fit) > results <- data.frame(tau = fit$sol[1, ], intercept = fit$sol[4, + ], slope = fit$sol[5, ]) > p <- ggplot(results, aes(x = tau, y = intercept)) + opts(title = "Quantile Function") > q <- p + geom_line(lwd = 1.2) > temp <- coef(lm(log(prices[, 2]) ~ log(prices[, 1])))[1] > q <- q + geom_hline(yintercept = temp, colour = "red", lwd = 2) > print(q) > p <- ggplot(results, aes(x = tau, y = intercept)) + opts(title = "Quantile Function") > q <- p + geom_line(aes(y = slope), lwd = 1.2) > temp <- coef(lm(log(prices[, 2]) ~ log(prices[, 1])))[2] > q <- q + geom_hline(yintercept = temp, colour = "red", lwd = 2) > print(q) |
Can one trace back the intercept levels ?
> fit1 <- lm(prices[, 2] ~ prices[, 1]) > fit2 <- lm(log(prices[, 2]) ~ log(prices[, 1])) > coef(fit1)[2] prices[, 1] 1.162249 > exp(coef(fit2)[2]) - 1 log(prices[, 1]) 1.235314 > fit1 <- rq(prices[, 2] ~ prices[, 1], tau = 0.5) > fit2 <- rq(log(prices[, 2]) ~ log(prices[, 1]), tau = 0.5) > coef(fit1)[2] prices[, 1] 1.141397 > exp(coef(fit2)[2]) - 1 log(prices[, 1]) 1.202772 |
Clearly the fits are close What if I delete the highest obs from y and x and then run a median reg.? Will the estimates remain same ?
> fit1 <- lm(prices[, 2] ~ prices[, 1]) > fit2 <- lm(prices[1:241, 2] ~ prices[1:241, 1]) > coef(fit1) (Intercept) prices[, 1] 44.571232 1.162249 > coef(fit2) (Intercept) prices[1:241, 1] 51.698425 1.112381 > (coef(fit2)[2] - coef(fit1)[2]) * 100/coef(fit1)[2] prices[1:241, 1] -4.290653 > fit1 <- rq(prices[, 2] ~ prices[, 1], tau = 0.5) > fit2 <- rq(prices[1:241, 2] ~ prices[1:241, 1], tau = 0.5) > coef(fit1) (Intercept) prices[, 1] 48.255190 1.141397 > coef(fit2) (Intercept) prices[1:241, 1] 52.118702 1.108058 > (coef(fit2)[2] - coef(fit1)[2]) * 100/coef(fit1)[2] prices[1:241, 1] -2.920852 |
If I remove a fornight of pricepoints, the hedge ratio change is only 3 percent via median regression while it is 4.5 percent using conditional mean equation
Need to figure out to tabulate skew shift and scale shift