Purpose In my earlier post I had used geom to draw about 52 plots. In this post , I aim to plot 48 graphs using stat , thus making it 100 plots over the weekend.

I can remind myself of this weekend when I tried this feat of producing 100 plots on a weekend in february 2010.

Let me first prepare the dataset

> library(RSQLite)
> library(ggplot2)
> date.start <- "2009-02-02"
> date.end <- "2010-02-18"
> db <- "C:/sqlite/mydbases/pairs/pairsv1.s3db"
> drv <- dbDriver("SQLite")
> con <- dbConnect(drv, dbname = db)
> query <- paste(" Remove deliberately")
> security.db <- dbGetQuery(con, query)
> query <- " select * from security_master"
> master <- dbGetQuery(con, query)
> dataset <- merge(security.db, master, by.x = "ticker", by.y = "ticker",
+     all.x = T)
> dataset <- dataset[, -5]
> colnames(dataset) <- c("ticker", "ticker.id", "trade.date", "price",
+     "market.cap", "sector", "sector.id")
> colnames(master) <- c("ticker.id", "ticker", "market.cap", "sector.name",
+     "sector.id")
> master$mcap.rank <- factor(round(rank(master$market.cap)/38) +
+     1)
> tickers <- unique(master$ticker)
> security.db <- security.db[, 3:4]
> security.db1 <- unstack(security.db, form = security.db$price ~
+     security.db$ticker)
> y.t <- "UNIONBANK"
> x.t <- "PNB"
> dates <- as.Date(unique(dataset$trade.date))


stat_hist

> library(ggplot2)
> p <- ggplot(master, aes(x = market.cap/5000))
> q <- p + stat_bin()
> print(q)

stat_ggplot2-002.jpg

> p <- ggplot(master, aes(x = market.cap/5000))
> q <- p + stat_bin(binwidth = 0.25)
> print(q)

stat_ggplot2-003.jpg

> p <- ggplot(master, aes(x = market.cap/5000))
> q <- p + stat_bin(breaks = seq(1, 50, 0.1))
> print(q)

stat_ggplot2-004.jpg

> p <- ggplot(master, aes(x = market.cap/5000))
> q <- p + stat_bin(aes(y = ..density..), binwidth = 0.25)
> print(q)

stat_ggplot2-005.jpg

> p <- ggplot(master, aes(x = market.cap/5000))
> q <- p + stat_bin(aes(y = ..density..), binwidth = 0.25, geom = "line",
+     position = "identity")
> print(q)

stat_ggplot2-006.jpg

> p <- ggplot(master, aes(x = market.cap/5000))
> q <- p + stat_bin(aes(y = ..density..), binwidth = 0.25, geom = "line",
+     position = "identity")
> q <- q + stat_density(colour = "blue", fill = NA, lwd = 2)
> print(q)

stat_ggplot2-007.jpg

> head(master)
  ticker.id     ticker market.cap        sector.name sector.id mcap.rank
1         1       ABAN       4174 Oil Field Services        17         2
2         2        ABB      15503          Utilities        25         3
3         3        ACC      13125             Cement         4         3
4         4 ADLABSFILM       1106              Media        14         1
5         5  AMBUJACEM      12924             Cement         4         3
6         6 ANDHRABANK       5340            PSUBank         3         2
> p <- ggplot(master, aes(x = sector.name))
> q <- p + stat_bin() + coord_flip()
> print(q)

stat_ggplot2-008.jpg



stat_bin2d

> library(mnormt)
> temp <- as.data.frame(rmnorm(n = 10000, mean = c(0, 0), varcov = diag(2)))
> colnames(temp) <- c("x", "y")
> q <- ggplot(temp, aes(x = x, y = y)) + geom_jitter()
> print(q)

stat_ggplot2-009.jpg

But here is a frequency count of the same graph

> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_bin2d()
> print(q)

stat_ggplot2-010.jpg

> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_bin2d(bins = 30)
> print(q)

stat_ggplot2-011.jpg

> temp <- as.data.frame(rmnorm(n = 10000, mean = c(0, 0), varcov = diag(2)))
> colnames(temp) <- c("x", "y")
> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_bin2d(binwidth = c(0.1, 0.1))
> print(q)

stat_ggplot2-012.jpg

> temp <- as.data.frame(rmnorm(n = 1000, mean = c(0, 0), varcov = diag(2)))
> colnames(temp) <- c("x", "y")
> p <- ggplot(temp, aes(x = x, y = y))
> x1 <- seq(min(temp$x), max(temp$x), 0.1)
> y1 <- seq(min(temp$y), max(temp$y), 0.1)
> q <- p + stat_bin2d(breaks = list(x = x1, y = y1))
> print(q)

stat_ggplot2-013.jpg



stat_binhex

> library(mnormt)
> library(hexbin)
> temp <- as.data.frame(rmnorm(n = 10000, mean = c(0, 0), varcov = diag(2)))
> colnames(temp) <- c("x", "y")
> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_binhex()
> print(q)

stat_ggplot2-014.jpg

> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_binhex(bins = 30)
> print(q)

stat_ggplot2-015.jpg

> temp <- as.data.frame(rmnorm(n = 10000, mean = c(0, 0), varcov = diag(2)))
> colnames(temp) <- c("x", "y")
> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_binhex(binwidth = c(0.1, 0.1))
> print(q)

stat_ggplot2-016.jpg

> temp <- as.data.frame(rmnorm(n = 1000, mean = c(0, 0), varcov = diag(2)))
> colnames(temp) <- c("x", "y")
> p <- ggplot(temp, aes(x = x, y = y))
> x1 <- seq(min(temp$x), max(temp$x), 0.1)
> y1 <- seq(min(temp$y), max(temp$y), 0.1)
> q <- p + stat_binhex(breaks = list(x = x1, y = y1))
> print(q)

stat_ggplot2-017.jpg



stat_function

> x <- data.frame(x = rnorm(100))
> p <- ggplot(x, aes(x = x)) + geom_density()
> q <- p + stat_function(fun = pnorm, colour = "blue", lwd = 1.3)
> print(q)

stat_ggplot2-018.jpg



stat_sum '' ''

stat_summary

> z <- rnorm(1000)
> x <- c(rep("a", 300), rep("b", 200), rep("c", 450), rep("d",
+     50))
> y <- c(rep("e", 100), rep("f", 20), rep("g", 700), rep("h", 180))
> temp <- data.frame(x = x, y = y, z = z)
> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_sum(aes(group = 1))
> print(q)

stat_ggplot2-020.jpg

> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_sum(aes(group = 1)) + scale_size(to = c(3, 10))
> print(q)

stat_ggplot2-021.jpg

> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_sum(aes(group = 1)) + scale_size(to = c(3, 20))
> print(q)

stat_ggplot2-022.jpg

> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_sum(aes(group = 1)) + scale_area(to = c(3, 10))
> print(q)

stat_ggplot2-023.jpg

> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_sum(aes(group = x))
> print(q)

stat_ggplot2-024.jpg

> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_sum(aes(group = y))
> print(q)

stat_ggplot2-025.jpg

> q <- qplot(x, y, temp)
> print(q)

stat_ggplot2-026.jpg

> q <- qplot(x, y, temp, stat = "sum", group = 1)
> print(q)

stat_ggplot2-027.jpg



The count is 75 till now. I have drawn about 75 graphs over the weekend. I would like to aim for 100 , so that I will remember this weekend as the 100 graph weekend.

stat_summary

> z <- c(rnorm(300, 20, 3), rnorm(200, 60, 5), rnorm(500, 13, 8))
> x <- c(rep("a", 300), rep("b", 200), rep("c", 450), rep("d",
+     50))
> y <- c(rep("e", 100), rep("f", 20), rep("g", 700), rep("h", 180))
> temp <- data.frame(x1 = factor(x), x2 = factor(y), z = z)
> p <- ggplot(temp, aes(x = x1, y = z))
> q <- p + stat_summary(fun.data = "mean_cl_boot", colour = "red")
> print(q)

stat_ggplot2-028.jpg

> q <- qplot(x1, z, data = temp, stat = "summary", fun.y = "mean")
> print(q)

stat_ggplot2-029.jpg

> q <- qplot(x1, z, data = temp, stat = "summary", fun.y = "median")
> print(q)

stat_ggplot2-030.jpg

> q <- qplot(x1, z, data = temp, stat = "summary", fun.y = "median") +
+     ylim(0, 80)
> print(q)

stat_ggplot2-031.jpg

> q <- qplot(x1, z, data = temp, stat = "summary", fun.y = "median")
> q <- q + coord_cartesian(ylim = c(0, 80))
> print(q)

stat_ggplot2-032.jpg

> stat_sum_single <- function(fun, geom = "point", ...) {
+     stat_summary(fun.y = fun, colour = "red", geom = geom, size = 3,
+         ...)
+ }
> q <- qplot(x1, z, data = temp)
> q <- q + stat_sum_single(mean)
> print(q)

stat_ggplot2-033.jpg

> q <- qplot(x1, z, data = temp)
> q <- q + stat_sum_single(mean, geom = "line")
> print(q)

stat_ggplot2-034.jpg

> q <- qplot(x1, z, data = temp)
> q <- q + stat_sum_single(median)
> print(q)

stat_ggplot2-035.jpg

> q <- qplot(x1, z, data = temp)
> q <- q + stat_sum_single(sd)
> print(q)

stat_ggplot2-036.jpg

Well, 16 more graphs left to achieve my target of 100 graphs for the weekend. Well, it does seem to be in the reach.



Annotation with ggplot2

> data(economics)
> head(economics)
        date   pce    pop psavert uempmed unemploy
1 1967-06-30 507.8 198712     9.8     4.5     2944
2 1967-07-31 510.9 198911     9.8     4.7     2945
3 1967-08-31 516.7 199113     9.0     4.6     2958
4 1967-09-30 513.3 199311     9.8     4.9     3143
5 1967-10-31 518.5 199498     9.7     4.7     3066
6 1967-11-30 526.2 199657     9.4     4.8     3018
> unemp <- qplot(date, unemploy, data = economics, geom = "line",
+     xlab = "", ylab = "Number unemp(1000s)")
> print(unemp)

stat_ggplot2-037.jpg

> unemp <- qplot(date, unemploy, data = economics, geom = "line",
+     xlab = "", ylab = "Number unemp(1000s)")
> unemp <- unemp + geom_vline(aes(xintercept = start), data = presidential)
> print(unemp)

stat_ggplot2-038.jpg

Well, 14 more graphs left to achieve my target of 100 graphs for the weekend.

> yrng <- range(economics$unemploy)
> xrng <- range(economics$date)
> unemp <- qplot(date, unemploy, data = economics, geom = "line",
+     xlab = "", ylab = "Number unemp(1000s)")
> unemp <- unemp + geom_rect(aes(NULL, NULL, xmin = start, xmax = end,
+     fill = party), ymin = yrng[1], ymax = yrng[2], data = presidential)
> print(unemp)

stat_ggplot2-039.jpg

> yrng <- range(economics$unemploy)
> xrng <- range(economics$date)
> unemp <- qplot(date, unemploy, data = economics, geom = "line",
+     xlab = "", ylab = "Number unemp(1000s)")
> unemp <- unemp + geom_rect(aes(NULL, NULL, xmin = start, xmax = end,
+     fill = party), ymin = yrng[1], ymax = yrng[2], data = presidential) +
+     scale_fill_manual(values = alpha(c("blue", "red"), 0.2))
> last_plot <- unemp
> print(unemp)

stat_ggplot2-040.jpg

> q <- last_plot() + geom_text(aes(x = start, y = yrng[1], label = name),
+     data = presidential, size = 3, hjust = 0, vjust = 0)
> print(q)

stat_ggplot2-041.jpg



I realize at this point that I have actually not used geom_text properly in all examples.

Let me see whether I can do another 11 graphs using geom_text

geom_text

> temp <- as.data.frame(rmnorm(n = 100, mean = c(0, 0), varcov = diag(2)))
> colnames(temp) <- c("x", "y")
> q <- ggplot(temp, aes(x = x, y = y)) + geom_jitter()
> q <- q + geom_text(aes(label = "M"))
> print(q)

stat_ggplot2-042.jpg



opts

> temp <- as.data.frame(rmnorm(n = 100, mean = c(0, 0), varcov = diag(2)))
> colnames(temp) <- c("x", "y")
> q <- ggplot(temp, aes(x = x, y = y)) + geom_jitter()
> q <- q + opts(title = "VINAY")
> print(q)

stat_ggplot2-043.jpg

> temp <- as.data.frame(rmnorm(n = 100, mean = c(0, 0), varcov = diag(2)))
> colnames(temp) <- c("x", "y")
> q <- ggplot(temp, aes(x = x, y = y)) + geom_jitter()
> q <- q + opts(title = "VINAY")
> print(q)

stat_ggplot2-044.jpg

> temp <- as.data.frame(rmnorm(n = 100, mean = c(0, 0), varcov = diag(2)))
> colnames(temp) <- c("x", "y")
> q <- qplot(x, y, temp, geom = "jitter")
> q <- q + opts(title = "VINAY")
> print(q)

stat_ggplot2-045.jpg

> temp <- as.data.frame(rmnorm(n = 100, mean = c(0, 0), varcov = diag(2)))
> colnames(temp) <- c("x", "y")
> q <- qplot(x, y, temp, geom = "jitter")
> q <- q + opts(title = "VINAY")
> q <- q + theme_bw()
> print(q)

stat_ggplot2-046.jpg



6 more plots will take me to 100 plots for the weekend. I have reached a stage when I want to plot anything for that matter..Ok, let me go with it

I just need 6 more plots…. You know what..I will take a break…I will take my own sweet time and complete these 6 plots before the clock strikes 12…I will …

geom_errorbar

> limits <- aes(ymax = y + se, ymin = y - se)
> temp <- data.frame(y = rnorm(4, 23, 2), se = runif(4), grp = factor(c(1,
+     2, 1, 2)), x = factor(c("a", "b", "a", "b")))
> p <- ggplot(temp, aes(x = x, y = y, fill = grp))
> q <- p + geom_bar(position = "dodge", stat = "identity")
> print(q)

stat_ggplot2-047.jpg



stat_contour

> volcano3d <- melt(volcano)
> names(volcano3d) <- c("x", "y", "z")
> p <- ggplot(volcano3d, aes(x, y, z = z))
> q <- p + stat_contour()
> print(q)

stat_ggplot2-048.jpg

> p <- ggplot(volcano3d, aes(x, y, z = z))
> q <- p + stat_contour(bins = 2)
> print(q)

stat_ggplot2-049.jpg

> p <- ggplot(volcano3d, aes(x, y, z = z))
> q <- p + stat_contour(bins = 10)
> print(q)

stat_ggplot2-050.jpg

> p <- ggplot(volcano3d, aes(x, y, z = z))
> q <- p + stat_contour(binwidth = 2)
> print(q)

stat_ggplot2-051.jpg

> p <- ggplot(volcano3d, aes(x, y, z = z))
> q <- p + stat_contour(size = 2, linetype = 4)
> print(q)

stat_ggplot2-052.jpg



WOW!!! Its 11 pm Sunday night and I am done …I have reached my target of 100 graphs over the weekend.