Purpose In my earlier post I had used geom to draw about 52 plots. In this post , I aim to plot 48 graphs using stat , thus making it 100 plots over the weekend.
I can remind myself of this weekend when I tried this feat of producing 100 plots on a weekend in february 2010.
Let me first prepare the dataset
> library(RSQLite)
> library(ggplot2)
> date.start <- "2009-02-02"
> date.end <- "2010-02-18"
> db <- "C:/sqlite/mydbases/pairs/pairsv1.s3db"
> drv <- dbDriver("SQLite")
> con <- dbConnect(drv, dbname = db)
> query <- paste(" Remove deliberately")
> security.db <- dbGetQuery(con, query)
> query <- " select * from security_master"
> master <- dbGetQuery(con, query)
> dataset <- merge(security.db, master, by.x = "ticker", by.y = "ticker",
+ all.x = T)
> dataset <- dataset[, -5]
> colnames(dataset) <- c("ticker", "ticker.id", "trade.date", "price",
+ "market.cap", "sector", "sector.id")
> colnames(master) <- c("ticker.id", "ticker", "market.cap", "sector.name",
+ "sector.id")
> master$mcap.rank <- factor(round(rank(master$market.cap)/38) +
+ 1)
> tickers <- unique(master$ticker)
> security.db <- security.db[, 3:4]
> security.db1 <- unstack(security.db, form = security.db$price ~
+ security.db$ticker)
> y.t <- "UNIONBANK"
> x.t <- "PNB"
> dates <- as.Date(unique(dataset$trade.date)) |
stat_hist
> library(ggplot2)
> p <- ggplot(master, aes(x = market.cap/5000))
> q <- p + stat_bin()
> print(q) |
data:image/s3,"s3://crabby-images/dec28/dec280ad44d2b45b1c3e81f71f4353c09deb4fcf" alt="stat_ggplot2-002.jpg"
> p <- ggplot(master, aes(x = market.cap/5000))
> q <- p + stat_bin(binwidth = 0.25)
> print(q) |
data:image/s3,"s3://crabby-images/9259e/9259efac0024a54e21dbefd979f6edbd3e1f6778" alt="stat_ggplot2-003.jpg"
> p <- ggplot(master, aes(x = market.cap/5000))
> q <- p + stat_bin(breaks = seq(1, 50, 0.1))
> print(q) |
data:image/s3,"s3://crabby-images/cdcf2/cdcf2b3b090c05cf6b0056f9db95475c2e0845c3" alt="stat_ggplot2-004.jpg"
> p <- ggplot(master, aes(x = market.cap/5000))
> q <- p + stat_bin(aes(y = ..density..), binwidth = 0.25)
> print(q) |
data:image/s3,"s3://crabby-images/a015b/a015be40ba144c15825ca7f297de0837f0b3ff4f" alt="stat_ggplot2-005.jpg"
> p <- ggplot(master, aes(x = market.cap/5000))
> q <- p + stat_bin(aes(y = ..density..), binwidth = 0.25, geom = "line",
+ position = "identity")
> print(q) |
data:image/s3,"s3://crabby-images/c1a8d/c1a8ddb095779c82f3f9e729bbca0788e39ed98c" alt="stat_ggplot2-006.jpg"
> p <- ggplot(master, aes(x = market.cap/5000))
> q <- p + stat_bin(aes(y = ..density..), binwidth = 0.25, geom = "line",
+ position = "identity")
> q <- q + stat_density(colour = "blue", fill = NA, lwd = 2)
> print(q) |
data:image/s3,"s3://crabby-images/4f912/4f91206a35977740db0e980e2bf2f786a7a6e8a4" alt="stat_ggplot2-007.jpg"
> head(master)
ticker.id ticker market.cap sector.name sector.id mcap.rank
1 1 ABAN 4174 Oil Field Services 17 2
2 2 ABB 15503 Utilities 25 3
3 3 ACC 13125 Cement 4 3
4 4 ADLABSFILM 1106 Media 14 1
5 5 AMBUJACEM 12924 Cement 4 3
6 6 ANDHRABANK 5340 PSUBank 3 2
> p <- ggplot(master, aes(x = sector.name))
> q <- p + stat_bin() + coord_flip()
> print(q) |
data:image/s3,"s3://crabby-images/d8cb0/d8cb06a54825bd2f2b02c8303cf4d1f4bf1983ac" alt="stat_ggplot2-008.jpg"
stat_bin2d
> library(mnormt)
> temp <- as.data.frame(rmnorm(n = 10000, mean = c(0, 0), varcov = diag(2)))
> colnames(temp) <- c("x", "y")
> q <- ggplot(temp, aes(x = x, y = y)) + geom_jitter()
> print(q) |
data:image/s3,"s3://crabby-images/67762/677621d9b6a66b3de0baef838f3035476f7bceba" alt="stat_ggplot2-009.jpg"
But here is a frequency count of the same graph
> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_bin2d()
> print(q) |
data:image/s3,"s3://crabby-images/b225e/b225e575b05706474139a67699025e2adc5fa670" alt="stat_ggplot2-010.jpg"
> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_bin2d(bins = 30)
> print(q) |
data:image/s3,"s3://crabby-images/f809a/f809a5b3f597c82c1d1c1a98a599bdf17bb97b22" alt="stat_ggplot2-011.jpg"
> temp <- as.data.frame(rmnorm(n = 10000, mean = c(0, 0), varcov = diag(2)))
> colnames(temp) <- c("x", "y")
> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_bin2d(binwidth = c(0.1, 0.1))
> print(q) |
data:image/s3,"s3://crabby-images/021f6/021f6c61f220657c69571a24bcf14dc03e5b9af1" alt="stat_ggplot2-012.jpg"
> temp <- as.data.frame(rmnorm(n = 1000, mean = c(0, 0), varcov = diag(2)))
> colnames(temp) <- c("x", "y")
> p <- ggplot(temp, aes(x = x, y = y))
> x1 <- seq(min(temp$x), max(temp$x), 0.1)
> y1 <- seq(min(temp$y), max(temp$y), 0.1)
> q <- p + stat_bin2d(breaks = list(x = x1, y = y1))
> print(q) |
data:image/s3,"s3://crabby-images/49ce2/49ce25cab952c1ecd9cecb733eb730f61641a191" alt="stat_ggplot2-013.jpg"
stat_binhex
> library(mnormt)
> library(hexbin)
> temp <- as.data.frame(rmnorm(n = 10000, mean = c(0, 0), varcov = diag(2)))
> colnames(temp) <- c("x", "y")
> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_binhex()
> print(q) |
data:image/s3,"s3://crabby-images/2e403/2e403f0b7875286f36b93e16cffc859a44a2eee7" alt="stat_ggplot2-014.jpg"
> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_binhex(bins = 30)
> print(q) |
data:image/s3,"s3://crabby-images/20070/20070be2dd4048907c4b41b054cdcb03363963a3" alt="stat_ggplot2-015.jpg"
> temp <- as.data.frame(rmnorm(n = 10000, mean = c(0, 0), varcov = diag(2)))
> colnames(temp) <- c("x", "y")
> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_binhex(binwidth = c(0.1, 0.1))
> print(q) |
data:image/s3,"s3://crabby-images/a818d/a818d3fb9da42a6f0af21f3a3fb7c7c30c365aaf" alt="stat_ggplot2-016.jpg"
> temp <- as.data.frame(rmnorm(n = 1000, mean = c(0, 0), varcov = diag(2)))
> colnames(temp) <- c("x", "y")
> p <- ggplot(temp, aes(x = x, y = y))
> x1 <- seq(min(temp$x), max(temp$x), 0.1)
> y1 <- seq(min(temp$y), max(temp$y), 0.1)
> q <- p + stat_binhex(breaks = list(x = x1, y = y1))
> print(q) |
data:image/s3,"s3://crabby-images/63261/63261888bc3839c88e2b4803caa1b869e0606337" alt="stat_ggplot2-017.jpg"
stat_function
> x <- data.frame(x = rnorm(100))
> p <- ggplot(x, aes(x = x)) + geom_density()
> q <- p + stat_function(fun = pnorm, colour = "blue", lwd = 1.3)
> print(q) |
data:image/s3,"s3://crabby-images/3f54a/3f54a00b678d07f0b60f697d3d402c909cd8b20c" alt="stat_ggplot2-018.jpg"
stat_sum '' ''
stat_summary
> z <- rnorm(1000)
> x <- c(rep("a", 300), rep("b", 200), rep("c", 450), rep("d",
+ 50))
> y <- c(rep("e", 100), rep("f", 20), rep("g", 700), rep("h", 180))
> temp <- data.frame(x = x, y = y, z = z)
> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_sum(aes(group = 1))
> print(q) |
data:image/s3,"s3://crabby-images/55b4d/55b4db8a1781e47295d536001d845ab3a5101f21" alt="stat_ggplot2-020.jpg"
> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_sum(aes(group = 1)) + scale_size(to = c(3, 10))
> print(q) |
data:image/s3,"s3://crabby-images/5d77b/5d77ba48463240a7cdfd240721d5540a9aa967c8" alt="stat_ggplot2-021.jpg"
> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_sum(aes(group = 1)) + scale_size(to = c(3, 20))
> print(q) |
data:image/s3,"s3://crabby-images/97316/97316f77f88dd396271297883b1718b76bcedd8c" alt="stat_ggplot2-022.jpg"
> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_sum(aes(group = 1)) + scale_area(to = c(3, 10))
> print(q) |
data:image/s3,"s3://crabby-images/bad5d/bad5d36cfa2959e40d7fa857132dc516d1c30ca1" alt="stat_ggplot2-023.jpg"
> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_sum(aes(group = x))
> print(q) |
data:image/s3,"s3://crabby-images/a8799/a879973c2a814eb1e71741770a7ace1bfa6a639c" alt="stat_ggplot2-024.jpg"
> p <- ggplot(temp, aes(x = x, y = y))
> q <- p + stat_sum(aes(group = y))
> print(q) |
data:image/s3,"s3://crabby-images/02b37/02b379f80817ec16ac5b0de25591a1cbbf317005" alt="stat_ggplot2-025.jpg"
> q <- qplot(x, y, temp)
> print(q) |
data:image/s3,"s3://crabby-images/829e8/829e8c2dc5819f8504f30ba88dfbc845f29858c6" alt="stat_ggplot2-026.jpg"
> q <- qplot(x, y, temp, stat = "sum", group = 1)
> print(q) |
data:image/s3,"s3://crabby-images/10ad0/10ad0a67425a487cb2983955568a1b49fa34f1cb" alt="stat_ggplot2-027.jpg"
The count is 75 till now. I have drawn about 75 graphs over the weekend. I would like to aim for 100 , so that I will remember this weekend as the 100 graph weekend.
stat_summary
> z <- c(rnorm(300, 20, 3), rnorm(200, 60, 5), rnorm(500, 13, 8))
> x <- c(rep("a", 300), rep("b", 200), rep("c", 450), rep("d",
+ 50))
> y <- c(rep("e", 100), rep("f", 20), rep("g", 700), rep("h", 180))
> temp <- data.frame(x1 = factor(x), x2 = factor(y), z = z)
> p <- ggplot(temp, aes(x = x1, y = z))
> q <- p + stat_summary(fun.data = "mean_cl_boot", colour = "red")
> print(q) |
data:image/s3,"s3://crabby-images/862b8/862b8456245251b4247127a3517f055cd7a283c1" alt="stat_ggplot2-028.jpg"
> q <- qplot(x1, z, data = temp, stat = "summary", fun.y = "mean")
> print(q) |
data:image/s3,"s3://crabby-images/f4ffb/f4ffbef22fc0b17274a251c4c9041884923332e8" alt="stat_ggplot2-029.jpg"
> q <- qplot(x1, z, data = temp, stat = "summary", fun.y = "median")
> print(q) |
data:image/s3,"s3://crabby-images/59ee2/59ee24724a74fd2fe426350f3b169de222865cd6" alt="stat_ggplot2-030.jpg"
> q <- qplot(x1, z, data = temp, stat = "summary", fun.y = "median") +
+ ylim(0, 80)
> print(q) |
data:image/s3,"s3://crabby-images/e99f3/e99f39155ba3f7b2581000db608c2966c127f160" alt="stat_ggplot2-031.jpg"
> q <- qplot(x1, z, data = temp, stat = "summary", fun.y = "median")
> q <- q + coord_cartesian(ylim = c(0, 80))
> print(q) |
data:image/s3,"s3://crabby-images/e4174/e417472b514a9a4ca6df5ddebbd48b55d59d4096" alt="stat_ggplot2-032.jpg"
> stat_sum_single <- function(fun, geom = "point", ...) {
+ stat_summary(fun.y = fun, colour = "red", geom = geom, size = 3,
+ ...)
+ }
> q <- qplot(x1, z, data = temp)
> q <- q + stat_sum_single(mean)
> print(q) |
data:image/s3,"s3://crabby-images/dd365/dd3650891b49a35c8436374363f3670cb7e3dff4" alt="stat_ggplot2-033.jpg"
> q <- qplot(x1, z, data = temp)
> q <- q + stat_sum_single(mean, geom = "line")
> print(q) |
data:image/s3,"s3://crabby-images/a2bae/a2bae29b0d68edb59a2a00b7854d97e2389f3e11" alt="stat_ggplot2-034.jpg"
> q <- qplot(x1, z, data = temp)
> q <- q + stat_sum_single(median)
> print(q) |
data:image/s3,"s3://crabby-images/0958b/0958b44771503026c277688b9d4bbcfe211d1986" alt="stat_ggplot2-035.jpg"
> q <- qplot(x1, z, data = temp)
> q <- q + stat_sum_single(sd)
> print(q) |
data:image/s3,"s3://crabby-images/58732/58732cd9ae42b2e81c1831a35f9551c837528182" alt="stat_ggplot2-036.jpg"
Well, 16 more graphs left to achieve my target of 100 graphs for the weekend. Well, it does seem to be in the reach.
Annotation with ggplot2
> data(economics)
> head(economics)
date pce pop psavert uempmed unemploy
1 1967-06-30 507.8 198712 9.8 4.5 2944
2 1967-07-31 510.9 198911 9.8 4.7 2945
3 1967-08-31 516.7 199113 9.0 4.6 2958
4 1967-09-30 513.3 199311 9.8 4.9 3143
5 1967-10-31 518.5 199498 9.7 4.7 3066
6 1967-11-30 526.2 199657 9.4 4.8 3018
> unemp <- qplot(date, unemploy, data = economics, geom = "line",
+ xlab = "", ylab = "Number unemp(1000s)")
> print(unemp) |
data:image/s3,"s3://crabby-images/47b6f/47b6f1b6500b45e097582c3cb2b13a39ef9312ed" alt="stat_ggplot2-037.jpg"
> unemp <- qplot(date, unemploy, data = economics, geom = "line",
+ xlab = "", ylab = "Number unemp(1000s)")
> unemp <- unemp + geom_vline(aes(xintercept = start), data = presidential)
> print(unemp) |
data:image/s3,"s3://crabby-images/59bcd/59bcdbff33bea6e5cbb906db4bdc21b6f93d5ce6" alt="stat_ggplot2-038.jpg"
Well, 14 more graphs left to achieve my target of 100 graphs for the weekend.
> yrng <- range(economics$unemploy)
> xrng <- range(economics$date)
> unemp <- qplot(date, unemploy, data = economics, geom = "line",
+ xlab = "", ylab = "Number unemp(1000s)")
> unemp <- unemp + geom_rect(aes(NULL, NULL, xmin = start, xmax = end,
+ fill = party), ymin = yrng[1], ymax = yrng[2], data = presidential)
> print(unemp) |
data:image/s3,"s3://crabby-images/35f0a/35f0a080bca81515199b0f7421973f9c2db17f21" alt="stat_ggplot2-039.jpg"
> yrng <- range(economics$unemploy)
> xrng <- range(economics$date)
> unemp <- qplot(date, unemploy, data = economics, geom = "line",
+ xlab = "", ylab = "Number unemp(1000s)")
> unemp <- unemp + geom_rect(aes(NULL, NULL, xmin = start, xmax = end,
+ fill = party), ymin = yrng[1], ymax = yrng[2], data = presidential) +
+ scale_fill_manual(values = alpha(c("blue", "red"), 0.2))
> last_plot <- unemp
> print(unemp) |
data:image/s3,"s3://crabby-images/52a90/52a90fa2e81005aad8d9247d775f8111902968b6" alt="stat_ggplot2-040.jpg"
> q <- last_plot() + geom_text(aes(x = start, y = yrng[1], label = name),
+ data = presidential, size = 3, hjust = 0, vjust = 0)
> print(q) |
data:image/s3,"s3://crabby-images/49011/49011b922c0da3def90853b4fe8650864e52b08c" alt="stat_ggplot2-041.jpg"
I realize at this point that I have actually not used geom_text properly in all examples.
Let me see whether I can do another 11 graphs using geom_text
geom_text
> temp <- as.data.frame(rmnorm(n = 100, mean = c(0, 0), varcov = diag(2)))
> colnames(temp) <- c("x", "y")
> q <- ggplot(temp, aes(x = x, y = y)) + geom_jitter()
> q <- q + geom_text(aes(label = "M"))
> print(q) |
data:image/s3,"s3://crabby-images/864d6/864d6243a4af2af93c797803ed8168c60e85cb9e" alt="stat_ggplot2-042.jpg"
opts
> temp <- as.data.frame(rmnorm(n = 100, mean = c(0, 0), varcov = diag(2)))
> colnames(temp) <- c("x", "y")
> q <- ggplot(temp, aes(x = x, y = y)) + geom_jitter()
> q <- q + opts(title = "VINAY")
> print(q) |
data:image/s3,"s3://crabby-images/3a228/3a2280b1164fddef08d2b0aea02a0516c28545ba" alt="stat_ggplot2-043.jpg"
> temp <- as.data.frame(rmnorm(n = 100, mean = c(0, 0), varcov = diag(2)))
> colnames(temp) <- c("x", "y")
> q <- ggplot(temp, aes(x = x, y = y)) + geom_jitter()
> q <- q + opts(title = "VINAY")
> print(q) |
data:image/s3,"s3://crabby-images/4abad/4abad3e3e428f7aab183467081157dc103eae35b" alt="stat_ggplot2-044.jpg"
> temp <- as.data.frame(rmnorm(n = 100, mean = c(0, 0), varcov = diag(2)))
> colnames(temp) <- c("x", "y")
> q <- qplot(x, y, temp, geom = "jitter")
> q <- q + opts(title = "VINAY")
> print(q) |
data:image/s3,"s3://crabby-images/aa5cc/aa5cc378cc715a2428c2011510d1b2349d873f57" alt="stat_ggplot2-045.jpg"
> temp <- as.data.frame(rmnorm(n = 100, mean = c(0, 0), varcov = diag(2)))
> colnames(temp) <- c("x", "y")
> q <- qplot(x, y, temp, geom = "jitter")
> q <- q + opts(title = "VINAY")
> q <- q + theme_bw()
> print(q) |
data:image/s3,"s3://crabby-images/42955/429550ea609739ed4348b438e92a42fa6ed27214" alt="stat_ggplot2-046.jpg"
6 more plots will take me to 100 plots for the weekend. I have reached a stage when I want to plot anything for that matter..Ok, let me go with it
I just need 6 more plots…. You know what..I will take a break…I will take my own sweet time and complete these 6 plots before the clock strikes 12…I will …
geom_errorbar
> limits <- aes(ymax = y + se, ymin = y - se)
> temp <- data.frame(y = rnorm(4, 23, 2), se = runif(4), grp = factor(c(1,
+ 2, 1, 2)), x = factor(c("a", "b", "a", "b")))
> p <- ggplot(temp, aes(x = x, y = y, fill = grp))
> q <- p + geom_bar(position = "dodge", stat = "identity")
> print(q) |
data:image/s3,"s3://crabby-images/341b3/341b3c6e51a150b84709eb5280af546a8293dd77" alt="stat_ggplot2-047.jpg"
stat_contour
> volcano3d <- melt(volcano)
> names(volcano3d) <- c("x", "y", "z")
> p <- ggplot(volcano3d, aes(x, y, z = z))
> q <- p + stat_contour()
> print(q) |
data:image/s3,"s3://crabby-images/4e2dd/4e2dd40db85f9e695a7e0c823a5d0e6fd33c9d95" alt="stat_ggplot2-048.jpg"
> p <- ggplot(volcano3d, aes(x, y, z = z))
> q <- p + stat_contour(bins = 2)
> print(q) |
data:image/s3,"s3://crabby-images/5f732/5f732fc3267ba1047ba46cd3823e0b028fe4fc90" alt="stat_ggplot2-049.jpg"
> p <- ggplot(volcano3d, aes(x, y, z = z))
> q <- p + stat_contour(bins = 10)
> print(q) |
data:image/s3,"s3://crabby-images/de14a/de14a762ca2b0019590d71f9b9d2c2748f063e96" alt="stat_ggplot2-050.jpg"
> p <- ggplot(volcano3d, aes(x, y, z = z))
> q <- p + stat_contour(binwidth = 2)
> print(q) |
data:image/s3,"s3://crabby-images/19358/193584ebf7f51a662b9d47303683b2e94fd50690" alt="stat_ggplot2-051.jpg"
> p <- ggplot(volcano3d, aes(x, y, z = z))
> q <- p + stat_contour(size = 2, linetype = 4)
> print(q) |
data:image/s3,"s3://crabby-images/86aa5/86aa54e51df9b3e2c303eebe249f42fe61a24907" alt="stat_ggplot2-052.jpg"
WOW!!! Its 11 pm Sunday night and I am done …I have reached my target of 100 graphs over the weekend.