Sunday 15 April 2012

r - How to use dplyr to group elements in x ,count frequency of x for an interval of y? -



r - How to use dplyr to group elements in x ,count frequency of x for an interval of y? -

x<- c('a','v','c','a','d','e','g','f','h','y','u','r','s','w','s','d','g','j','u','r','s','s','s','v','b','g','e','w','s','d','g','h','j','i','t','e','w','w','q','q','d','v','b','m','m','k','l','u','p','o','r','t','n','e','w','w','j','f','c','g','h','t','r','d','e','w','w','w','z','f','g','f','h','h','y','r','f','f','l')

y <- sample(1:40, 79, replace=t)

y 1 38 18 19 19 37 38 26 4 32 23 11 24 36 15 22 19 6 24 13 36 2 26 35 39 8 33 20 19 23 28 5 17 40 26 18 21 [37] 35 23 27 12 3 33 16 32 11 19 4 5 8 19 5 19 33 33 33 13 12 32 21 4 14 8 28 34 33 22 34 19 39 23 6 8 [73] 37 17 21 16 38 15 36

i have 2 variables 'x' , 'y' . there more 1 instance of observation in 'x' . there values in y corresponding every observation in 'x'

i accomplish grouping , partitioning of y values intervals .

to set in different way , how many times letter occured divided intervals specified based on value assigned letter in each of occurance.

example :-

could not represent table not find improve way type here.

i hope clear. shall seek restate if needed. appreciate help in regard.

using dplyr

library(dplyr) library(tidyr) res <- tally(group_by(df, x, y=cut(y, breaks=seq(0,40, by=10)))) %>% ungroup() %>% spread(y,n, fill=0)

or using data.table

library(data.table) res1 <- dcast.data.table(setdt(df)[,list(.n), by=list(x, y1=cut(y, breaks=seq(0,40, by=10)))], x~y1, value.var="n", fill=0l) all.equal(as.data.frame(res), as.data.frame(res1)) #[1] true

note: there label argument in cut if want have column headings freq0-10, etc

tally(group_by(df, x, y=cut(y,breaks=seq(0,40, by=10), labels=paste0("freq", c("0-10", "10-20", "20-30", "30-40"))))) %>% ungroup() %>% spread(y,n, fill=0) %>% head(2) # x freq0-10 freq10-20 freq20-30 freq30-40 #1 0 1 1 0 #2 b 1 1 0 0 data df <- structure(list(x = structure(c(1l, 22l, 3l, 1l, 4l, 5l, 7l, 6l, 8l, 24l, 21l, 18l, 19l, 23l, 19l, 4l, 7l, 10l, 21l, 18l, 19l, 19l, 19l, 22l, 2l, 7l, 5l, 23l, 19l, 4l, 7l, 8l, 10l, 9l, 20l, 5l, 23l, 23l, 17l, 17l, 4l, 22l, 2l, 13l, 13l, 11l, 12l, 21l, 16l, 15l, 18l, 20l, 14l, 5l, 23l, 23l, 10l, 6l, 3l, 7l, 8l, 20l, 18l, 4l, 5l, 23l, 23l, 23l, 25l, 6l, 7l, 6l, 8l, 8l, 24l, 18l, 6l, 6l, 12l), .label = c("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "y", "z"), class = "factor"), y = c(12l, 9l, 29l, 21l, 27l, 37l, 12l, 31l, 33l, 11l, 25l, 15l, 27l, 27l, 13l, 37l, 8l, 2l, 21l, 6l, 4l, 23l, 30l, 6l, 9l, 28l, 4l, 24l, 26l, 2l, 13l, 10l, 15l, 6l, 38l, 9l, 30l, 26l, 28l, 39l, 19l, 16l, 11l, 9l, 2l, 4l, 16l, 15l, 11l, 14l, 19l, 35l, 19l, 29l, 22l, 40l, 19l, 12l, 7l, 6l, 20l, 10l, 12l, 6l, 30l, 13l, 38l, 39l, 30l, 20l, 6l, 9l, 1l, 40l, 26l, 14l, 23l, 33l, 2l)), .names = c("x", "y" ), row.names = c(na, -79l), class = "data.frame")

r dplyr

No comments:

Post a Comment