When using the "stack" style (not "dodge") as with geom_bar or geom_col. The totals get compromised. I manage to represent the correct total in a simple way when one of the values is conspicuously more frequent than others, see Workaround (not log). But, the total problem remains for other cases and log scales. I would ask for a universal solution.
Case 1. Similar frequencies
mydf<-data.frame(date=c(rep("2020-02-01",5),rep("2020-02-01",4),rep("2020-02-02",5),rep("2020-02-02",4) ),
value= c(rep(LETTERS[1:3],6) ) )#,"A","A" )
mydf
library(data.table)
setDT(mydf)[, .N, by=.(date, value)]
# date value N
# 1: 2020-02-01 A 3
# 2: 2020-02-01 B 3
# 3: 2020-02-01 C 3
# 4: 2020-02-02 A 3
# 5: 2020-02-02 B 3
# 6: 2020-02-02 C 3
library(ggplot2)
library(scales)
simple1<-ggplot(mydf, aes(date, fill = value)) +
geom_bar() + scale_y_continuous(breaks= breaks_pretty())
simple1log<-ggplot(mydf, aes(date, fill = value)) +
geom_bar() + scale_y_continuous(trans='log2', breaks = log_breaks(7),
labels= label_number_auto()
)
# Total count problem, real total is 9
{
require(grid)
grid.newpage()
pushViewport(viewport(layout = grid.layout(1, 2)))
pushViewport(viewport(layout.pos.col = 1, layout.pos.row = 1))
print(simple1,newpage=F)
popViewport()
pushViewport(viewport(layout.pos.col = 2, layout.pos.row = 1))
print( simple1log, newpage = F )
}
![enter image description here]()
Case 2: One value more frequent, same problem, workaround.
mydf2<-data.frame(date=c(rep("2020-02-01",25),rep("2020-02-01",20),rep("2020-02-02",25),rep("2020-02-02",20) ),
value= c(rep(LETTERS[1],39),rep(LETTERS[1:3],4),rep(LETTERS[1],39) ) , stringsAsFactors = FALSE)
setDT(mydf2)[, .N, by=.(date, value)]
dateValueCount<-setDT(mydf2)[, .N, by=.(date, value)]
# date value N
# 1: 2020-02-01 A 41
# 2: 2020-02-01 B 2
# 3: 2020-02-01 C 2
# 4: 2020-02-02 A 41
# 5: 2020-02-02 B 2
# 6: 2020-02-02 C 2
prevalent1<-ggplot(mydf2, aes(date, fill = value)) +
geom_bar() + scale_y_continuous(breaks= breaks_pretty())
# total value = 45
prevalent1log<-ggplot(mydf2, aes(date, fill = value)) +
geom_bar() + scale_y_continuous(trans='log2', breaks = log_breaks(7),
labels= label_number_auto()
)
# total Problem, real total is 45
{
require(grid)
grid.newpage()
pushViewport(viewport(layout = grid.layout(1, 2)))
pushViewport(viewport(layout.pos.col = 1, layout.pos.row = 1))
print(prevalent1,newpage=F)
popViewport()
pushViewport(viewport(layout.pos.col = 2, layout.pos.row = 1))
print( prevalent1log, newpage = F )
}
![enter image description here]()
# workaround:
# get the most frequent per group
mydf2Max<-dateValueCount[, .SD[ N== max(N) ] , by=date]
mydf2Max
# date value N
# 1: 2020-02-01 A 41
# 2: 2020-02-02 A 41
# totals per group
dateCount<-mydf2[, .N, by=.(date)]
# date N
# 1: 2020-02-01 45
# 2: 2020-02-02 45
# transfer column to previous table
mydf2Max$totalDay <- dateCount$N[match(mydf2Max$date, dateCount$date)]
# the final height of A will be dependent on the values of B and C
mydf2Max$diff<-mydf2Max$totalDay-mydf2Max$N
# shrinkFactor for the upper part of the plot which begins in threshold
shrinkFactor<-.05
threshold<-6
# part of our frequent value (A) count must not be shrinked
mydf2Max$notshrink <- threshold - mydf2Max$diff
# part of A data (> threshold) must be shrinked
mydf2Max$NToShrink<-mydf2Max$N-mydf2Max$notshrink
mydf2Max$NToShrinkShrinked<-mydf2Max$NToShrink*shrinkFactor
# now sum the not-shrinked part with the shrinked part to obtain the transformed height
mydf2Max$NToShrinkShrinkedPlusBase<-mydf2Max$NToShrinkShrinked+mydf2Max$notshrink
# transformation function - works for "dodge" position
# https://stackoverflow.com/questions/44694496/y-break-with-scale-change-in-r
trans <- function(x){pmin(x,threshold) + shrinkFactor*pmax(x-threshold,0)}
# dateValueCount$transN <- trans(dateValueCount$N)
setDF(dateValueCount)
setDF(mydf2Max)
# pass transformed column to original d.f.
dateValueCount$N2 <- mydf2Max$NToShrinkShrinkedPlusBase[match(interaction( dateValueCount[c("value","date")]) ,
interaction( mydf2Max[c("value","date") ] ) )]
# substitute real N with transformed values
dateValueCount[which(!is.na(dateValueCount$N2)),]$N <- dateValueCount[which(!is.na(dateValueCount$N2)),]$N2
yticks <- c(0, 2,4,6,40,50)
ggplot(data=dateValueCount, aes(date, N, group=value, fill=value)) + #group=longName
geom_col(position="stack") +
geom_rect(aes(xmin=0, xmax=3, ymin=threshold, ymax=threshold+1), fill="white") +
scale_y_continuous(breaks = trans(yticks), labels= yticks)
![enter image description here]()