options(help_type = "html")
## Load the sensR package:
require(sensR)
### Example 1: Basic difference testing with the Triangle protocol:
N <- 100
X <- 45
#################################
### How many correct answers would we expect if H_0 is true and
### p_c = 1/3?
#################################
## The answer is given by the binomial distribution:
barplot(dbinom(0:100, 100, 1/3), col="blue", names.arg=0:100, las=1)
## Lets zoom in:
barplot(dbinom(15:60, 100, 1/3), col="blue", names.arg=15:60, las=1)
xx <- numeric(100)
xx[45] <- dbinom(45, 100, 1/3)
## Add X=45 to the plot:
barplot(xx[15:60], col="green", add=TRUE, axes=FALSE)
## So 33 is most probable and <20 or >50 is unlikely.
#################################
### What would an extreme observation be if $H_0$ is true?
#################################
## Consider:
## P(X >= 41) = 1 - P(X <= 41 - 1):
1- pbinom(41-1, 100, 1/3)
## P(X >= 42) = 1 - P(X <= 42 - 1):
1- pbinom(42-1, 100, 1/3)
## x_c = 42 is the critical value on the 5% significance level.
## The probability of observing 42 or more correct answers out of 100
## if Pr(correct answer) = 1/3 is less than 0.05.
## sensR function can find this number for you:
findcr(sample.size=100, alpha=0.05, p0=1/3)
#################################
### How extreme is 45 correct out of 100 if products are not
### different?
#################################
## Pr(X >= 45) if X ~ binom(1/3, 100):
## Pr(X >= 45) = 1 - Pr(X <= 45 - 1):
1 - pbinom(45 - 1, 100, 1/3)
## This is the p-value for our difference test.
## Illustration of the p-value:
barplot(dbinom(15:60, N, 1/3), col="blue", names.arg=15:60, las=1)
xx <- numeric(100)
xx[45:100] <- dbinom(45:100, 100, 1/3)
## Add X=45 to the plot:
barplot(xx[15:60], col="red", add=TRUE, axes=FALSE)
text(45, .01, "p-value", col="red", cex=1.5)
## Conclusion:
## There is a considerable evidence that products are perceived
## differently by consumers.
#################################
### Which values for the true p_c are plausible?
#################################
## Approximate 95% Confidence interval:
pchat <- 45 / 100
se.pchat <- sqrt(pchat * (1 - pchat) / 100)
## CI limits:
pchat - 1.96 * se.pchat ## lower limit
pchat + 1.96 * se.pchat ## upper limit
#################################
### How can sensR help us with sensory difference tests?
#################################
discrim(45, 100, method = "triangle")
discrim(45, 100, method = "triangle", statistic="Wald")
## ?discrim
##################################################################
#################################
### What is the proportion of discriminators in the sweetener
### example?
#################################
## We had 45 correct out or 100, so pc is
pchat <- 45 / 100
## The guessing probability is 1/3 for the Triangle test:
pd <- (0.45 - 1/3) / (2/3)
pd
## so 17.5% of the consumers are able to tell the difference between
## the products.
### How can sensR help us here?
## From pc 2 pd:
pc2pd(pc=0.45, Pguess=1/3)
## From pd 2 pc:
pd2pc(pd=0.175, Pguess=1/3)
#################################
### Which proportion of correct answers should we expect if we had
### used the Duo-Trio protocol instead?
#################################
## So, if pd = 0.175, what is pc when the guessing probability is 1/2?
pd2pc(pd=0.175, Pguess=1/2)
## so we should expect around 60% correct answers in the Duo-Trio test
## had we used that instead of the Triangle test.
##################################################################
#################################
### What is d-prime in the sweetener example?
#################################
## We can use the rescale function to move between the different
## scales for a sensory difference:
rescale(pc=0.45, method = "triangle")
## ?rescale
#################################
### How low or high could we reasonably expect d-prime to be?
#################################
## Here we seek a confidence interval. The discrim function will
## provide that for us:
discrim(45, 100, method="triangle")
## Conclusion:
## It is plausible that the true sensory difference between the
## products is as low as d' = 0.43 or as high as 1.74.
## This means that d' could be quite small or indeed fairly high.
##################################################################
### Power estimation:
## Illustration of power on toy example:
## Power of detecting a d'=2 in a Triangle test with 15 samples with
## alpha=0.05:
par(mfrow = c(2, 1))
barplot(dbinom(0:15, 15, 1/3), col="green", names.arg=0:15, las=1,
ylim=c(0, .25))
text(2, .2, "Null hypothesis")
## 1) Find critical value
findcr(15, p0=1/3) ## 9
## Check --- P(X >= 9) = 1 - P(X <= 8):
1 - pbinom(7, 15, 1/3) ## less than alpha
1 - pbinom(8, 15, 1/3) ## larger than alpha - OK
## Add critical value to plot:
xx <- numeric(16)
xx[10:16] <- dbinom(9:15, 15, 1/3)
## Add critical region to the plot:
barplot(xx, col="blue", add=TRUE, axes=FALSE)
text(14, .1, "type I error", col="blue")
## Power is the probability of getting X >= 9 if the Alternative
## hypothesis is true.
## Illustration of the binomial distribution if the Alternative
## hypothesis is true:
## What is pc if d-prime is 2?
rescale(d.prime=2, method="triangle")
## pc = 0.6048
barplot(dbinom(0:15, 15, prob=0.6048), col="red", names.arg=0:15, las=1,
ylim=c(0, .25))
text(3, .2, "Alternative hypothesis")
xx <- numeric(16)
xx[10:16] <- dbinom(9:15, 15, prob=0.6048)
## Add critical region to the plot:
barplot(xx, col="green", add=TRUE, axes=FALSE)
text(17, .15, "power", col="green")
text(5, .1, "type II error", col="red")
## 3) Find Pr(X >= 9) if X ~ binom(0.6048, 15)
## Pr(X >= 9) = 1 - Pr(X <= 8)
1- pbinom(8, 15, prob = 0.6048)
## So the power of this test is around 62%
## Now use a function from sensR:
d.primePwr(2, sample.size=15, method="triangle")
## ?d.primePwr
#################################
### What is the probability that you can detect a difference of d'=1?
#################################
## Protocol: Duo-Trio
## Sample size 100
## Significance level? --- Lets choose alpha = 5%
d.primePwr(d.primeA=1, sample.size=229, alpha=0.05, method="duotrio")
d.primePwr(d.primeA=1, sample.size=50, alpha=0.05, method="twoAFC")
d.primeSS(1, target.power=.80, alpha=.05, method="triangle")
d.primeSS(1, target.power=.80, alpha=.05, method="tetrad")
d.primeSS(1, target.power=.80, alpha=.05, method="duotrio")
d.primeSS(1, target.power=.80, alpha=.05, method="twoAFC")
## So with this sample size we have less than 50% chance of detecting
## the difference.
## This experiment is not worth doing!
#################################
### How many consumers would you need to have 80% power?
#################################
d.primeSS(d.primeA=1, target.power=0.8, alpha=0.05, method="duotrio")
## This is better, but still not adequate.
#################################
### If you are convinced that the difference is primarily a difference
### in smoothness, what can you then do?
#################################
## We could use a specified method such as 2-AFC or 3-AFC
## instead. This would mean much fewer consumers:
d.primeSS(d.primeA=1, target.power=0.8, alpha=0.05, method="twoAFC")
d.primeSS(d.primeA=1, target.power=0.8, alpha=0.05, method="threeAFC")
##################################################################
### Stable exact sample size:
## From ISO Triangle Standard (2004):
alpha <- .05
beta <- 0.001
pd <- 0.1
pg <- 1/3
n.iso <- 1181
(n.sensR <- discrimSS(pdA=pd, pd0=0, target.power=1-beta, alpha=alpha,
pGuess=1/3, test="difference",
statistic="exact"))
## Whoops - this is not 1181!?!?!?!
n.iso == n.sensR ## FALSE
## Lets compute power at n.sensR:
pwr.sensR <- discrimPwr(pdA=pd, pd0=0, sample.size=n.sensR, alpha=alpha,
pGuess=pg, test="difference", statistic="exact")
pwr.sensR > 1-beta
pwr.sensR
## so here power is in fact larger than required, so 1178 is
## sufficient and 1181 is the wrong number!
## Lets compute the stable-exact estimate:
discrimSS(pdA=pd, pd0=0, target.power=1-beta, alpha=alpha,
pGuess=1/3, test="difference", statistic="stable")
## We could also ask for both at the same time:
discrimSS(pdA=pd, pd0=0, target.power=1-beta, alpha=alpha,
pGuess=1/3, test="difference", statistic="both.exact")
discrimSS(pdA=pd, pd0=0, target.power=1-beta, alpha=alpha,
pGuess=1/3, test="difference", statistic="normal")
discrimSS(pdA=pd, pd0=0, target.power=1-beta, alpha=alpha,
pGuess=1/3, test="difference", statistic="cont.normal")
?discrimSS
## Lets take a closer look (plot it):
par(mfrow=c(1, 1))
n.seq <- 1168:1210
pwr.seq <- sapply(n.seq, function(n) {
discrimPwr(pdA=pd, pd0=0, sample.size=n, alpha=alpha,
pGuess=pg, test="difference", statistic="exact") })
plot(n.seq, pwr.seq, type="b", xlab="Sample size",
ylab="Exact binomial power")
abline(h=1-beta)
segments(c(1178, 1200), c(0, 0), y1=pwr.seq[n.seq %in% c(1178, 1200)],
col="red")
## Conclusion:
## sensR gets it right and we must conclude there is an error in the
## ISO standard.
##################################################################
### Extra stuff - figures in slides etc.
##################################################################
#################################
### Gridgeman's paradox
#################################
n <- c(45, 108, 720, 240, 240, 108, 156, 180)
tri <- c(21, 42, 363, 104, 99, 50, 106, 105)
afc3 <- c(32, 62, 539, 161, 148, 75, 145, 152)
mat <- rbind(Triangle=tri/n, "3-AFC"=afc3/n)
pdf(file="./figs/gridgeman_pc.pdf", width=5, height=4)
## par(mar=c(4,4,2,1)+.5))
par(mar=c(4,4,.5,0)+.5)
barplot(mat, beside=TRUE, ylab = "Proportion correct",
xlab="Studies", legend.text = c("Triangle", "3-AFC"),
args.legend = list(bty="n", x="topleft"), ylim=0:1, las=1,
names.arg = letters[1:8])
dev.off()
## What if we look at proportion discriminators?
mat2 <- rbind(pc2pd(tri/n, 1/3),
pc2pd(afc3/n, 1/3))
pdf(file="./figs/gridgeman_pd.pdf", width=5, height=4)
## par(mar=c(4,4,2,1)+.5))
par(mar=c(4,4,.5,0)+.5)
barplot(mat2, beside=TRUE, ylab = "Proportion discriminators",
xlab="Studies", legend.text = c("Triangle", "3-AFC"),
args.legend = list(bty="n", x="topleft"), ylim=0:1, las=1,
names.arg = letters[1:8])
dev.off()
## Problem:
## Same test gives different values of pd for Triangle and 3-AFC for
## the same products.
mat3 <- rbind(psyinv(tri/n, method="triangle"),
psyinv(afc3/n, method = "threeAFC"))
pdf(file="./figs/gridgeman_dprime.pdf", width=5, height=4)
## par(mar=c(4,4,2,1)+.5))
par(mar=c(4,4,.5,0)+.5)
barplot(mat3, beside=TRUE, ylab = "Thurstonian d-prime",
xlab="Studies", legend.text = c("Triangle", "3-AFC"),
args.legend = list(bty="n", x="topleft"), ylim=c(0, 2.5), las=1,
names.arg = letters[1:8])
dev.off()
#################################
#################################
### Examples of d' in R:
#################################
pdf(file="./figs/dprime_examples.pdf", width=4, height=3)
par(mfrow=c(3, 1))
par(mar=c(1,0,1,0)+.5)
xx <- seq(-3, 7, len=1e3)
plot(xx, dnorm(xx), type ="l", axes=FALSE, xlab="", ylab="",
main="obvious difference (d' = 4)", lwd=2)
lines(xx, dnorm(xx, mean=4), type="l", col="blue", lwd=2)
abline(h = 0)
xx <- seq(-3, 4, len=1e3)
plot(xx, dnorm(xx), type ="l", axes=FALSE, xlab="", ylab="",
main="confusable (d' = 1)", lwd=2)
lines(xx, dnorm(xx, mean=1), type="l", col="blue", lwd=2)
abline(h = 0)
xx <- seq(-3, 3.2, len=1e3)
plot(xx, dnorm(xx), type ="l", axes=FALSE, xlab="", ylab="",
main="Almost indistinguishable (d' = 0.2)", lwd=2)
lines(xx, dnorm(xx, mean=.2), type="l", col="blue", lwd=2)
abline(h = 0)
dev.off()
#################################
##
pdf(file="./figs/discretePwr.pdf", width=5, height=3)
par(mar=c(3.1,4,0,0))
xx <- 100:120
pwr <- sapply(100:120, function(n) d.primePwr(1.2, sample.size=n))
plot(xx, pwr, type="b", ylab="Power", xlab="", axes=FALSE,
pch=19)
mtext("Sample size", side=1, line=2)
axis(1); axis(2, las=1)
abline(h = 0.75, col="blue", lty=2)
sel <- c(7, 16)
points(xx[sel], pwr[sel], col="red", pch=19)
segments(x0=xx[sel], y0=c(0, 0), y1=pwr[sel], col="red")
dev.off()
#################################
pdf(file="./figs/ss_vs_pwr.pdf", width=5, height=3)
par(mar=c(3.1,4,0,0))
ss <- 10:150
pw3 <- sapply(ss, function(s) discrimPwr(0.25, sample.size=s,
pGuess=1/3))
pw2 <- sapply(ss, function(s) discrimPwr(0.25, sample.size=s,
pGuess=1/2))
plot(ss, pw3, xlab="", ylab="Power", type="l", ylim=0:1,
axes=FALSE)
mtext("Sample size", side=1, line=2)
lines(ss, pw2, col=2)
axis(1, at=1:15 * 10)
axis(2, las=1)
text(30, 0.8, "3 alternatives")
text(80, .5, "2 alternatives", col=2)
text(80, 1, expression(p[d] == 0.25))
dev.off()