options(help_type = "html") ## Load the sensR package: require(sensR) ### Example 1: Basic difference testing with the Triangle protocol: N <- 100 X <- 45 ################################# ### How many correct answers would we expect if H_0 is true and ### p_c = 1/3? ################################# ## The answer is given by the binomial distribution: barplot(dbinom(0:100, 100, 1/3), col="blue", names.arg=0:100, las=1) ## Lets zoom in: barplot(dbinom(15:60, 100, 1/3), col="blue", names.arg=15:60, las=1) xx <- numeric(100) xx[45] <- dbinom(45, 100, 1/3) ## Add X=45 to the plot: barplot(xx[15:60], col="green", add=TRUE, axes=FALSE) ## So 33 is most probable and <20 or >50 is unlikely. ################################# ### What would an extreme observation be if $H_0$ is true? ################################# ## Consider: ## P(X >= 41) = 1 - P(X <= 41 - 1): 1- pbinom(41-1, 100, 1/3) ## P(X >= 42) = 1 - P(X <= 42 - 1): 1- pbinom(42-1, 100, 1/3) ## x_c = 42 is the critical value on the 5% significance level. ## The probability of observing 42 or more correct answers out of 100 ## if Pr(correct answer) = 1/3 is less than 0.05. ## sensR function can find this number for you: findcr(sample.size=100, alpha=0.05, p0=1/3) ################################# ### How extreme is 45 correct out of 100 if products are not ### different? ################################# ## Pr(X >= 45) if X ~ binom(1/3, 100): ## Pr(X >= 45) = 1 - Pr(X <= 45 - 1): 1 - pbinom(45 - 1, 100, 1/3) ## This is the p-value for our difference test. ## Illustration of the p-value: barplot(dbinom(15:60, N, 1/3), col="blue", names.arg=15:60, las=1) xx <- numeric(100) xx[45:100] <- dbinom(45:100, 100, 1/3) ## Add X=45 to the plot: barplot(xx[15:60], col="red", add=TRUE, axes=FALSE) text(45, .01, "p-value", col="red", cex=1.5) ## Conclusion: ## There is a considerable evidence that products are perceived ## differently by consumers. ################################# ### Which values for the true p_c are plausible? ################################# ## Approximate 95% Confidence interval: pchat <- 45 / 100 se.pchat <- sqrt(pchat * (1 - pchat) / 100) ## CI limits: pchat - 1.96 * se.pchat ## lower limit pchat + 1.96 * se.pchat ## upper limit ################################# ### How can sensR help us with sensory difference tests? ################################# discrim(45, 100, method = "triangle") discrim(45, 100, method = "triangle", statistic="Wald") ## ?discrim ################################################################## ################################# ### What is the proportion of discriminators in the sweetener ### example? ################################# ## We had 45 correct out or 100, so pc is pchat <- 45 / 100 ## The guessing probability is 1/3 for the Triangle test: pd <- (0.45 - 1/3) / (2/3) pd ## so 17.5% of the consumers are able to tell the difference between ## the products. ### How can sensR help us here? ## From pc 2 pd: pc2pd(pc=0.45, Pguess=1/3) ## From pd 2 pc: pd2pc(pd=0.175, Pguess=1/3) ################################# ### Which proportion of correct answers should we expect if we had ### used the Duo-Trio protocol instead? ################################# ## So, if pd = 0.175, what is pc when the guessing probability is 1/2? pd2pc(pd=0.175, Pguess=1/2) ## so we should expect around 60% correct answers in the Duo-Trio test ## had we used that instead of the Triangle test. ################################################################## ################################# ### What is d-prime in the sweetener example? ################################# ## We can use the rescale function to move between the different ## scales for a sensory difference: rescale(pc=0.45, method = "triangle") ## ?rescale ################################# ### How low or high could we reasonably expect d-prime to be? ################################# ## Here we seek a confidence interval. The discrim function will ## provide that for us: discrim(45, 100, method="triangle") ## Conclusion: ## It is plausible that the true sensory difference between the ## products is as low as d' = 0.43 or as high as 1.74. ## This means that d' could be quite small or indeed fairly high. ################################################################## ### Power estimation: ## Illustration of power on toy example: ## Power of detecting a d'=2 in a Triangle test with 15 samples with ## alpha=0.05: par(mfrow = c(2, 1)) barplot(dbinom(0:15, 15, 1/3), col="green", names.arg=0:15, las=1, ylim=c(0, .25)) text(2, .2, "Null hypothesis") ## 1) Find critical value findcr(15, p0=1/3) ## 9 ## Check --- P(X >= 9) = 1 - P(X <= 8): 1 - pbinom(7, 15, 1/3) ## less than alpha 1 - pbinom(8, 15, 1/3) ## larger than alpha - OK ## Add critical value to plot: xx <- numeric(16) xx[10:16] <- dbinom(9:15, 15, 1/3) ## Add critical region to the plot: barplot(xx, col="blue", add=TRUE, axes=FALSE) text(14, .1, "type I error", col="blue") ## Power is the probability of getting X >= 9 if the Alternative ## hypothesis is true. ## Illustration of the binomial distribution if the Alternative ## hypothesis is true: ## What is pc if d-prime is 2? rescale(d.prime=2, method="triangle") ## pc = 0.6048 barplot(dbinom(0:15, 15, prob=0.6048), col="red", names.arg=0:15, las=1, ylim=c(0, .25)) text(3, .2, "Alternative hypothesis") xx <- numeric(16) xx[10:16] <- dbinom(9:15, 15, prob=0.6048) ## Add critical region to the plot: barplot(xx, col="green", add=TRUE, axes=FALSE) text(17, .15, "power", col="green") text(5, .1, "type II error", col="red") ## 3) Find Pr(X >= 9) if X ~ binom(0.6048, 15) ## Pr(X >= 9) = 1 - Pr(X <= 8) 1- pbinom(8, 15, prob = 0.6048) ## So the power of this test is around 62% ## Now use a function from sensR: d.primePwr(2, sample.size=15, method="triangle") ## ?d.primePwr ################################# ### What is the probability that you can detect a difference of d'=1? ################################# ## Protocol: Duo-Trio ## Sample size 100 ## Significance level? --- Lets choose alpha = 5% d.primePwr(d.primeA=1, sample.size=229, alpha=0.05, method="duotrio") d.primePwr(d.primeA=1, sample.size=50, alpha=0.05, method="twoAFC") d.primeSS(1, target.power=.80, alpha=.05, method="triangle") d.primeSS(1, target.power=.80, alpha=.05, method="tetrad") d.primeSS(1, target.power=.80, alpha=.05, method="duotrio") d.primeSS(1, target.power=.80, alpha=.05, method="twoAFC") ## So with this sample size we have less than 50% chance of detecting ## the difference. ## This experiment is not worth doing! ################################# ### How many consumers would you need to have 80% power? ################################# d.primeSS(d.primeA=1, target.power=0.8, alpha=0.05, method="duotrio") ## This is better, but still not adequate. ################################# ### If you are convinced that the difference is primarily a difference ### in smoothness, what can you then do? ################################# ## We could use a specified method such as 2-AFC or 3-AFC ## instead. This would mean much fewer consumers: d.primeSS(d.primeA=1, target.power=0.8, alpha=0.05, method="twoAFC") d.primeSS(d.primeA=1, target.power=0.8, alpha=0.05, method="threeAFC") ################################################################## ### Stable exact sample size: ## From ISO Triangle Standard (2004): alpha <- .05 beta <- 0.001 pd <- 0.1 pg <- 1/3 n.iso <- 1181 (n.sensR <- discrimSS(pdA=pd, pd0=0, target.power=1-beta, alpha=alpha, pGuess=1/3, test="difference", statistic="exact")) ## Whoops - this is not 1181!?!?!?! n.iso == n.sensR ## FALSE ## Lets compute power at n.sensR: pwr.sensR <- discrimPwr(pdA=pd, pd0=0, sample.size=n.sensR, alpha=alpha, pGuess=pg, test="difference", statistic="exact") pwr.sensR > 1-beta pwr.sensR ## so here power is in fact larger than required, so 1178 is ## sufficient and 1181 is the wrong number! ## Lets compute the stable-exact estimate: discrimSS(pdA=pd, pd0=0, target.power=1-beta, alpha=alpha, pGuess=1/3, test="difference", statistic="stable") ## We could also ask for both at the same time: discrimSS(pdA=pd, pd0=0, target.power=1-beta, alpha=alpha, pGuess=1/3, test="difference", statistic="both.exact") discrimSS(pdA=pd, pd0=0, target.power=1-beta, alpha=alpha, pGuess=1/3, test="difference", statistic="normal") discrimSS(pdA=pd, pd0=0, target.power=1-beta, alpha=alpha, pGuess=1/3, test="difference", statistic="cont.normal") ?discrimSS ## Lets take a closer look (plot it): par(mfrow=c(1, 1)) n.seq <- 1168:1210 pwr.seq <- sapply(n.seq, function(n) { discrimPwr(pdA=pd, pd0=0, sample.size=n, alpha=alpha, pGuess=pg, test="difference", statistic="exact") }) plot(n.seq, pwr.seq, type="b", xlab="Sample size", ylab="Exact binomial power") abline(h=1-beta) segments(c(1178, 1200), c(0, 0), y1=pwr.seq[n.seq %in% c(1178, 1200)], col="red") ## Conclusion: ## sensR gets it right and we must conclude there is an error in the ## ISO standard. ################################################################## ### Extra stuff - figures in slides etc. ################################################################## ################################# ### Gridgeman's paradox ################################# n <- c(45, 108, 720, 240, 240, 108, 156, 180) tri <- c(21, 42, 363, 104, 99, 50, 106, 105) afc3 <- c(32, 62, 539, 161, 148, 75, 145, 152) mat <- rbind(Triangle=tri/n, "3-AFC"=afc3/n) pdf(file="./figs/gridgeman_pc.pdf", width=5, height=4) ## par(mar=c(4,4,2,1)+.5)) par(mar=c(4,4,.5,0)+.5) barplot(mat, beside=TRUE, ylab = "Proportion correct", xlab="Studies", legend.text = c("Triangle", "3-AFC"), args.legend = list(bty="n", x="topleft"), ylim=0:1, las=1, names.arg = letters[1:8]) dev.off() ## What if we look at proportion discriminators? mat2 <- rbind(pc2pd(tri/n, 1/3), pc2pd(afc3/n, 1/3)) pdf(file="./figs/gridgeman_pd.pdf", width=5, height=4) ## par(mar=c(4,4,2,1)+.5)) par(mar=c(4,4,.5,0)+.5) barplot(mat2, beside=TRUE, ylab = "Proportion discriminators", xlab="Studies", legend.text = c("Triangle", "3-AFC"), args.legend = list(bty="n", x="topleft"), ylim=0:1, las=1, names.arg = letters[1:8]) dev.off() ## Problem: ## Same test gives different values of pd for Triangle and 3-AFC for ## the same products. mat3 <- rbind(psyinv(tri/n, method="triangle"), psyinv(afc3/n, method = "threeAFC")) pdf(file="./figs/gridgeman_dprime.pdf", width=5, height=4) ## par(mar=c(4,4,2,1)+.5)) par(mar=c(4,4,.5,0)+.5) barplot(mat3, beside=TRUE, ylab = "Thurstonian d-prime", xlab="Studies", legend.text = c("Triangle", "3-AFC"), args.legend = list(bty="n", x="topleft"), ylim=c(0, 2.5), las=1, names.arg = letters[1:8]) dev.off() ################################# ################################# ### Examples of d' in R: ################################# pdf(file="./figs/dprime_examples.pdf", width=4, height=3) par(mfrow=c(3, 1)) par(mar=c(1,0,1,0)+.5) xx <- seq(-3, 7, len=1e3) plot(xx, dnorm(xx), type ="l", axes=FALSE, xlab="", ylab="", main="obvious difference (d' = 4)", lwd=2) lines(xx, dnorm(xx, mean=4), type="l", col="blue", lwd=2) abline(h = 0) xx <- seq(-3, 4, len=1e3) plot(xx, dnorm(xx), type ="l", axes=FALSE, xlab="", ylab="", main="confusable (d' = 1)", lwd=2) lines(xx, dnorm(xx, mean=1), type="l", col="blue", lwd=2) abline(h = 0) xx <- seq(-3, 3.2, len=1e3) plot(xx, dnorm(xx), type ="l", axes=FALSE, xlab="", ylab="", main="Almost indistinguishable (d' = 0.2)", lwd=2) lines(xx, dnorm(xx, mean=.2), type="l", col="blue", lwd=2) abline(h = 0) dev.off() ################################# ## pdf(file="./figs/discretePwr.pdf", width=5, height=3) par(mar=c(3.1,4,0,0)) xx <- 100:120 pwr <- sapply(100:120, function(n) d.primePwr(1.2, sample.size=n)) plot(xx, pwr, type="b", ylab="Power", xlab="", axes=FALSE, pch=19) mtext("Sample size", side=1, line=2) axis(1); axis(2, las=1) abline(h = 0.75, col="blue", lty=2) sel <- c(7, 16) points(xx[sel], pwr[sel], col="red", pch=19) segments(x0=xx[sel], y0=c(0, 0), y1=pwr[sel], col="red") dev.off() ################################# pdf(file="./figs/ss_vs_pwr.pdf", width=5, height=3) par(mar=c(3.1,4,0,0)) ss <- 10:150 pw3 <- sapply(ss, function(s) discrimPwr(0.25, sample.size=s, pGuess=1/3)) pw2 <- sapply(ss, function(s) discrimPwr(0.25, sample.size=s, pGuess=1/2)) plot(ss, pw3, xlab="", ylab="Power", type="l", ylim=0:1, axes=FALSE) mtext("Sample size", side=1, line=2) lines(ss, pw2, col=2) axis(1, at=1:15 * 10) axis(2, las=1) text(30, 0.8, "3 alternatives") text(80, .5, "2 alternatives", col=2) text(80, 1, expression(p[d] == 0.25)) dev.off()