library(ggplot2) library(plyr) s <- read.csv("http://linguistics.berkeley.edu/~kjohnson/subjects.csv",header=TRUE) d <- read.csv("http://linguistics.berkeley.edu/~kjohnson//unique_data.csv",header=TRUE); df <- NULL; for (sbj in levels(s$subj)) { L1 <- s$L1[s$subj==sbj]; if (!(L1 %in% c("English", "english", "ENGLISH", "eng"))) { print(paste(sbj,"not native English")); next; } subd <- subset(d,d$subj==sbj); # subset of this subject's data if (length(subd)<1) { print(paste(sbj,'no data')); next; } if (length(subd$resp)<50) { print(paste(sbj,'not enough tokens')); next; } t<-table(subd$status); propOK = t[4]/sum(t); if (propOK < 0.8) { print(paste(sbj,"fewer than 90% status OK")); next; } t <- table(subd$resp); mp <- max(prop.table(t)); if (mp>0.5) { print(paste(sbj,"used one response over 50% of the time")); next; } print(paste(sbj,"looks OK")); df <- rbind(df,subd); } levels(df$subj) # plot RT distribution rt_from_offset = df$rt- df$filedur plot(density(rt_from_offset,na.rm=T),main="Pooled subjects", xlab="Reaction time (ms) from stimulus offset",xlim=c(-2000,10000)) abline(v=0) rug(rt_from_offset)