

library(Hmisc)
library(car)


availableGrades <- c("5.0", "4.0", "3.7", "3.3", "3.0", "2.7", "2.3", "2.0", "1.7", "1.3", "1.0")

parseData <- function(scoreInputDf, nameColIndex, idColIndex, firstScoreColumn)
{
  # extract score info
  scoreDf <- scoreInputDf[2:nrow(scoreInputDf), firstScoreColumn:length(scoreInputDf)]
  rowSumScores <- rowSums(scoreDf)
  questionCount <- ncol(scoreDf)
  studentCount <- nrow(scoreDf)

  # extract full points info and normalize the score to total score
  fullItemScores <- unlist(scoreInputDf[1, firstScoreColumn:length(scoreInputDf)])        ## first row of score is full points
  normalizedScore <- 
  {
    fullItemScoreMat <- matrix(rep(fullItemScores, studentCount), ncol = questionCount, byrow = T)
    scoreDf / fullItemScoreMat
  }
  totalPoints <- sum(fullItemScores)

  # extract student info
  studentNames <- scoreInputDf[2:nrow(scoreInputDf), nameColIndex]
  studentIds <- scoreInputDf[2:nrow(scoreInputDf), idColIndex]
 
  # return value
  list(
      scoreDf = scoreDf,
      rowSumScores = rowSumScores,
      questionCount = questionCount,
      studentCount = studentCount,
      fullItemScores = fullItemScores,
      normalizedScore = normalizedScore,
      studentNames = studentNames,
      studentIds = studentIds,
      totalPoints = totalPoints
    )
}

calculateScoreBreaks <- function(totalPoints, anchorCorrective)
{
  anchorFor4.0 <- (totalPoints / 2) + anchorCorrective
  ## grade range calculation
  # * anchorFor4.0 is the middle point of grade 4.0
  # * We have 3.5 major steps (1.0 – 1.7, 2.0 - 2.7, 3.0 - 3.7, 4.0 - 4.7 (note the discrete grade range and pseudo grade 4.7)) 
  #    * Since the anchor is in the middle of 4.0, the last step is counted as 0.5 step (half between 4.0 - 4.7)
  # * We have 3 minor steps for each of major step (x.0, x.3, x.7)
  gradeRange <- (totalPoints - anchorFor4.0) / ( 3.5 * 3 )
  score.4.0 <- anchorFor4.0 - (0.5 * gradeRange)           ## score before the ancor points are still in the 4.0 major step
  rangeMultiplier <- seq(0, length(availableGrades) - 2)   ## all breaks except below 5.0 and more than 1.0
  scoreBreaks.temp <- score.4.0 + (rangeMultiplier * gradeRange)
  c(0, scoreBreaks.temp, totalPoints, recursive = T)  # close both ends
}



plotGradeHistogram <- function(scoreBreaks, rowSumScores, totalPoints, courseName)
{  
  thePalette <- rev(rainbow(length(scoreBreaks) - 1))
  
  
  # expand margin for legend
  oldMargins <- par()$mar
  newMargins <- oldMargins + c(0, 0, 0, 12)
  par(xpd = T, mar = newMargins)
  
  # histogram (suppress warning that we use bins that are not equidistance)
  options(warn = -1)
  theHist <- hist(rowSumScores, 
       breaks = scoreBreaks,
       col = thePalette,
       xlim = c(0,totalPoints), 
       freq = T,
       xlab = "Achieved score",
       ylab = "Number of students",
       main = courseName,
       labels = T
       )
  options(warn = 0)     
       
  # legend
  scoreMin <- scoreBreaks[1:length(scoreBreaks) - 1]
  scoreMax <- scoreBreaks[2:length(scoreBreaks)]
  scoreMin <- sprintf("%0.2f", scoreMin)
  scoreMax <- sprintf("%0.2f", scoreMax)
  gradeRangeLegend <- paste(availableGrades, ": ", scoreMin, " - ", scoreMax)
  gradeRangeLegend <- rev(gradeRangeLegend)
  
  par(xpd = T)
  # legend("topright", gradeRangeLegend, fill = rev(thePalette))
  labelAreaX <- totalPoints * 1.1
  labelAreaY <- max(theHist$counts)
  legend(labelAreaX, labelAreaY, gradeRangeLegend, fill = rev(thePalette))
  par(xpd = F)
  
  # distribution descriptive statistics
  gradesData <- rep(as.numeric(availableGrades), theHist$counts)
  gradesMean <- mean(gradesData)
  gradesSD <- sd(gradesData)
  gradesMed <- median(gradesData)
  descStatText <- sprintf("Mean: %.2f\nSD: %.2f\nMed: %.2f", gradesMean, gradesSD, gradesMed)
  
  par(xpd = T)
  text(totalPoints * 1.4, 0, pos = 3, labels = descStatText)
  par(xpd = F)
}  



# adjust anchorCorrective to compensate bad question design, lowering the passing score
summaryExam <- function (fileName, courseName, firstScoreColumn, topicsFileName,  anchorCorrective = 0.0, nameColIndex = 2, idColIndex = 1, summaryRoot = "Summary", studentRoot = "Students")
{
  prepareOutputPath(summaryRoot, studentRoot)
  
  # read and parse score data
  scoreInputDf <- read.csv(fileName, header = T, sep = ",")
  attach(parseData(scoreInputDf, nameColIndex, idColIndex, firstScoreColumn))

  # read topic data
  topicDf <- read.csv(topicsFileName, header = T, sep = ",")
  
  ## grade breaks
  scoreBreaks <- calculateScoreBreaks(totalPoints, anchorCorrective)
  
  ## NOTE: the last bin will be two times wider than other bins because we consider the right half of 4.0 - 4.7 as 4.0 bin.
  ## This is beneficial to the student because we lowered the passing break as well as the breaks for each grade.

  ########### histogram of total score
  pdf(file.path(summaryRoot, "Student Performance.pdf"))
  plotGradeHistogram(scoreBreaks, rowSumScores, totalPoints, courseName)
  dev.off()
  
  ########### plot of % solved
  pdf(file.path(summaryRoot, "Question Performance.pdf"), width = 8);  
  plotQuestionPerformance(normalizedScore, courseName, topicDf, questionCount, drawLines = T)
  dev.off()


  ########### plot of raw scores
  plotRawScore(scoreDf, courseName, fullItemScores, summaryRoot)
  
  totalScoreMean <- mean(rowSumScores)
  totalScoreSD <- sd(rowSumScores)
  
  pdf(file.path(summaryRoot, "Raw total score.pdf"), width = 8);  
  rawTotalHist <- hist(rowSumScores, breaks = -0.5:(totalPoints + 0.5), main="Histogram of raw total score\n(bin size = 1 point)", xlim=c(0, totalPoints))
  title(sub = sprintf("Mean: %.2f, SD: %.2f", totalScoreMean, totalScoreSD))
  
  # TODO
  passingPoint <- scoreBreaks[2]
  maxFrequency <- max(rawTotalHist$counts)
  segments(passingPoint, 0, passingPoint, maxFrequency,col = rgb(1, 0, 0, 1), lwd=3)

  dev.off()
  
  ########### plot normality
  pdf(file.path(summaryRoot, "Normality.pdf"), width = 8)
  normality <- shapiro.test(rowSumScores)
  qqnorm(rowSumScores)
  qqline(rowSumScores, lty = 2)
  
  title(sub = sprintf("Shapiro-Wilk normality test: W = %f, p = %f (p should > .05)", normality$statistic, normality$p.value))
  dontcare <- dev.off()
  

  ########## plot each student's score
  for (studentIndex in 1:studentCount)
  {
    # extract student info
    studentName <- studentNames[studentIndex]
    studentId <- studentIds[studentIndex]

    # plot question performance
    fileName <- paste(studentId, " ", studentName, ".pdf", sep = "")
    pdf(file.path(studentRoot, fileName), width = 8);  
    plotQuestionPerformance(normalizedScore, courseName, topicDf, questionCount, dotLegend = F)
    
    # plot student performance
    studentNormScore <- normalizedScore[studentIndex,]
    studentScore <- scoreDf[studentIndex,]
    points(1:questionCount, studentNormScore * 100, ylim = c(0,100), col = "red", pch = 4)
    lines(1:questionCount, studentNormScore * 100, ylim = c(0,100), col = "red")
    text(1:questionCount, (studentNormScore * 100), studentScore, pos = 1)
    
    # legend
    legend(questionCount + 1, 70, c("mean", "your score"), pch = c(20, 4), col = c("black", "red"), lty = c(0,1), xpd = T)
    
    # write student grade
    studentGradeIndex <- as.numeric(findInterval(sum(studentScore), scoreBreaks))
    studentGrade <- availableGrades[studentGradeIndex]
    studentText <- paste(studentId, " ", studentName, " ")
    gradeText <- paste("Score: ", sum(studentScore), "/", totalPoints, " Raw grade: ", studentGrade);
    mtext(paste(studentText, gradeText), side = 1, line = 4, adj = 0)
    
    # close file
    dev.off()
  }
}

plotQuestionPerformance <- function(normalizedScore, courseName, topicDf, questionCount, drawLines = F, dotLegend = T) 
{
  # expand margin for legend
  oldMargins <- par()$mar
  newMargins <- oldMargins + c(0, 0, 0, 10)
  par(xpd = T, mar = newMargins)
  
  # plot blank space
  plot(colMeans(normalizedScore) * 100, 
      type = "n", 
      ylim = c(0,100),
      main = courseName, 
      xlab = "Question", 
      ylab = "% achieved", 
      lab = c(length(normalizedScore), 10, 0),
      xaxs = "r", 
      yaxs = "i",
      cex.axis = 0.7)
  par(new = F)
  
  # regions
  thePalette <- rainbow(nrow(topicDf), alpha = 0.4)
  for (topicIndex in 1:nrow(topicDf))
  {
    topicRow <- topicDf[topicIndex,]
    rect(topicRow["from"] - 0.5, 0, topicRow["to"] + 0.5, 100, col = thePalette[topicIndex], border = NA)
  }
  
  # mean
  points(colMeans(normalizedScore) * 100, ylim = c(0,100), xlab = "", ylab ="", lab = c(length(normalizedScore), 10, 0), pch = 20)
  if (drawLines)
  {
    lines(colMeans(normalizedScore) * 100, ylim = c(0,100), xlab = "", ylab ="", lab = c(length(normalizedScore), 10, 0), pch = 20)
  }
  
  # error bar
  par(xpd = F)  ## Turn clipping on
  x <- 1:length(normalizedScore)
  y <- colMeans(normalizedScore) * 100
  delta <- sapply(normalizedScore, sd) * 100
  # errbar(x, y, y + delta, y - delta, add = T)
  
  # legend
  legend(questionCount + 1, 50, as.character(topicDf[,"topic"]), fill = thePalette, xpd = T)
  
  if (dotLegend)
  {
    legend(questionCount + 1, 60, "mean", pch = 20, xpd = T)
  }
  
  
}

plotRawScore <- function(scoreDf, courseName, fullItemScores, summaryRoot)
{
  score.m <- as.matrix(scoreDf)
  items.v <- rep(1:length(fullItemScores), each=dim(score.m)[1])
  score.v <- as.vector(score.m)
  
  # scatter plot of each data point
  pdf(file.path(summaryRoot, "Raw score - scatter.pdf"), width = 8);  
  scatterplot(items.v, score.v, xlab = "Question", ylab = "Score", ylim = c(0, max(fullItemScores)), jitter=list(x=1.5, y=0), pch = 16, col = rgb(0, 0, 0, 0.2), reg.line = F, smooth = F, boxplots = F, xaxt = "n", main = "Summary raw score by question")
  axis(1, at = 1:length(fullItemScores))
  plotFullPointsLines(fullItemScores)
  dontcare <- dev.off()
  
  
  # box plot summarizing the same data
  pdf(file.path(summaryRoot, "Raw score - box.pdf"), width = 8);  
  boxplot(score.v ~ items.v, xlab = "Question", ylab = "Score", ylim = c(0, max(fullItemScores)), main = "Scatter of raw score by question")
  plotFullPointsLines(fullItemScores)
  dontcare <- dev.off()
}

plotFullPointsLines <- function(fullItemScores)
{
  for (i in 1:length(fullItemScores))
  {
    fullPoint <- fullItemScores[i]
    segments(i - 0.5, fullPoint, i + 0.5, fullPoint,col = rgb(1, 0, 0, 1), lwd=3)
  }
}

prepareOutputPath <- function(summaryRoot, studentRoot)
{
  # prepare output path
  if (!file.exists(summaryRoot))
  {
    dir.create(summaryRoot, recursive = T)
  }
  if (!file.exists(studentRoot))
  {
    dir.create(studentRoot, recursive = T)
  }
}

