options(scipen=50)
#################################################
# LIBRARY:
#
removeOutliers <- function (dataRow, percentToRem){
	result <- dataRow
	if (percentToRem > 0){
		upperBound <- ceiling(length(dataRow) - length(dataRow)*(percentToRem/100));
		result <- sort(dataRow)[0:upperBound];
	}
	return (result);
}
getYAxisTicks <- function (dataRows){
	yoffs = 10**(sign(log10(max(abs(dataRows))))*ceiling(abs(log10(max(abs(dataRows))))))
	if (yoffs == 1) {
	  yoffs <- .1
	}
	return(c(yoffs * min(floor(min(dataRows)/(yoffs)),0):ceiling(max(dataRows)/(yoffs))))
}
plotTitlePage <- function(dataRow1, titleString, clusterSize, clusterMax){
	plot(0:15, type = "n", xaxt="n", yaxt="n", bty="n", xlab = "", ylab = "")
	title(main=titleString)
	text(8, 14, "count: ", adj=c(1,0));       text(8,14, length(dataRow1), adj=c(0,0))
	text(8, 13, "mean: " , adj=c(1,0));       text(8, 13, mean(dataRow1), adj=c(0,0))
	text(8, 12, "max: "  , adj=c(1,0));       text(8, 12, max(dataRow1), adj=c(0,0))
	text(8, 11, "min: "  , adj=c(1,0));       text(8, 11, min(dataRow1), adj=c(0,0))
	text(8, 10, "median: ",adj=c(1,0));       text(8, 10, median(dataRow1), adj=c(0,0))
	text(8, 9, "std. dev.: ", adj=c(1,0));    text(8, 9, sd(dataRow1)*sqrt((length(dataRow1)-1)/length(dataRow1)), adj=c(0,0))
	text(8, 8,"rel. std. dev.: ",adj=c(1,0)); text(8, 8, (sd(dataRow1)*sqrt((length(dataRow1)-1)/length(dataRow1)))/mean(dataRow1), adj=c(0,0))
	text(8, 7,"variance: ",adj=c(1,0));       text(8, 7, var(dataRow1)*((length(dataRow1)-1)/length(dataRow1)), adj=c(0,0))
	text(8, 6, "variance (est): ",adj=c(1,0));text(8, 6, var(dataRow1), adj=c(0,0))
	text(8, 5, "zero: ", adj=c(1,0));         text(8, 5, 0, adj=c(0,0))
	text(8,2, "cluster max: ", adj=c(1,0));   text(8,2, clusterMax, adj=c(0,0))
	text(8,1, "cluster size: ",adj=c(1,0));   text(8,1, clusterSize,adj=c(0,0))
}
plotComparisonSheet <- function(dataRow1, dr1hist, dataRow2, dr2hist, boxp, titleString, histDiff50, histDiff75, stacktrace){
	#########################################################################################################
	# result of KS-test: "statistics" maximum difference; "p.value" probability that null hypothesis is true
	# 0-hypo: distribution function of test is not greater than the distribution of Palladio
	ksTestRe2  <- ks.test(data1.curr, data2.curr, alternative="greater")
	ksTestRe4  <- ks.test(data1.curr, data2.curr, alternative="less")
	# result of chi square test: "chiTestRes$statistics" value of chi-squared statistics; "chiTestRes$p.value" probability that null hypothesis is true
	# chiTestRes <- chisq.test(data1.hist$density, data2.hist$density)

	plot(0:20, type = "n", xaxt="n", yaxt="n", bty="n", xlab = "", ylab = "")
	title(main=titleString)
	text(5, 20, "mean: " , adj=c(1,0));       text(5, 20, mean(dataRow1), adj=c(0,0));  text(10, 20, mean(dataRow2), adj=c(0,0))
	text(5, 19, "max: "  , adj=c(1,0));       text(5, 19, max(dataRow1), adj=c(0,0));   text(10, 19, max(dataRow2), adj=c(0,0))
	text(5, 18, "bp. max: ", adj=c(1,0));     text(5, 18, boxp$stats[5,2], adj=c(0,0)); text(10, 18, boxp$stats[5,1], adj=c(0,0))
	text(5, 17, "bp. Q.75%: ", adj=c(1,0));  text(5, 17, boxp$stats[4,2], adj=c(0,0)); text(10, 17, boxp$stats[4,1], adj=c(0,0))
	text(5, 16, "median: ",adj=c(1,0));       text(5, 16, median(dataRow1), adj=c(0,0));text(10, 16, median(dataRow2), adj=c(0,0))
	text(5, 15, "bp. Q.25%: ", adj=c(1,0));  text(5, 15, boxp$stats[2,2], adj=c(0,0)); text(10, 15, boxp$stats[2,1], adj=c(0,0))
	text(5, 14, "bp. min: ", adj=c(1,0));     text(5, 14, boxp$stats[1,2], adj=c(0,0)); text(10, 14, boxp$stats[1,1], adj=c(0,0))
	text(5, 13, "min: "  , adj=c(1,0));       text(5, 13, min(dataRow1), adj=c(0,0));   text(10, 13, min(dataRow2), adj=c(0,0))
	text(5, 11, "std. dev.: ", adj=c(1,0));   text(5, 11, sd(dataRow1)*sqrt((length(dataRow1)-1)/length(dataRow1)), adj=c(0,0))
	                                          text(10,11, sd(dataRow2)*sqrt((length(dataRow2)-1)/length(dataRow2)), adj=c(0,0))
	text(5, 9, "Korrelationskoeffizient: ", adj=c(1,0)); text(5, 9, cor(dr1hist$density, dr2hist$density), adj=c(0,0))
	text(5, 8, "KS-Test T > P (W): ", adj=c(1,0));
        text(5, 8, paste("p(", ksTestRe2$p.value,"); Intervall:[", ksTestRe2$p.value + ksTestRe2$statistic,", ", ksTestRe2$p.value - ksTestRe2$statistic,"]"), adj=c(0,0));
	text(5, 7, "KS-Test T > P (W): T has smaller values as T-CDF lies above (and to the left) of P-CDF.", adj=c(0,0));
	text(5, 6, "KS-Test T < P (W): ", adj=c(1,0));
        text(5, 6, paste("p(", ksTestRe4$p.value,"); Intervall:[", ksTestRe4$p.value + ksTestRe4$statistic,", ", ksTestRe4$p.value - ksTestRe4$statistic,"]"), adj=c(0,0));
	text(5, 5, "KS-Test T < P (W): T has bigger values as T-CDF lies below (and to the right) of P-CDF.", adj=c(0,0));

	#################################################
	# flame graph output:
	#
	cat(stacktrace, median(data1.curr), median(data2.curr), ksTestRe4$p.value<0.05, ksTestRe2$p.value<0.05, "
", sep=";", file="analysis1.flame.csv", append=TRUE)
	#################################################
}

#################################################
#################################################
# MAIN CODE:
#
cat("", file="analysis1.flame.csv")
data1 <- read.csv2(file="inlined_measurements.txt", dec=".")
mapping <- read.csv2(file="mapping4.txt", dec=".")
pdf(file="./analysis1.RR.pdf", paper="a4r", width=11, height=8)
pdfCom <- dev.cur()
pdf(file="./analysis1.R.pdf", paper="a4r", width=11, height=8)
pdfAll <- dev.cur()
for (index in 1:length(mapping$key)){
	mapping.pal <- as.character(mapping$palladio.mapping[index])
	mapping.sta <- as.character(mapping$stacktrace[index])
	mapping.key <- as.character(mapping$key[index])
	write(paste("[INFO] START with key: ",mapping.key), stdout())
	if (length(mapping.pal) > 0 && mapping.pal!=""){
		write("[INFO] step0: preprocessing data sets...", stdout())
		write(paste("[INFO] number of records for key: ",which(data1$stacktrace==mapping$key[index])))
		data1.curr <- 0.001*0.001*data1$duration[which(data1$stacktrace==mapping$key[index])]
		write("[INFO] step0: preprocessing data sets...", stdout())
		data2 <- read.csv2(file=mapping.pal, dec=".")
		data2.curr <- data2$Time.Span*1000
		
		write("[INFO] step 1: outlier removal...", stdout())
		#################################################
		# outlier removal:
		#
		data1.curr <- removeOutliers(data1.curr, 0)
		data2.curr <- removeOutliers(data2.curr, 0)
		
        	write("[INFO] step 2: test visualization...", stdout())
		#################################################
		# test data visualization:
		#
		cluster.size<- c(1,10,125)
		data.max <- max(data1.curr, data2.curr)    # compute matching clusters for both data rows!
		cluster.marr<- ceiling(data.max/cluster.size)
		cluster.max <- max(cluster.marr[which(cluster.marr<100)], min(cluster.marr))
		cluster.size<- max(cluster.size[min(which(cluster.marr==cluster.max))], min(cluster.size))
		#
		plotTitlePage(data1.curr, titleString=mapping$key[index], clusterSize=cluster.size, clusterMax=cluster.max)
		#
		cluster.ele <- c(cluster.size*0:cluster.max)
		myXlab = paste(mapping$key[index], " - ", cluster.max, " Cluster der Groesze ", cluster.size, "ms")

		data1.hist <- hist(data1.curr, breaks=cluster.ele, freq=FALSE, axes=FALSE, xlab=myXlab, ylab="relative Haeufigkeit", main="Verteilung Aufrufe")
		axis(1)
		axis(2, at=getYAxisTicks(data1.hist$density))


        	write("[INFO] step 3: Palladio visualization...", stdout())
		#################################################
		# Palladio data visualization:
		#
		plotTitlePage(data2.curr, mapping.pal, cluster.size, cluster.max)
		myXlab = paste(mapping.pal, " - ", cluster.max, " Cluster der Groesze ", cluster.size, "ms")

		data2.hist <- hist(data2.curr, breaks=cluster.ele, freq=FALSE, axes=FALSE, xlab=myXlab, ylab="relative Haeufigkeit", main="Verteilung Aufrufe")
		axis(1)
		axis(2, at=getYAxisTicks(data2.hist$density))


        	write("[INFO] step 4: Data row comparison...", stdout())
		#################################################
		# comparison:
		#
		boxp <- boxplot(data2.curr, data1.curr, main="Quartilvergleich - Palladio / Test", names=c(mapping.pal, mapping.key))
		dev.set(pdfCom)
		boxp <- boxplot(data2.curr, data1.curr, main=paste(mapping$key[index], ": Quartilvergleich - Palladio / Test"), names=c(mapping.pal, mapping.key))
		pdf(paste("./boxplot_",mapping.key,".pdf"), paper="a4", width=8, height=9)
		boxplot(data2.curr, data1.curr, main=paste("Box plot - ",mapping.key), names=c("PCM measurement", "test measurements"), ylab="time (ms)")
		dev.off()
		pdf(paste("./boxplot_",mapping.key,"-2.pdf"), paper="a4", width=8, height=9)
		plot(data1.hist$mids, data1.hist$density, type="b", col="red", ylim=c(0,.001), main="response time diagram", xlab="method response time (ms)", ylab="relative frequency")
		lines(data2.hist$mids, data2.hist$density, col="blue")
		points(data2.hist$mids, data2.hist$density, col="blue")
		dev.off()
		dev.set(pdfAll)

		#plot(data1.hist$density, data2.hist$density, type="b", main="XY-Diagramm der rel. Haeufigkeiten; X=Test, Y=Palladio")
		#legend(x="topright", legend=c(paste("Kovarianz: ",cov(data1.hist$density, data2.hist$density))
		#			     ,paste("Korrelationskoeffizient: ",cor(data1.hist$density, data2.hist$density))
		#			     ,paste("Boxplot-Stats PAL: ", boxp$stats[5,1],"; ", boxp$stats[4,1],"; ", boxp$stats[3,1],"; ", boxp$stats[2,1],"; ", boxp$stats[1,1],"; ") 
		#			     ,paste("Boxplot-Conf PAL: ", boxp$conf[2,1],"; ", boxp$conf[1,1]) 
		#			     ,paste("Boxplot-Stats Test: ", boxp$stats[5,2],"; ", boxp$stats[4,2],"; ", boxp$stats[3,2],"; ", boxp$stats[2,2],"; ", boxp$stats[1,2],"; ") 
		#			     ,paste("Boxplot-Conf Test: ", boxp$conf[2,2],"; ", boxp$conf[1,2])
		#			     ,paste("Chi-Square-Test: ", chiTestRes$statistic, "; ", chiTestRes$p.value)
		#			     ,paste("Kolmogorov-Smirnoff-Test > (H): d(", ksTestRes$statistic, "); p(", ksTestRes$p.value,")")
		#			     ,paste("Kolmogorov-Smirnoff-Test > (W): d(", ksTestRe2$statistic, "); p(", ksTestRe2$p.value,")")
		#			     ,paste("Kolmogorov-Smirnoff-Test < (H): d(", ksTestRe6$statistic, "); p(", ksTestRe6$p.value,")")
		#			     ,paste("Kolmogorov-Smirnoff-Test < (W): d(", ksTestRe4$statistic, "); p(", ksTestRe4$p.value,")")
		#			     ))

		plot(ecdf(data1.hist$density), col="red", ylim=c(0, 1), main="kumulierte Verteilungsfunktion")
		plot(ecdf(data2.hist$density), col="blue", add=TRUE)

		qqplot(data1.curr, data2.curr, main="QQPlot (Test x Palladio)")

        	write("[INFO] step 5: historgram differences...", stdout())
		#################################################
		# histogramm differences:
		#
		histDiff <- data2.hist$density - data1.hist$density
		highestPossibleIndicator <- sum(abs(histDiff))
		bp.names = cluster.ele[0:(length(cluster.ele)-1)]
		bp <- barplot(histDiff, axes=FALSE, main="Differenz d. relativen Haeufigkeiten Palladio-Test (mit Palladio-Quartilen)")
		qs <- quantile(data2.curr/cluster.size)
		axis(1, at=bp[1:length(bp.names)], labels=bp.names)
		axis(2, at=getYAxisTicks(histDiff))
		abline(v=bp[ceiling(qs)], col="green")

		bp.names = cluster.ele[ceiling(qs[3]):length(cluster.ele)]
		histDiff.50 = histDiff[max(ceiling(qs[3]),2):length(histDiff)]
		if (length(histDiff.50) > 1){
			bp1 <- barplot(histDiff.50, axes=FALSE, main="Differenz d. relativen Haeufigkeiten Palladio-Test (ab 50%-Quartil)")
			axis(1, at=bp1[1:length(bp.names)], labels=bp.names)
			axis(2, at=getYAxisTicks(histDiff.50))
			legend(x="topright", legend=c(paste("Summe aller Werte: ",sum(histDiff.50)), paste("Summe Betrag aller Werte komplett: ",highestPossibleIndicator)))
		}

		bp.names = cluster.ele[ceiling(qs[4]):length(cluster.ele)]
		histDiff.75 = histDiff[max(ceiling(qs[4]),2):length(histDiff)]
		if (length(histDiff.75) > 1){
			bp2 <- barplot(histDiff.75, axes=FALSE, main="Differenz d. relativen Haeufigkeiten Palladio-Test (ab 75%-Quartil)")
			axis(1, at=bp2[1:length(bp.names)], labels=bp.names)
			axis(2, at=getYAxisTicks(histDiff.75))
			legend(x="topright", legend=c(paste("Summe aller Werte: ",sum(histDiff.75)), paste("Summe Betrag aller Werte komplett: ",highestPossibleIndicator)))
		}
		
        	write("[INFO] step 6: comparison SHEET...", stdout())
		#################################################
		# comparison SHEET:
		#
		dev.set(pdfCom)
		plotComparisonSheet(data1.curr, data1.hist, data2.curr, data2.hist, boxp, mapping.pal, histDiff.50, histDiff.75, mapping.sta)
		dev.set(pdfAll)

	}
	write(paste("[INFO] DONE with key: ",mapping.key), stdout())
}
dev.off()
dev.off()
warnings()

