import sys,copy,random,math,os,time,getopt,re,traceback,urllib,difflib,types,shutil,fileinput,shutil

sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)),"../libraries/"))

import ned_stats as stats
import ned_basic as basic
import ned_commandLine as commandLine
import ned_conservationScorer as conservationHelper
import ned_disorderHelper as disorderHelper
import ned_uniprotHelper as uniprotHelper
import ned_proteinInfoHelper as proteinInfoHelper
import ned_motifHelper as motifHelper

import rje_seq,rje_blast,rje_uniprot,rje_tree

from sets import Set

#slimprints.py -a ../Datasets/Alignments/Human/ -c0.35 -qT -W30 -PF -fT -m0.1 -FF
version = 3

old_results = """
1e-06 	2 	0 	0.004 	0.000 	1.000
1e-05 	10 	5 	0.018 	0.001 	0.667
0.0001 	23 	40 	0.041 	0.007 	0.365
0.001 	55 	141 	0.098 	0.023 	0.281
0.01 	69 	467 	0.123 	0.078 	0.129
0.1 	117 	1185 	0.209 	0.197 	0.090
1 	284 	4165 	0.507 	0.694 	0.064
total 	560 	6003 	1.000 	1.000 	0.085\n"""
	
class SLiMPrints_Tester:
	def __init__(self):
		commandline = commandLine.CommandLine()
		self.options = {}
		
		self.options.update(commandline.loadIniFile(os.path.join(os.path.dirname(os.path.realpath(__file__)),"../settings/utilities.ini")))
		self.options.update(commandline.loadIniFile(os.path.join(os.path.dirname(os.path.realpath(__file__)),"../settings/slimprints.ini")))
		
		self.data = {}
	
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	"""	
	def makeROCtable(self):
		humanIds = open(self.options['uniprot_human_ids']).read().split("\n")[1:100]
		humanIds.sort()
		
		scoreData = {}
		for accession in humanIds:			
			scoreData[accession] = {}
			scoreDataFile = os.path.join(self.options['alignment_dir'],"ALN",accession + ".orthaln.fas.scores")
	
			if os.path.exists(scoreDataFile):
				for line in open(scoreDataFile).read().strip().split("\n"):
					bits = line.split("\t")
					scoreData[accession][bits[0]] = bits[1].split(",")
				
				print accession + " processed"
				
				
				data = open(paths[accession]).read().split("\n")
			
				if len(data) > 0:
					results = data[1:]
					header = data[0].split("\t")
					
					for hit in results:
						if len(header) == len(hit.split("\t")) - 1:
							try:
								if "slim" in header:
									bits = hit.split("\t")
									
									species = bits[header.index("spec")]
									p = float(bits[header.index("p")])
									pCor = float(bits[header.index("pCor")])
									sig = float(bits[header.index("sig")])
									var= float(bits[header.index("pVar")])
									pos = bits[header.index('pos')]
									motifUniprot = bits[header.index('MOTIF')]
									mutagen= bits[header.index('MUTAGEN')]
									mod= bits[header.index('MOD_RES')]
									pdb = bits[header.index('PDB')]
									gapPercent = float(bits[header.index('gapPercent')])
									insertions = float(bits[header.index('insertions')])
									dis = bits[header.index('dis')]
									anchor = bits[header.index('anchor')]
									region = bits[header.index('REGION')]
									ss = bits[header.index('SECONDARY_STRUCTURE')]
									
									if "motif" in header:
										motif = bits[header.index("motif")].strip()
									else:
										motif = bits[header.index("slim")].strip()
									
									try:
										elm = bits[header.index("ELM")].split(":")[0]
									except:
										elm = ""
			else:
				print accession +" not found"
	"""
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
		
	def parseSLiMSearch(self):
		elmInfo = motifHelper.motifHelper()
		elmInfo.initialiseELMData()
			
		self.options['accession'] = elmInfo.data['ELM'].keys()
	
		SLiMSearchResults = fileinput.input(["../../Projects/SLiMPrints_paper/Data/ELM.B.instances1.tdt"])
		
		headerBits = SLiMSearchResults[0].split("\t")
		print "\n".join([str(x) + "-" + headerBits[x] for x in range(0,len(headerBits))])
		
		elmindex = headerBits.index("elm")
		pMotifCorIndex = headerBits.index('correctedRLCConProb')
		rlcIndex = headerBits.index('conRLC')
		pMotifIndex = headerBits.index('conRLCProb')
		conColIndex = headerBits.index('conColScores')
		conRLCColIndex = headerBits.index('conRLCScores')

			
		speciesIndex = headerBits.index('species')
		
		dataCorrected = {"ELM":[],"FP":[]}
		dataPmotif = {"ELM":[],"FP":[]}
		dataRLC = {"ELM":[],"FP":[]}
		conRLCCol = {"ELM":[],"FP":[]}
		conCol = {"ELM":[],"FP":[]}
		
		#print len(self.options['accession'])
		count = 1
		for line in SLiMSearchResults:
			count += 1
			bits = line.split("\t")
			if int(bits[speciesIndex]) >= int(sys.argv[3]): 
				if bits[2] in self.options['accession']:
					if int(bits[speciesIndex]) >= int(sys.argv[3]):
						if len(bits[7]) - bits[7].count(".") > 0:
							if len(bits[elmindex ]) > 0 and bits[elmindex].split(":")[0] == bits[0]:
						
			
								dataRLC["ELM"].append(bits[rlcIndex])
								
								conRLCCol["ELM"] += bits[conRLCColIndex].split(",")
								conCol["ELM"] += bits[conColIndex].split(",")
								
							elif len(bits[elmindex ]) > 0 :
								pass
							else:
								if len(bits[32]) == 0:
									
									dataRLC["FP"].append(bits[rlcIndex])
									conRLCCol["FP"] += bits[conRLCColIndex].split(",")
									conCol["FP"] += bits[conColIndex].split(",")
		fileinput.close()
		
		SLiMSearchResults = fileinput.input(["../../Projects/SLiMPrints_paper/Data/ELM.B.instances.tdt"])
		
		headerBits = SLiMSearchResults[0].split("\t")
		print "\n".join([str(x) + "-" + headerBits[x] for x in range(0,len(headerBits))])
		
		elmindex = headerBits.index("elm")
		pMotifCorIndex = headerBits.index('correctedRLCConProb')
		rlcIndex = headerBits.index('conRLC')
		pMotifIndex = headerBits.index('conRLCProb')
		conColIndex = headerBits.index('conColScores')
		conRLCColIndex = headerBits.index('conRLCScores')
	
		speciesIndex = headerBits.index('species')
		
		
		for line in SLiMSearchResults:
			count += 1
			
			bits = line.split("\t")
			if int(bits[speciesIndex]) >= int(sys.argv[3]): 
				if bits[2] in self.options['accession']:
					if int(bits[speciesIndex]) >= int(sys.argv[3]):
						if len(bits[7]) - bits[7].count(".") > 0:
							if len(bits[elmindex ]) > 0 and bits[elmindex].split(":")[0] == bits[0]:
								
								
								
								if float(bits[21]) <= 0.05:
									dataCorrected["ELM"].append(bits[pMotifCorIndex])
									dataPmotif["ELM"].append(bits[pMotifIndex])
								
								
								
							elif len(bits[elmindex ]) > 0 :
								pass
							else:
								if len(bits[32]) == 0:
									if float(bits[21]) <= 0.05:
										dataCorrected["FP"].append(bits[pMotifCorIndex])
										dataPmotif["FP"].append(bits[pMotifIndex])

										
					#if float(bits[pMotifCorIndex]) < 0.000000001:
					#	print line
							#if len(bits[32]) == "":
		
		
		fileinput.close()
		dist = basic.binList(dataCorrected["ELM"],normaliser=10)
		print basic.plotDist(dist)
		
		
		dist = basic.binList(dataPmotif["ELM"],normaliser=10)
		print basic.plotDist(dist)
		
		dist = basic.binList(dataPmotif["ELM"],normaliser=2)
		print basic.plotDist(dist)
		print
		
		
		dist = basic.binList(dataCorrected["FP"],normaliser=10)
		print basic.plotDist(dist)
		
		dist = basic.binList(dataPmotif["FP"],normaliser=10)
		print basic.plotDist(dist)
		
		dist = basic.binList(dataPmotif["FP"],normaliser=2)
		print basic.plotDist(dist)
		print
		
		open("../../Projects/SLiMPrints_paper/Data/FP_SLiMSearch_elmBenchmarking_correctedRLCConProb","w").write("\n".join(dataCorrected["FP"]))
		open("../../Projects/SLiMPrints_paper/Data/ELM_SLiMSearch_elmBenchmarking_conRLCProb","w").write("\n".join(dataPmotif["ELM"]))
		open("../../Projects/SLiMPrints_paper/Data/FP_SLiMSearch_elmBenchmarking_conRLCProb","w").write("\n".join(dataPmotif["FP"]))
		open("../../Projects/SLiMPrints_paper/Data/ELM_SLiMSearch_elmBenchmarking_correctedRLCConProb","w").write("\n".join(dataCorrected["ELM"]))
		
		open("../../Projects/SLiMPrints_paper/Data/SLiMSearch_elmBenchmarking_correctedRLCConProb_ROC","w").write("\t0\n".join(dataCorrected["FP"]) + "\t0\n" + "\t1\n".join(dataCorrected["ELM"])  +"\t1\n")
		open("../../Projects/SLiMPrints_paper/Data/SLiMSearch_elmBenchmarking_conRLCProb_ROC","w").write("\t0\n".join(dataPmotif["FP"]) + "\t0\n" + "\t1\n".join(dataPmotif["ELM"])  +"\t1\n")
		open("../../Projects/SLiMPrints_paper/Data/SLiMSearch_elmBenchmarking_conRLCCol_ROC","w").write("\t0\n".join(conRLCCol["FP"]) + "\t0\n" + "\t1\n".join(conRLCCol["ELM"])  +"\t1\n")
		open("../../Projects/SLiMPrints_paper/Data/SLiMSearch_elmBenchmarking_conCol_ROC","w").write("\t0\n".join(conCol["FP"]) + "\t0\n" + "\t1\n".join(conCol["ELM"])  +"\t1\n")
		
		print os.popen("/usr/local/bin/R CMD BATCH /Applications/Bioware/Projects/SLiMPrints_paper/Data/Fig5-ROC_Curves.R").read()
		
		
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	
	def check_results(self):
		if sys.argv[1] == "True":
			human_elm = True
		else:
			human_elm = False
			
		paths = {}
		accessions = []
		
		counter = {
		"files_with_slimprints_results":{"true":[],"false":[]},
		"slimprints_hits_sig":{"total":[]},
		"slimprints_hits_sig_elm":{"total":{"All":[],"LIG":[],"CLV":[],"TRG":[],"MOD":[]}},
		"slimprints_hits_sig_false":{"total":[]},
		}
	
		humanIds = open(self.options['uniprot_human_ids']).read().split("\n")[1:]
		
		if human_elm:
			elmInfo = motifHelper.motifHelper()
			elmInfo.initialiseELMData()
			
			self.options['accession'] = elmInfo.data['ELM'].keys()
						
			count = 0
			for accession in self.options['accession']:
				count += len(elmInfo.data['ELM'][accession])
		
				slimprintsPath = os.path.join(self.options['resdir'],accession + "_comp_SLiMPrints.out")
				if os.path.exists(slimprintsPath):
					paths[accession] = slimprintsPath
					counter["files_with_slimprints_results"]["true"].append(accession)
					
				else:
					counter["files_with_slimprints_results"]["false"].append(accession)
					
		else:
			for file in os.listdir(self.options['resdir']):
				if file.split(".")[-1] == "out":
					
					slimprintsPath = os.path.join(self.options['resdir'],file)
					
					if file.split("_")[0] in humanIds:
						paths[file.split("_")[0]] = slimprintsPath
					
				
				
		resultsDict = {}
		true = []
		false = []

		accessions = paths.keys()
		
		accessions.sort()
		
		
		
		elmResultsTrue = []
		elmResultsFalse = []
		elmSLiMPrintsResultsHeader = ""
		
		count = 0
		htmlString = "<table>"
		for accession in accessions:
			#print "-"*100
			#print [x["Desc"] for x in elmInfo.data['ELM'][accession]]
			data = open(paths[accession]).read().split("\n")
			#print paths[accession]
			#print data
			if len(data) > 0:
				#print paths[accession]
				results = data[1:]
				header = data[0].split("\t")
				#print data
				#print header
				if len(elmResultsTrue) + len(elmResultsFalse) == 0:
					elmSLiMPrintsResultsHeader = "Accession\t" + data[0] + "\n"
				
				for hit in results:
					if len(header) == len(hit.split("\t")) - 1:
						try:
							if "slim" in header:
								bits = hit.split("\t")
								
								species = bits[header.index("spec")]
								p = float(bits[header.index("p")])
								pCor = float(bits[header.index("pCor")])
								sig = float(bits[header.index("sig")])
								var= float(bits[header.index("pVar")])
								pos = bits[header.index('pos')]
								motifUniprot = bits[header.index('MOTIF')]
								mutagen= bits[header.index('MUTAGEN')]
								mod= bits[header.index('MOD_RES')]
								pdb = bits[header.index('PDB')]
								gapPercent = float(bits[header.index('gapPercent')])
								insertions = float(bits[header.index('insertions')])
								dis = bits[header.index('dis')]
								anchor = bits[header.index('anchor')]
								region = bits[header.index('REGION')]
								ss = bits[header.index('SECONDARY_STRUCTURE')]
								
								sig = pCor
								
								if "motif" in header:
									motif = bits[header.index("motif")].strip()
								else:
									motif = bits[header.index("slim")].strip()
								
								try:
									elm = bits[header.index("ELM")].split(":")[0]
								except:
									elm = ""
									
								start =pos.split(":")[0]
								stop =pos.split(":")[1]
							
								#sig = pCor
								
								
								if sig < float(sys.argv[2]) and int(species) >= int(sys.argv[3]):# and len("".join(motif.split("."))) <= 5:
									print accession + "\t" + hit
									shutil.copyfile(os.path.join(self.options['alignment_dir'],"ALN",accession+ ".orthaln.fas"),"/Applications/Bioware/Projects/SLiMPrints_paper/Alignments/" + accession+ ".orthaln.fas")
										
									htmlString += "<tr>"
									#htmlString += "<td>" + str(len(resultsDict))+ "</td>"
									htmlString += "<td>" + str(sig) + "</td>"
									htmlString += "<td>" + pos + "</td>"
									htmlString += "<td>" + motif + "</td>"
									htmlString += "<td>" + str(dis) + "</td>"
									htmlString += '<td><a href="http://127.0.0.1/~davey/alignmentViewer/drawer.php?uniprotid=' + accession  + '&column=' + start  +'&width=100" target="_blank">view</a></td>'
									htmlString += "</tr>\n"
								
									#print "python ./ned_plotDistribution3.py -A " + accession + " -x" + str(max(0,int(start)-50)) + " -y" + str(int(stop)+50) + " -D ~/Desktop/Interesting_Motifs/" + accession + "_" + motif.replace(".","x") + "_" + start + "_" + "%1.2g"%pCor + ".pdf"
									#print os.popen("python ./ned_plotDistribution3.py -A " + accession + " -x" + str(max(0,int(start)-100)) + " -y" + str(int(stop)+100) + " -P250 -D ~/Desktop/Interesting_Motifs/" + accession + "_" + motif.replace(".","x") + "_" + start + "_" + "%1.2g"%pCor + ".pdf").read()
									if elm != "":
										elmResultsTrue.append((accession + "\t" + hit).replace("\t\t","\tNA\t").replace("\t\t","\tNA\t").replace("'","").strip())
									else:
										elmResultsFalse.append((accession + "\t" + hit).replace("\t\t","\tNA\t").replace("\t\t","\tNA\t").replace("'","").strip())
										
									#"""
									sig_log = int(math.log(sig,10))
									
									if sig_log not in counter["slimprints_hits_sig"]:
										counter["slimprints_hits_sig"][sig_log] = []
										counter["slimprints_hits_sig_elm"][sig_log] = []
										counter["slimprints_hits_sig_false"][sig_log] = []
									
									
									if len(elm) > 0:
										#print  accession,"\t",motif,"\t",bits[15],"\t",bits[13]
										true.append(elm)
										counter["slimprints_hits_sig_elm"][sig_log].append(sig)
										counter["slimprints_hits_sig_elm"]["total"]["All"].append(sig)
										
										if elm not in counter["slimprints_hits_sig_elm"]["total"]:
											counter["slimprints_hits_sig_elm"]["total"][elm] = []
											
										counter["slimprints_hits_sig_elm"]["total"][elm].append(sig)
										counter["slimprints_hits_sig_elm"]["total"][elm.split("_")[0]].append(sig)
									else:
										false.append(elm)
										counter["slimprints_hits_sig_false"][sig_log].append(sig)
										counter["slimprints_hits_sig_false"]["total"].append(sig)
									
									counter["slimprints_hits_sig"][sig_log].append(sig)
									counter["slimprints_hits_sig"]["total"].append(sig)
									
									if sig not in results:
										resultsDict[sig] = []
										
									resultsDict[sig].append(elm)
							
						except:
							raise
					
		htmlString += "</table>"
		
		open("../../Projects/SLiMPrints_paper/Data/hits_slimprints.html","w").write(htmlString)
		
		#print "-"*10
		#print old_results
		#print counter["slimprints_hits_sig_elm"]
		
		#if human_elm:
		for v in elmResultsFalse:
			if not len(v.split("\t")) == len(elmSLiMPrintsResultsHeader.split("\t")):
				print len(v.split("\t"))
				
		for v in elmResultsTrue:
			if not len(v.split("\t")) == len(elmSLiMPrintsResultsHeader.split("\t")):
				print len(v.split("\t"))
	
		open("../../Projects/SLiMPrints_paper/Data/elmBenchmarkingResultsELM.dat","w").write(elmSLiMPrintsResultsHeader + "\n".join(elmResultsTrue))
		open("../../Projects/SLiMPrints_paper/Data/elmBenchmarkingResultsOther.dat","w").write(elmSLiMPrintsResultsHeader + "\n".join(elmResultsFalse))
		
		typeCounts = {'All': 1002, 'LIG': 761, 'CLV': 31, 'TRG': 77, 'MOD': 133}

		for type in ["LIG","CLV","MOD","TRG"]:
			print type,"\t",len(counter["slimprints_hits_sig_elm"]["total"][type]),"\t",
			print str(float(len(counter["slimprints_hits_sig_elm"]["total"][type]))/typeCounts[type]) ,"\t",
				
			counts = basic.binList(counter["slimprints_hits_sig_elm"]["total"][type],log=True)
			try:
				print sum([counts[x] for x in range(4,max(counts.keys()))])
			except:
				print "?"
	
		sorted = counter["slimprints_hits_sig"].keys()
		sorted.sort()
		
		outStr = ""
		
		for key in sorted:
			if key not in ["total"]:
				outStr += str(10**key) + "\t"
				
				outStr += str(len(counter["slimprints_hits_sig_elm"][key])) + "\t"
				outStr += str(len(counter["slimprints_hits_sig_false"][key])) + "\t"
				try:
					outStr += str("%1.3f"%(float(len(counter["slimprints_hits_sig_elm"][key]))/len(counter["slimprints_hits_sig_elm"]["total"]["All"]))) + "\t"
				except:
					outStr += str(0) + "\t"
				
				
				try:
					outStr += "%1.3f"%(float(len(counter["slimprints_hits_sig_false"][key]))/len(counter["slimprints_hits_sig_false"]["total"])) + "\t"
				except:
					outStr += str(0) + "\t"
					
				try:
					outStr += str("%1.3f"%(float(len(counter["slimprints_hits_sig_elm"][key]))/float(len(counter["slimprints_hits_sig_false"][key]) + len(counter["slimprints_hits_sig_elm"][key]))))
				except:
					outStr += str(0)
					
				outStr += "\n"
		
		open("../../Projects/SLiMPrints_paper/Data/elmBenchmarkingStatsBoth.dat","w").write(outStr)
		open("../../Projects/SLiMPrints_paper/Data/elmBenchmarkingStatsTrue.dat","w").write(basic.plotDist(basic.binList(counter["slimprints_hits_sig_elm"]["total"]["All"],log=True)))
		open("../../Projects/SLiMPrints_paper/Data/elmBenchmarkingStatsFalse.dat","w").write(basic.plotDist(basic.binList(counter["slimprints_hits_sig_false"]["total"],log=True)))
		
		print outStr
	
		print "Total\t" ,len(counter["slimprints_hits_sig_elm"]["total"]["All"]),"\t",len(counter["slimprints_hits_sig_false"]["total"]) ,"\t", len(counter["slimprints_hits_sig_elm"]["total"]["All"]) + len(counter["slimprints_hits_sig_false"]["total"])
		print "Total\t" ,float(len(counter["slimprints_hits_sig_elm"]["total"]["All"]))/1002,"\t",float(len(counter["slimprints_hits_sig_elm"]["total"]["All"]))/(len(counter["slimprints_hits_sig_elm"]["total"]["All"]) + len(counter["slimprints_hits_sig_false"]["total"]))
	
		#else:
		#	open("../../Projects/SLiMPrints_paper/Data/humanResults.dat","w").write(elmResultsTrue + elmResultsFalse)
		
		#for type in counter["slimprints_hits_sig_elm"]["total"]:
		#	print 
		#	print "-"*10
		#	print type,
			
		#	if type != "All":
		#		print len(elmInfo.data["Type"][type])
		#	else:
		#		summer = 0
		#		for t in ["LIG","CLV","TRG","MOD"]:
		#			summer += len(elmInfo.data["Type"][t])
					
		#		print summer
			
			#basic.plotDist(basic.binList(counter["slimprints_hits_sig_elm"]["total"][type],log=True))
			
			
		#try:
		#	print "%1.3f"%(sum(counter["slimprints_hits_sig_elm"]["total"]["All"])/len(counter["slimprints_hits_sig_elm"]["total"]["All"])),"\t",
		#except:
		#	print 0,"\t",
				
		#try:
		#	print "%1.3f"%(sum(counter["slimprints_hits_sig_false"]["total"])/len(counter["slimprints_hits_sig_false"]["total"]))
		#except:
		#	print 0
			
			
		#print resultsDict
		
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################
	############################################################################################################################################

	def check_rawData(self):
		elmInfo = motifHelper.motifHelper()
		elmInfo.initialiseELMData()
			
		if sys.argv[1] == "True":
			self.options['accession'] = elmInfo.data['ELM'].keys()
		else:
			self.options['accession'] = open(self.options['uniprot_human_ids']).read().split("\n")[1:]
		
		print self.options['uniprot_human_ids']
		print len(self.options['accession'])
		wcs_r_list = []
		wcs_r_list_background = []
		con_r_list = []
		con_r_list_background = []
		flanks = {}
		
		for i in range(-100,101):
			flanks[i] = []
		
		amb = re.compile('\.\{[^\}]*\}|\[[^\]]*\]\{[^\}]*\}|[A-Z]\{[^\}]+\}|\[[^\]]*\]|\.\{[0-9]\}|[A-Za-z\.\$\^]')
		
		findable = 0
		notFindable = 0
		proteinLengths = []
		reasons = {"Gappy":0,"Order":0,"Short":0,"EXTRA":0,"Domain":0,"Divergence":0}
		
		ELMs = {"True":{"All":[],"LIG":[],"CLV":[],"TRG":[],"MOD":[]},
		"False":{"All":[],"LIG":[],"CLV":[],"TRG":[],"MOD":[]}}
		
		elmResidueCount = 0
		disorderConservationCorrelationStr = "disorder\tRLCp\tCS\n"
		supplementaryTable1= "UniprotAccession\tELM_name\tStart\tEnd\tMotif\tSpecies\n"
		proteinCounts = []
		for file in os.listdir(os.path.join(self.options['alignment_dir'],"ALN")):
			if file.split(".")[-1] == "scores" and file.split(".")[0] in self.options['accession']:
				tmpData = {}
				
				
				for line in open(os.path.join(self.options['alignment_dir'],"ALN",file)).read().strip().split("\n"):
					tmpData[line.split("\t")[0]] = line.split("\t")[1].split(",")
					
				if int(tmpData['SpeciesCount'][0])  >= 0:#int(sys.argv[3]):
					proteinCounts.append(file.split(".")[0])
					for i in range(0,len(tmpData["Disorder"])):	
						
						
						tmpStr = ""
						
						try:
						
							if tmpData['ignoreList'][i] != "Gappy" and int(tmpData['SpeciesCount'][0]) > int(sys.argv[3]):
								tmpStr = str(tmpData["Disorder"][i]) + "\t" + str(tmpData["WCS_W_p"][i]) + "\t" + str(tmpData["WCS"][i]) +"\n"
								
								#if float(tmpData["WCS_W_p"][i]) == 1:
								#	print tmpData['ignoreList'][i],
								
						except:
							pass#raise
							
						disorderConservationCorrelationStr += tmpStr
						
						
					try:
						instances = elmInfo.data['ELM'][file.split(".")[0]]
						
						
						proteinLengths.append(len(tmpData["ignoreList"]))
						for instance in instances:
							
							reason= ""
							
							instance['Sequence'] = tmpData['Sequence'][0]
							#print instance['Sequence']
							expandedMotifs = elmInfo.expandMotif(instance['RE'])
							
							maskedMotif = []
							scores = []
							conScores = []
							
							dis = []
							if len(expandedMotifs) > 0:
								for expandedMotif in expandedMotifs:
									ambBits = amb.findall(re.sub("\[\^[^\]]+\]",".",expandedMotif).strip("^$"))
									if len(ambBits) == instance['End'] - instance['Start']:
										tmpRE = re.compile(expandedMotif)
										#print expandedMotif,instance['Sequence'][instance['Start']-1:instance['End']]
										if len(tmpRE.findall(instance['Sequence'][instance['Start']-1:instance['End']-1])) > 0:
											use = filter(lambda x: ambBits[x][0] != ".", range(0,len(ambBits)))
											
											maskedMotif = []
											for motifResIgnore in use:
												maskedMotif.append(tmpData["ignoreList"][instance['Start'] + motifResIgnore -1])
												scores.append(float(tmpData["WCS_W_rStdev"][instance['Start'] + motifResIgnore -1]))
												conScores.append(float(tmpData["WCS"][instance['Start'] + motifResIgnore -1]))
												dis.append(float(tmpData["Disorder"][instance['Start'] + motifResIgnore -1]))
												tmpData["ignoreList"][instance['Start'] + motifResIgnore -1] = "ELM"
												
							
							#print file.split(".")[0],"\t",tmpData["SpeciesCount"][0],"\t","%1.2f"%(sum(dis)/len(dis)),"\t",instance['LongDesc'],"\t",",".join(maskedMotif),
							
							elmResidueCount += len(maskedMotif)
							
							if maskedMotif.count("") + maskedMotif.count("MOTIF") + maskedMotif.count("ELM") == len(maskedMotif) and int(tmpData["SpeciesCount"][0]) >= int(sys.argv[3]):
								ELMs["True"]["All"].append(instance["Desc"])
								ELMs["True"][instance["Desc"].split("_")[0]].append(instance["Desc"])
								findable += 1
								
								wcs_r_list += scores
								con_r_list += conScores
								middle = int(instance['Start'] + instance['End'])/2
						
								for i in range(-100,101):
									if middle + i < len(tmpData["WCS"]) and middle + i > 0:
										flanks[i].append(float(tmpData["WCS"][middle + i]))
									
							else:	
								ELMs["False"]["All"].append(instance["Desc"])
								ELMs["False"][instance["Desc"].split("_")[0]].append(instance["Desc"])
								
								if int(tmpData["SpeciesCount"][0]) < int(sys.argv[3]):
									maskedMotif = ["Divergence"]*len(maskedMotif)
									
									for i in range(0,len(tmpData["ignoreList"])):					
										tmpData["ignoreList"][i] = "Divergence"
										
								maskedMotif.sort()
								
								stringCounter = {}
								
								for i in range(0,len(maskedMotif)):
									#print maskedMotif[i]
									
									if maskedMotif[i] not in stringCounter:
										stringCounter[maskedMotif[i]] = 1
									else:
										stringCounter[maskedMotif[i]] += 1
								
								if "MOTIF" in stringCounter:
									del stringCounter["MOTIF"]
									
								if "ELM" in stringCounter:
									del stringCounter["ELM"]
									
								revMapStringCounter = dict((v,k) for k, v in stringCounter.iteritems())
								
								reason = revMapStringCounter[max(revMapStringCounter.keys())]
								
								if reason == "Few":
									reason = "Short"
									
								if reason in reasons:
									reasons[reason] += 1
								else:
									reasons["Domain"] += 1
									reason = "Domain"
									#print file.split(".")[0],reason
								#print file.split(".")[0],maskedMotif
								
								notFindable += 1
								
							
							supplementaryTable1 += file.split(".")[0] + "\t" + instance['LongDesc']+ "\t" + str(instance['Start']) + "\t" + str(instance['End']) + "\t" + instance['RE'] + "\t" + instance['Taxon'] + "\t" + reason + "\n"
					
							#print file.split(".")[0],"\t",tmpData["SpeciesCount"][0],"\t","%1.2f"%(sum(dis)/len(dis)),"\t",instance['LongDesc'],"\t",",".join(maskedMotif),"\t",
							
							#print (sum(dis)/len(dis) > 0.3 and int(tmpData["SpeciesCount"][0]) > 10)# and maskedMotif.count("") == len(maskedMotif))
					except:
						raise
						pass
						
					for i in range(0,len(tmpData["ignoreList"])):					
							if tmpData["ignoreList"][i] == "":
								wcs_r_list_background.append(float(tmpData["WCS_W_rStdev"][i]))
								con_r_list_background.append(float(tmpData["WCS"][i]))
				
							
							
		tmpDict = {}
		for key1 in ELMs:
			for key2 in ELMs[key1]:
				print key1,key2,len(ELMs[key1][key2])
				tmpDict [key2] = len(ELMs[key1][key2])
				
		print tmpDict
		
		flanksData = ""
		
		for i in range(-100,101):
			flanksData += str(i) + "\t" + "%1.3f"%(sum(flanks[i])/len(flanks[i])) + "\n"
			
			
		open("../../Projects/SLiMPrints_paper/supplementaryTable1.tdt","w").write(supplementaryTable1)
		open("../../Projects/SLiMPrints_paper/Data/elmBenchmarkingFlanks.dat","w").write(flanksData)
		
		open("../../Projects/SLiMPrints_paper/Data/disorderConservationCorrelation.dat","w").write(disorderConservationCorrelationStr)
		
		
		dist = basic.binList(wcs_r_list,normaliser=10)
		open("../../Projects/SLiMPrints_paper/Data/elmBenchmarkingResidueP_elm.dat","w").write(basic.plotDist(dist))
		
		dist = basic.binList(wcs_r_list_background,normaliser=10)
		open("../../Projects/SLiMPrints_paper/Data/elmBenchmarkingResidueP_background.dat","w").write(basic.plotDist(dist))
		
		
		print len([x for x in wcs_r_list if float(x) > 0]),len(wcs_r_list),float(len([x for x in wcs_r_list if float(x) > 0]))/len(wcs_r_list)
		print sum(wcs_r_list)/len(wcs_r_list),len(wcs_r_list)
		
		print float(len([x for x in wcs_r_list_background if x > 0]))/len(wcs_r_list_background)
		print sum(wcs_r_list_background)/len(wcs_r_list_background),len(wcs_r_list_background)
		open("../../Projects/SLiMPrints_paper/Data/elmBenchmarkingRLCFalse.dat","w").write("\n".join([str(x) for x in wcs_r_list_background]))
		open("../../Projects/SLiMPrints_paper/Data/elmBenchmarkingRLCTrue.dat","w").write("\n".join([str(x) for x in wcs_r_list]))
		
		open("../../Projects/SLiMPrints_paper/Data/elmBenchmarkingRLCTrue.dat","w").write("\n".join([str(x) for x in wcs_r_list]))
		
		open("../../Projects/SLiMPrints_paper/Data/elmBenchmarkingRLC_ROC.dat","w").write("\t1\n".join([str(x) for x in wcs_r_list_background]) + "\t1\n" + "\t0\n".join([str(x) for x in wcs_r_list])  +"\t0\n")
		open("../../Projects/SLiMPrints_paper/Data/elmBenchmarkingCon_ROC.dat","w").write("\t1\n".join([str(x) for x in con_r_list_background]) + "\t1\n" + "\t0\n".join([str(x) for x in con_r_list])  +"\t0\n")
		
			
		print sum(wcs_r_list)/len(wcs_r_list)
		for elm in ELMs:
			print elm,len(ELMs[elm])
		
		print "Proteins:",len(proteinCounts)
	
		print "Total",findable + notFindable
		print "Unmasked",findable
		print "Masked",notFindable
		print "ELM residues unmasked",len(wcs_r_list)
		print "ELM residues",elmResidueCount
		print
		print "Non-ELM residues unmasked",len(wcs_r_list_background)
		print "Residues",sum(proteinLengths)
		
		summer = 0
		for i in range(2,6):
			summer += (2 +1)**(i-1)*len(wcs_r_list_background)
			
		print "Considered overlapping motif",summer
		wcs_r_list.sort()		
		
		print reasons
		print os.popen("/usr/local/bin/R CMD BATCH /Applications/Bioware/Projects/SLiMPrints_paper/Fig3-RLC_barplots.R").read()
		
		
if __name__ == "__main__":
	tester = SLiMPrints_Tester()
	
	tester.check_results()
	#tester.check_rawData()
	#tester.parseSLiMSearch()
	