from ned_sortedFileSearcher import Searcher

import ned_commandLine as commandline
import ned_basicReader as basicReader
import ned_uniprotHelper as uniprotHelper
import ned_basic as basic

import re,os,sys,pprint

##
# COSMIC data @ ftp://ftp.sanger.ac.uk/pub/CGP/cosmic/data_export/
# HGNC as key, taxonomy:9606 -> customise 	Cross-reference (HGNC) 
#
# 
#
#
#
#
##
class mutationHelper():
	def __init__(self,options={}):	
		self.options = options
		
		cmdline = commandline.CommandLine()
		self.options = cmdline.loadIniFile(os.path.join(os.path.dirname(os.path.realpath(__file__)),"../settings/utilities.ini"))

		#self.searchComplexMutation = Searcher(self.options["mutation_complex"])
		
		self.options["verbosity"] = 0
		self.snpData = {}
		
	
		
	def pullSNPsFromUniprot(self,listIds=[]):
		if listIds == []:
			listIds = basicReader.readTableFile(self.options["uniprot_human_ids"],hasHeader=False)
			
		for accession in listIds[0]:
			if accession not in self.snpData:
				self.snpData[accession] = {}
				
			uniprotReader = uniprotHelper.uniprotReader()
			uniprotReader.options["alignment_dir"] = "../Datasets/Alignments/"
			uniprotReader.options["uniprot_dir"] = os.path.abspath("../Datasets/Uniprot/Human/")
					
			uniprotReader.readUniprot(accession)
			self.snpData = uniprotReader.parseVariants()
		
		return self.snpData
	
	def parseMutationData(self):
		#self.parse1000Genomes()
		self.parseCosmic()
		
	def parse1000Genomes(self):
		"""{'match_seq': 'match: 20rkeGhev26'
			 'Feature_type': 'Transcript'
			 'Extra': 'ENSP=ENSP00000420293'
			 '#Uploaded_variation': '3_125879755_C/T'
			 'Feature': 'ENST00000472186'
			 'Codons': 'gGc/gAc'
			 'Existing_variation': '-'
			 'matches_uniprot(0:1)': '1'
			 'uniprotAcc': 'O75891'
			 'Location': '3:125879755'
			 'CDS_position': '68'
			 'Protein_position': '23'
			 'Consequence': 'NON_SYNONYMOUS_CODING'
			 'Allele': 'T'
			 'Gene': 'ENSG00000144908'
			 'cDNA_position': '296'
			 'Amino_acids': 'G/D'}
			"""

		substitutionPattern = re.compile(r'(?P<from>[A-Z])(?P<start>[0-9]+)>(?P<to>[A-Z\*]*)')
			
		table1000GenomesData = basicReader.readTableFile(self.options["mutation_1000genomes"],byColumn=False)
		print len(table1000GenomesData)
		
		nonsynonymousCount = 0
		
		for entry in table1000GenomesData:
			try:
				
				#m = substitutionPattern.search(table1000GenomesData[entry]['AA_CHANGE'])
							
				if table1000GenomesData[entry]['#Uploaded_variation'] == ".":
					id = table1000GenomesData[entry]['Location'] 
				else:
					id = table1000GenomesData[entry]['#Uploaded_variation']
					
				sub_type = table1000GenomesData[entry]['Consequence']
				
				if sub_type == 'NON_SYNONYMOUS_CODING':
					sub_type = "non-synonymous"
				
					fromRes = table1000GenomesData[entry]['Amino_acids'].split("/")[0]#m.group('from')
					toRes = table1000GenomesData[entry]['Amino_acids'].split("/")[1]#m.group('to')
				else:
					sub_type = "synonymous"
					fromRes = table1000GenomesData[entry]['Amino_acids']
					toRes = table1000GenomesData[entry]['Amino_acids']

				
				desc = ""
				depth = ""
				allelleFrequency = ""
				r2 = ""
				
				descBits = ""#table1000GenomesData[entry]['1000_GENOME_STATS'].split(";")

				try:
					allelleFrequency = descBits[1].split("=")[1]
					depth = descBits[0]
					if len(descBits) > 3:
						r2 = "|".join(descBits[3:])
				except:
					depth = "?"
					r2 = "?"
					allelleFrequency = "?"
					
				desc = toRes + "<-" + fromRes + "" #+ ":AF=" + allelleFrequency
			
				tempMutationDict = {
				"Id":id,
				"subType":sub_type,
				"Start":table1000GenomesData[entry]['Protein_position'],#m.group('start')
				"End":table1000GenomesData[entry]['Protein_position'],#m.group('start')
				"From":fromRes ,
				"To":toRes,
				"Desc":desc,
				
				"dbSNP":"",
				
				"AllelleFreq":allelleFrequency,
				'Depth':depth,
				"r2":r2,
				"Source":"1000genomes"
				}
				
				#if table1000GenomesData[entry]['uniprotAcc'] == "Q14289":
				#	print entry,desc,
				#	print table1000GenomesData[entry]#['Amino_Acid_Change'],
				#	print tempMutationDict
				
				if table1000GenomesData[entry]['uniprotAcc'] not in self.snpData:
					self.snpData[table1000GenomesData[entry]['uniprotAcc']] = {}
					
				#if id not in self.snpData[table1000GenomesData[entry]['UNIPROT_ACC']]:
				if sub_type == "non-synonymous":
					nonsynonymousCount += 1
					
				self.snpData[table1000GenomesData[entry]['uniprotAcc']][id] = tempMutationDict
				
			except Exception,e:
				print "Error",e
		
		print nonsynonymousCount
	
	def parseCosmic(self):	
		#tableComplexData = basicReader.readTableFile(self.options["mutation_complex"],byColumn=False)
		#tableSubstitutionData = basicReader.readTableFile(self.options["mutation_substitution"],byColumn=False)
		#tableDeletionData = basicReader.readTableFile(self.options["mutation_deletion"],byColumn=False)
		#tableInsertionData = basicReader.readTableFile(self.options["mutation_insertion"],byColumn=False)
		#tableFrameshiftData = basicReader.readTableFile(self.options["mutation_frameshift"],byColumn=False)
		tableCosmicData = basicReader.readTableFile(self.options["mutation_cosmic"],byColumn=False)
		
		'''
		'Comments': ''
		'match_seq': 'match: 756mdeDddr762'
		'Gene name': 'ZZZ3'
		'uniprotAcc': 'Q8IYH5'
		'Mutation GRCh37 strand': '-'
		'Primary site': 'ovary'
		'Mutation zygosity': 'het'
		'Mutation AA': 'p.D759D'
		'Tumour origin': 'primary'
		'Sample source': 'NS'
		'Pubmed_PMID': '21720365'
		'Mutation NCBI36 genome position': '1:77814395-77814395'
		'Histology subtype': 'serous_carcinoma'
		'Accession Number': 'ENST00000370801'
		'matches_uniprot(1:0)': '1'
		'Mutation ID': '80810'
		'Mutation CDS': 'c.2277T>C'
		'Sample name': 'TCGA-59-2351'
		'Primary histology': 'carcinoma'
		'Mutation GRCh37 genome position': '1:78041807-78041807'
		'HGNC ID': '24523'
		'Mutation Description': 'Substitution - coding silent'
		'Genome-wide screen': 'y'
		'ID_tumour': '1398559'
		'Mutation NCBI36 strand': '-'
		'ID_sample': '1474860'
		'Mutation somatic status': 'Confirmed somatic variant'
		'Site subtype': 'NS'
		'''
		
		substitutionPattern = re.compile(r'(?P<from>[A-Z\*])(?P<start>[0-9]+)(?P<to>[A-Z\*]*)')
		complexPattern = re.compile(r'(?P<from>[A-Z]*)(?P<start>[0-9]+)_*(?P<to>[A-Z]\**)(?P<end>[0-9]*)(ins|>)(?P<inserted>[A-Z]*)')
		insertionPattern = re.compile(r'(?P<from>[A-Z]*)(?P<start>[0-9]+)_*(?P<to>[A-Z\*]*)(?P<end>[0-9]*)(ins|>)(?P<inserted>[A-Z]*)')
		deletionPattern = re.compile(r'(?P<from>[A-Z]*)(?P<start>[0-9]+)_*(?P<to>[A-Z\*]*)(?P<end>[0-9]*)(del|>)(?P<deleted>[A-Z]*)')
		frameshiftPattern = re.compile(r'p\.(?P<residue>[A-Z])(?P<position>[0-9]+)fs\*(?P<length>\>{0,1}[0-9]*)')
		
		types = {}
		
		"""
		'Substitution - Missense'
		'Substitution - coding silent'
		'Substitution - Nonsense'
		
		'Deletion - In frame'
		'Deletion - Frameshift'
		
		'Insertion - In frame'
		'Insertion - Frameshift'
		
		'Nonstop extension' = Changes a stop to a aa and causes read through.
		
		'Unknown'
		
		'Complex - frameshift'
		'Complex - deletion inframe'
		'Complex - compound substitution'
		'Complex - insertion inframe'
		"""
		cout = 0
		for mutDict in [tableCosmicData]:
			for entry in mutDict:
				try:
					if mutDict[entry]['uniprotAcc'] not in self.snpData:
						self.snpData[mutDict[entry]['uniprotAcc']] = {}
					
					tempMutationDict = {}
					stop = ""
					
					if mutDict[entry]['Mutation Description'] not in types:
						types[mutDict[entry]['Mutation Description']] = 0
					
					types[mutDict[entry]['Mutation Description']] +=1
					
					mutation_type = mutDict[entry]['Mutation Description'].split(" - ")
					mutDict[entry]['Mutation Description'] = mutDict[entry]['Mutation Description'].replace(" - ","-").replace(" ","_")
					
					if mutation_type[-1] == "Frameshift" or mutation_type[-1] == "frameshift":
						m = frameshiftPattern.search(mutDict[entry]['Mutation AA'])
						
						fromRes = m.group('position')
						toRes = m.group('position')
	
						sub_type = "non-synonymous"
						
						if fromRes == toRes:
							sub_type = "synonymous"
							
						stop = str(int(m.group('position')) + int(m.group('length')))
						
						tempMutationDict = {"Id":mutDict[entry]['Mutation AA'],"Start":m.group('position'),"subType":sub_type,"End":m.group('position'),"stop":stop,"From":fromRes ,"To":toRes,"Desc":  mutDict[entry]['Mutation AA'] +":"  + mutDict[entry]['Mutation Description'] + ":"+ mutDict[entry]['Histology subtype'] ,"dbSNP":"","Source":"Cosmic"}
					
					elif mutation_type[0] == "Substitution" or mutation_type[0] == "Nonstop":
						m = substitutionPattern.search(mutDict[entry]['Mutation AA'])
						
						fromRes = m.group('from')
						toRes = m.group('to')
	
						sub_type = "non-synonymous"
						
						
						if fromRes == toRes:
							sub_type = "synonymous"
							
						if toRes == '*':
							stop = m.group('start')
							
						tempMutationDict = {"Id":mutDict[entry]['Mutation AA'],"Start":m.group('start'),"stop":stop,"subType":sub_type,"End":m.group('start'),"From":fromRes ,"To":toRes,"Desc":  mutDict[entry]['Mutation AA'] +":"  + mutDict[entry]['Mutation Description'] + ":"+ mutDict[entry]['Histology subtype'] ,"dbSNP":"","Source":"Cosmic"}
					
					elif mutation_type[0] == "Deletion":
						m = deletionPattern.search(mutDict[entry]['Mutation AA'])
						
						fromRes = m.group('from')
						toRes = m.group('to')
	
						sub_type = "non-synonymous"
						
						if fromRes == toRes:
							sub_type = "synonymous"
							
						tempMutationDict = {"Id":mutDict[entry]['Mutation AA'],"Start":m.group('start'),"stop":stop,"subType":"non-synonymous","End":m.group('start'),"From":fromRes ,"To":toRes,"Desc":  mutDict[entry]['Mutation AA'] +":"  + mutDict[entry]['Mutation Description'] + ":"+ mutDict[entry]['Histology subtype'] ,"dbSNP":"","Source":"Cosmic"}
					
					elif mutation_type[0] == "Complex":
						m = complexPattern.search(mutDict[entry]['Mutation AA'])
						
						fromRes = m.group('from')
						toRes = m.group('to')
						
						sub_type = "non-synonymous"
						
						if fromRes == toRes:
							sub_type = "synonymous"
							
						tempMutationDict = {"Id":mutDict[entry]['Mutation AA'],"Start":m.group('start'),"stop":stop,"subType":"non-synonymous","End":m.group('start'),"From":fromRes ,"To":toRes,"Desc": mutDict[entry]['Mutation AA']  +":"  + mutDict[entry]['Mutation Description'] + ":"+ mutDict[entry]['Histology subtype'] ,"dbSNP":"","Source":"Cosmic"}
					
					elif mutation_type[0] == "Insertion":
						m = insertionPattern.search(mutDict[entry]['Mutation AA'])
						
						fromRes = m.group('from')
						toRes = m.group('to')
						
						sub_type = "non-synonymous"
						
						if fromRes == toRes:
							sub_type = "synonymous"
							
						tempMutationDict = {"Id":mutDict[entry]['Mutation AA'],"Start":m.group('start'),"stop":stop,"End":m.group('start'),"subType":"non-synonymous","From":fromRes ,"To":toRes,"Desc":  mutDict[entry]['Mutation AA'] + ":" + mutDict[entry]['Mutation Description'] + ":" + mutDict[entry]['Histology subtype'] ,"dbSNP":"","Source":"Cosmic"}
					

					
					if tempMutationDict != {}:
						
						if mutDict[entry]['Mutation ID'] + "_" + mutDict[entry]['Pubmed_PMID'] in self.snpData[mutDict[entry]['uniprotAcc']]:
							cout +=1
							print mutDict[entry]['Mutation ID'] + "_" + mutDict[entry]['Pubmed_PMID']
						
							print self.snpData[mutDict[entry]['uniprotAcc']][mutDict[entry]['Mutation ID'] + "_" + mutDict[entry]['Pubmed_PMID']]
							print tempMutationDict
							
						self.snpData[mutDict[entry]['uniprotAcc']][mutDict[entry]['Mutation ID'] + "_" + mutDict[entry]['Pubmed_PMID']] = tempMutationDict
					else:
						print tempMutationDict 
						
				except Exception,e:
					#print "-"*100
					print mutDict[entry]['Mutation Description'],"\t",
					print mutDict[entry]['Mutation AA']
					#print mutDict[entry]
					#print e
					#basic.writeError(e)
					#print "-"*100
					#print mutDict[entry]
		
		print cout
		print len(tableCosmicData)
		print sum([len(self.snpData[x]) for x in self.snpData])
		print types
		
	def readMutationDb(self):
		self.snpData = basicReader.readTableFile(self.options["mutation_db"],key="acc",byColumn=False,relationship="many")

		
	def tablify(self, accessionList=[],snpType="non-synonymous"):
		if snpType == "non-synonymous":
			snpTypes = ["non-synonymous"]
		else:
			snpTypes = ["non-synonymous","synonymous"]
			
		tableStr = "acc\tid\tsubType\tStart\tEnd\tFrom\tTo\tAllelleFreq\tDepth\tR2\tDesc\tSource\tdbSNP\n"
		
		if accessionList == []:
			accessionList = self.snpData.keys()
		else:
			pass
		
		accessionList.sort()
		
		for accession in accessionList:
			sorter = self.snpData[accession].keys()
			sorter.sort()
			
			if accession != "":
				for FTid in sorter:
					try:	
						#print self.snpData[accession][FTid]["subType"],self.snpData[accession][FTid]
						if self.snpData[accession][FTid]["subType"].lower() in snpTypes:
							tableRowStr = accession + "\t"
							tableRowStr += FTid + "\t"
							
							tableRowStr += self.snpData[accession][FTid]["subType"] + "\t"
							tableRowStr += str(self.snpData[accession][FTid]["Start"]) + "\t"
							tableRowStr += str( self.snpData[accession][FTid]["End"]) + "\t"
							tableRowStr += self.snpData[accession][FTid]["From"] + "\t"
							tableRowStr += self.snpData[accession][FTid]["To"] + "\t"
							
							"""
							if "AllelleFreq" in self.snpData[accession][FTid]:
								tableRowStr += self.snpData[accession][FTid]["AllelleFreq"] + "\t"
							else:
								tableRowStr += "\t"
								
							if "Depth" in self.snpData[accession][FTid]:
								tableRowStr += self.snpData[accession][FTid]["Depth"] + "\t"
							else:
								tableRowStr += "\t"
								
							if "r2" in self.snpData[accession][FTid]:
								tableRowStr += self.snpData[accession][FTid]["r2"] + "\t"
							else:
								tableRowStr += "\t"
							"""
							tableRowStr += self.snpData[accession][FTid]["Desc"] + "\t"
							tableRowStr += self.snpData[accession][FTid]["Source"] + "\t"
							tableRowStr += self.snpData[accession][FTid]["dbSNP"] + "\n"
							
								
							tableStr += tableRowStr
					except Exception,e:
						print self.snpData[accession][FTid],e
					
		
		return tableStr
		
		
	def findMutation(self,id):
		mutation = []
		
		for hub in self.searchMutation.find(id):
			mutation = hub.split("\t")[1].split(",")
			
		return mutation
	
	def makeMutationDb(self):
		self.parseMutationData()
		print os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)),"../../Databases/Mutation/Human_mutations.tdt"))
		open(os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)),"../../Databases/Mutation/Human_mutations.tdt")),"w").write(mutHelper.tablify())
	
if __name__ == "__main__":
	mutHelper = mutationHelper()
	#mutHelper.parse100Genomes()
	#mutHelper.readMutationDb()
	mutHelper.makeMutationDb()
	#pp = pprint.PrettyPrinter(indent=4)
	#pp.pprint(mutHelper.snpData["Q14289"])
	
	