import sys,copy,os,re



import ned_basic as basic
import ned_basicReader as basicReader
import ned_commandLine as commandline
import ned_fastaHelper as fastaHelper



#write a generic url grabber class
#dssp files here - ftp://ftp.cmbi.ru.nl//pub/molbio/data/dssp/ 

		
		#	HEADER    HYDROLASE   (SERINE PROTEINASE)         17-MAY-76   1EST                
		#...
		#  240  1  4  4  0 TOTAL NUMBER OF RESIDUES, NUMBER OF CHAINS, 
		#                  NUMBER OF SS-BRIDGES(TOTAL,INTRACHAIN,INTERCHAIN)                .
		# 10891.0   ACCESSIBLE SURFACE OF PROTEIN (ANGSTROM**2)    
		#  162 67.5   TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(J)  ; PER 100 RESIDUES 
		#    0  0.0   TOTAL NUMBER OF HYDROGEN BONDS IN     PARALLEL BRIDGES; PER 100 RESIDUES 
		#   84 35.0   TOTAL NUMBER OF HYDROGEN BONDS IN ANTIPARALLEL BRIDGES; PER 100 RESIDUES 
		#...
		#   26 10.8   TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+2)
		#   30 12.5   TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+3)
		#   10  4.2   TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+4)
		#... 
		#  #  RESIDUE AA STRUCTURE BP1 BP2  ACC   N-H-->O  O-->H-N  N-H-->O  O-->H-N    
		#    2   17   V  B 3   +A  182   0A   8  180,-2.5 180,-1.9   1,-0.2 134,-0.1  
		#
		#                                    TCO  KAPPA ALPHA  PHI   PSI    X-CA   Y-CA   Z-CA 
		#                                  -0.776 360.0   8.1 -84.5 125.5  -14.7   34.4   34.8
		#
		#....;....1....;....2....;....3....;....4....;....5....;....6....;....7..
		#    .-- sequential resnumber, including chain breaks as extra residues
		#    |    .-- original PDB resname, not nec. sequential, may contain letters
		#    |    |   .-- amino acid sequence in one letter code
		#    |    |   |  .-- secondary structure summary based on columns 19-38
		#    |    |   |  | xxxxxxxxxxxxxxxxxxxx recommend columns for secstruc details
		#    |    |   |  | .-- 3-turns/helix  
		#    |    |   |  | |.-- 4-turns/helix  
		#    |    |   |  | ||.-- 5-turns/helix  
		#    |    |   |  | |||.-- geometrical bend
		#    |    |   |  | ||||.-- chirality
		#    |    |   |  | |||||.-- beta bridge label 
		#    |    |   |  | ||||||.-- beta bridge label 
		#    |    |   |  | |||||||   .-- beta bridge partner resnum
		#    |    |   |  | |||||||   |   .-- beta bridge partner resnum
		#    |    |   |  | |||||||   |   |.-- beta sheet label 
		#    |    |   |  | |||||||   |   ||   .-- solvent accessibility
		#    |    |   |  | |||||||   |   ||   |
		#  #  RESIDUE AA STRUCTURE BP1 BP2  ACC
		#    |    |   |  | |||||||   |   ||   |
		#   35   47   I  E     +     0   0    2
		#   36   48   R  E >  S- K   0  39C  97 
		#   37   49   Q  T 3  S+     0   0   86    (example from 1EST)
		#   38   50   N  T 3  S+     0   0   34   
		#   39   51   W  E <   -KL  36  98C   6 


# 
#   #  RESIDUE AA STRUCTURE BP1 BP2  ACC     N-H-->O    O-->H-N    N-H-->O    O-->H-N    TCO  KAPPA ALPHA  PHI   PSI    X-CA   Y-CA   Z-CA
#      |         |         |         |         |         |         |         |         |         |    1    |    1    |    1    |    1
#      |    1    |    2    |    3    |    4    |    5    |    6    |    7    |    8    |    9    |    0    |    1    |    2    |    3         
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345     
# 

class dsspHelper():
	def __init__(self):
		cmdline = commandline.CommandLine()
		self.options = cmdline.loadIniFile(os.path.join(os.path.dirname(os.path.realpath(__file__)),"../settings/utilities.ini"))
		#self.data = {"chains":{},"statistics":{}}
		
		self.fixedwidths = {"count":[0,4],
		 "offset":[5,9],
		 "chain":[10,11],
		 "aa":[12,14],
		 "SecondaryStructure":[16],
		 "3-turns/helix":[18],
		 "4-turns/helix":[19],
		 "5-turns/helix":[20],
		 "geometricalBend":[21],
		 "chirality":[22],
		 "betaBridgeLabel1":[23],
		 "betaBridgeLabel2":[24],
		 "betaBridgePartnerResnum":[25,28],
		 "betaSheetLabel":[29,32],
		 "solventAccessibility":[34,37],
		 "N-H-->O":[42,49],
		 "O-->H-N":[52,60],
		 "N-H-->O":[63,71],
		 "O-->H-N":[74,82],
		 "TCO":[84,90],
		 "KAPPA":[92,96],
		 "ALPHA":[98,102],
		 "PHI":[103,107],
		 "PSI":[110,114],
		 "X-CA":[117,121],
		 "Y-CA":[124,128],
		 "Z-CA":[130,135]}

	
	def parseDssp(self,pdb):
		pdb = pdb.lower()
		self.data = {"chains":{},"statistics":{}}
		
		atData = False
		for line in open(os.path.join(self.options['dssp_dir'],pdb  + ".dssp")).read().strip().split("\n"):
			
			if atData == True:
				
				aaTmpDict = {}
				for header in self.fixedwidths:
					start = self.fixedwidths[header][0]
					
					if len(self.fixedwidths[header]) == 1:
						stop = start + 1
					else:
						stop = self.fixedwidths[header][1] + 1 

					aaTmpDict[header] = line[start:stop].strip()
				
				if len(aaTmpDict["chain"]) > 0:
					
					if aaTmpDict["chain"] not in self.data["chains"]:
						self.data["chains"][aaTmpDict["chain"]] = {}
						self.data["statistics"][aaTmpDict["chain"]] = {}
					
					self.data["chains"][aaTmpDict["chain"]][int(aaTmpDict["offset"])] = copy.deepcopy(aaTmpDict)

			if line.strip()[0] == "#":
				atData = True	
	
			
	
	def dataByType(self):
		for chain in self.data["chains"].keys():
			for header in self.fixedwidths:
				
				dataDict = {}	
				for offset in self.data["chains"][chain]:
						
					 dataDict[offset] = self.data["chains"][chain][offset][header]

				self.data["statistics"][chain][header] =  dataDict
		
		
	def convertDssp(self,pdb,acc,reg="",start=0,stop=0):
		fastahelper = fastaHelper.fastaHelper()
		fastahelper.fetchSequence(acc,".")	
		sequence = fastahelper.sequence
	

		
		pdbDict = {
		"ss":[],
		"motifRegion":[],
		"chain":[],
		"offsetScore":[],
		"matchMotif":[],
		"ssClass":[]
		}

		for chain in self.data["statistics"]:
			self.data["SequenceCheck"] = {}
			self.data["SecondaryStructureCode"] = {}

			structureAvailable = True
			motifRegion = ""
			ss = ""
			
			gappedPDBseq = ""
			for offset in range(min(self.data["statistics"][chain]["aa"]),max(self.data["statistics"][chain]["aa"])):
				if offset in self.data["statistics"][chain]["aa"]:
					gappedPDBseq += self.data["statistics"][chain]["aa"][offset]
				else:
					gappedPDBseq += "-"
			
			[offset,offsetScore] = basic.alignStrings(sequence,gappedPDBseq,True) 
			
			if offset != "False":
				adj = offset - min(self.data["statistics"][chain]['solventAccessibility'])
				
				for offset in self.data["statistics"][chain]['solventAccessibility']:
					self.data["SequenceCheck"][(offset + adj)] = self.data["statistics"][chain]["aa"][offset]
					
					if len(self.data["statistics"][chain]["SecondaryStructure"][offset]) > 0:
						self.data["SecondaryStructureCode"][(offset + adj)] = self.data["statistics"][chain]["SecondaryStructure"][offset]
					else:
						self.data["SecondaryStructureCode"][(offset + adj)] = "-"
	
				if reg != 0:
					errorPDB = "True"
					
					try:
						for i in range(start - 1,stop ):
							if i in self.data["SequenceCheck"]:
								motifRegion += self.data["SequenceCheck"][i]
								ss += self.data["SecondaryStructureCode"][i]
								errorPDB = False 
							else:
								motifRegion += "x"
								ss += "x"

						if not errorPDB:
				
							pattern = re.compile(reg)
							
							#print pdb,"\t",chain,"\t",offsetScore,"\t",ss,"\t",motifRegion,"\t", len(pattern.findall(motifRegion)) > 0#"".join(self.data["statistics"][chain]["aa"].values()),"\t",errorPDB
							
							pdbDict["ss"].append(ss.replace("G","H").replace("I","H").replace("B","E"))
							pdbDict["motifRegion"].append(motifRegion)
							pdbDict["chain"].append(chain)
							pdbDict["offsetScore"].append(offsetScore)
							pdbDict["matchMotif"].append(str(len(pattern.findall(motifRegion)) > 0))
							
							
					except Exception,e:
						print e, self.data["SequenceCheck"]

		return pdbDict			
	
