import os,urllib,sys,re

import ned_commandLine as commandline
import ned_fastaHelper as fastaHelper

try:
	from modeller import *	# Load standard Modeller classes 
	from modeller.automodel import *	# Load the automodel class
except:
	print "Modeller not installed"

class structureHelper():
	def __init__(self):
		self.data = {}
		cmdline = commandline.CommandLine()
		self.options = cmdline.loadIniFile(os.path.join(os.path.dirname(os.path.realpath(__file__)),"../settings/utilities.ini"))
	
	def parsePDBfile(self,id):
				
		if not os.path.exists(os.path.join(self.options["pdb_dir"],id.lower() + ".pdb")):
			self.getStructure(id)
		
		pdbContent = open(os.path.join(self.options["pdb_dir"],id.lower() + ".pdb")).read()
		
		JRNLdone = False
		self.data["pubmed"] = {}
		for line in pdbContent.split("\n"):
			if line[0:4] == "JRNL":
				m = re.match(r"JRNL\W+(?P<tag>\w+)\W+(?P<line>.)\W+(?P<data>.+)",line)
				
				if m.group('tag') not in self.data["pubmed"]:				
					self.data["pubmed"][m.group('tag').strip()] = m.group('data').strip()
				else:
					self.data["pubmed"][m.group('tag').strip()] += " "  + m.group('data').strip()
				
				JRNLdone = True
			elif JRNLdone:
				break
		
		self.data["pubmed"]['TITL'] = self.data["pubmed"]['TITL'].capitalize()
		
	def getStructure(self,id):
		url = "http://www.rcsb.org/pdb/files/" + id.lower()  + ".pdb"
		
		if not os.path.exists(os.path.join(self.options["pdb_dir"],id.lower() + ".pdb")):
			print "Downloading PDB " + id
			opener = urllib.FancyURLopener()
			f = opener.open(url)
					
			open(os.path.join(self.options["pdb_dir"],id.lower() + ".pdb"),"w").write(f.read())
	
	
	def stripPDBSequences(self,pdbList,aliFile):
		env = environ()
		#log.none()
		env.io.atom_files_directory = [self.options["pdb_dir"]]
		
		mdl = model(env)
		aln = alignment(env)
		
		for pdb in pdbList:
			try:
				if not os.path.exists(os.path.join(self.options["pdb_dir"],pdb + ".pdb")):
					self.getStructure(pdb)
					
				try:
					mdl.read(file=pdb)
				except:
					pass
					
				print pdb,"Chain IDs and lengths: ", [(c.name, len(c.residues)) for c in mdl.chains],
				
				for c in mdl.chains:
					c.write(file="./chains/"  +pdb + ".%s.chn.fasta" % c.name, atom_file=pdb,align_code=pdb + "%s" % c.name)
			except:
				pass
		
			print "done"
		
	def checkRMSD(self,pdb1,pdb2):
		
		if not os.path.exists(os.path.join(self.options["pdb_dir"],pdb1 + ".pdb")):
			self.getStructure(pdb1)
				
		if not os.path.exists(os.path.join(self.options["pdb_dir"],pdb2 + ".pdb")):
			self.getStructure(pdb2)
				
		env = environ()
		log.none()
		env.io.atom_files_directory = [self.options["pdb_dir"]]
		
		mdl = model(env)
		aln = alignment(env)
		
		for code in [pdb1,pdb2]:
			mdl.read(file=code, model_segment=('FIRST:A', 'LAST:A'))
			aln.append_model(mdl, atom_files=code, align_codes=code+"A")	
			
		aln.salign(rr_file='${LIB}/blosum62.sim.mat',
		gap_penalties_1d=(-500, 0), output='',
		align_block=15,   # no. of seqs. in first MSA
		align_what='PROFILE',
		alignment_type='PAIRWISE',
		comparison_type='PSSM',  # or 'MAT' (Caution: Method NOT benchmarked
								# for 'MAT')
		similarity_flag=True,    # The score matrix is not rescaled
		substitution=True,       # The BLOSUM62 substitution values are
		#write_fit=True,                        # multiplied to the corr. coef.
		#output_weights_file='test.mtx', # optional, to write weight matrix
		smooth_prof_weight=10.0) # For mixing data with priors
		
		#aln.write(file=pdb1 + '-' + pdb2 + '.ali')
		
		# And now superpose the two structures using current alignment to get
		# various RMS's:
		mdl = model(env, file=pdb1,model_segment=('FIRST:A', 'LAST:A'))
		atmsel = selection(mdl).only_atom_types('CA')
		mdl2 = model(env, file=pdb2,model_segment=('FIRST:A', 'LAST:A'))
		r = atmsel.superpose(mdl2, aln)
		
		return r
		
		
	def makeModellerAlignment(self,structureID,structureChains,modelID,fastaPath=False,alignmentData={}):
		#set up model file in PIR format
		env = environ()
		log.none()
		
		pirDir = os.path.join(self.options['uniprot_dir'],"pir")
		pirPath = os.path.join(pirDir,modelID + '.pir')
		
		fastahelper = fastaHelper.fastaHelper()
		
		if fastaPath == False:
			fastaDir = os.path.join(self.options['uniprot_dir'],"fasta")
			fastaPath = os.path.join(fastaDir,modelID + '.fasta')
			fastahelper.fetchSequence(modelID,fastaDir)
		
		fastahelper.readFastaDb(fastaPath)
		tag =fastahelper.data[fastahelper.order[0]]["header"].split()[0][1:]
		print tag
		format = alignment(env, file=fastaPath, alignment_format='FASTA')
		format.write(file=pirPath, alignment_format='PIR')
		
		env = environ()
		env.io.atom_files_directory = [self.options["pdb_dir"]]
		
		
		
		
		for ID in structureID:
			print ID
			if not os.path.exists(os.path.join(self.options["pdb_dir"],ID + ".pdb")):
				self.getStructure(ID)
		
		
		outStr = ">P1;" + tag + "\n"
		outStr += "sequence::     : :     : :::-1.00:-1.00\n"
		outStr += alignmentData["query"] + "*\n"
		
		for i in range(0,len(structureID)):
			ID = structureID[i]
			chain = structureChains[i]
			
				
			mdl = model(env, file=os.path.join(self.options["pdb_dir"],ID+ ".pdb"), model_segment=('FIRST:' +chain, 'LAST:' +chain))
			
			aln = alignment(env)		
			
			aln.append_model(mdl, atom_files=os.path.join(self.options["pdb_dir"],ID+ ".pdb"), align_codes=ID)
			
			aln.write(file=os.path.join(self.options['modeller_dir'],tag + '.ali'))
			bits = open(os.path.join(self.options['modeller_dir'],tag + '.ali')).read().strip().split("\n")
			
			outStr += bits[0] + "\n"
			outStr += bits[1] + "\n"
			
			if ID in alignmentData:
				outStr += alignmentData[ID][chain] + "*\n"
			
		print outStr
		open(os.path.join(self.options['modeller_dir'],tag + '.ali'),"w").write(outStr)		
		
		return tag
		
	def runModeller(self,structureID,structureChains,modelID,fastaPath,alignmentData={}):
		
		os.chdir(self.options['modeller_dir'])
		
		tag = self.makeModellerAlignment(structureID,structureChains,modelID,fastaPath,alignmentData)
		alignment = os.path.join(self.options['modeller_dir'],tag + '.ali')
		
		for ID in structureID:
			if not os.path.exists(os.path.join(self.options["pdb_dir"],ID + ".pdb")):
				self.getStructure(ID)
			
		#log.verbose()
		log.none()
		normal = False
		
		if normal:
			env = environ()
			env.io.atom_files_directory = [self.options["pdb_dir"]]
			
			a = automodel(env,
			alnfile = alignment,
			knowns	= structureID,
			sequence = tag,
			assess_methods=(assess.DOPE, assess.GA341)) 
			
			a.starting_model= 2
			a.ending_model = 100 
			
			a.make()
		else:
			env = environ()
			env.schedule_scale = physical.values(default=1.0, soft_sphere=0.7) 
			env.io.atom_files_directory = [self.options["pdb_dir"]]
			
			a = automodel(env,
			alnfile = alignment,
			knowns	= structureID,
			sequence = tag,
			assess_methods=(assess.DOPE, assess.GA341)) 
			
			a.starting_model = 1
			a.ending_model = 1
			
			a.library_schedule = autosched.slow 
			a.max_var_iterations = 300
			a.md_level = refine.slow
			
			a.repeat_optimization = 2 
			a.max_molpdf = 1e6
			a.make()
		
		#a.outputs ()
		
		
		key = 'DOPE score'
		
		ok_models = filter(lambda x: x['failure'] is None, a.outputs)
		ok_models.sort(lambda a,b: cmp(a[key], b[key]))
		
		m = ok_models[0]
		print "Top model: %s (DOPE score %.3f)" % (m['name'], m[key])
		
		
		
		


if __name__ == "__main__":	
	modeller = True
	if modeller:
		structureID =sys.argv[1].split(",")
		modelID =sys.argv[2]
		fastaPath = False
		
		modelMaker = structureHelper()
		
		if len(sys.argv) == 4:
			fastaPath = sys.argv[3]
		
		modelMaker.runModeller(structureID,modelID,fastaPath)
	else:
		pdbParser = structureHelper()
		pdbParser.parsePDBfile("2VIF")
		print pdbParser.data
	