#! /tools/bin/python2.7

import string, os, random, math

#path = raw_input('State working directorey: ')
#path = 'D:\DTU-studier\Master speciale\myShare\test\Ny mappe (2)'

print 'PRIDICTIONS INITIATED!!!!'


p_hyp = 0.0
p_ther = 0.3333
p_meso = 0.3333
p_psy = 0.3333
pseudocount = 0.1
#pseudocount = 0.00000000000000000000000000000000000000000000000000000000000000001

likelihood = open('likelihoods.txt').readlines()
output = open('predictions.txt','w')
parse_results = open('Test-parse_results.txt').readlines()
parse_likelihoods = open('used_likelihoods.txt','w')
genefambased_predictions = open('genefam-based_predictions.txt','w')
parselikelist = []
#struct_based = open('struct-based_predictions.txt').readlines()

genefambased_predictions.write('Organism')
genefambased_predictions.write('\t')
genefambased_predictions.write('post(Ther)')
genefambased_predictions.write('\t')
genefambased_predictions.write('post(Meso)')
genefambased_predictions.write('\t')
genefambased_predictions.write('post(Psy)')
genefambased_predictions.write('\n')

families = []
for line in likelihood:
    words = string.split(line)
    families.append((words[0]))


fams = string.split(parse_results[0])
for i in range(1,len(parse_results)):
    line = parse_results[i]
    presentlist = string.split(line)
    actualclass = presentlist[0].split('_')[0]
    #print presentlist[0]
    obscount = -1
    """p_obs_hyp = (p_hyp)
    p_obs_ther = (p_ther)
    p_obs_meso = (p_meso)
    p_obs_psy = (p_psy)"""
    
    #p_obs_hyp = math.log(p_hyp)
    p_obs_ther = math.log(p_ther)
    p_obs_meso = math.log(p_meso)
    p_obs_psy = math.log(p_psy)
    for obs in presentlist:
        obscount += 1
	#print obscount, fams[obscount], families.count(fams[obscount])
        if families.count(fams[obscount]) > 0:
            likelihood_freqs = likelihood[families.index(fams[obscount])].replace(',','.')
	    #print fams[obscount]
            words = string.split(likelihood_freqs)
            if parselikelist.count(likelihood_freqs) < 1:
                parselikelist.append(likelihood_freqs)           

            
            if int(obs) == 1:     #p_obs_hyp = p(precense/absence-observationer|hyp)
                #p_obs_hyp += math.log((float(words[1])+pseudocount))  
                p_obs_ther += math.log((float(words[2])+pseudocount)) 
                p_obs_meso += math.log((float(words[3])+pseudocount)) 
                p_obs_psy += math.log((float(words[4])+pseudocount))   
            else:
                #p_obs_hyp += math.log((1-float(words[1])+pseudocount))
                p_obs_ther += math.log((1-float(words[2])+pseudocount))
                p_obs_meso += math.log((1-float(words[3])+pseudocount))
                p_obs_psy += math.log((1-float(words[4])+pseudocount))
                
            """
            if int(obs) == 1:     #p_obs_hyp = p(precense/absence-observationer|hyp)
                p_obs_hyp *= ((float(words[1])+pseudocount))  
                p_obs_ther *= ((float(words[2])+pseudocount)) 
                p_obs_meso *= ((float(words[3])+pseudocount)) 
                p_obs_psy *= ((float(words[4])+pseudocount))   
            else:
                p_obs_hyp *= ((1-float(words[1])+pseudocount))
                p_obs_ther *= ((1-float(words[2])+pseudocount))
                p_obs_meso *= ((1-float(words[3])+pseudocount))
                p_obs_psy *= ((1-float(words[4])+pseudocount))"""


        #else:
            #print fams[obscount], 'Family not included'
    
    p_obs_hyp = 0
    p_obs_ther = math.exp(p_obs_ther)
    p_obs_meso = math.exp(p_obs_meso)
    p_obs_psy = math.exp(p_obs_psy)

    """p_obs_hyp = (p_obs_hyp)
    p_obs_ther = (p_obs_ther)
    p_obs_meso = (p_obs_meso)
    p_obs_psy = (p_obs_psy)"""
    
    p_obs_total = p_obs_hyp + p_obs_ther + p_obs_meso + p_obs_psy # Same as "evidence"
    #print 'p_obs_total', p_obs_total

    post_hyp = (p_obs_hyp)/p_obs_total
    post_ther = (p_obs_ther)/p_obs_total
    post_meso = (p_obs_meso)/p_obs_total
    post_psy = (p_obs_psy)/p_obs_total
    predlist = [post_hyp, post_ther, post_meso, post_psy]
    reslist = ['Hyp_','Ther_','Meso_','Psy_']
    #print 'Predlist', predlist

    # Print results to file
    #print genomename.replace('\t','')
    output.write(presentlist[0])
    output.write('\t')
    rescount = -1
    printpreds = [actualclass,'|']
    for h in predlist:
        rescount += 1
        if h == max(predlist):
            output.write(reslist[rescount])
            output.write('\t')
            printpreds.append(reslist[rescount])
    output.write(str(max(predlist)))
    #output.write(answers[pancore.index(pancore[int(genomename1)])])
    output.write('\n')

    #print printpreds
    #print post_hyp, post_ther, post_meso, post_psy
    genefambased_predictions.write(presentlist[0])
    genefambased_predictions.write('\t')
    genefambased_predictions.write(str(post_ther))
    genefambased_predictions.write('\t')
    genefambased_predictions.write(str(post_meso))
    genefambased_predictions.write('\t')
    genefambased_predictions.write(str(post_psy))
    genefambased_predictions.write('\n')


for i in parselikelist:
    parse_likelihoods.write(i)
parse_likelihoods.close()


# Total evaluation:

output.close()
#os.startfile('predictive_evaluation_new.py')
genefambased_predictions.close()