#!/usr/bin/env python
##################################################################
#                                                                #
# Deposit3D - a PDB structure deposition tool                    #
#                                                                #
# A command-line script to create a mmCIF deposition file for    #
# automated structure submissions to RCSB/PDB ADIT.              #
#                                                                #
# Executing the script provides operating instructions.          #
#                                                                #
# To configure this script a path needs to be provided to a CCP4 #
# installation (cf the ccp4installation parameter).              #
#                                                                #
# Disclaimer: This software is distributed 'as is' without       #
# warrenty of any kind. SGX and the authors shall not be         #
# liable for any direct, consequential or other damages suffered #
# by the user or any others resulting from use of this software  #
#                                                                #
# Structural GenomiX                                 April 2005  #
#                                                                #
##################################################################
#

import sys
import os
import string

# set path to CCP4 installation

ccp4installation = '/biodata/software/linux/packages/ccp4-5.0.2'

#####################################
# Annotation default initialization #
#####################################

# Set CCP4 default flag for cross-validation set '0','mask' bulk solvent correction and TRUNCATE defaults for reduction

nfree_exclude = '0'
ref_bulksolvent = 'mask'

truncate_default_i = '-4.0'
truncate_default_f = '0.0'

# Null initializations

audit_author_name = '?'
audit_contact_author_name = '?'
audit_contact_author_email = '?'
audit_contact_author_address = '?'
audit_contact_author_phone = '?'
audit_contact_author_fax = '?'

citation_title = '?'
citation_journal_abbrev = '?'
citation_journal_volume = '?'
citation_page_first = '?'
citation_page_last = '?'
citation_year = '?'
citation_author_name = '?'

data_collection_temp_K = '?'
data_collection_date = '?'
wavelengths = '?'
beamline = '?'
detector_type = '?'
detector_maker = '?'
monochromator_type = '?'
xray_method = '?'

computing_data_collection = '?'
computing_data_reduction = '?'
computing_structure_solution = '?'
computing_molecular_graphics = '?'
computing_structure_refinement = '?'

protein_name = '?'
protein_ec_number = '?'

structure_title = '?'
structure_class = '?'
structure_keywords = '?'

biological_unit = '?'

sequence_databasename = '?'
sequence_databasecode = '?'

source_common_name = '?'
source_scientific_name = '?'
source_gene_name = '?'
source_host_common_name = '?'
source_host_scientific_name = '?'

acell = '?'
bcell = '?'
ccell = '?'
alpha = '?'
beta = '?'
gamma = '?'
spgno = '?'
spgname = '?'

matthews_coef = '?'
solvent_percent = '?'

exptl_crystal_grow_method = '?'
exptl_crystal_grow_pH = '?'
exptl_crystal_grow_temp = '?'
exptl_crystal_grow_components = '?'

data_num_unmerged = '?'
data_num = '?'
data_rlow = '?'
data_rhigh = '?'
data_percentobs = '?'
data_redund = '?'
data_rmerge = '?'
data_ioversig = '?'
datas_num = '?'
datas_num_unmerged = '?'
datas_rlow = '?'
datas_rhigh = '?'
datas_percentobs = '?'
datas_redund = '?'
datas_rmerge = '?'
datas_ioversig = '?'

ref_b11 = '?'
ref_b12 = '?'
ref_b13 = '?'
ref_b23 = '?'
ref_b22 = '?'
ref_b33 = '?'
ref_dlow = '?'
ref_dhigh = '?'
ref_bmean = '?'
ref_numobs = '?'
ref_numall = '?'
ref_numfree = '?'
ref_numwork = '?'
ref_percent = '?'
ref_rall = '?'
ref_robs = '?'
ref_rwork = '?'
ref_rfree = '?'
ref_dbond = '?'
ref_dangle = '?'
ref_dtorsion = '?'
ref_dchiral = '?'
ref_dplane = '?'
ref_bmbond = '?'
ref_bmangle = '?'
ref_bsbond = '?'
ref_bsangle = '?'
ref_solvent_vdw_probe_radii = '?'
ref_solvent_ion_probe_radii = '?'
ref_solvent_shrinkage_radii = '?'
ref_ksolv = '?'
ref_bsolv = '?'
ref_natom = '?'
ref_nsolvent = '?'

#####################################################
# Initialize operating defaults and structure data  #
#####################################################

xtalsetup = ccp4installation + '/include/ccp4.setup'
entitylist = ccp4installation + '/lib/data/monomers/full_names.list'
symmetrylib = ccp4installation + '/lib/data/symop.lib'

quote = """'"""
pwd = os.getcwd()
input = 'none'

pdbfile = 'none'
mtzfile = 'none'
seqfile = 'none'
scalafile = 'none'
scalepackfile = 'none'
templatefile = 'none'

reflineList = []
dataList = []
dataList_prev = []
mtzList = []
aList_chains = []
seqList = []
symList = []
aList_connect = []
readdata = 'no'
read_mtzlabels = 'no'
read_cell = 'no'
acell_mtz = '?'
bcell_mtz = '?'
ccell_mtz = '?'
alpha_mtz = '?'
beta_mtz = '?'
gamma_mtz = '?'
found_c = 'no'
found_n = 'no'
cryst_flag = 'no'
ref_anisoflag = 'no'
water_flag = 'no'
read_project_count = 0
atom_count = 0
solvent_count = 0
num_connect = 0
famp = '?'
sd = '?'
freer = '?'
footnote = ' . '

# Het group lists

aList_hets = []
aList_hets_names = []
aList_hets_nonPDB = []
aList_hets_number = []
aList_hets_asym = []

# refmac.out parsing

aList_bonds_chain = []
aList_bonds_resno = []
aList_bonds_resname = []
aList_angles_chain = []
aList_angles_resno = []
aList_angles_resname = []
aList_contacts_chain = []
aList_contacts_resno = []
aList_contacts_resname = []
aList_chiral_chain = []
aList_chiral_resno = []
aList_chiral_resname = []
aList_cis_chain = []
aList_cis_resno = []
aList_cis_resname = []
bond_list = 'no'
angle_list = 'no'
contact_list = 'no'
cispep_list = 'no'
chiral_list = 'no'
iteration_final = 'no'

################
# Check inputs #
################

fileexists = os.path.exists(ccp4installation)
if fileexists == 0:
    print '\nThe CCP4 installation was not found\n'
    sys.exit(1)

fileexists = os.path.exists(xtalsetup)
if fileexists == 0:
    print '\nThe setup file for the CCP4 installation was not found\n'
    sys.exit(1)

fileexists = os.path.exists(entitylist)
if fileexists == 0:
    print '\nThe CCP4 entity list was not found\n'
    sys.exit(1)

fileexists = os.path.exists(symmetrylib)
if fileexists == 0:
    print '\nThe CCP4 symmetry library was not found\n'
    sys.exit(1)

number_of_args = len(sys.argv)

if number_of_args == 2:
    input = str(sys.argv[1])

if number_of_args < 4 and input != 'help':
    print '\nPlease provide the following files on the command-line (any order):'
    print 'Coordinates (*.pdb), Reflection data (*.mtz), FASTA sequence (*.faa)'
    print 'Optionally, you may provide a CCP4/SCALA log file (*.log) or a'
    print 'SCALEPACK file (*.sca). You may also provide deposition template file'
    print 'containing "non-electronic" user information (*.template).'
    print '\nFor each novel ligand (not in CCP4/REFMAC ligand libraries), there'
    print 'should be a local PDB file containing an idealized coordinate template'
    print 'using the ligand code as file root and ".ideal" as file extension.'
    print 'Example: there is a file "UNK.ideal" for a ligand designated "UNK".\n'
    print 'Note that each protein in the coordinate file should be identified by a separate'
    print 'chain-id and non-protein entities should be identified by chain-id "W".\n'
    print 'Enter "help" on the command-line for more information on these formats\n'

    sys.exit(1)

if number_of_args == 2 and input == 'help': 
    
    print '\nNotes on input file formats: \n'
    print '1. Each protein in the coordinate file should be identified by a separate'
    print 'chain-id (usually A,B,C,...). Non-protein entities should be identified'
    print 'by chain-id "W". The file should contain a CRYST1 record.\n'
    print '2. The reflection file should contain only one column of type F (structure'
    print 'factor amplitude), one column of type Q (standard deviation on structure '
    print 'factor amplitude) and one colume of type I (flags for working and test'
    print 'data for validation). The CCP4 convention with Rfree data flagged by "0"'
    print 'is applied. This may be changed via script default parameter "nfree_exclude".'
    print 'All data are used in the calculation, without cutoff on sd(F). i.e. Rall and'
    print 'Robs are synonymous.\n'
    print '3. The FASTA sequence file may contain blank lines and a title line identified'
    print 'by a ">" symbol.\n'
    print '4. The parsing of the SCALA log files requires that the file was produced by '
    print 'CCP4 version 5.*\n'
    print '5. The parsing of SCALEPACK log files has only been lightly tested with'
    print 'HKL version 1.97.\n'
                                                    
    sys.exit(1)

# Banner

print '\n___________________________________________________________________\n'
print ' ** Deposit3D 1.0 **'
print 'Author : John Badger (Structural Genomix)'
print 'Release: April 2005\n'
print 'May be modified or redistributed but this banner may not be removed'
print 'except by the author. Functional modifications should be cited here.'
print '___________________________________________________________________\n'

count = 1
while count < number_of_args:
    inputfile = str(sys.argv[count])

    if inputfile.find('.pdb') > -1:
        pdbfile = inputfile

    if inputfile.find('.mtz') > -1:
        mtzfile = inputfile

    if inputfile.find('.faa') > -1:
        seqfile = inputfile    

    if inputfile.find('.log') > -1:
        scalafile = inputfile

    if inputfile.find('.sca') > -1:
        scalepackfile = inputfile    

    if inputfile.find('.template') > -1:
        templatefile = inputfile

    count = count + 1

# Check for PDB file

if pdbfile == 'none':
    print 'No coordinate file (*.pdb) was found\n'
    sys.exit(1)
else:
    fileexists = os.path.exists(pdbfile)
    if fileexists == 0:
        print '\nThe coordinate file was not found\n'
        sys.exit(1)   

# Check for MTZ file

if mtzfile == 'none':
    print 'No data file (*.mtz) was found\n'
    sys.exit(1)
else:
    fileexists = os.path.exists(mtzfile)
    if fileexists == 0:
        print '\nThe data file was not found\n'
        sys.exit(1)     

# Check for FASTA sequence file

if seqfile == 'none':
    print 'No sequence file (*.seq) was found\n'
    sys.exit(1)
else:
    fileexists = os.path.exists(seqfile)
    if fileexists == 0:
        print '\nThe sequence file was not found\n'
        sys.exit(1)       

# Check for SCALA log file

if scalafile == 'none':
    if scalepackfile == 'none':
        print 'No merging (SCALA) file was found'
else:
    fileexists = os.path.exists(scalafile)
    if fileexists == 0:
        print '\nThe SCALA log file was not found\n'
        sys.exit(1)
        
    computing_data_reduction = 'SCALA, TRUNCATE'    

# Check for SCALEPACK log file

if scalepackfile == 'none':
    if scalafile == 'none':
        print 'No merging (SCALEPACK) file was found'
else:
    fileexists = os.path.exists(scalepackfile)
    if fileexists == 0:
        print '\nThe SCALEPACK log file was not found\n'
        sys.exit(1)
        
    computing_data_reduction = 'SCALEPACK, TRUNCATE'

# Check for deposition information file

if templatefile == 'none':
    print 'No deposition information file (*.template) was found'
else:
    fileexists = os.path.exists(templatefile)
    if fileexists == 0:
        print '\nThe deposition information file was not found\n'
        sys.exit(1)
        
######################################################
# Inspect coordinate data to obtain entity lists etc #
######################################################

# Read through the coordinate file

file = open(pdbfile,'r')
allLines = file.readlines()
file.close()

for eachLine in allLines:

    tag = eachLine[0:6]
    tag = tag.strip()
    chain_id = eachLine[21:22]
    chain_id = chain_id.strip()
    res_name = eachLine[17:20]
    res_name = res_name.strip()
    atom_name = eachLine[13:16]
    atom_name = atom_name.strip()
    res_number = eachLine[22:26]
    x = eachLine[30:38]
    y = eachLine[38:46]
    z = eachLine[46:54]

    # Check for cell dimensions

    if tag == 'CRYST1':
        cryst_flag = 'yes'

    if tag == 'ATOM' or tag == 'HETATM':

        length_chain_id = len(chain_id)
        if length_chain_id == 0:
            print '\nThere are ATOM/HETATM record(s) without chain-ids.'
            print 'Each protein in the coordinate file should be identified by a separate'
            print 'chain-id (usually A,B,C,...). Non-protein entities should be identified'
            print 'by chain-id "W".\n'
            sys.exit(1)

        atom_count = atom_count + 1

        # protein

        if chain_id != 'W':

            # Check if we already have this polypeptide

            repeat_chain = 'no'
            count = 0
            count_chains = len(aList_chains)
            
            while count < count_chains:
                if chain_id == aList_chains[count]:
                    repeat_chain = 'yes'

                count = count + 1

            # If a new chain, add to the list

            if repeat_chain == 'no':
                aList_chains.append(chain_id)
            
        # Non-protein atom count

        if chain_id == 'W':
            solvent_count = solvent_count + 1

        # ligands
        
        if chain_id == 'W' and res_name != 'HOH':

            # Check if we already have this entity

            repeat = 'no'
            count = 0
            count_hets = len(aList_hets)

            while count < count_hets:
                if res_name == aList_hets[count]:
                    repeat = 'yes'

                count = count + 1

            # If a new entity, add to the list

            if repeat == 'no':
                aList_hets.append(res_name)

        # Check for water

        if res_name == 'HOH':

            water_flag = 'yes'

        # Check for peptide links across non-consecutive residue nos

        if chain_id != 'W':

            if atom_name == 'C':
                store_chain_c = chain_id
                store_number_c = res_number
                store_name_c = atom_name
                store_res_c = res_name
                xc = float(x)
                yc = float(y)
                zc = float(z)
                found_c = 'yes'

            if atom_name == 'N':
                store_chain_n = chain_id
                store_number_n = res_number
                store_name_n = atom_name
                store_res_n = res_name
                xn = float(x)
                yn = float(y)
                zn = float(z)
                found_n = 'yes'

            if found_c == 'yes' and found_n == 'yes':
                store_number_c_int = int(store_number_c)
                store_number_n_int = int(store_number_n)
                seqno_diff = store_number_c_int - store_number_n_int
                seqno_diff = abs(seqno_diff)

                if seqno_diff > 1:
                    dx = xn - xc
                    dy = yn - yc
                    dz = zn - zc
                    dist = dx*dx + dy*dy + dz*dz

                    # Add special TRANS link record if it seems to be one

                    if dist < 3.0:
                        num_connect = num_connect + 1
                        
                        link_record = 'LINK         C   ' + store_res_c + ' ' + store_chain_c + store_number_c \
                                      + '                 N   ' + store_res_n + ' ' + store_chain_n + store_number_n \
                                      + '                TRANS'
                        aList_connect.append(link_record)
                        found_c = 'no'
                        found_n = 'no'

    # Check for anisotropic refinement

    if tag == 'ANISOU':
        ref_anisoflag = 'yes' 

# Checks on PDB file integrity

if cryst_flag == 'no':
    print '\nThe coordinate file must contain a CRYST1 record\n'
    sys.exit(1)

number_chains = len(aList_chains)
if number_chains == 0:
    print '\nEach protein molecule must be identified by chain-id (A,B,..)'
    print 'The water and ligand entities must be identified by chain-id W.\n'
    sys.exit(1)

############################
# Obtain the entity lists  #
############################

fileexists = os.path.exists(entitylist)
if fileexists != 0:
    file = open(entitylist,'r')
    allLines = file.readlines()
    file.close()
else:
    print '\nList of PDB entities was not found\n'
    sys.exit(1)

count = 0
count_hets = len(aList_hets)

while count < count_hets:

    found = 'no'
    pdbentity = aList_hets[count]
    pdbentity = pdbentity.strip()

    for eachLine in allLines:
        tag = eachLine[0:4]
        tag = tag.strip()

        if tag == 'code':
            entitycode = eachLine[5:8]
            entitycode = entitycode.strip()

        if tag == 'name':
            entityname = eachLine[5:80]
            entityname = entityname.strip()

            if pdbentity == entitycode:
                found = 'yes'
                aList_hets_names.append(entityname)

    # Obtain user input of names for novel ligands and store

    if found == 'no':

        print '\nNovel ligand identified by code ',pdbentity
        novelligand = raw_input('Enter a name for this ligand: ')
        novelligand = novelligand.strip()
                
        aList_hets_names.append(novelligand)
        aList_hets_nonPDB.append(pdbentity)

    count = count + 1

# List PDB name assignments from the REFMAC5 list

if count_hets > 0:
    print '\nList of PDB HETNAM assignments'
    print '=============================='

    count = 0
    while count < count_hets:

        prentitycode = aList_hets[count]
        prentityname = aList_hets_names[count]

        print prentitycode,prentityname
        
        count = count + 1

# Establish lists of ligand entity codes and pointers

number_entities = len(aList_hets)
entity_list_number = 1
count = 0

if number_entities > 0:
    
    while count < number_entities:        
        entity_list_number = entity_list_number + 1
        pr_entity_list_number = str(entity_list_number)
        aList_hets_number.append(pr_entity_list_number)        

        entity_code = aList_hets[count]
        entity_code_asym = entity_code + '_W '
        aList_hets_asym.append(entity_code_asym)

        count = count + 1

if water_flag == 'yes':
    entity_list_number = entity_list_number + 1
    water_entity_list_number = str(entity_list_number)

# Check we have information to deal with novel entities

number_nonPDB = len(aList_hets_nonPDB)

if number_nonPDB > 0:

    count = 0
    while count < number_nonPDB:
        
        entity = aList_hets_nonPDB[count]
        entity_file = entity + '.ideal'

        # Check file exists and does contain appropriate ATOM/HETATM records
        
        fileexists = os.path.exists(entity_file)
        
        if fileexists == 0:
            
            print 'Ideal coordinate file for the ligand was not found: ',entity_file,'\n'
            sys.exit(1)
            
        else:

            ligand_records = 'no'

            file = open(entity_file, 'r')
            allLines = file.readlines()
            file.close()    

            for eachLine in allLines:
                if eachLine.find(entity) > -1:
                    ligand_records = 'yes'

            if ligand_records == 'no':
                print '\nFile did not contain ',entity,' records\n'
                sys.exit(1)

        count = count + 1

# Obtain atom counts

if ref_natom == 0:
    print '\nNo atoms were found in this PDB file\n'
    sys,exit(1)

ref_natom = str(atom_count)
ref_nsolvent = str(solvent_count)

###################################################
# Rewrite clean PDB now everything seems in order #
###################################################

file = open(pdbfile,'r')
allLines = file.readlines()
file.close()

file = open('temp_use.pdb','w')

# Write any peptide LINK records

if num_connect > 0:
        
    count = 0
    while count < num_connect:
        link_record = aList_connect[count]
        file.write(link_record)
        file.write('\n')

        count = count + 1

# Write CRYST1, ATOM/HETATM, TER, END records

for eachLine in allLines:
    tag = eachLine[0:6]
    tag = tag.strip()

    if tag == 'CRYST1':
        file.write(eachLine)

    if tag == 'ATOM' or tag == 'HETATM':
        file.write(eachLine)        

    if tag == 'TER':
        file.write(eachLine)

    if tag == 'END':
        file.write(eachLine)

file.close()

##############################################
# Begin reporting main calculation processes #
##############################################

print '\nProcess Summary'
print '==============='

#########################################################
# Analyse mtz file for label and spacegroup information #
#########################################################                

# C-shell execution of MTZDMP

filename = 'runmtzdmp.sh'
file = open(filename, 'w')
file.write('#!/bin/csh -f\n')
file.write('source ')
file.write(xtalsetup)
file.write('\n$CETC/mtzdmp ')
file.write(mtzfile)
file.write(' -s > mtzdmp.out\n')
file.close()

os.system('chmod +x runmtzdmp.sh')
os.system('./runmtzdmp.sh')

file = open('mtzdmp.out', 'r')
allLines = file.readlines()
file.close()

os.remove('mtzdmp.out')
os.remove('runmtzdmp.sh')

for eachLine in allLines:

    # Determine space group number
    
    if eachLine.find('* Space Group') > -1:
        mtzList = eachLine.split()
        spgno = mtzList[4]        

    # Initialize to read mtz data table

    if eachLine.find('OVERALL FILE STATISTICS') > -1:
        read_mtzlabels = 'yes'

    if read_mtzlabels == 'yes':        
        mtzList = eachLine.split()
        mtzList_length = len(mtzList)

        if mtzList_length == 12:
            
            # Determine amplitude label

            if mtzList[10] == 'F':
                famp = mtzList[11]

            # Determine standard deviation(amplitude) label

            if mtzList[10] == 'Q':
                sd = mtzList[11]

            # Determine Cross-validation label
            
            if mtzList[10] == 'I':
                freer = mtzList[11]   

    # Try to parse wavelength (available from CCP4 5 mtzdumps)
    
    if read_project_count > 0:
        read_project_count = read_project_count + 1      

    if eachLine.find('Dataset ID, project/crystal/dataset names, cell dimensions, wavelength') > -1:
        read_project_count = read_project_count + 1

    if read_project_count == 7:
        mtzList = eachLine.split()
        mtzList_length = len(mtzList)
        read_project_count = 0

        if mtzList_length == 1:
            mtzwavelength = mtzList[0]

            if mtzwavelength != '0.00000' and wavelengths == '?':
                wavelengths = mtzwavelength

    # Determine cell

    if read_cell == 'yes':
        mtzList = eachLine.split()
        mtzList_length = len(mtzList)

        if mtzList_length == 6:
            acell_mtz = mtzList[0]
            bcell_mtz = mtzList[1]
            ccell_mtz = mtzList[2]
            alpha_mtz = mtzList[3]
            beta_mtz = mtzList[4]
            gamma_mtz = mtzList[5]

            read_cell = 'no'
    
    if eachLine.find('Cell Dimensions') > -1:
        read_cell = 'yes'

# Check all items were found

if famp == '?':
    print 'The MTZ file label for structure factor amplitude was not determined (type F)'
    sys.exit(1)
else:
    print 'Using structure factor amplitude:',famp

if sd == '?':
    print '\nThe MTZ file label for structure factor standard deviation was not determined (type Q)'
    sys.exit(1)
else:
    print 'Using standard deviation on structure factor amplitude:',sd

if freer == '?':
    print '\nThe MTZ file label for the freeR flag was not determined (type I)'
    sys.exit(1)
else:
    print 'Using Free-R data set defined by flag:',freer

# Set H-M space group name from space group number

file = open(symmetrylib, 'r')
allLines = file.readlines()
file.close()

for eachLine in allLines:

    symList = eachLine.split()
    length_symList = len(symList)

    if length_symList > 1:
        if symList[0] == spgno:
            symList = eachLine.split(quote)
            spgname = symList[1]

################################
# Parse FASTA sequence file    #
################################

print 'Parsing FASTA file'

# Load single character series into a large list

title_count = 0

file = open(seqfile,'r')
allLines = file.readlines()
file.close()

for eachLine in allLines:

    parse_line = 'yes'

    # skip title line

    tag = eachLine[0:1]
    if tag == '>':
        parse_line = 'no'
        title_count = title_count + 1

    # Trap out multiple sequences deliminated by title

    if title_count > 1:
        parse_line = 'no'

    # skip blank lines

    line_length = len(eachLine)
    if line_length == 0:
        parse_line = 'no'

    # load

    if parse_line == 'yes':
        sequence_line = eachLine.strip()

        line_length = len(sequence_line)
        line_length = line_length + 1

        count = 1
        while count < line_length:
            j = count 
            i = count - 1
            aacode = sequence_line[i:j]
            seqList.append(aacode)

            count = count + 1

# Check that we have the sequence

sequence_length = len(seqList)
if sequence_length == 0:
    print '\nFASTA sequence extraction failed\n'
    sys.exit(1)

########################################
# Parse SCALA log file (if available)  #
########################################

# This function requires that the SCALA log came from CCP4 5.0.2 since it parses from the new summary

if scalafile != 'none':

    print 'Parsing SCALA log file'

    file = open(scalafile,'r')
    allLines = file.readlines()
    file.close()

    for eachLine in allLines:

        if eachLine.find('Overall  OuterShell'):
            readdata = 'yes'

        if readdata == 'yes':

            dataList = eachLine.split()
    
            if eachLine.find('Low resolution limit') > -1:
                data_rlow = dataList[3]
                datas_rlow = dataList[4]                 

            if eachLine.find('High resolution limit') > -1:
                data_rhigh = dataList[3]
                datas_rhigh = dataList[4]

            if eachLine.find('Rmerge') > -1:
                data_rmerge = dataList[1]
                datas_rmerge = dataList[2]

            if eachLine.find('Total number of observations') > -1:
                data_num_unmerged = dataList[4]
                datas_num_unmerged = dataList[4]

            if eachLine.find('Total number unique') > -1:
                data_num = dataList[3]
                datas_num = dataList[4]

            if eachLine.find('Mean(I)/sd(I)') > -1:
                data_ioversig = dataList[1]
                datas_ioversig = dataList[2]

            if eachLine.find('Completeness') > -1:
                data_percentobs = dataList[1]
                datas_percentobs = dataList[2]

            if eachLine.find('Multiplicity') > -1:
                data_redund = dataList[1]
                datas_redund = dataList[2]

############################################
# Parse SCALEPACK log file (if available)  #
############################################

# Only lightly tested with DENZO/SCALEPACK 1.97 installation

float_num_refs_theoretical = '?'

if scalepackfile != 'none':

    print 'Parsing SCALEPACK log file'

    file = open(scalepackfile,'r')
    allLines = file.readlines()
    file.close()

    readredundancy = 'no'
    res_table = 'no'

    for eachLine in allLines:

        # Resolution, Rmerge, Av-I/error

        if eachLine.find('  All reflections') > -1:            
            dataList = eachLine.split()

            # Rmerge
            
            data_rmerge = dataList[6]
            datas_rmerge = dataList_prev[6]

            # Av-I/error

            mean_i = dataList[2]
            mean_sigi = dataList[3]
            mean_i = float(mean_i)
            mean_sigi = float(mean_sigi)
            netIoveravsigmaI = mean_i/mean_sigi
            netIoveravsigmaI_out = round(netIoveravsigmaI,1)
            data_ioversig = str(netIoveravsigmaI_out)

            mean_i = dataList_prev[2]
            mean_sigi = dataList_prev[3]
            mean_i = float(mean_i)
            mean_sigi = float(mean_sigi)
            netIoveravsigmaI = mean_i/mean_sigi
            netIoveravsigmaI_out = round(netIoveravsigmaI,1)
            datas_ioversig = str(netIoveravsigmaI_out)

            # Resolution

            datas_rlow = dataList_prev[0]
            datas_rhigh = dataList_prev[1]
            data_rhigh = dataList_prev[1]

        # Low resolution shell limit

        if res_table == 'yes' and data_rlow == '?':
            dataList = eachLine.split()
            data_rlow = dataList[0]

        if eachLine.find('limit    Angstrom       I   error   stat. Chi**2  R-fac  R-fac') > -1:
            res_table = 'yes'
            
        # Total reflection count

        if eachLine.find('All films') > -1:
            dataList = eachLine.split()
            data_num_unmerged = dataList[2]

        # Get average redundancy

        if readredundancy == 'yes' and eachLine.find('All hkl') > -1:
            dataList = eachLine.split()
            data_num = dataList[12]

            float_data_num = float(data_num)
            float_data_num_unmerged = float(data_num_unmerged)
            redundancy = float_data_num_unmerged/float_data_num
            redundancy_out = round(redundancy,2)
            data_redund = str(redundancy_out)
            datas_num = dataList_prev[12]

            readredundancy = 'no'

        if eachLine.find('Summary of observation redundancies by shells:') > -1 and data_num_unmerged != '?':
            readredundancy = 'yes'

        # Keep previous line

        dataList_prev = eachLine.split()

    # Compute overall data completeness versus theoretical

    filename = 'rununique.sh'
    file = open(filename, 'w')
    file.write('#!/bin/csh -f\n')
    file.write('source ')
    file.write(xtalsetup)
    file.write('\n$CCP4_BIN/unique HKLOUT temp_unique.mtz << end_unique > unique.out\n')
    file.write('TITLE  unique_data\n')
    file.write('LABOUT  F=XFUNI SIGF=XSIGFUNI\n')
    file.write('RESOLUTION ')
    file.write(data_rhigh)
    file.write('\nSYMM ')
    file.write(spgno)
    file.write('\nCELL ')
    file.write(acell_mtz)
    file.write(' ')
    file.write(bcell_mtz)
    file.write(' ')
    file.write(ccell_mtz)
    file.write(' ')
    file.write(alpha_mtz)
    file.write(' ')
    file.write(beta_mtz)
    file.write(' ')
    file.write(gamma_mtz)
    file.write('\nEND\n')
    file.write('end_unique\n')
    file.close()

    os.system('chmod +x rununique.sh')
    os.system('./rununique.sh')

    fileexists = os.path.exists('unique.out')
    if fileexists == 0:
        print '\nSCALEPACK data completeness calculation failed'
        sys.exit(1)
    else:
        file = open('unique.out','r')
        allLines = file.readlines()
        file.close()

        for eachLine in allLines:

            if eachLine.find('reflections within resolution limits') > -1:
                dataList = eachLine.split()
                num_refs_theoretical = dataList[0]
                float_num_refs_theoretical = float(num_refs_theoretical)


        if float_num_refs_theoretical != '?':
            float_data_percentobs = 100.0 * float_data_num/float_num_refs_theoretical
            data_percentobs = round(float_data_percentobs,1)
            data_percentobs = str(data_percentobs)
        
        os.remove('unique.out')
        os.remove('rununique.sh')

    # Compute outer shell data completeness versus theoretical

    float_num_refs_theoretical = '?'

    filename = 'runmtzdmp.sh'
    file = open(filename, 'w')
    file.write('#!/bin/csh -f\n')
    file.write('source ')
    file.write(xtalsetup)
    file.write('\n$CCP4_BIN/mtzdump HKLIN temp_unique.mtz << end-mtzdump > mtzdump.out\n')
    file.write('NREF 1\n')
    file.write('STATS NBIN 1 RESO ')
    file.write(datas_rlow)
    file.write(' ')
    file.write(datas_rhigh)
    file.write('\nEND\n')
    file.write('end-mtzdump\n')
    file.close()

    os.system('chmod +x runmtzdmp.sh')
    os.system('./runmtzdmp.sh')

    fileexists = os.path.exists('mtzdump.out')
    if fileexists == 0:
        print '\nSCALEPACK shell data completeness calculation failed'
        sys.exit(1)
    else:
        file = open('mtzdump.out','r')
        allLines = file.readlines()
        file.close()

        for eachLine in allLines:

            if eachLine.find('No. of reflections used in FILE STATISTICS') > -1:
                dataList = eachLine.split()
                num_refs_theoretical = dataList[7]
                float_num_refs_theoretical = float(num_refs_theoretical)

        if float_num_refs_theoretical != '?':
            float_datas_num = float(datas_num)
            float_datas_percentobs = 100.0 * float_datas_num/float_num_refs_theoretical
            datas_percentobs = round(float_datas_percentobs,1)
            datas_percentobs = str(datas_percentobs)
        
        os.remove('mtzdump.out')
        os.remove('runmtzdmp.sh')
        os.remove('temp_unique.mtz')

###################################################
# Parse template information file (if available)  #
###################################################

if templatefile != 'none':

    print 'Parsing template file'

    file = open(templatefile,'r')
    allLines = file.readlines()
    file.close()

    for eachLine in allLines:

        dataList = eachLine.split(':')
        length = len(dataList)

        # Parse each item from the template file

        if length == 2:

            # Section 1
            
            if eachLine.find('audit_author_name') > -1:
                audit_author_name = dataList[1]
                audit_author_name = audit_author_name.strip()

            if eachLine.find('audit_contact_author_name') > -1:
                audit_contact_author_name = dataList[1]
                audit_contact_author_name =audit_contact_author_name.strip()

            if eachLine.find('audit_contact_author_email') > -1:
                audit_contact_author_email = dataList[1]
                audit_contact_author_email = audit_contact_author_email.strip()

            if eachLine.find('audit_contact_author_address') > -1:
                audit_contact_author_address = dataList[1]
                audit_contact_author_address = audit_contact_author_address.strip()

            if eachLine.find('audit_contact_author_phone') > -1:
                audit_contact_author_phone = dataList[1]
                audit_contact_author_phone = audit_contact_author_phone.strip()

            if eachLine.find('audit_contact_author_fax') > -1:
                audit_contact_author_fax = dataList[1]
                audit_contact_author_fax = audit_contact_author_fax.strip()

            if eachLine.find('citation_title') > -1:
                citation_title = dataList[1]
                citation_title = citation_title.strip()

            if eachLine.find('citation_journal_abbrev') > -1:
                citation_journal_abbrev = dataList[1]
                citation_journal_abbrev = citation_journal_abbrev.strip()

            if eachLine.find('citation_journal_volume') > -1:
                citation_journal_volume = dataList[1]
                citation_journal_volume = citation_journal_volume.strip()

            if eachLine.find('citation_page_first') > -1:
                citation_page_first = dataList[1]
                citation_page_first = citation_page_first.strip()

            if eachLine.find('citation_page_last') > -1:
                citation_page_last = dataList[1]
                citation_page_last = citation_page_last.strip()

            if eachLine.find('citation_year') > -1:
                citation_year = dataList[1]
                citation_year = citation_year.strip()

            if eachLine.find('citation_author_name') > -1:
                citation_author_name = dataList[1]
                citation_author_name = citation_author_name.strip()

            # Section 2

            if eachLine.find('data_collection_temp_K') > -1:
                data_collection_temp_K = dataList[1]
                data_collection_temp_K = data_collection_temp_K.strip()

            if eachLine.find('wavelengths') > -1:
                wavelengths = dataList[1]
                wavelengths = wavelengths.strip()

            if eachLine.find('data_collection_date') > -1:
                data_collection_date = dataList[1]
                data_collection_date = data_collection_date.strip()

            if eachLine.find('beamline') > -1:
                beamline = dataList[1]
                beamline = beamline.strip()

            if eachLine.find('detector_type') > -1:
                detector_type = dataList[1]
                detector_type = detector_type.strip()

            if eachLine.find('detector_maker') > -1:
                detector_maker = dataList[1]
                detector_maker = detector_maker.strip()

            if eachLine.find('monochromator_type') > -1:
                monochromator_type = dataList[1]
                monochromator_type = monochromator_type.strip()

            if eachLine.find('xray_method') > -1:
                xray_method = dataList[1]
                xray_method = xray_method.strip()

            if eachLine.find('computing_data_collection') > -1:
                computing_data_collection = dataList[1]
                computing_data_collection = computing_data_collection.strip()

            if eachLine.find('computing_data_reduction') > -1:
                computing_data_reduction = dataList[1]
                computing_data_reduction = computing_data_reduction.strip()

            if eachLine.find('computing_structure_solution') > -1:
                computing_structure_solution = dataList[1]
                computing_structure_solution = computing_structure_solution.strip()

            if eachLine.find('computing_molecular_graphics') > -1:
                computing_molecular_graphics = dataList[1]
                computing_molecular_graphics = computing_molecular_graphics.strip()

            if eachLine.find('computing_structure_refinement') > -1:
                computing_structure_refinement = dataList[1]
                computing_structure_refinement = computing_structure_refinement.strip()

            # Section 3

            if eachLine.find('protein_name') > -1:
                protein_name = dataList[1]
                protein_name = protein_name.strip()

            if eachLine.find('protein_ec_number') > -1:
                protein_ec_number = dataList[1]
                protein_ec_number = protein_ec_number.strip()

            if eachLine.find('structure_title') > -1:
                structure_title = dataList[1]
                structure_title = structure_title.strip()

            if eachLine.find('structure_class') > -1:
                structure_class = dataList[1]
                structure_class = structure_class.strip()

            if eachLine.find('structure_keywords') > -1:
                structure_keywords = dataList[1]
                structure_keywords = structure_keywords.strip()

            if eachLine.find('biological_unit') > -1:
                biological_unit = dataList[1]
                biological_unit = biological_unit.strip()

            if eachLine.find('sequence_databasename') > -1:
                sequence_databasename = dataList[1]
                sequence_databasename = sequence_databasename.strip()

            if eachLine.find('sequence_databasecode') > -1:
                sequence_databasecode = dataList[1]
                sequence_databasecode = sequence_databasecode.strip()

            if eachLine.find('source_common_name') > -1:
                source_common_name = dataList[1]
                source_common_name = source_common_name.strip()

            if eachLine.find('source_scientific_name') > -1:
                source_scientific_name = dataList[1]
                source_scientific_name = source_scientific_name.strip()

            if eachLine.find('source_gene_name') > -1:
                source_gene_name = dataList[1]
                source_gene_name = source_gene_name.strip()

            if eachLine.find('source_host_common_name') > -1:
                source_host_common_name = dataList[1]
                source_host_common_name = source_host_common_name.strip()

            if eachLine.find('source_host_scientific_name') > -1:
                source_host_scientific_name = dataList[1]
                source_host_scientific_name = source_host_scientific_name.strip()

            # Section 4

            if eachLine.find('exptl_crystal_grow_method') > -1:
                exptl_crystal_grow_method = dataList[1]
                exptl_crystal_grow_method = exptl_crystal_grow_method.strip()            

            if eachLine.find('exptl_crystal_grow_pH') > -1:
                exptl_crystal_grow_pH = dataList[1]
                exptl_crystal_grow_pH = exptl_crystal_grow_pH.strip()

            if eachLine.find('exptl_crystal_grow_temp') > -1:
                exptl_crystal_grow_temp = dataList[1]
                exptl_crystal_grow_temp = exptl_crystal_grow_temp.strip()

            if eachLine.find('exptl_crystal_grow_components') > -1:
                exptl_crystal_grow_components = dataList[1]
                exptl_crystal_grow_components = exptl_crystal_grow_components.strip()

#############################################################################
# C-shell dictionary generation for REFMAC5 for minimal description ligands #
#############################################################################

print 'Checking for PDB ligands defined only by minimal descriptions'

# Generate temporary ligand name

liganddir = pwd + '/temp.lib'

fileexists = os.path.exists(liganddir)
if fileexists != 0:
    os.remove(liganddir)

# Eliminate non-PDB entities from coordinate file

file = open('temp_use.pdb','r')
allLines = file.readlines()
file.close()

temppdbfile = 'temp_pdb.pdb'
file = open(temppdbfile, 'w')

for eachLine in allLines:

    pdbentity = 'yes'

    tag = eachLine[0:6]
    tag = tag.strip()
    res_name = eachLine[17:20]
    res_name = res_name.strip()

    if tag == 'CRYST1':
        file.write(eachLine)

    if tag == 'TER':
        file.write(eachLine)

    if tag == 'END':
        file.write(eachLine)

    if tag == 'LINK':
        file.write(eachLine)

    if tag == 'ATOM' or tag == 'HETATM':

        # check versus non-PDB entity list

        if number_nonPDB > 0:

            count = 0
            while count < number_nonPDB:
                
                entity = aList_hets_nonPDB[count]

                if entity == res_name:
                    pdbentity = 'no'

                count = count + 1

        if pdbentity == 'yes':
            file.write(eachLine)

file.write('END\n')
file.close()

# REFMAC5 run to generate ligands

filename = 'runrefmac5.sh'
file = open(filename, 'w')
file.write('#!/bin/csh -f\n')
file.write('source ')
file.write(xtalsetup)
file.write('\n$CCP4_BIN/refmac5 XYZIN ')
file.write(temppdbfile)
file.write(' XYZOUT temp_lig.pdb LIB_OUT ')
file.write(liganddir)
file.write(' << end-lig > temp_lig.out \n')
file.write('MODE NEWEntry\n')
file.write('MAKE_RESTRAINTS CHECK None\n')
file.write('MAKE_RESTRAINTS EXIT Yes\n')
file.write('END\n')
file.write('end-lig\n')
file.close()

os.system('chmod +x runrefmac5.sh')
os.system('./runrefmac5.sh')

fileexists = os.path.exists('temp_lig.pdb')
if fileexists == 0:
    print 'REFMAC5 dictionary calculation for PDB entities failed - check "temp_lig.out"'
    print 'The usual problem is atom names inconsistent with the PDB ligand code\n'
    sys.exit(1)
else:
    os.remove('temp_lig.pdb')
    os.remove('temp_lig.out')
    os.remove('runrefmac5.sh')
    os.remove(temppdbfile)

###################################################################
# C-shell dictionary generation with LIBCHECK for non-PDB ligands #
###################################################################

if number_nonPDB > 0:

    print 'Building dictionaries for novel ligands from templates'

    count = 0
    while count < number_nonPDB:

        entity = aList_hets_nonPDB[count]
        pdbtemplate = entity + '.ideal'

        # Establish and execute CCP4/LIBCHECK to build the library

        filename = 'runlibcheck.sh'
        file = open(filename, 'w')
        file.write('#!/bin/csh -f\n')
        file.write('source ')
        file.write(xtalsetup)
        file.write('\n$CCP4_BIN/libcheck XYZIN << end-libcheck > libcheck.out \n')
        file.write('N\n')
        file.write('_COOR Y\n')
        file.write('_FILE_PDB ')
        file.write(pdbtemplate)
        file.write('\nMON ')
        file.write(entity)
        file.write('\n \n')
        file.write('\nend-libcheck')
        file.close()

        os.system('chmod +x runlibcheck.sh')
        os.system('./runlibcheck.sh')
        os.remove('runlibcheck.sh')

        fileexists = os.path.exists('libcheck.lib')
        if fileexists == 0:
            print 'The output library file from LIBCHECK was not created'
            print 'This entity may already be in the standard dictionaries'
            sys.exit(1)

        os.remove('libcheck.out')

        entity_libcheck_pdb = 'libcheck_' + entity + '.pdb'
        entity_libcheck_ps = 'libcheck_' + entity + '.ps'
        entity_libcheck_cif = 'libcheck_' + entity + '.cif'

        fileexists = os.path.exists(entity_libcheck_pdb)
        if fileexists != 0:        
            os.remove(entity_libcheck_pdb)
            
        fileexists = os.path.exists(entity_libcheck_ps)
        if fileexists != 0:            
            os.remove(entity_libcheck_ps)

        fileexists = os.path.exists(entity_libcheck_cif)
        if fileexists != 0:        
            os.remove(entity_libcheck_cif) 

        # Post-process to remove plane definitions and variable torsions
        # from library file libcheck.lib as they are not fully reliable
        # and use of these in refinements may vary 

        write_flag = 'yes'

        file = open('libcheck.lib','r')
        allLines = file.readlines()
        file.close()

        os.remove('libcheck.lib')

        entitylib = entity + '.lib'
        file = open(entitylib, 'w')

        for eachLine in allLines:

            # Stop once plane records are encountered

            if eachLine.find('_chem_comp_plane_atom.comp_id') > -1:
                write_flag = 'no'

            # Skip variable torsions

            if write_flag == 'yes':
                if eachLine.find('var_') == -1:
                    file.write(eachLine)

        # Close out more neatly

        file.write('# ------------------------------------------------------\n')
        file.write('# ------------------------------------------------------\n')
        file.write('# ------------------------------------------------------\n')    
        file.close()

        # Combine novel entity library with any special PDB libraries

        fileexists = os.path.exists(liganddir)
        if fileexists != 0:

            filename = 'runlibcheck.sh'
            file = open(filename, 'w')
            file.write('#!/bin/csh -f\n')
            file.write('source ')
            file.write(xtalsetup)
            file.write('\n$CCP4_BIN/libcheck << end-libcheck > libcheck.out \n')
            file.write('N\n')
            file.write('_FILE_L ')
            file.write(entitylib)
            file.write('\n_FILE_L2 ')
            file.write(liganddir)
            file.write('\nend-libcheck')
            file.close()

            os.system('chmod +x runlibcheck.sh')
            os.system('./runlibcheck.sh')

            os.remove('runlibcheck.sh')
            os.remove('libcheck.out')
            os.remove(entitylib)

            os.rename('libcheck.lib',liganddir)

        else:

            os.rename(entitylib,liganddir)
        
        count = count + 1

#####################################################
# C-shell R-factor and stereochemistry calculation  #
#####################################################

print 'Running R-factor and stereochemistry calculations'

filename = 'runrefmac5.sh'
file = open(filename, 'w')
file.write('#!/bin/csh -f\n')
file.write('source ')
file.write(xtalsetup)
file.write('\n$CCP4_BIN/refmac5 HKLIN ')
file.write(mtzfile)
file.write(' XYZIN temp_use.pdb HKLOUT temp_ref.mtz XYZOUT temp_ref.pdb ')

fileexists = os.path.exists(liganddir)
if fileexists != 0:
    file.write('LIBIN ')
    file.write(liganddir)

file.write(' << end-refmac5 > refmac.out\n')    

file.write('LABIN FP=')
file.write(famp)
file.write(' SIGFP=')
file.write(sd)
file.write(' FREE=')
file.write(freer)

file.write('\nLABOUT FC=FC PHIC=PHIC DELFWT=DELFWT PHDELWT=PHDELWT FWT=FWT PHWT=PHWT FOM=FOM\n')
file.write('FREE ')
file.write(nfree_exclude)
file.write('\nREFI TYPE RESTrained\n')
file.write('REFI RESI MLKF\n')

if ref_anisoflag == 'no':
    file.write('REFI BREF ISOT METH CGMAT\n')
else:
    file.write('REFI BREF ANISotropic METH CGMAT\n')  
    
if ref_bulksolvent == 'babinet':
    file.write('SCAL TYPE BULK LSSC ANIS\n')
    file.write('SOLVENT NO\n')

if ref_bulksolvent == 'mask':
    file.write('SCAL TYPE SIMPLE LSSC ANIS\n')
    file.write('SOLVENT YES\n')

if ref_bulksolvent == 'fixedbabinet':
    file.write('SCAL TYPE BULK LSSC ANIS FIXBulk SCBULk 0.78 BBULk 180.0 \n')
    file.write('SOLVENT NO\n')

file.write('MAKE_RESTRAINTS HYDR N\n')
file.write('MAKE_RESTRAINTS NEWLigand Noexit\n')
file.write('MAKE_RESTRAINTS SS Y\n')
file.write('MAKE_RESTRAINTS CISP Y\n')
file.write('NCYC 0\n')
file.write('MONI DIST 6.0\n')
file.write('MONI ANGL 8.0\n')
file.write('MONI TORSION 10.0\n')
file.write('MONI PLANE 10.0\n')
file.write('MONI VANderwaals 4.25\n')
file.write('MONI CHIRAL 8.0\n')
file.write('MONI BFACTOR 99.0\n')
file.write('USECWD\n')
file.write('PNAME NOID\n')
file.write('DNAME output\n')
file.write('END\n')
file.write('end-refmac5\n')
file.close()

os.system('chmod +x runrefmac5.sh')
os.system('./runrefmac5.sh')

fileexists = os.path.exists('output.refmac')
if fileexists == 0:
    print '\nREFMAC5 calculation failed - check refmac.out\n'
    print 'The usual problem is atom names inconsistent with the PDB residue code\n'
    sys.exit(1)

fileexists = os.path.exists('temp_ref.pdb')
if fileexists == 0:    
    print '\nREFMAC5 calculation failed - check refmac.out\n'
    print 'The usual problem is atom names inconsistent with the PDB residue code\n'
    sys.exit(1)

fileexists = os.path.exists('runrefmac5.sh')
if fileexists != 0:
    os.remove('runrefmac5.sh')

fileexists = os.path.exists('temp_ref.mtz')
if fileexists != 0:     
    os.remove('temp_ref.mtz')

fileexists = os.path.exists('temp.lib')
if fileexists != 0:     
    os.remove('temp.lib')    

# Parse the required deposition information from output.refmac
# Note: this file already conveniently uses mmCIF tags..

file = open('output.refmac','r')
allLines = file.readlines()
file.close()

for eachLine in allLines:

    reflineList = eachLine.split()
    
    if eachLine.find('_refine.ls_R_factor_R_all') > -1:
        ref_rall = reflineList[1]

    if eachLine.find('_refine.ls_R_factor_R_free') > -1:
        ref_rfree = reflineList[1]

    if eachLine.find('_refine.ls_R_factor_R_work') > -1:
        ref_rwork = reflineList[1]

    if eachLine.find('_refine.ls_d_res_low') > -1:
        ref_dlow = reflineList[1]

    if eachLine.find('_refine.ls_d_res_high') > -1:
        ref_dhigh = reflineList[1]

    if eachLine.find('_refine.ls_R_factor_R_all') > -1:
        ref_rall = reflineList[1]
        ref_robs = reflineList[1]

    if eachLine.find('_refine.ls_number_reflns_R_free') > -1:
        ref_numfree = reflineList[1]

    if eachLine.find('_refine.ls_number_reflns_R_work') > -1:
        ref_numwork = reflineList[1]

    if eachLine.find('_refine.ls_number_reflns_obs') > -1:
        ref_numobs = reflineList[1]
        ref_numall = reflineList[1]

    if eachLine.find('_refine.B_iso_mean') > -1:
        ref_bmean = reflineList[1]

    if eachLine.find('_refine.aniso_B[1][1]') > -1:
        ref_b11 = reflineList[1]

    if eachLine.find('_refine.aniso_B[2][2]') > -1:
        ref_b22 = reflineList[1]

    if eachLine.find('_refine.aniso_B[3][3]') > -1:
        ref_b33 = reflineList[1]

    if eachLine.find('_refine.aniso_B[1][2]') > -1:
        ref_b12 = reflineList[1]

    if eachLine.find('_refine.aniso_B[1][3]') > -1:
        ref_b13 = reflineList[1]

    if eachLine.find('_refine.aniso_B[2][3]') > -1:
        ref_b23 = reflineList[1]       

    if eachLine.find('_cell.length_a') > -1:
        acell = reflineList[1]  

    if eachLine.find('_cell.length_b') > -1:
        bcell = reflineList[1]

    if eachLine.find('_cell.length_c') > -1:
        ccell = reflineList[1]

    if eachLine.find('_cell.angle_alpha') > -1:
        alpha = reflineList[1]

    if eachLine.find('_cell.angle_beta') > -1:
        beta = reflineList[1]
        
    if eachLine.find('_cell.angle_gamma') > -1:
        gamma = reflineList[1]

    if eachLine.find('_refine.solvent_vdw_probe_radii') > -1:
        ref_solvent_vdw_probe_radii = reflineList[1]

    if eachLine.find('_refine.solvent_ion_probe_radii') > -1:
        ref_solvent_ion_probe_radii = reflineList[1]

    if eachLine.find('_refine.solvent_shrinkage_radii') > -1:
        ref_solvent_shrinkage_radii = reflineList[1]    

    if eachLine.find('_refine.solvent_model_param_ksol') > -1:
        ref_ksolv = reflineList[1]

    if eachLine.find('_refine.solvent_model_param_bsol') > -1:
        ref_bsolv = reflineList[1]    

    # CCP4 4.2 tags

    if eachLine.find('Bond distances: refined atoms') > -1:
        ref_dbond = eachLine[53:59]
        ref_dbond = ref_dbond.strip()  

    if eachLine.find('Bond angles  : refined atom') > -1:
        ref_dangle = eachLine[53:59]
        ref_dangle = ref_dangle.strip()

    if eachLine.find('Torsion angles, period  1. refined') > -1:
        ref_dtorsion = eachLine[53:59]
        ref_dtorsion = ref_dtorsion.strip()

    if eachLine.find('Chiral centres: refined atoms') > -1:
        ref_dchiral = eachLine[53:59]
        ref_dchiral = ref_dchiral.strip()

    if eachLine.find('Planar groups: refined atoms') > -1:
        ref_dplane = eachLine[53:59]
        ref_dplane = ref_dplane.strip()

    if eachLine.find('M. chain bond B values') > -1:
        ref_bmbond = eachLine[53:59]
        ref_bmbond = ref_bmbond.strip()

    if eachLine.find('M. chain angle B values: refined atom') > -1:
        ref_bmangle = eachLine[53:59]
        ref_bmangle = ref_bmangle.strip()

    if eachLine.find('S. chain bond B values: refined atoms') > -1:
        ref_bsbond = eachLine[53:59]
        ref_bsbond = ref_bsbond.strip()

    if eachLine.find('S. chain angle B values: refined atoms') > -1:
        ref_bsangle = eachLine[53:59]
        ref_bsangle = ref_bsangle.strip()

    # or CCP4 5.0.2 CIF-like tags

    if eachLine.find('r_bond_refined_d') > -1:
        ref_dbond = reflineList[2]

    if eachLine.find('r_angle_refined_deg') > -1:
        ref_dangle = reflineList[2]

    if eachLine.find('r_dihedral_angle_1_deg') > -1:
        ref_dtorsion = reflineList[2]

    if eachLine.find('r_chiral_restr') > -1:
        ref_dchiral = reflineList[2]

    if eachLine.find('r_gen_planes_refined') > -1:
        ref_dplane = reflineList[2]

    if eachLine.find('r_mcbond_it') > -1:
        ref_bmbond = reflineList[2]

    if eachLine.find('r_mcangle_it') > -1:
        ref_bmangle = reflineList[2]

    if eachLine.find('r_scbond_it') > -1:
        ref_bsbond = reflineList[2]

    if eachLine.find('r_scangle_it') > -1:
        ref_bsangle = reflineList[2]

# Fix for cubic space groups

if ref_b11 == '?':
    ref_b11 = '0.00'

if ref_b22 == '?':
    ref_b22 = '0.00'

if ref_b33 == '?':
    ref_b33 = '0.00'

if ref_b12 == '?':
    ref_b12 = '0.00'

if ref_b13 == '?':
    ref_b13 = '0.00'

if ref_b23 == '?':
    ref_b23 = '0.00'

# Check for incorrect use of cross-validation flag (Rwork differs from Rfree by less than 1%)

if ref_rfree != '?' and ref_rwork != '?':
    float_ref_rfree = float(ref_rfree)
    float_ref_rwork = float(ref_rwork)
    float_rdif = float_ref_rfree - float_ref_rwork

    if float_rdif < 0.01:
        print '\nRwork and Rfree are extremely close !'
        print 'The default assignment of Rfree flags (0) by script parameter nfree_exclude'
        print 'needs to be changed\n'
        sys.exit(1)

###########################################################
# Compute refinement data completeness versus theoretical #
###########################################################

float_num_refs_theoretical = '?'

filename = 'rununique.sh'
file = open(filename, 'w')
file.write('#!/bin/csh -f\n')
file.write('source ')
file.write(xtalsetup)
file.write('\n$CCP4_BIN/unique HKLOUT temp_unique.mtz << end_unique > unique.out\n')
file.write('TITLE  unique_data\n')
file.write('LABOUT  F=XFUNI SIGF=XSIGFUNI\n')
file.write('RESOLUTION ')
file.write(ref_dhigh)
file.write('\nSYMM ')
file.write(spgno)
file.write('\nCELL ')
file.write(acell)
file.write(' ')
file.write(bcell)
file.write(' ')
file.write(ccell)
file.write(' ')
file.write(alpha)
file.write(' ')
file.write(beta)
file.write(' ')
file.write(gamma)
file.write('\nEND\n')
file.write('end_unique\n')
file.close()

os.system('chmod +x rununique.sh')
os.system('./rununique.sh')

fileexists = os.path.exists('unique.out')
if fileexists == 0:
    print '\nrefinement data completeness calculation failed'
    sys.exit(1)
else:
    file = open('unique.out','r')
    allLines = file.readlines()
    file.close()

    for eachLine in allLines:

        if eachLine.find('reflections within resolution limits') > -1:
            dataList = eachLine.split()
            num_refs_theoretical = dataList[0]
            float_num_refs_theoretical = float(num_refs_theoretical)

    if float_num_refs_theoretical != '?':
        float_data_refinement = float(ref_numall)
        float_data_percentobs = 100.0 * float_data_refinement/float_num_refs_theoretical
        ref_percent = round(float_data_percentobs,1)
        ref_percent = str(ref_percent)

os.remove('unique.out')
os.remove('rununique.sh')

fileexists = os.path.exists('temp_unique.mtz')
if fileexists != 0:
    os.remove('temp_unique.mtz')

#####################################################################
# Check refmac.out for severe point errors in covalent/VDW geometry #
#####################################################################

# Parsing here is more fragile as it uses a log file rather than harvesting file

file = open('refmac.out','r')
allLines = file.readlines()
file.close()

for eachLine in allLines:

    # Babinet bulk solvent parameters (if used) since not in data harvesting as of CCP4 5.0.2

    if eachLine.find('Babinet"s bulk solvent:') > -1:
        ref_ksolv = eachLine[33:40]
        ref_ksolv = ref_ksolv.strip()
        ref_bsolv = eachLine[47:54]
        ref_bsolv = ref_bsolv.strip()

    # Parsing section limits

    if eachLine.find('****') > -1:
        bond_list = 'no'
        angle_list = 'no'
        torsion_list = 'no'
        contact_list = 'no'

    if eachLine.find('----') > -1:
        bond_list = 'no'
        angle_list = 'no'
        torsion_list = 'no'
        contact_list = 'no'

    # Flag logging on finding final iteration number

    if eachLine.find('Refinement cycles') > -1:
        iterations = eachLine[30:40]
        iterations = iterations.strip()

        if iterations == '0':
            iteration_final = 'yes'

    if eachLine.find('CGMAT cycle number') > -1:
        if eachLine.find(iterations) > -1:
            iteration_final = 'yes'

    # get abnormal bond list

    if bond_list == 'yes' and iteration_final == 'yes':

        chain = eachLine[0:1]
        chain = chain.strip()
        if chain != '':
            resnumber = eachLine[1:5]
            resname = eachLine[6:9]
            aList_bonds_chain.append(chain)
            aList_bonds_resno.append(resnumber)
            aList_bonds_resname.append(resname)

    if eachLine.find('Bond distance deviations ') > -1:
        bond_list = 'yes'

    # get abnormal bond angle list

    if angle_list == 'yes' and iteration_final == 'yes':

        chain = eachLine[0:1]
        chain = chain.strip()
        if chain != '':
            resnumber = eachLine[1:5]
            resname = eachLine[6:9]
            aList_angles_chain.append(chain)
            aList_angles_resno.append(resnumber)
            aList_angles_resname.append(resname)

    if eachLine.find('Bond angle deviations ') > -1:
        angle_list = 'yes'

    # get abnormal contacts list

    if contact_list == 'yes' and iteration_final == 'yes':

        chain = eachLine[0:1]
        chain = chain.strip()
        if chain != '':
            resnumber = eachLine[1:5]
            resnumber = resnumber.strip()
            resname = eachLine[6:9]
            chain2 = eachLine[18:19]
            chain2 = chain2.strip()
            resnumber2 = eachLine[19:24]
            resnumber2 = resnumber2.strip()
            resname2 = eachLine[20:23]
            disorder1 = eachLine[14:15]
            disorder2 = eachLine[32:33]
            disorder1 = disorder1.strip()
            disorder2 = disorder2.strip()

            # Skip intra-residue interactions

            if chain != chain2 or resnumber != resnumber2:
                if disorder1 == '.' and disorder2 == '.':
                    aList_contacts_chain.append(chain)
                    aList_contacts_resno.append(resnumber)
                    aList_contacts_resname.append(resname)
                    aList_contacts_chain.append(chain2)
                    aList_contacts_resno.append(resnumber2)
                    aList_contacts_resname.append(resname2)

    if eachLine.find('VDW deviations ') > -1:
        contact_list = 'yes'
        chiral_list = 'no'

    # get severe chiral center violations

    if chiral_list == 'yes' and iteration_final == 'yes':

        chain = eachLine[0:1]
        chain = chain.strip()
        if chain != '':
            resnumber = eachLine[1:5]
            resname = eachLine[6:9]
            aList_chiral_chain.append(chain)
            aList_chiral_resno.append(resnumber)
            aList_chiral_resname.append(resname)

    if eachLine.find('Chiral volume deviations') > -1:
        chiral_list = 'yes'

    # get CIS peptide list

    if eachLine.find('CIS peptide bond is found') > -1:
        cispep_list = 'yes'

    if cispep_list == 'yes' and iteration_final == 'yes':
        chain = eachLine[15:16]
        chain = chain.strip()
        if chain != '':
            resnumber = eachLine[40:44]
            resname = eachLine[45:48]
            aList_cis_chain.append(chain)
            aList_cis_resno.append(resnumber)
            aList_cis_resname.append(resname)
            cispep_list = 'no'

############################################
# C-shell execution of CCP4/MATTHEWS_COEF  #             
############################################

print 'Running solvent volume calculations'

# Note that this uses the FASTA sequence, which should be the protein in the crystal sample

num_res_in_crystal_au = sequence_length * number_chains
num_res_in_crystal_au = str(num_res_in_crystal_au)

filename = 'runmatthews_coef.sh'
file = open(filename, 'w')
file.write('#!/bin/csh -f\n')
file.write('source ')
file.write(xtalsetup)
file.write('\n$CCP4_BIN/matthews_coef << end-matthews_coef > matthews_coef.out\n')
file.write('CELL ')
file.write(acell)
file.write(' ')
file.write(bcell)
file.write(' ')
file.write(ccell)
file.write(' ')
file.write(alpha)
file.write(' ')
file.write(beta)
file.write(' ')
file.write(gamma)
file.write(' ')
file.write('\nSYMM ')
file.write(spgno)
file.write('\nNRES ')
file.write(num_res_in_crystal_au)
file.write('\nend-matthews_coef\n')
file.close()

os.system('chmod +x runmatthews_coef.sh')
os.system('./runmatthews_coef.sh')

fileexists = os.path.exists('matthews_coef.out')
if fileexists == 0:
    print '\nMATTHEWS_COEF calculation failed to execute\n'
    sys.exit(1)
    
file = open('matthews_coef.out','r')
allLines = file.readlines()
file.close()

for eachLine in allLines:

    if eachLine.find('The Matthews Coefficient is') > -1:
        
        dataList = eachLine.split(':')
        matthews_coef = dataList[1]
        matthews_coef = matthews_coef.strip()

    if eachLine.find('Assuming protein density is') > -1:

        dataList = eachLine.split(':')
        solvent_percent = dataList[1]
        solvent_percent = solvent_percent.strip() 

os.remove('runmatthews_coef.sh')
os.remove('matthews_coef.out')

#####################
# Write everything  #
#####################

print 'Writing deposition file'

file = open('pdbdeposit.cif', 'w')
file.write('data_structure_1\n')

file.write('#\n')
file.write('##############################\n')
file.write('# Release information etc    #\n')
file.write('##############################\n')
file.write('#\n')
file.write('_audit_author.name ')
file.write(quote)
file.write(audit_author_name)
file.write(quote)
file.write('\n#\n')
file.write('_audit_contact_author.name ')
file.write(quote)
file.write(audit_contact_author_name)
file.write(quote)
file.write('\n_audit_contact_author.email ')
file.write(quote)
file.write(audit_contact_author_email)
file.write(quote)
file.write('\n_audit_contact_author.address\n')
file.write('; ')
file.write(audit_contact_author_address)
file.write('\n;\n')           
file.write('_audit_contact_author.phone ')
file.write(quote)
file.write(audit_contact_author_phone)
file.write(quote)
file.write('\n_audit_contact_author.fax ')
file.write(quote) 
file.write(audit_contact_author_fax)
file.write(quote)
file.write('\n#\n')
file.write('_pdbx_database_status.dep_release_code_coordinates ')
file.write(quote)
file.write('HPUB')
file.write(quote)
file.write('\n_pdbx_database_status.dep_release_code_struct_fact ')
file.write(quote)
file.write('HPUB')
file.write(quote)
file.write('\n_pdbx_database_status.dep_release_code_sequence    ')
file.write(quote)
file.write('REL')
file.write(quote)
file.write('\n#\n')


if citation_title != '?':

    file.write('################################\n')
    file.write('# Citation and author          #\n')
    file.write('################################\n')
    file.write('#\n')
    file.write('_citation.id primary\n')
    file.write('_citation.title ')
    file.write(quote)
    file.write(citation_title)
    file.write(quote)
    file.write('\n_citation.journal_abbrev ')
    file.write(quote)
    file.write(citation_journal_abbrev)
    file.write(quote)
    file.write('\n_citation.journal_volume ')
    file.write(quote)
    file.write(citation_journal_volume)
    file.write(quote)
    file.write('\n_citation.page_first ')
    file.write(citation_page_first)
    file.write('\n_citation.page_last ')
    file.write(citation_page_last)
    file.write('\n_citation.year ')
    file.write(citation_year)
    file.write('\n#\n')
    file.write('_citation_author.citation_id primary\n')
    file.write('_citation_author.name ')
    file.write(quote)
    file.write(citation_author_name)
    file.write(quote)
    file.write('\n#\n')

if citation_journal_abbrev == 'unpublished' or citation_journal_abbrev == 'UNPUBLISHED':

    file.write('################################\n')
    file.write('# Citation and author          #\n')
    file.write('################################\n')
    file.write('#\n')
    file.write('_citation.id primary\n')
    file.write('_citation.journal_abbrev unpublished')

file.write('##############################################################\n')
file.write('#                                                            #\n')
file.write('# Extra data collection and processing information for PDB   #\n')
file.write('#                                                            #\n')
file.write('##############################################################\n')
file.write('#\n')
file.write('_exptl.entry_id 1\n')
file.write('_exptl.method ')
file.write(quote)
file.write('x-ray diffraction')
file.write(quote)
file.write('\n_exptl.crystals_number 1\n')
file.write('#\n')
file.write('_diffrn.id  1\n')
file.write('_diffrn.ambient_temp ')
file.write(data_collection_temp_K)
file.write('\n')

file.write('_diffrn_source.diffrn_id 1\n')

if wavelengths != '1.54':

    file.write('_diffrn_source.source ')
    file.write(quote)
    file.write('Synchrotron')
    file.write(quote)
    file.write('\n_diffrn_source.type ')
    file.write(quote)
    file.write(beamline)
    file.write(quote)
    file.write('\n_diffrn_detector.diffrn_id 1\n')
    file.write('_diffrn_detector.pdbx_collection_date ')
    file.write(quote)
    file.write(data_collection_date)
    file.write(quote)
    file.write('\n_diffrn_detector.detector ')
    file.write(quote)
    file.write(detector_type)
    file.write(quote)    
    file.write('\n_diffrn_detector.type ')
    file.write(quote)
    file.write(detector_maker)
    file.write(quote)
    file.write('\n#\n')    
else:
    file.write('_diffrn_source.type ')
    file.write(quote)
    file.write('Rotating anode')
    file.write(quote)
    file.write('\n_diffrn_detector.diffrn_id 1\n')
    file.write('_diffrn_detector.pdbx_collection_date ')
    file.write(quote)
    file.write(data_collection_date)
    file.write(quote)
    file.write('\n_diffrn_detector.detector ')
    file.write(quote)
    file.write(detector_type)
    file.write(quote)    
    file.write('\n_diffrn_detector.type ')
    file.write(quote)
    file.write(detector_maker)
    file.write(quote)
    file.write('\n#\n')

file.write('_diffrn_radiation.diffrn_id  1\n')
file.write('_diffrn_radiation.wavelength_id  1\n')

if xray_method == 'MAD':
    file.write('_diffrn_radiation.pdbx_diffrn_protocol MAD\n')
else:
    file.write('_diffrn_radiation.pdbx_diffrn_protocol ')
    file.write(quote)
    file.write('Single wavelength')
    file.write(quote)

file.write('\n_diffrn_radiation.monochromator ')
file.write(quote)
file.write(monochromator_type)
file.write(quote)
file.write('\n_diffrn_radiation.pdbx_wavelength_list ')
file.write(quote)
file.write(wavelengths)
file.write(quote)
file.write('\n_diffrn_reflns.diffrn_id   1\n')
file.write('_diffrn_reflns.number ')
file.write(data_num_unmerged)
file.write('\n#\n')
file.write('_computing.entry_id 1\n')
file.write('_computing.data_collection ')
file.write(quote)
file.write(computing_data_collection)
file.write(quote)
file.write('\n_computing.data_reduction ')
file.write(quote)
file.write(computing_data_reduction)
file.write(quote)
file.write('\n_computing.structure_solution ')
file.write(quote)
file.write(computing_structure_solution)
file.write(quote)
file.write('\n_computing.molecular_graphics ')
file.write(quote)
file.write(computing_molecular_graphics)
file.write(quote)
file.write('\n_computing.structure_refinement ')
file.write(quote)
file.write(computing_structure_refinement)
file.write(quote)
           
file.write('\n#\n')
file.write('################################\n')
file.write('# Sequence information         #\n')
file.write('################################\n')
file.write('#\n')

# Write protein sequence (entity 1)

file.write('_entity_poly.entity_id     1\n')
file.write('_entity_poly.pdbx_seq_one_letter_code\n')
file.write(';')

# Pad out sequence with spaces

remainder = sequence_length%60
pad = 60 - remainder

count = 0
while count < pad:
    seqList.append(' ')
    count  = count + 1

sequence_length = len(seqList)

# Print out in blocks of 60 amino acids

count = 0
while count < sequence_length:

    aa = seqList[count]
    file.write(aa)

    count = count + 1
    remainder = count%60

    if remainder == 0:
        file.write('\n')

file.write(';\n')

# Write the PDB chain-ids to which the sequence corresponds

file.write('_entity_poly.pdbx_strand_id ')
file.write(quote)

count = 0
while count < number_chains:
    chain_out = aList_chains[count]
    pr_chain_out = ' ' + chain_out + ' '
    file.write(pr_chain_out)

    if count != number_chains - 1:
        file.write(',')

    count = count + 1
    
file.write(quote)
file.write('\n')

# Entity annotation

file.write('#\n')
file.write('################################\n')
file.write('# Entity information           #\n')
file.write('################################\n')
file.write('#\n')
file.write('loop_\n')
file.write('_entity.id\n')
file.write('_entity.pdbx_description\n')
file.write('_entity.type\n')
file.write('_entity.pdbx_ec\n')

# Protein

file.write('1')
file.write('\n; ')
file.write(protein_name)
file.write('\n;\n')
file.write('                  polymer     ')
file.write(protein_ec_number)
file.write('\n')

# Ligand list

number_entities = len(aList_hets)
entity_list_number = 1
count = 0

if number_entities > 0:
    
    while count < number_entities:
        
        entity_number = aList_hets_number[count]
        entity_name = aList_hets_names[count]

        file.write(entity_number)
        file.write('\n; ')
        file.write(entity_name)
        file.write('\n;\n')
        file.write('                  non-polymer ?\n')

        count = count + 1

# Waters

if water_flag == 'yes':
    
    file.write(water_entity_list_number)
    file.write(' water           water       ?\n') 


# Add the entity keyword ids as separate table for ADIT 

file.write('#\n')
file.write('loop_\n')
file.write('_entity_keywords.entity_id\n')
file.write('1\n')

count = 0
if number_entities > 0:
    
    while count < number_entities:        
        entity_number = aList_hets_number[count]

        file.write(entity_number)
        file.write('\n')

        count = count + 1

if water_flag == 'yes':    
    file.write(water_entity_list_number)
    file.write('\n')

file.write('#\n')
file.write('##############################################################\n')
file.write('# Structure annotation                                       #\n')
file.write('# note: _struct_keywords.pdbx_keywords maps to HEADER        #\n')
file.write('# and should report function. Use broad enzyme               #\n')
file.write('# classification or "Structural genomics, unknown function"  #\n')
file.write('#       _struct_keywords.text can be any words               #\n') 
file.write('##############################################################\n')
file.write('#\n')
file.write('_struct.entry_id 1\n')
file.write('_struct.title ')
file.write(quote)
file.write(structure_title)
file.write(quote)
file.write('\n#\n')
file.write('_struct_keywords.entry_id 1\n')
file.write('_struct_keywords.pdbx_keywords ')
file.write(quote)
file.write(structure_class)
file.write(quote)
file.write('\n_struct_keywords.text ')
file.write(quote)
file.write(structure_keywords)
file.write(quote)
file.write('\n#\n')
file.write('##############################################\n')
file.write('# Unique entity map                          #\n') 
file.write('# note: points to _atom_site.label_asym_id   #\n')
file.write('##############################################\n')
file.write('#\n')
file.write('loop_\n')
file.write('_struct_asym.id\n')
file.write('_struct_asym.entity_id\n')

# Protein

count = 0
if number_chains > 0:
    
    while count < number_chains:

        entity_list_number = '1'
        entity_list_asym = aList_chains[count]

        file.write(entity_list_asym)
        file.write('     ')
        file.write(entity_list_number)
        file.write('\n')

        count = count + 1

# Ligand list

count = 0
if number_entities > 0:

    while count < number_entities:

        entity_list_number = aList_hets_number[count]
        entity_list_asym =  aList_hets_asym[count]

        file.write(entity_list_asym)
        file.write(entity_list_number)
        file.write('\n')

        count = count + 1

# Waters

if water_flag == 'yes':

    file.write('W     ')
    file.write(water_entity_list_number)
    file.write('\n')

file.write('#\n')
file.write('##############################################\n')
file.write('# Asymmetric unit description                #\n')
file.write('##############################################\n')
file.write('#\n')
file.write('_struct_biol.id 1\n')
file.write('_struct_biol.details ')
file.write(quote)
file.write(biological_unit)
file.write(quote)
file.write('\n#\n')
file.write('##############################################\n')
file.write('# Database sequence reference                #\n')
file.write('##############################################\n')
file.write('#\n')
file.write('loop_\n')
file.write('_struct_ref.id\n')
file.write('_struct_ref.entity_id\n')
file.write('_struct_ref.db_name\n')
file.write('_struct_ref.db_code\n')
file.write('1 1 ')
file.write(quote)
file.write(sequence_databasename)
file.write(quote)
file.write(' ')
file.write(quote)
file.write(sequence_databasecode)
file.write(quote)
file.write('\n#\n')
file.write('###########################\n')
file.write('# Source information      #\n')
file.write('###########################\n')
file.write('#\n')
file.write('_entity_src_gen.entity_id                1\n')
file.write('_entity_src_gen.gene_src_common_name     ')
file.write(quote)
file.write(source_common_name)
file.write(quote)
file.write('\n_entity_src_gen.pdbx_gene_src_scientific_name  ')
file.write(quote)
file.write(source_scientific_name)
file.write(quote)
file.write('\n_entity_src_gen.pdbx_gene_src_gene	 ')
file.write(quote)
file.write(source_gene_name)
file.write(quote)
file.write('\n_entity_src_gen.host_org_common_name	 ')
file.write(quote)
file.write(source_host_common_name)
file.write(quote)
file.write('\n_entity_src_gen.pdbx_host_org_scientific_name ')
file.write(quote)
file.write(source_host_scientific_name)
file.write(quote)

# Write crystal data

file.write('\n#\n')
file.write('##########################\n')
file.write('# Crystal information    #\n')
file.write('##########################\n')
file.write('#\n')
file.write('_cell.entry_id 1\n')
file.write('_cell.length_a ')
file.write(acell)
file.write('\n_cell.length_b ')
file.write(bcell)
file.write('\n_cell.length_c ')
file.write(ccell)
file.write('\n_cell.angle_alpha ')
file.write(alpha)
file.write('\n_cell.angle_beta ')
file.write(beta)
file.write('\n_cell.angle_gamma ')
file.write(gamma)
file.write('\n#\n')
file.write('_symmetry.entry_id 1\n')
file.write('_symmetry.Int_Tables_number ')
file.write(spgno)
file.write('\n_symmetry.space_group_name_H-M ')
file.write(quote)
file.write(spgname)
file.write(quote)
file.write('\n')

file.write('#\n')
file.write('_exptl_crystal.id 1\n')
file.write('_exptl_crystal.density_percent_sol ')
file.write(solvent_percent)
file.write('\n_exptl_crystal.density_Matthews ')
file.write(matthews_coef )
file.write('\n#\n')

file.write('_exptl_crystal_grow.crystal_id 1\n')
file.write('_exptl_crystal_grow.method ')
file.write(quote)
file.write(exptl_crystal_grow_method)
file.write(quote)
file.write('\n_exptl_crystal_grow.pH ')
file.write(exptl_crystal_grow_pH)
file.write('\n_exptl_crystal_grow.temp ')
file.write(exptl_crystal_grow_temp)
file.write('\n_exptl_crystal_grow.pdbx_details ')
file.write(quote)
file.write(exptl_crystal_grow_components)
file.write(quote)
file.write('\n')

# Write data collection data
            
file.write('#\n')
file.write('###########################\n')
file.write('# Data collection         #\n')
file.write('###########################\n')
file.write('#\n')
file.write('# Overall processing statistics\n')
file.write('#\n')
file.write('_reflns.entry_id 1\n')
file.write('_reflns.number_all ')
file.write(data_num)                  
file.write('\n_reflns.number_obs ')
file.write(data_num)

file.write('\n_reflns.observed_criterion_sigma_F ')
file.write(truncate_default_f)
file.write('\n_reflns.observed_criterion_sigma_I ')
file.write(truncate_default_i)

file.write('\n_reflns.d_resolution_low ')
file.write(data_rlow)         
file.write('\n_reflns.d_resolution_high ')
file.write(data_rhigh)        
file.write('\n_reflns.percent_possible_obs ')
file.write(data_percentobs)
file.write('\n_reflns.pdbx_redundancy ')
file.write(data_redund)    
file.write('\n_reflns.pdbx_Rmerge_I_obs ')
file.write(data_rmerge)    
file.write('\n_reflns.pdbx_netI_over_av_sigmaI ')
file.write(data_ioversig)  
file.write('\n#\n')
file.write('# Outer shell processing statistics\n')
file.write('#\n')
file.write('_reflns_shell.number_measured_all ')
file.write(datas_num)                  
file.write('\n_reflns_shell.number_measured_obs ')
file.write(datas_num) 
file.write('\n_reflns_shell.d_res_low ')
file.write(datas_rlow)
file.write('\n_reflns_shell.d_res_high ')
file.write(datas_rhigh)
file.write('\n_reflns_shell.meanI_over_sigI_obs ')
file.write(datas_ioversig)
file.write('\n_reflns_shell.Rmerge_I_obs ')
file.write(datas_rmerge)
file.write('\n_reflns_shell.percent_possible_all ')
file.write(datas_percentobs)
file.write('\n_reflns_shell.pdbx_redundancy ')
file.write(datas_redund)

# Write refinement information
            
file.write('\n#\n')
file.write('###########################\n')
file.write('# Refinement information  #\n')
file.write('###########################\n')
file.write('#\n')
file.write('_refine.entry_id 1\n')

if xray_method == 'MAD':
    file.write('_refine.pdbx_method_to_determine_struct  ')
    file.write(quote)
    file.write('MAD phasing')
    file.write(quote)

if xray_method == 'SAD':
    file.write('_refine.pdbx_method_to_determine_struct  ')
    file.write(quote)
    file.write('SAD phasing')
    file.write(quote)

if xray_method == 'IR':
    file.write('_refine.pdbx_method_to_determine_struct  ')
    file.write(quote)
    file.write('Molecular Replacement')
    file.write(quote)

if xray_method != 'MAD' and xray_method != 'SAD' and xray_method != 'IR':
    file.write('_refine.pdbx_method_to_determine_struct  ')
    file.write(quote)
    file.write(xray_method)
    file.write(quote)

file.write('\n#\n')
file.write('# Data selection\n')
file.write('#\n')
file.write('_refine.ls_d_res_low ')
file.write(ref_dlow)
file.write('\n_refine.ls_d_res_high ')
file.write(ref_dhigh)
file.write('\n#\n')
file.write('# Bulk solvent scattering model correction\n')
file.write('#\n')
file.write('_refine.solvent_model_details\n')

# This selection is not used in deposition

if ref_bulksolvent == 'babinet' or ref_bulksolvent == 'fixedbabinet':
    file.write('; Babinet bulk solvent correction\n')
    file.write(';\n')
    file.write('_refine.solvent_model_param_ksol ')
    file.write(ref_ksolv)
    file.write('\n_refine.solvent_model_param_bsol ')
    file.write(ref_bsolv)
    file.write('\n')

if ref_bulksolvent == 'mask':
    file.write('; Mask bulk solvent correction\n')
    file.write(';\n')
    file.write('_refine.pdbx_solvent_vdw_probe_radii ')
    file.write(ref_solvent_vdw_probe_radii)
    file.write('\n_refine.pdbx_solvent_ion_probe_radii ')
    file.write(ref_solvent_ion_probe_radii)
    file.write('\n_refine.pdbx_solvent_shrinkage_radii ')
    file.write(ref_solvent_shrinkage_radii)
    file.write('\n')

file.write('#\n')
file.write('# Refinement scaling\n')
file.write('#\n')
file.write('_refine.aniso_B[1][1] ')
file.write(ref_b11)
file.write('\n_refine.aniso_B[1][2] ')
file.write(ref_b12)
file.write('\n_refine.aniso_B[1][3] ')
file.write(ref_b13)
file.write('\n_refine.aniso_B[2][2] ')
file.write(ref_b22)
file.write('\n_refine.aniso_B[2][3] ')
file.write(ref_b23)
file.write('\n_refine.aniso_B[3][3] ')
file.write(ref_b33)
file.write('\n#\n')
file.write('# Mean B-factor\n')
file.write('#\n')
file.write('_refine.B_iso_mean ')
file.write(ref_bmean)

file.write('\n#\n')
file.write('# B-factor refinement method\n')
file.write('#\n')

if ref_anisoflag == 'yes':
    file.write('_refine.pdbx_isotropic_thermal_model ')
    file.write(quote)
    file.write('anisotropic')
    file.write(quote)
else:
    file.write('_refine.pdbx_isotropic_thermal_model ')
    file.write(quote)
    file.write('isotropic')
    file.write(quote)
    
file.write('\n#\n')
file.write('# Overall R-factors\n')
file.write('#\n')
file.write('_refine.ls_number_reflns_all ')
file.write(ref_numall)
file.write('\n_refine.ls_number_reflns_obs ')
file.write(ref_numobs)
file.write('\n_refine.ls_number_reflns_R_free ')
file.write(ref_numfree)
file.write('\n_refine.ls_percent_reflns_obs ')
file.write(ref_percent)
file.write('\n_refine.ls_R_factor_all ')
file.write(ref_rall)
file.write('\n_refine.ls_R_factor_obs ')
file.write(ref_robs)
file.write('\n_refine.ls_R_factor_R_work ')
file.write(ref_rwork)
file.write('\n_refine.ls_R_factor_R_free ')
file.write(ref_rfree)
file.write('\n_refine.pdbx_ls_sigma_F 0.0\n')
file.write('_refine.pdbx_ls_cross_valid_method ')
file.write(quote)
file.write('Free R-value')
file.write(quote)
file.write('\n_refine.pdbx_R_Free_selection_details ')
file.write(quote)
file.write('random')
file.write(quote)
file.write('\n_refine.pdbx_stereochemistry_target_values ')
file.write(quote)
file.write('Engh-Huber')
file.write(quote)
file.write('\n#\n')
file.write('# Stereochemical agreement\n')
file.write('#\n')
file.write('loop_\n')
file.write('_refine_ls_restr.type\n')
file.write('_refine_ls_restr.dev_ideal\n')
file.write('r_bond_d         ')
file.write(ref_dbond)
file.write('\nr_angle_d        ')
file.write(ref_dangle)
file.write('\nr_planar_tor     ')
file.write(ref_dtorsion)
file.write('\nr_chiral_restr   ')
file.write(ref_dchiral)
file.write('\nr_plane_restr    ')
file.write(ref_dplane)
file.write('\nr_mcbond_it      ')
file.write(ref_bmbond)
file.write('\nr_mcangle_it     ')
file.write(ref_bmangle)
file.write('\nr_scbond_it      ')
file.write(ref_bsbond)
file.write('\nr_scangle_it     ')
file.write(ref_bsangle)            

file.write('\n#\n')
file.write('# Atom counts\n')
file.write('#\n')
file.write('_refine_hist.cycle_id 1\n')
file.write('_refine_hist.d_res_high ')
file.write(ref_dhigh)
file.write('\n_refine_hist.d_res_low ')
file.write(ref_dlow)
file.write('\n_refine_hist.number_atoms_total ')
file.write(ref_natom)
file.write('\n_refine_hist.number_atoms_solvent ')
file.write(ref_nsolvent)
file.write('\n#\n')

# Add coordinates. Note that this uses the REFMAC o/p for better atom-typing

xyzfile = open('temp_ref.pdb', 'r')
allLines = xyzfile.readlines()
xyzfile.close()

os.remove('temp_use.pdb')
os.remove('temp_ref.pdb')

file.write('##############################\n')
file.write('# Coordinates                #\n')
file.write('##############################\n')
file.write('#                             \n')
file.write('loop_                         \n')
file.write('_atom_site.type_symbol        \n')
file.write('_atom_site.label_atom_id      \n')
file.write('_atom_site.label_comp_id      \n')
file.write('_atom_site.auth_asym_id       \n')
file.write('_atom_site.auth_seq_id        \n')
file.write('_atom_site.label_seq_id       \n')
file.write('_atom_site.label_alt_id       \n')
file.write('_atom_site.Cartn_x            \n')
file.write('_atom_site.Cartn_y            \n')
file.write('_atom_site.Cartn_z            \n')
file.write('_atom_site.occupancy          \n')
file.write('_atom_site.B_iso_or_equiv     \n')
file.write('_atom_site.footnote_id        \n')
file.write('_atom_site.label_entity_id    \n')
file.write('_atom_site.id                 \n')
file.write('_atom_site.label_asym_id      \n')

for eachLine in allLines:

    tag = eachLine[0:6]
    tag = tag.strip()

    if tag == 'ATOM' or tag == 'HETATM':

        atom_serial = eachLine[6:11]
        atom_name = eachLine[12:16]
        atom_alt = eachLine[16:17]
        res_name = eachLine[17:20]
        chain_id = eachLine[21:22]
        res_number = eachLine[22:26]
        insert_code = eachLine[26:27]
        x_coord = eachLine[30:38]
        y_coord = eachLine[38:46]
        z_coord = eachLine[46:54]
        occ_value = eachLine[54:60]
        b_value = eachLine[60:66]
        element = eachLine[76:78]

        # strip or pad some records

        pr_res_name = res_name.strip()

        wr_atom_serial = ' ' + atom_serial + ' '
        wr_atom_name = ' ' + atom_name + ' '
        wr_atom_alt = ' ' + atom_alt + ' '
        wr_res_name = ' ' + res_name + ' '
        wr_chain_id = ' ' + chain_id + ' '
        wr_res_number = ' ' + res_number + ' '
        wr_insert_code = ' ' + insert_code + ' '
        wr_x_coord = ' ' + x_coord + ' '
        wr_y_coord = ' ' + y_coord + ' '
        wr_z_coord = ' ' + z_coord + ' '
        wr_occ_value = ' ' + occ_value + ' '
        wr_b_value = ' ' + b_value + ' '
        wr_element = ' ' + element + ' '

        # Patch null alternate records

        if atom_alt == ' ':
            wr_atom_alt = ' . '

        # Establish label_asym records

        #  1.protein 

        if chain_id != 'W' and res_name != 'HOH':
            label_asym = '   ' + chain_id
            label_entity = '1'

        #  2.ligands

        if chain_id == 'W' and res_name != 'HOH':
            wr_chain_id = ' . '
            count = 0
            while count < number_entities:
                hetname = aList_hets[count]
                if hetname == res_name:
                    label_asym = aList_hets_asym[count]
                    label_entity = aList_hets_number[count]
                    
                count = count + 1

        #  3.water

        if res_name == 'HOH':
            wr_chain_id = ' . '
            label_asym =  '   W'
            label_entity = water_entity_list_number

        #

        wr_label_asym = ' ' + label_asym
        wr_label_entity = ' ' + label_entity + ' '
        
        # Write CIF atom record

        file.write('  ')
        file.write(wr_element)
        file.write(wr_atom_name)
        file.write(wr_res_name)
        file.write(wr_chain_id)
        file.write(wr_res_number)
        file.write(wr_res_number)
        file.write(wr_atom_alt)
        file.write(wr_x_coord)
        file.write(wr_y_coord)
        file.write(wr_z_coord)
        file.write(wr_occ_value)
        file.write(wr_b_value)
        file.write(footnote)
        file.write(wr_label_entity)
        file.write(wr_atom_serial)
        file.write(label_asym)
        file.write('\n')

# Add list of anisotropic records of same type and order as PDB file

if ref_anisoflag == 'yes':

    file.write('################################\n')
    file.write('# Anisotropic B-factor records #\n')
    file.write('################################\n')
    file.write('loop_\n')
    file.write('_atom_site_anisotrop.id\n')
    file.write('_atom_site_anisotrop.type_symbol\n')
    file.write('_atom_site_anisotrop.U[1][1]\n')
    file.write('_atom_site_anisotrop.U[2][2]\n')
    file.write('_atom_site_anisotrop.U[3][3]\n')
    file.write('_atom_site_anisotrop.U[1][2]\n')
    file.write('_atom_site_anisotrop.U[1][3]\n')
    file.write('_atom_site_anisotrop.U[2][3]\n')

    for eachLine in allLines:

        tag = eachLine[0:6]
        tag = tag.strip()

        if tag == 'ANISOU':
            
            atom_serial = eachLine[6:11]           
            element = eachLine[76:78]
            u11 = eachLine[28:35]
            u22 = eachLine[35:42]
            u33 = eachLine[42:49]
            u12 = eachLine[49:56]
            u13 = eachLine[56:63]
            u23 = eachLine[63:70]

            wr_atom_serial = ' ' + atom_serial + ' '
            wr_element = ' ' + element + ' '
            wr_u11 = ' ' + u11 + ' '
            wr_u22 = ' ' + u22 + ' '
            wr_u33 = ' ' + u33 + ' '
            wr_u12 = ' ' + u12 + ' '
            wr_u13 = ' ' + u13 + ' '
            wr_u23 = ' ' + u23 + ' '

            file.write(wr_atom_serial)
            file.write(wr_element)
            file.write(wr_u11)
            file.write(wr_u22)
            file.write(wr_u33)
            file.write(wr_u12)
            file.write(wr_u13)
            file.write(wr_u23)
            file.write('\n')

#

file.write('#\n')
file.write('###########\n')
file.write('# The End #\n')
file.write('###########\n')
file.write('#\n')

###################################################################
# Write simple CIF reflection file list from the refinement data  #
###################################################################

print 'Creating CIF reflection list'

filename = 'runmtz2various.sh'
file = open(filename, 'w')
file.write('#!/bin/csh -f\n')
file.write('source ')
file.write(xtalsetup)
file.write('\n$CCP4_BIN/mtz2various HKLIN ')
file.write(mtzfile)
file.write(' HKLOUT hkldeposit.cif << end-mtz2various > mtz2various.out\n')
file.write('LABIN FP=')
file.write(famp)
file.write(' SIGFP=')
file.write(sd)
file.write(' FREE=')
file.write(freer)
file.write('\nOUTPUT CIF data_1\n')
file.write('FREEVAL ')
file.write(nfree_exclude)
file.write('\nMISS\n')
file.write('SCALE 10\n')
file.write('MONITOR 1000\n')
file.write('END\n')
file.write('end-mtz2various\n')
file.close()

os.system('chmod +x runmtz2various.sh')
os.system('./runmtz2various.sh')

fileexists = os.path.exists('hkldeposit.cif')
if fileexists == 0:
    print 'MTZ2VARIOUS run to generate CIF reflection file appears to have failed\n'
    sys.exit(1)
    
os.remove('mtz2various.out')
os.remove('runmtz2various.sh')

#########################
# Report statistics     #
#########################

print '\nRefinement Summary'
print '==================='

print 'Resolution  :',ref_dhigh
print 'R(working)  :',ref_rwork
print 'R(free)     :',ref_rfree
print 'RMSD(bonds) :',ref_dbond
print 'RMSD(angles):',ref_dangle

# Flag geometric problems in bonds

length_bonds = len(aList_bonds_resno)

if length_bonds > 0:

    print '\nThe following residues have severely strained covalent bond lengths:'

    count  = 0
    while count < length_bonds:
        chain = aList_bonds_chain[count]
        aa = aList_bonds_resno[count]
        resname = aList_bonds_resname[count]

        print chain,aa,resname

        count = count + 1

# Flag geometric problems in angles

length_angles = len(aList_angles_resno)

if length_angles > 0:

    print '\nThe following residues have severely strained covalent bond angles:'

    count  = 0
    while count < length_angles:
        chain = aList_angles_chain[count]
        aa = aList_angles_resno[count]
        resname = aList_angles_resname[count]

        print chain,aa,resname

        count = count + 1
        
# Flag short VDV contacts

length_vdv = len(aList_contacts_resno)

if length_vdv > 0:

    print '\nThe following residues have extremely short VDW contacts:'

    count  = 0
    while count < length_vdv:
        chain = aList_contacts_chain[count]
        aa = aList_contacts_resno[count]
        resname = aList_contacts_resname[count]

        print chain,aa

        count = count + 1

# Flag abnormal chiral centers

length_chiral = len(aList_chiral_resno)

if length_chiral > 0:

    print '\nThe following residues have severely strained chiral centers:'

    count  = 0
    while count < length_chiral:
        chain = aList_chiral_chain[count]
        aa = aList_chiral_resno[count]
        resname = aList_chiral_resname[count]

        print chain,aa,resname

        count = count + 1

# Flag CIS residues

length_cis = len(aList_cis_resno)

if length_cis > 0:

    print '\nThe following residues are in CIS conformation:'

    count  = 0
    while count < length_cis:
        chain = aList_cis_chain[count]
        aa = aList_cis_resno[count]
        resname = aList_cis_resname[count]

        print chain,aa,resname

        count = count + 1

# 

if length_bonds > 0 or length_angles > 0 or length_chiral > 0:
    print '\nSevere stereochemical abnormalities are virtually impossible'
    print 'See file refmac.out for more information.'

# QC notes from SGX work

fl_ref_dhigh = float(ref_dhigh)

rfree_max =  -0.02 * fl_ref_dhigh * fl_ref_dhigh  + 0.13 * fl_ref_dhigh + 0.11 
rdif_max  =  -0.01 * fl_ref_dhigh * fl_ref_dhigh  + 0.065 * fl_ref_dhigh - 0.02 
phipsi_core_min = 100 * ( -0.04 * fl_ref_dhigh + 0.96 )
badsidechains_max = 0.75 * fl_ref_dhigh + 0.75

pr_rfree_max = round(rfree_max,3)
pr_rdif_max = round(rdif_max,3)
pr_phipsi_core_min = round(phipsi_core_min,1)
pr_badsidechains_max = round(badsidechains_max,1)

print '\nSGX expectations for global structure quality metrics at',ref_dhigh,'A resolution'
print '==========================================================================='
print 'Max value for R(free)                               :',pr_rfree_max
print 'Max difference for R(free) - R(work)                :',pr_rdif_max
print 'Min percentage amino acids in Ramachandran plot core:',pr_phipsi_core_min
print 'Max percentage side chains in abnormal conformations:',pr_badsidechains_max

print '\n1. The value for R-free should only exceed this value if there are'
print '   clear indications of problematic data (anisotropy, twinning).'
print '2. Residues in the Ramachandran plot core are those calculated'
print '   by PROCHECK or an equivalent phi-psi area in other tabulations.'
print '3. Abnormal side chains are those flagged by a chi-1 angle differing'
print '   by 45 degrees from the nearest rotamer as calculated by CCP4/ROTAMER.'

# Write final deposition notes

print '\nPDB Deposition Notes'
print '====================='
print 'Structure deposition file : pdbdeposit.cif'
print 'X-ray data deposition file: hkldeposit.cif'

print '\nThe mmCIF sequence/structure/entity mappings will only need adjustment'
print 'if there is more than one type of protein in the crystal.'

print '\nThe structure deposition file contains annotation data and coordinates.'
print 'This file may be deposited to the PDB through the RCSB/PDB ADIT interface'
print '(http://rcsb-deposit.rutgers.edu/adit/). From the ADIT session ' 
print 'select file type "mmCIF" and upload the file. After deposition, use the'
print '"PREVIEW ENTRY" option to check the information uploaded to the RCSB/PDB.'
print 'Any alterations, missing or additional information may be entered through'
print 'this interface.\n'

#