#!/usr/bin/env python ################################################################## # # # Deposit3D - a PDB structure deposition tool # # # # A command-line script to create a mmCIF deposition file for # # automated structure submissions to RCSB/PDB ADIT. # # # # Executing the script provides operating instructions. # # # # To configure this script a path needs to be provided to a CCP4 # # installation (cf the ccp4installation parameter). # # # # Disclaimer: This software is distributed 'as is' without # # warrenty of any kind. SGX and the authors shall not be # # liable for any direct, consequential or other damages suffered # # by the user or any others resulting from use of this software # # # # Structural GenomiX April 2005 # # # ################################################################## # import sys import os import string # set path to CCP4 installation ccp4installation = '/biodata/software/linux/packages/ccp4-5.0.2' ##################################### # Annotation default initialization # ##################################### # Set CCP4 default flag for cross-validation set '0','mask' bulk solvent correction and TRUNCATE defaults for reduction nfree_exclude = '0' ref_bulksolvent = 'mask' truncate_default_i = '-4.0' truncate_default_f = '0.0' # Null initializations audit_author_name = '?' audit_contact_author_name = '?' audit_contact_author_email = '?' audit_contact_author_address = '?' audit_contact_author_phone = '?' audit_contact_author_fax = '?' citation_title = '?' citation_journal_abbrev = '?' citation_journal_volume = '?' citation_page_first = '?' citation_page_last = '?' citation_year = '?' citation_author_name = '?' data_collection_temp_K = '?' data_collection_date = '?' wavelengths = '?' beamline = '?' detector_type = '?' detector_maker = '?' monochromator_type = '?' xray_method = '?' computing_data_collection = '?' computing_data_reduction = '?' computing_structure_solution = '?' computing_molecular_graphics = '?' computing_structure_refinement = '?' protein_name = '?' protein_ec_number = '?' structure_title = '?' structure_class = '?' structure_keywords = '?' biological_unit = '?' sequence_databasename = '?' sequence_databasecode = '?' source_common_name = '?' source_scientific_name = '?' source_gene_name = '?' source_host_common_name = '?' source_host_scientific_name = '?' acell = '?' bcell = '?' ccell = '?' alpha = '?' beta = '?' gamma = '?' spgno = '?' spgname = '?' matthews_coef = '?' solvent_percent = '?' exptl_crystal_grow_method = '?' exptl_crystal_grow_pH = '?' exptl_crystal_grow_temp = '?' exptl_crystal_grow_components = '?' data_num_unmerged = '?' data_num = '?' data_rlow = '?' data_rhigh = '?' data_percentobs = '?' data_redund = '?' data_rmerge = '?' data_ioversig = '?' datas_num = '?' datas_num_unmerged = '?' datas_rlow = '?' datas_rhigh = '?' datas_percentobs = '?' datas_redund = '?' datas_rmerge = '?' datas_ioversig = '?' ref_b11 = '?' ref_b12 = '?' ref_b13 = '?' ref_b23 = '?' ref_b22 = '?' ref_b33 = '?' ref_dlow = '?' ref_dhigh = '?' ref_bmean = '?' ref_numobs = '?' ref_numall = '?' ref_numfree = '?' ref_numwork = '?' ref_percent = '?' ref_rall = '?' ref_robs = '?' ref_rwork = '?' ref_rfree = '?' ref_dbond = '?' ref_dangle = '?' ref_dtorsion = '?' ref_dchiral = '?' ref_dplane = '?' ref_bmbond = '?' ref_bmangle = '?' ref_bsbond = '?' ref_bsangle = '?' ref_solvent_vdw_probe_radii = '?' ref_solvent_ion_probe_radii = '?' ref_solvent_shrinkage_radii = '?' ref_ksolv = '?' ref_bsolv = '?' ref_natom = '?' ref_nsolvent = '?' ##################################################### # Initialize operating defaults and structure data # ##################################################### xtalsetup = ccp4installation + '/include/ccp4.setup' entitylist = ccp4installation + '/lib/data/monomers/full_names.list' symmetrylib = ccp4installation + '/lib/data/symop.lib' quote = """'""" pwd = os.getcwd() input = 'none' pdbfile = 'none' mtzfile = 'none' seqfile = 'none' scalafile = 'none' scalepackfile = 'none' templatefile = 'none' reflineList = [] dataList = [] dataList_prev = [] mtzList = [] aList_chains = [] seqList = [] symList = [] aList_connect = [] readdata = 'no' read_mtzlabels = 'no' read_cell = 'no' acell_mtz = '?' bcell_mtz = '?' ccell_mtz = '?' alpha_mtz = '?' beta_mtz = '?' gamma_mtz = '?' found_c = 'no' found_n = 'no' cryst_flag = 'no' ref_anisoflag = 'no' water_flag = 'no' read_project_count = 0 atom_count = 0 solvent_count = 0 num_connect = 0 famp = '?' sd = '?' freer = '?' footnote = ' . ' # Het group lists aList_hets = [] aList_hets_names = [] aList_hets_nonPDB = [] aList_hets_number = [] aList_hets_asym = [] # refmac.out parsing aList_bonds_chain = [] aList_bonds_resno = [] aList_bonds_resname = [] aList_angles_chain = [] aList_angles_resno = [] aList_angles_resname = [] aList_contacts_chain = [] aList_contacts_resno = [] aList_contacts_resname = [] aList_chiral_chain = [] aList_chiral_resno = [] aList_chiral_resname = [] aList_cis_chain = [] aList_cis_resno = [] aList_cis_resname = [] bond_list = 'no' angle_list = 'no' contact_list = 'no' cispep_list = 'no' chiral_list = 'no' iteration_final = 'no' ################ # Check inputs # ################ fileexists = os.path.exists(ccp4installation) if fileexists == 0: print '\nThe CCP4 installation was not found\n' sys.exit(1) fileexists = os.path.exists(xtalsetup) if fileexists == 0: print '\nThe setup file for the CCP4 installation was not found\n' sys.exit(1) fileexists = os.path.exists(entitylist) if fileexists == 0: print '\nThe CCP4 entity list was not found\n' sys.exit(1) fileexists = os.path.exists(symmetrylib) if fileexists == 0: print '\nThe CCP4 symmetry library was not found\n' sys.exit(1) number_of_args = len(sys.argv) if number_of_args == 2: input = str(sys.argv[1]) if number_of_args < 4 and input != 'help': print '\nPlease provide the following files on the command-line (any order):' print 'Coordinates (*.pdb), Reflection data (*.mtz), FASTA sequence (*.faa)' print 'Optionally, you may provide a CCP4/SCALA log file (*.log) or a' print 'SCALEPACK file (*.sca). You may also provide deposition template file' print 'containing "non-electronic" user information (*.template).' print '\nFor each novel ligand (not in CCP4/REFMAC ligand libraries), there' print 'should be a local PDB file containing an idealized coordinate template' print 'using the ligand code as file root and ".ideal" as file extension.' print 'Example: there is a file "UNK.ideal" for a ligand designated "UNK".\n' print 'Note that each protein in the coordinate file should be identified by a separate' print 'chain-id and non-protein entities should be identified by chain-id "W".\n' print 'Enter "help" on the command-line for more information on these formats\n' sys.exit(1) if number_of_args == 2 and input == 'help': print '\nNotes on input file formats: \n' print '1. Each protein in the coordinate file should be identified by a separate' print 'chain-id (usually A,B,C,...). Non-protein entities should be identified' print 'by chain-id "W". The file should contain a CRYST1 record.\n' print '2. The reflection file should contain only one column of type F (structure' print 'factor amplitude), one column of type Q (standard deviation on structure ' print 'factor amplitude) and one colume of type I (flags for working and test' print 'data for validation). The CCP4 convention with Rfree data flagged by "0"' print 'is applied. This may be changed via script default parameter "nfree_exclude".' print 'All data are used in the calculation, without cutoff on sd(F). i.e. Rall and' print 'Robs are synonymous.\n' print '3. The FASTA sequence file may contain blank lines and a title line identified' print 'by a ">" symbol.\n' print '4. The parsing of the SCALA log files requires that the file was produced by ' print 'CCP4 version 5.*\n' print '5. The parsing of SCALEPACK log files has only been lightly tested with' print 'HKL version 1.97.\n' sys.exit(1) # Banner print '\n___________________________________________________________________\n' print ' ** Deposit3D 1.0 **' print 'Author : John Badger (Structural Genomix)' print 'Release: April 2005\n' print 'May be modified or redistributed but this banner may not be removed' print 'except by the author. Functional modifications should be cited here.' print '___________________________________________________________________\n' count = 1 while count < number_of_args: inputfile = str(sys.argv[count]) if inputfile.find('.pdb') > -1: pdbfile = inputfile if inputfile.find('.mtz') > -1: mtzfile = inputfile if inputfile.find('.faa') > -1: seqfile = inputfile if inputfile.find('.log') > -1: scalafile = inputfile if inputfile.find('.sca') > -1: scalepackfile = inputfile if inputfile.find('.template') > -1: templatefile = inputfile count = count + 1 # Check for PDB file if pdbfile == 'none': print 'No coordinate file (*.pdb) was found\n' sys.exit(1) else: fileexists = os.path.exists(pdbfile) if fileexists == 0: print '\nThe coordinate file was not found\n' sys.exit(1) # Check for MTZ file if mtzfile == 'none': print 'No data file (*.mtz) was found\n' sys.exit(1) else: fileexists = os.path.exists(mtzfile) if fileexists == 0: print '\nThe data file was not found\n' sys.exit(1) # Check for FASTA sequence file if seqfile == 'none': print 'No sequence file (*.seq) was found\n' sys.exit(1) else: fileexists = os.path.exists(seqfile) if fileexists == 0: print '\nThe sequence file was not found\n' sys.exit(1) # Check for SCALA log file if scalafile == 'none': if scalepackfile == 'none': print 'No merging (SCALA) file was found' else: fileexists = os.path.exists(scalafile) if fileexists == 0: print '\nThe SCALA log file was not found\n' sys.exit(1) computing_data_reduction = 'SCALA, TRUNCATE' # Check for SCALEPACK log file if scalepackfile == 'none': if scalafile == 'none': print 'No merging (SCALEPACK) file was found' else: fileexists = os.path.exists(scalepackfile) if fileexists == 0: print '\nThe SCALEPACK log file was not found\n' sys.exit(1) computing_data_reduction = 'SCALEPACK, TRUNCATE' # Check for deposition information file if templatefile == 'none': print 'No deposition information file (*.template) was found' else: fileexists = os.path.exists(templatefile) if fileexists == 0: print '\nThe deposition information file was not found\n' sys.exit(1) ###################################################### # Inspect coordinate data to obtain entity lists etc # ###################################################### # Read through the coordinate file file = open(pdbfile,'r') allLines = file.readlines() file.close() for eachLine in allLines: tag = eachLine[0:6] tag = tag.strip() chain_id = eachLine[21:22] chain_id = chain_id.strip() res_name = eachLine[17:20] res_name = res_name.strip() atom_name = eachLine[13:16] atom_name = atom_name.strip() res_number = eachLine[22:26] x = eachLine[30:38] y = eachLine[38:46] z = eachLine[46:54] # Check for cell dimensions if tag == 'CRYST1': cryst_flag = 'yes' if tag == 'ATOM' or tag == 'HETATM': length_chain_id = len(chain_id) if length_chain_id == 0: print '\nThere are ATOM/HETATM record(s) without chain-ids.' print 'Each protein in the coordinate file should be identified by a separate' print 'chain-id (usually A,B,C,...). Non-protein entities should be identified' print 'by chain-id "W".\n' sys.exit(1) atom_count = atom_count + 1 # protein if chain_id != 'W': # Check if we already have this polypeptide repeat_chain = 'no' count = 0 count_chains = len(aList_chains) while count < count_chains: if chain_id == aList_chains[count]: repeat_chain = 'yes' count = count + 1 # If a new chain, add to the list if repeat_chain == 'no': aList_chains.append(chain_id) # Non-protein atom count if chain_id == 'W': solvent_count = solvent_count + 1 # ligands if chain_id == 'W' and res_name != 'HOH': # Check if we already have this entity repeat = 'no' count = 0 count_hets = len(aList_hets) while count < count_hets: if res_name == aList_hets[count]: repeat = 'yes' count = count + 1 # If a new entity, add to the list if repeat == 'no': aList_hets.append(res_name) # Check for water if res_name == 'HOH': water_flag = 'yes' # Check for peptide links across non-consecutive residue nos if chain_id != 'W': if atom_name == 'C': store_chain_c = chain_id store_number_c = res_number store_name_c = atom_name store_res_c = res_name xc = float(x) yc = float(y) zc = float(z) found_c = 'yes' if atom_name == 'N': store_chain_n = chain_id store_number_n = res_number store_name_n = atom_name store_res_n = res_name xn = float(x) yn = float(y) zn = float(z) found_n = 'yes' if found_c == 'yes' and found_n == 'yes': store_number_c_int = int(store_number_c) store_number_n_int = int(store_number_n) seqno_diff = store_number_c_int - store_number_n_int seqno_diff = abs(seqno_diff) if seqno_diff > 1: dx = xn - xc dy = yn - yc dz = zn - zc dist = dx*dx + dy*dy + dz*dz # Add special TRANS link record if it seems to be one if dist < 3.0: num_connect = num_connect + 1 link_record = 'LINK C ' + store_res_c + ' ' + store_chain_c + store_number_c \ + ' N ' + store_res_n + ' ' + store_chain_n + store_number_n \ + ' TRANS' aList_connect.append(link_record) found_c = 'no' found_n = 'no' # Check for anisotropic refinement if tag == 'ANISOU': ref_anisoflag = 'yes' # Checks on PDB file integrity if cryst_flag == 'no': print '\nThe coordinate file must contain a CRYST1 record\n' sys.exit(1) number_chains = len(aList_chains) if number_chains == 0: print '\nEach protein molecule must be identified by chain-id (A,B,..)' print 'The water and ligand entities must be identified by chain-id W.\n' sys.exit(1) ############################ # Obtain the entity lists # ############################ fileexists = os.path.exists(entitylist) if fileexists != 0: file = open(entitylist,'r') allLines = file.readlines() file.close() else: print '\nList of PDB entities was not found\n' sys.exit(1) count = 0 count_hets = len(aList_hets) while count < count_hets: found = 'no' pdbentity = aList_hets[count] pdbentity = pdbentity.strip() for eachLine in allLines: tag = eachLine[0:4] tag = tag.strip() if tag == 'code': entitycode = eachLine[5:8] entitycode = entitycode.strip() if tag == 'name': entityname = eachLine[5:80] entityname = entityname.strip() if pdbentity == entitycode: found = 'yes' aList_hets_names.append(entityname) # Obtain user input of names for novel ligands and store if found == 'no': print '\nNovel ligand identified by code ',pdbentity novelligand = raw_input('Enter a name for this ligand: ') novelligand = novelligand.strip() aList_hets_names.append(novelligand) aList_hets_nonPDB.append(pdbentity) count = count + 1 # List PDB name assignments from the REFMAC5 list if count_hets > 0: print '\nList of PDB HETNAM assignments' print '==============================' count = 0 while count < count_hets: prentitycode = aList_hets[count] prentityname = aList_hets_names[count] print prentitycode,prentityname count = count + 1 # Establish lists of ligand entity codes and pointers number_entities = len(aList_hets) entity_list_number = 1 count = 0 if number_entities > 0: while count < number_entities: entity_list_number = entity_list_number + 1 pr_entity_list_number = str(entity_list_number) aList_hets_number.append(pr_entity_list_number) entity_code = aList_hets[count] entity_code_asym = entity_code + '_W ' aList_hets_asym.append(entity_code_asym) count = count + 1 if water_flag == 'yes': entity_list_number = entity_list_number + 1 water_entity_list_number = str(entity_list_number) # Check we have information to deal with novel entities number_nonPDB = len(aList_hets_nonPDB) if number_nonPDB > 0: count = 0 while count < number_nonPDB: entity = aList_hets_nonPDB[count] entity_file = entity + '.ideal' # Check file exists and does contain appropriate ATOM/HETATM records fileexists = os.path.exists(entity_file) if fileexists == 0: print 'Ideal coordinate file for the ligand was not found: ',entity_file,'\n' sys.exit(1) else: ligand_records = 'no' file = open(entity_file, 'r') allLines = file.readlines() file.close() for eachLine in allLines: if eachLine.find(entity) > -1: ligand_records = 'yes' if ligand_records == 'no': print '\nFile did not contain ',entity,' records\n' sys.exit(1) count = count + 1 # Obtain atom counts if ref_natom == 0: print '\nNo atoms were found in this PDB file\n' sys,exit(1) ref_natom = str(atom_count) ref_nsolvent = str(solvent_count) ################################################### # Rewrite clean PDB now everything seems in order # ################################################### file = open(pdbfile,'r') allLines = file.readlines() file.close() file = open('temp_use.pdb','w') # Write any peptide LINK records if num_connect > 0: count = 0 while count < num_connect: link_record = aList_connect[count] file.write(link_record) file.write('\n') count = count + 1 # Write CRYST1, ATOM/HETATM, TER, END records for eachLine in allLines: tag = eachLine[0:6] tag = tag.strip() if tag == 'CRYST1': file.write(eachLine) if tag == 'ATOM' or tag == 'HETATM': file.write(eachLine) if tag == 'TER': file.write(eachLine) if tag == 'END': file.write(eachLine) file.close() ############################################## # Begin reporting main calculation processes # ############################################## print '\nProcess Summary' print '===============' ######################################################### # Analyse mtz file for label and spacegroup information # ######################################################### # C-shell execution of MTZDMP filename = 'runmtzdmp.sh' file = open(filename, 'w') file.write('#!/bin/csh -f\n') file.write('source ') file.write(xtalsetup) file.write('\n$CETC/mtzdmp ') file.write(mtzfile) file.write(' -s > mtzdmp.out\n') file.close() os.system('chmod +x runmtzdmp.sh') os.system('./runmtzdmp.sh') file = open('mtzdmp.out', 'r') allLines = file.readlines() file.close() os.remove('mtzdmp.out') os.remove('runmtzdmp.sh') for eachLine in allLines: # Determine space group number if eachLine.find('* Space Group') > -1: mtzList = eachLine.split() spgno = mtzList[4] # Initialize to read mtz data table if eachLine.find('OVERALL FILE STATISTICS') > -1: read_mtzlabels = 'yes' if read_mtzlabels == 'yes': mtzList = eachLine.split() mtzList_length = len(mtzList) if mtzList_length == 12: # Determine amplitude label if mtzList[10] == 'F': famp = mtzList[11] # Determine standard deviation(amplitude) label if mtzList[10] == 'Q': sd = mtzList[11] # Determine Cross-validation label if mtzList[10] == 'I': freer = mtzList[11] # Try to parse wavelength (available from CCP4 5 mtzdumps) if read_project_count > 0: read_project_count = read_project_count + 1 if eachLine.find('Dataset ID, project/crystal/dataset names, cell dimensions, wavelength') > -1: read_project_count = read_project_count + 1 if read_project_count == 7: mtzList = eachLine.split() mtzList_length = len(mtzList) read_project_count = 0 if mtzList_length == 1: mtzwavelength = mtzList[0] if mtzwavelength != '0.00000' and wavelengths == '?': wavelengths = mtzwavelength # Determine cell if read_cell == 'yes': mtzList = eachLine.split() mtzList_length = len(mtzList) if mtzList_length == 6: acell_mtz = mtzList[0] bcell_mtz = mtzList[1] ccell_mtz = mtzList[2] alpha_mtz = mtzList[3] beta_mtz = mtzList[4] gamma_mtz = mtzList[5] read_cell = 'no' if eachLine.find('Cell Dimensions') > -1: read_cell = 'yes' # Check all items were found if famp == '?': print 'The MTZ file label for structure factor amplitude was not determined (type F)' sys.exit(1) else: print 'Using structure factor amplitude:',famp if sd == '?': print '\nThe MTZ file label for structure factor standard deviation was not determined (type Q)' sys.exit(1) else: print 'Using standard deviation on structure factor amplitude:',sd if freer == '?': print '\nThe MTZ file label for the freeR flag was not determined (type I)' sys.exit(1) else: print 'Using Free-R data set defined by flag:',freer # Set H-M space group name from space group number file = open(symmetrylib, 'r') allLines = file.readlines() file.close() for eachLine in allLines: symList = eachLine.split() length_symList = len(symList) if length_symList > 1: if symList[0] == spgno: symList = eachLine.split(quote) spgname = symList[1] ################################ # Parse FASTA sequence file # ################################ print 'Parsing FASTA file' # Load single character series into a large list title_count = 0 file = open(seqfile,'r') allLines = file.readlines() file.close() for eachLine in allLines: parse_line = 'yes' # skip title line tag = eachLine[0:1] if tag == '>': parse_line = 'no' title_count = title_count + 1 # Trap out multiple sequences deliminated by title if title_count > 1: parse_line = 'no' # skip blank lines line_length = len(eachLine) if line_length == 0: parse_line = 'no' # load if parse_line == 'yes': sequence_line = eachLine.strip() line_length = len(sequence_line) line_length = line_length + 1 count = 1 while count < line_length: j = count i = count - 1 aacode = sequence_line[i:j] seqList.append(aacode) count = count + 1 # Check that we have the sequence sequence_length = len(seqList) if sequence_length == 0: print '\nFASTA sequence extraction failed\n' sys.exit(1) ######################################## # Parse SCALA log file (if available) # ######################################## # This function requires that the SCALA log came from CCP4 5.0.2 since it parses from the new summary if scalafile != 'none': print 'Parsing SCALA log file' file = open(scalafile,'r') allLines = file.readlines() file.close() for eachLine in allLines: if eachLine.find('Overall OuterShell'): readdata = 'yes' if readdata == 'yes': dataList = eachLine.split() if eachLine.find('Low resolution limit') > -1: data_rlow = dataList[3] datas_rlow = dataList[4] if eachLine.find('High resolution limit') > -1: data_rhigh = dataList[3] datas_rhigh = dataList[4] if eachLine.find('Rmerge') > -1: data_rmerge = dataList[1] datas_rmerge = dataList[2] if eachLine.find('Total number of observations') > -1: data_num_unmerged = dataList[4] datas_num_unmerged = dataList[4] if eachLine.find('Total number unique') > -1: data_num = dataList[3] datas_num = dataList[4] if eachLine.find('Mean(I)/sd(I)') > -1: data_ioversig = dataList[1] datas_ioversig = dataList[2] if eachLine.find('Completeness') > -1: data_percentobs = dataList[1] datas_percentobs = dataList[2] if eachLine.find('Multiplicity') > -1: data_redund = dataList[1] datas_redund = dataList[2] ############################################ # Parse SCALEPACK log file (if available) # ############################################ # Only lightly tested with DENZO/SCALEPACK 1.97 installation float_num_refs_theoretical = '?' if scalepackfile != 'none': print 'Parsing SCALEPACK log file' file = open(scalepackfile,'r') allLines = file.readlines() file.close() readredundancy = 'no' res_table = 'no' for eachLine in allLines: # Resolution, Rmerge, Av-I/error if eachLine.find(' All reflections') > -1: dataList = eachLine.split() # Rmerge data_rmerge = dataList[6] datas_rmerge = dataList_prev[6] # Av-I/error mean_i = dataList[2] mean_sigi = dataList[3] mean_i = float(mean_i) mean_sigi = float(mean_sigi) netIoveravsigmaI = mean_i/mean_sigi netIoveravsigmaI_out = round(netIoveravsigmaI,1) data_ioversig = str(netIoveravsigmaI_out) mean_i = dataList_prev[2] mean_sigi = dataList_prev[3] mean_i = float(mean_i) mean_sigi = float(mean_sigi) netIoveravsigmaI = mean_i/mean_sigi netIoveravsigmaI_out = round(netIoveravsigmaI,1) datas_ioversig = str(netIoveravsigmaI_out) # Resolution datas_rlow = dataList_prev[0] datas_rhigh = dataList_prev[1] data_rhigh = dataList_prev[1] # Low resolution shell limit if res_table == 'yes' and data_rlow == '?': dataList = eachLine.split() data_rlow = dataList[0] if eachLine.find('limit Angstrom I error stat. Chi**2 R-fac R-fac') > -1: res_table = 'yes' # Total reflection count if eachLine.find('All films') > -1: dataList = eachLine.split() data_num_unmerged = dataList[2] # Get average redundancy if readredundancy == 'yes' and eachLine.find('All hkl') > -1: dataList = eachLine.split() data_num = dataList[12] float_data_num = float(data_num) float_data_num_unmerged = float(data_num_unmerged) redundancy = float_data_num_unmerged/float_data_num redundancy_out = round(redundancy,2) data_redund = str(redundancy_out) datas_num = dataList_prev[12] readredundancy = 'no' if eachLine.find('Summary of observation redundancies by shells:') > -1 and data_num_unmerged != '?': readredundancy = 'yes' # Keep previous line dataList_prev = eachLine.split() # Compute overall data completeness versus theoretical filename = 'rununique.sh' file = open(filename, 'w') file.write('#!/bin/csh -f\n') file.write('source ') file.write(xtalsetup) file.write('\n$CCP4_BIN/unique HKLOUT temp_unique.mtz << end_unique > unique.out\n') file.write('TITLE unique_data\n') file.write('LABOUT F=XFUNI SIGF=XSIGFUNI\n') file.write('RESOLUTION ') file.write(data_rhigh) file.write('\nSYMM ') file.write(spgno) file.write('\nCELL ') file.write(acell_mtz) file.write(' ') file.write(bcell_mtz) file.write(' ') file.write(ccell_mtz) file.write(' ') file.write(alpha_mtz) file.write(' ') file.write(beta_mtz) file.write(' ') file.write(gamma_mtz) file.write('\nEND\n') file.write('end_unique\n') file.close() os.system('chmod +x rununique.sh') os.system('./rununique.sh') fileexists = os.path.exists('unique.out') if fileexists == 0: print '\nSCALEPACK data completeness calculation failed' sys.exit(1) else: file = open('unique.out','r') allLines = file.readlines() file.close() for eachLine in allLines: if eachLine.find('reflections within resolution limits') > -1: dataList = eachLine.split() num_refs_theoretical = dataList[0] float_num_refs_theoretical = float(num_refs_theoretical) if float_num_refs_theoretical != '?': float_data_percentobs = 100.0 * float_data_num/float_num_refs_theoretical data_percentobs = round(float_data_percentobs,1) data_percentobs = str(data_percentobs) os.remove('unique.out') os.remove('rununique.sh') # Compute outer shell data completeness versus theoretical float_num_refs_theoretical = '?' filename = 'runmtzdmp.sh' file = open(filename, 'w') file.write('#!/bin/csh -f\n') file.write('source ') file.write(xtalsetup) file.write('\n$CCP4_BIN/mtzdump HKLIN temp_unique.mtz << end-mtzdump > mtzdump.out\n') file.write('NREF 1\n') file.write('STATS NBIN 1 RESO ') file.write(datas_rlow) file.write(' ') file.write(datas_rhigh) file.write('\nEND\n') file.write('end-mtzdump\n') file.close() os.system('chmod +x runmtzdmp.sh') os.system('./runmtzdmp.sh') fileexists = os.path.exists('mtzdump.out') if fileexists == 0: print '\nSCALEPACK shell data completeness calculation failed' sys.exit(1) else: file = open('mtzdump.out','r') allLines = file.readlines() file.close() for eachLine in allLines: if eachLine.find('No. of reflections used in FILE STATISTICS') > -1: dataList = eachLine.split() num_refs_theoretical = dataList[7] float_num_refs_theoretical = float(num_refs_theoretical) if float_num_refs_theoretical != '?': float_datas_num = float(datas_num) float_datas_percentobs = 100.0 * float_datas_num/float_num_refs_theoretical datas_percentobs = round(float_datas_percentobs,1) datas_percentobs = str(datas_percentobs) os.remove('mtzdump.out') os.remove('runmtzdmp.sh') os.remove('temp_unique.mtz') ################################################### # Parse template information file (if available) # ################################################### if templatefile != 'none': print 'Parsing template file' file = open(templatefile,'r') allLines = file.readlines() file.close() for eachLine in allLines: dataList = eachLine.split(':') length = len(dataList) # Parse each item from the template file if length == 2: # Section 1 if eachLine.find('audit_author_name') > -1: audit_author_name = dataList[1] audit_author_name = audit_author_name.strip() if eachLine.find('audit_contact_author_name') > -1: audit_contact_author_name = dataList[1] audit_contact_author_name =audit_contact_author_name.strip() if eachLine.find('audit_contact_author_email') > -1: audit_contact_author_email = dataList[1] audit_contact_author_email = audit_contact_author_email.strip() if eachLine.find('audit_contact_author_address') > -1: audit_contact_author_address = dataList[1] audit_contact_author_address = audit_contact_author_address.strip() if eachLine.find('audit_contact_author_phone') > -1: audit_contact_author_phone = dataList[1] audit_contact_author_phone = audit_contact_author_phone.strip() if eachLine.find('audit_contact_author_fax') > -1: audit_contact_author_fax = dataList[1] audit_contact_author_fax = audit_contact_author_fax.strip() if eachLine.find('citation_title') > -1: citation_title = dataList[1] citation_title = citation_title.strip() if eachLine.find('citation_journal_abbrev') > -1: citation_journal_abbrev = dataList[1] citation_journal_abbrev = citation_journal_abbrev.strip() if eachLine.find('citation_journal_volume') > -1: citation_journal_volume = dataList[1] citation_journal_volume = citation_journal_volume.strip() if eachLine.find('citation_page_first') > -1: citation_page_first = dataList[1] citation_page_first = citation_page_first.strip() if eachLine.find('citation_page_last') > -1: citation_page_last = dataList[1] citation_page_last = citation_page_last.strip() if eachLine.find('citation_year') > -1: citation_year = dataList[1] citation_year = citation_year.strip() if eachLine.find('citation_author_name') > -1: citation_author_name = dataList[1] citation_author_name = citation_author_name.strip() # Section 2 if eachLine.find('data_collection_temp_K') > -1: data_collection_temp_K = dataList[1] data_collection_temp_K = data_collection_temp_K.strip() if eachLine.find('wavelengths') > -1: wavelengths = dataList[1] wavelengths = wavelengths.strip() if eachLine.find('data_collection_date') > -1: data_collection_date = dataList[1] data_collection_date = data_collection_date.strip() if eachLine.find('beamline') > -1: beamline = dataList[1] beamline = beamline.strip() if eachLine.find('detector_type') > -1: detector_type = dataList[1] detector_type = detector_type.strip() if eachLine.find('detector_maker') > -1: detector_maker = dataList[1] detector_maker = detector_maker.strip() if eachLine.find('monochromator_type') > -1: monochromator_type = dataList[1] monochromator_type = monochromator_type.strip() if eachLine.find('xray_method') > -1: xray_method = dataList[1] xray_method = xray_method.strip() if eachLine.find('computing_data_collection') > -1: computing_data_collection = dataList[1] computing_data_collection = computing_data_collection.strip() if eachLine.find('computing_data_reduction') > -1: computing_data_reduction = dataList[1] computing_data_reduction = computing_data_reduction.strip() if eachLine.find('computing_structure_solution') > -1: computing_structure_solution = dataList[1] computing_structure_solution = computing_structure_solution.strip() if eachLine.find('computing_molecular_graphics') > -1: computing_molecular_graphics = dataList[1] computing_molecular_graphics = computing_molecular_graphics.strip() if eachLine.find('computing_structure_refinement') > -1: computing_structure_refinement = dataList[1] computing_structure_refinement = computing_structure_refinement.strip() # Section 3 if eachLine.find('protein_name') > -1: protein_name = dataList[1] protein_name = protein_name.strip() if eachLine.find('protein_ec_number') > -1: protein_ec_number = dataList[1] protein_ec_number = protein_ec_number.strip() if eachLine.find('structure_title') > -1: structure_title = dataList[1] structure_title = structure_title.strip() if eachLine.find('structure_class') > -1: structure_class = dataList[1] structure_class = structure_class.strip() if eachLine.find('structure_keywords') > -1: structure_keywords = dataList[1] structure_keywords = structure_keywords.strip() if eachLine.find('biological_unit') > -1: biological_unit = dataList[1] biological_unit = biological_unit.strip() if eachLine.find('sequence_databasename') > -1: sequence_databasename = dataList[1] sequence_databasename = sequence_databasename.strip() if eachLine.find('sequence_databasecode') > -1: sequence_databasecode = dataList[1] sequence_databasecode = sequence_databasecode.strip() if eachLine.find('source_common_name') > -1: source_common_name = dataList[1] source_common_name = source_common_name.strip() if eachLine.find('source_scientific_name') > -1: source_scientific_name = dataList[1] source_scientific_name = source_scientific_name.strip() if eachLine.find('source_gene_name') > -1: source_gene_name = dataList[1] source_gene_name = source_gene_name.strip() if eachLine.find('source_host_common_name') > -1: source_host_common_name = dataList[1] source_host_common_name = source_host_common_name.strip() if eachLine.find('source_host_scientific_name') > -1: source_host_scientific_name = dataList[1] source_host_scientific_name = source_host_scientific_name.strip() # Section 4 if eachLine.find('exptl_crystal_grow_method') > -1: exptl_crystal_grow_method = dataList[1] exptl_crystal_grow_method = exptl_crystal_grow_method.strip() if eachLine.find('exptl_crystal_grow_pH') > -1: exptl_crystal_grow_pH = dataList[1] exptl_crystal_grow_pH = exptl_crystal_grow_pH.strip() if eachLine.find('exptl_crystal_grow_temp') > -1: exptl_crystal_grow_temp = dataList[1] exptl_crystal_grow_temp = exptl_crystal_grow_temp.strip() if eachLine.find('exptl_crystal_grow_components') > -1: exptl_crystal_grow_components = dataList[1] exptl_crystal_grow_components = exptl_crystal_grow_components.strip() ############################################################################# # C-shell dictionary generation for REFMAC5 for minimal description ligands # ############################################################################# print 'Checking for PDB ligands defined only by minimal descriptions' # Generate temporary ligand name liganddir = pwd + '/temp.lib' fileexists = os.path.exists(liganddir) if fileexists != 0: os.remove(liganddir) # Eliminate non-PDB entities from coordinate file file = open('temp_use.pdb','r') allLines = file.readlines() file.close() temppdbfile = 'temp_pdb.pdb' file = open(temppdbfile, 'w') for eachLine in allLines: pdbentity = 'yes' tag = eachLine[0:6] tag = tag.strip() res_name = eachLine[17:20] res_name = res_name.strip() if tag == 'CRYST1': file.write(eachLine) if tag == 'TER': file.write(eachLine) if tag == 'END': file.write(eachLine) if tag == 'LINK': file.write(eachLine) if tag == 'ATOM' or tag == 'HETATM': # check versus non-PDB entity list if number_nonPDB > 0: count = 0 while count < number_nonPDB: entity = aList_hets_nonPDB[count] if entity == res_name: pdbentity = 'no' count = count + 1 if pdbentity == 'yes': file.write(eachLine) file.write('END\n') file.close() # REFMAC5 run to generate ligands filename = 'runrefmac5.sh' file = open(filename, 'w') file.write('#!/bin/csh -f\n') file.write('source ') file.write(xtalsetup) file.write('\n$CCP4_BIN/refmac5 XYZIN ') file.write(temppdbfile) file.write(' XYZOUT temp_lig.pdb LIB_OUT ') file.write(liganddir) file.write(' << end-lig > temp_lig.out \n') file.write('MODE NEWEntry\n') file.write('MAKE_RESTRAINTS CHECK None\n') file.write('MAKE_RESTRAINTS EXIT Yes\n') file.write('END\n') file.write('end-lig\n') file.close() os.system('chmod +x runrefmac5.sh') os.system('./runrefmac5.sh') fileexists = os.path.exists('temp_lig.pdb') if fileexists == 0: print 'REFMAC5 dictionary calculation for PDB entities failed - check "temp_lig.out"' print 'The usual problem is atom names inconsistent with the PDB ligand code\n' sys.exit(1) else: os.remove('temp_lig.pdb') os.remove('temp_lig.out') os.remove('runrefmac5.sh') os.remove(temppdbfile) ################################################################### # C-shell dictionary generation with LIBCHECK for non-PDB ligands # ################################################################### if number_nonPDB > 0: print 'Building dictionaries for novel ligands from templates' count = 0 while count < number_nonPDB: entity = aList_hets_nonPDB[count] pdbtemplate = entity + '.ideal' # Establish and execute CCP4/LIBCHECK to build the library filename = 'runlibcheck.sh' file = open(filename, 'w') file.write('#!/bin/csh -f\n') file.write('source ') file.write(xtalsetup) file.write('\n$CCP4_BIN/libcheck XYZIN << end-libcheck > libcheck.out \n') file.write('N\n') file.write('_COOR Y\n') file.write('_FILE_PDB ') file.write(pdbtemplate) file.write('\nMON ') file.write(entity) file.write('\n \n') file.write('\nend-libcheck') file.close() os.system('chmod +x runlibcheck.sh') os.system('./runlibcheck.sh') os.remove('runlibcheck.sh') fileexists = os.path.exists('libcheck.lib') if fileexists == 0: print 'The output library file from LIBCHECK was not created' print 'This entity may already be in the standard dictionaries' sys.exit(1) os.remove('libcheck.out') entity_libcheck_pdb = 'libcheck_' + entity + '.pdb' entity_libcheck_ps = 'libcheck_' + entity + '.ps' entity_libcheck_cif = 'libcheck_' + entity + '.cif' fileexists = os.path.exists(entity_libcheck_pdb) if fileexists != 0: os.remove(entity_libcheck_pdb) fileexists = os.path.exists(entity_libcheck_ps) if fileexists != 0: os.remove(entity_libcheck_ps) fileexists = os.path.exists(entity_libcheck_cif) if fileexists != 0: os.remove(entity_libcheck_cif) # Post-process to remove plane definitions and variable torsions # from library file libcheck.lib as they are not fully reliable # and use of these in refinements may vary write_flag = 'yes' file = open('libcheck.lib','r') allLines = file.readlines() file.close() os.remove('libcheck.lib') entitylib = entity + '.lib' file = open(entitylib, 'w') for eachLine in allLines: # Stop once plane records are encountered if eachLine.find('_chem_comp_plane_atom.comp_id') > -1: write_flag = 'no' # Skip variable torsions if write_flag == 'yes': if eachLine.find('var_') == -1: file.write(eachLine) # Close out more neatly file.write('# ------------------------------------------------------\n') file.write('# ------------------------------------------------------\n') file.write('# ------------------------------------------------------\n') file.close() # Combine novel entity library with any special PDB libraries fileexists = os.path.exists(liganddir) if fileexists != 0: filename = 'runlibcheck.sh' file = open(filename, 'w') file.write('#!/bin/csh -f\n') file.write('source ') file.write(xtalsetup) file.write('\n$CCP4_BIN/libcheck << end-libcheck > libcheck.out \n') file.write('N\n') file.write('_FILE_L ') file.write(entitylib) file.write('\n_FILE_L2 ') file.write(liganddir) file.write('\nend-libcheck') file.close() os.system('chmod +x runlibcheck.sh') os.system('./runlibcheck.sh') os.remove('runlibcheck.sh') os.remove('libcheck.out') os.remove(entitylib) os.rename('libcheck.lib',liganddir) else: os.rename(entitylib,liganddir) count = count + 1 ##################################################### # C-shell R-factor and stereochemistry calculation # ##################################################### print 'Running R-factor and stereochemistry calculations' filename = 'runrefmac5.sh' file = open(filename, 'w') file.write('#!/bin/csh -f\n') file.write('source ') file.write(xtalsetup) file.write('\n$CCP4_BIN/refmac5 HKLIN ') file.write(mtzfile) file.write(' XYZIN temp_use.pdb HKLOUT temp_ref.mtz XYZOUT temp_ref.pdb ') fileexists = os.path.exists(liganddir) if fileexists != 0: file.write('LIBIN ') file.write(liganddir) file.write(' << end-refmac5 > refmac.out\n') file.write('LABIN FP=') file.write(famp) file.write(' SIGFP=') file.write(sd) file.write(' FREE=') file.write(freer) file.write('\nLABOUT FC=FC PHIC=PHIC DELFWT=DELFWT PHDELWT=PHDELWT FWT=FWT PHWT=PHWT FOM=FOM\n') file.write('FREE ') file.write(nfree_exclude) file.write('\nREFI TYPE RESTrained\n') file.write('REFI RESI MLKF\n') if ref_anisoflag == 'no': file.write('REFI BREF ISOT METH CGMAT\n') else: file.write('REFI BREF ANISotropic METH CGMAT\n') if ref_bulksolvent == 'babinet': file.write('SCAL TYPE BULK LSSC ANIS\n') file.write('SOLVENT NO\n') if ref_bulksolvent == 'mask': file.write('SCAL TYPE SIMPLE LSSC ANIS\n') file.write('SOLVENT YES\n') if ref_bulksolvent == 'fixedbabinet': file.write('SCAL TYPE BULK LSSC ANIS FIXBulk SCBULk 0.78 BBULk 180.0 \n') file.write('SOLVENT NO\n') file.write('MAKE_RESTRAINTS HYDR N\n') file.write('MAKE_RESTRAINTS NEWLigand Noexit\n') file.write('MAKE_RESTRAINTS SS Y\n') file.write('MAKE_RESTRAINTS CISP Y\n') file.write('NCYC 0\n') file.write('MONI DIST 6.0\n') file.write('MONI ANGL 8.0\n') file.write('MONI TORSION 10.0\n') file.write('MONI PLANE 10.0\n') file.write('MONI VANderwaals 4.25\n') file.write('MONI CHIRAL 8.0\n') file.write('MONI BFACTOR 99.0\n') file.write('USECWD\n') file.write('PNAME NOID\n') file.write('DNAME output\n') file.write('END\n') file.write('end-refmac5\n') file.close() os.system('chmod +x runrefmac5.sh') os.system('./runrefmac5.sh') fileexists = os.path.exists('output.refmac') if fileexists == 0: print '\nREFMAC5 calculation failed - check refmac.out\n' print 'The usual problem is atom names inconsistent with the PDB residue code\n' sys.exit(1) fileexists = os.path.exists('temp_ref.pdb') if fileexists == 0: print '\nREFMAC5 calculation failed - check refmac.out\n' print 'The usual problem is atom names inconsistent with the PDB residue code\n' sys.exit(1) fileexists = os.path.exists('runrefmac5.sh') if fileexists != 0: os.remove('runrefmac5.sh') fileexists = os.path.exists('temp_ref.mtz') if fileexists != 0: os.remove('temp_ref.mtz') fileexists = os.path.exists('temp.lib') if fileexists != 0: os.remove('temp.lib') # Parse the required deposition information from output.refmac # Note: this file already conveniently uses mmCIF tags.. file = open('output.refmac','r') allLines = file.readlines() file.close() for eachLine in allLines: reflineList = eachLine.split() if eachLine.find('_refine.ls_R_factor_R_all') > -1: ref_rall = reflineList[1] if eachLine.find('_refine.ls_R_factor_R_free') > -1: ref_rfree = reflineList[1] if eachLine.find('_refine.ls_R_factor_R_work') > -1: ref_rwork = reflineList[1] if eachLine.find('_refine.ls_d_res_low') > -1: ref_dlow = reflineList[1] if eachLine.find('_refine.ls_d_res_high') > -1: ref_dhigh = reflineList[1] if eachLine.find('_refine.ls_R_factor_R_all') > -1: ref_rall = reflineList[1] ref_robs = reflineList[1] if eachLine.find('_refine.ls_number_reflns_R_free') > -1: ref_numfree = reflineList[1] if eachLine.find('_refine.ls_number_reflns_R_work') > -1: ref_numwork = reflineList[1] if eachLine.find('_refine.ls_number_reflns_obs') > -1: ref_numobs = reflineList[1] ref_numall = reflineList[1] if eachLine.find('_refine.B_iso_mean') > -1: ref_bmean = reflineList[1] if eachLine.find('_refine.aniso_B[1][1]') > -1: ref_b11 = reflineList[1] if eachLine.find('_refine.aniso_B[2][2]') > -1: ref_b22 = reflineList[1] if eachLine.find('_refine.aniso_B[3][3]') > -1: ref_b33 = reflineList[1] if eachLine.find('_refine.aniso_B[1][2]') > -1: ref_b12 = reflineList[1] if eachLine.find('_refine.aniso_B[1][3]') > -1: ref_b13 = reflineList[1] if eachLine.find('_refine.aniso_B[2][3]') > -1: ref_b23 = reflineList[1] if eachLine.find('_cell.length_a') > -1: acell = reflineList[1] if eachLine.find('_cell.length_b') > -1: bcell = reflineList[1] if eachLine.find('_cell.length_c') > -1: ccell = reflineList[1] if eachLine.find('_cell.angle_alpha') > -1: alpha = reflineList[1] if eachLine.find('_cell.angle_beta') > -1: beta = reflineList[1] if eachLine.find('_cell.angle_gamma') > -1: gamma = reflineList[1] if eachLine.find('_refine.solvent_vdw_probe_radii') > -1: ref_solvent_vdw_probe_radii = reflineList[1] if eachLine.find('_refine.solvent_ion_probe_radii') > -1: ref_solvent_ion_probe_radii = reflineList[1] if eachLine.find('_refine.solvent_shrinkage_radii') > -1: ref_solvent_shrinkage_radii = reflineList[1] if eachLine.find('_refine.solvent_model_param_ksol') > -1: ref_ksolv = reflineList[1] if eachLine.find('_refine.solvent_model_param_bsol') > -1: ref_bsolv = reflineList[1] # CCP4 4.2 tags if eachLine.find('Bond distances: refined atoms') > -1: ref_dbond = eachLine[53:59] ref_dbond = ref_dbond.strip() if eachLine.find('Bond angles : refined atom') > -1: ref_dangle = eachLine[53:59] ref_dangle = ref_dangle.strip() if eachLine.find('Torsion angles, period 1. refined') > -1: ref_dtorsion = eachLine[53:59] ref_dtorsion = ref_dtorsion.strip() if eachLine.find('Chiral centres: refined atoms') > -1: ref_dchiral = eachLine[53:59] ref_dchiral = ref_dchiral.strip() if eachLine.find('Planar groups: refined atoms') > -1: ref_dplane = eachLine[53:59] ref_dplane = ref_dplane.strip() if eachLine.find('M. chain bond B values') > -1: ref_bmbond = eachLine[53:59] ref_bmbond = ref_bmbond.strip() if eachLine.find('M. chain angle B values: refined atom') > -1: ref_bmangle = eachLine[53:59] ref_bmangle = ref_bmangle.strip() if eachLine.find('S. chain bond B values: refined atoms') > -1: ref_bsbond = eachLine[53:59] ref_bsbond = ref_bsbond.strip() if eachLine.find('S. chain angle B values: refined atoms') > -1: ref_bsangle = eachLine[53:59] ref_bsangle = ref_bsangle.strip() # or CCP4 5.0.2 CIF-like tags if eachLine.find('r_bond_refined_d') > -1: ref_dbond = reflineList[2] if eachLine.find('r_angle_refined_deg') > -1: ref_dangle = reflineList[2] if eachLine.find('r_dihedral_angle_1_deg') > -1: ref_dtorsion = reflineList[2] if eachLine.find('r_chiral_restr') > -1: ref_dchiral = reflineList[2] if eachLine.find('r_gen_planes_refined') > -1: ref_dplane = reflineList[2] if eachLine.find('r_mcbond_it') > -1: ref_bmbond = reflineList[2] if eachLine.find('r_mcangle_it') > -1: ref_bmangle = reflineList[2] if eachLine.find('r_scbond_it') > -1: ref_bsbond = reflineList[2] if eachLine.find('r_scangle_it') > -1: ref_bsangle = reflineList[2] # Fix for cubic space groups if ref_b11 == '?': ref_b11 = '0.00' if ref_b22 == '?': ref_b22 = '0.00' if ref_b33 == '?': ref_b33 = '0.00' if ref_b12 == '?': ref_b12 = '0.00' if ref_b13 == '?': ref_b13 = '0.00' if ref_b23 == '?': ref_b23 = '0.00' # Check for incorrect use of cross-validation flag (Rwork differs from Rfree by less than 1%) if ref_rfree != '?' and ref_rwork != '?': float_ref_rfree = float(ref_rfree) float_ref_rwork = float(ref_rwork) float_rdif = float_ref_rfree - float_ref_rwork if float_rdif < 0.01: print '\nRwork and Rfree are extremely close !' print 'The default assignment of Rfree flags (0) by script parameter nfree_exclude' print 'needs to be changed\n' sys.exit(1) ########################################################### # Compute refinement data completeness versus theoretical # ########################################################### float_num_refs_theoretical = '?' filename = 'rununique.sh' file = open(filename, 'w') file.write('#!/bin/csh -f\n') file.write('source ') file.write(xtalsetup) file.write('\n$CCP4_BIN/unique HKLOUT temp_unique.mtz << end_unique > unique.out\n') file.write('TITLE unique_data\n') file.write('LABOUT F=XFUNI SIGF=XSIGFUNI\n') file.write('RESOLUTION ') file.write(ref_dhigh) file.write('\nSYMM ') file.write(spgno) file.write('\nCELL ') file.write(acell) file.write(' ') file.write(bcell) file.write(' ') file.write(ccell) file.write(' ') file.write(alpha) file.write(' ') file.write(beta) file.write(' ') file.write(gamma) file.write('\nEND\n') file.write('end_unique\n') file.close() os.system('chmod +x rununique.sh') os.system('./rununique.sh') fileexists = os.path.exists('unique.out') if fileexists == 0: print '\nrefinement data completeness calculation failed' sys.exit(1) else: file = open('unique.out','r') allLines = file.readlines() file.close() for eachLine in allLines: if eachLine.find('reflections within resolution limits') > -1: dataList = eachLine.split() num_refs_theoretical = dataList[0] float_num_refs_theoretical = float(num_refs_theoretical) if float_num_refs_theoretical != '?': float_data_refinement = float(ref_numall) float_data_percentobs = 100.0 * float_data_refinement/float_num_refs_theoretical ref_percent = round(float_data_percentobs,1) ref_percent = str(ref_percent) os.remove('unique.out') os.remove('rununique.sh') fileexists = os.path.exists('temp_unique.mtz') if fileexists != 0: os.remove('temp_unique.mtz') ##################################################################### # Check refmac.out for severe point errors in covalent/VDW geometry # ##################################################################### # Parsing here is more fragile as it uses a log file rather than harvesting file file = open('refmac.out','r') allLines = file.readlines() file.close() for eachLine in allLines: # Babinet bulk solvent parameters (if used) since not in data harvesting as of CCP4 5.0.2 if eachLine.find('Babinet"s bulk solvent:') > -1: ref_ksolv = eachLine[33:40] ref_ksolv = ref_ksolv.strip() ref_bsolv = eachLine[47:54] ref_bsolv = ref_bsolv.strip() # Parsing section limits if eachLine.find('****') > -1: bond_list = 'no' angle_list = 'no' torsion_list = 'no' contact_list = 'no' if eachLine.find('----') > -1: bond_list = 'no' angle_list = 'no' torsion_list = 'no' contact_list = 'no' # Flag logging on finding final iteration number if eachLine.find('Refinement cycles') > -1: iterations = eachLine[30:40] iterations = iterations.strip() if iterations == '0': iteration_final = 'yes' if eachLine.find('CGMAT cycle number') > -1: if eachLine.find(iterations) > -1: iteration_final = 'yes' # get abnormal bond list if bond_list == 'yes' and iteration_final == 'yes': chain = eachLine[0:1] chain = chain.strip() if chain != '': resnumber = eachLine[1:5] resname = eachLine[6:9] aList_bonds_chain.append(chain) aList_bonds_resno.append(resnumber) aList_bonds_resname.append(resname) if eachLine.find('Bond distance deviations ') > -1: bond_list = 'yes' # get abnormal bond angle list if angle_list == 'yes' and iteration_final == 'yes': chain = eachLine[0:1] chain = chain.strip() if chain != '': resnumber = eachLine[1:5] resname = eachLine[6:9] aList_angles_chain.append(chain) aList_angles_resno.append(resnumber) aList_angles_resname.append(resname) if eachLine.find('Bond angle deviations ') > -1: angle_list = 'yes' # get abnormal contacts list if contact_list == 'yes' and iteration_final == 'yes': chain = eachLine[0:1] chain = chain.strip() if chain != '': resnumber = eachLine[1:5] resnumber = resnumber.strip() resname = eachLine[6:9] chain2 = eachLine[18:19] chain2 = chain2.strip() resnumber2 = eachLine[19:24] resnumber2 = resnumber2.strip() resname2 = eachLine[20:23] disorder1 = eachLine[14:15] disorder2 = eachLine[32:33] disorder1 = disorder1.strip() disorder2 = disorder2.strip() # Skip intra-residue interactions if chain != chain2 or resnumber != resnumber2: if disorder1 == '.' and disorder2 == '.': aList_contacts_chain.append(chain) aList_contacts_resno.append(resnumber) aList_contacts_resname.append(resname) aList_contacts_chain.append(chain2) aList_contacts_resno.append(resnumber2) aList_contacts_resname.append(resname2) if eachLine.find('VDW deviations ') > -1: contact_list = 'yes' chiral_list = 'no' # get severe chiral center violations if chiral_list == 'yes' and iteration_final == 'yes': chain = eachLine[0:1] chain = chain.strip() if chain != '': resnumber = eachLine[1:5] resname = eachLine[6:9] aList_chiral_chain.append(chain) aList_chiral_resno.append(resnumber) aList_chiral_resname.append(resname) if eachLine.find('Chiral volume deviations') > -1: chiral_list = 'yes' # get CIS peptide list if eachLine.find('CIS peptide bond is found') > -1: cispep_list = 'yes' if cispep_list == 'yes' and iteration_final == 'yes': chain = eachLine[15:16] chain = chain.strip() if chain != '': resnumber = eachLine[40:44] resname = eachLine[45:48] aList_cis_chain.append(chain) aList_cis_resno.append(resnumber) aList_cis_resname.append(resname) cispep_list = 'no' ############################################ # C-shell execution of CCP4/MATTHEWS_COEF # ############################################ print 'Running solvent volume calculations' # Note that this uses the FASTA sequence, which should be the protein in the crystal sample num_res_in_crystal_au = sequence_length * number_chains num_res_in_crystal_au = str(num_res_in_crystal_au) filename = 'runmatthews_coef.sh' file = open(filename, 'w') file.write('#!/bin/csh -f\n') file.write('source ') file.write(xtalsetup) file.write('\n$CCP4_BIN/matthews_coef << end-matthews_coef > matthews_coef.out\n') file.write('CELL ') file.write(acell) file.write(' ') file.write(bcell) file.write(' ') file.write(ccell) file.write(' ') file.write(alpha) file.write(' ') file.write(beta) file.write(' ') file.write(gamma) file.write(' ') file.write('\nSYMM ') file.write(spgno) file.write('\nNRES ') file.write(num_res_in_crystal_au) file.write('\nend-matthews_coef\n') file.close() os.system('chmod +x runmatthews_coef.sh') os.system('./runmatthews_coef.sh') fileexists = os.path.exists('matthews_coef.out') if fileexists == 0: print '\nMATTHEWS_COEF calculation failed to execute\n' sys.exit(1) file = open('matthews_coef.out','r') allLines = file.readlines() file.close() for eachLine in allLines: if eachLine.find('The Matthews Coefficient is') > -1: dataList = eachLine.split(':') matthews_coef = dataList[1] matthews_coef = matthews_coef.strip() if eachLine.find('Assuming protein density is') > -1: dataList = eachLine.split(':') solvent_percent = dataList[1] solvent_percent = solvent_percent.strip() os.remove('runmatthews_coef.sh') os.remove('matthews_coef.out') ##################### # Write everything # ##################### print 'Writing deposition file' file = open('pdbdeposit.cif', 'w') file.write('data_structure_1\n') file.write('#\n') file.write('##############################\n') file.write('# Release information etc #\n') file.write('##############################\n') file.write('#\n') file.write('_audit_author.name ') file.write(quote) file.write(audit_author_name) file.write(quote) file.write('\n#\n') file.write('_audit_contact_author.name ') file.write(quote) file.write(audit_contact_author_name) file.write(quote) file.write('\n_audit_contact_author.email ') file.write(quote) file.write(audit_contact_author_email) file.write(quote) file.write('\n_audit_contact_author.address\n') file.write('; ') file.write(audit_contact_author_address) file.write('\n;\n') file.write('_audit_contact_author.phone ') file.write(quote) file.write(audit_contact_author_phone) file.write(quote) file.write('\n_audit_contact_author.fax ') file.write(quote) file.write(audit_contact_author_fax) file.write(quote) file.write('\n#\n') file.write('_pdbx_database_status.dep_release_code_coordinates ') file.write(quote) file.write('HPUB') file.write(quote) file.write('\n_pdbx_database_status.dep_release_code_struct_fact ') file.write(quote) file.write('HPUB') file.write(quote) file.write('\n_pdbx_database_status.dep_release_code_sequence ') file.write(quote) file.write('REL') file.write(quote) file.write('\n#\n') if citation_title != '?': file.write('################################\n') file.write('# Citation and author #\n') file.write('################################\n') file.write('#\n') file.write('_citation.id primary\n') file.write('_citation.title ') file.write(quote) file.write(citation_title) file.write(quote) file.write('\n_citation.journal_abbrev ') file.write(quote) file.write(citation_journal_abbrev) file.write(quote) file.write('\n_citation.journal_volume ') file.write(quote) file.write(citation_journal_volume) file.write(quote) file.write('\n_citation.page_first ') file.write(citation_page_first) file.write('\n_citation.page_last ') file.write(citation_page_last) file.write('\n_citation.year ') file.write(citation_year) file.write('\n#\n') file.write('_citation_author.citation_id primary\n') file.write('_citation_author.name ') file.write(quote) file.write(citation_author_name) file.write(quote) file.write('\n#\n') if citation_journal_abbrev == 'unpublished' or citation_journal_abbrev == 'UNPUBLISHED': file.write('################################\n') file.write('# Citation and author #\n') file.write('################################\n') file.write('#\n') file.write('_citation.id primary\n') file.write('_citation.journal_abbrev unpublished') file.write('##############################################################\n') file.write('# #\n') file.write('# Extra data collection and processing information for PDB #\n') file.write('# #\n') file.write('##############################################################\n') file.write('#\n') file.write('_exptl.entry_id 1\n') file.write('_exptl.method ') file.write(quote) file.write('x-ray diffraction') file.write(quote) file.write('\n_exptl.crystals_number 1\n') file.write('#\n') file.write('_diffrn.id 1\n') file.write('_diffrn.ambient_temp ') file.write(data_collection_temp_K) file.write('\n') file.write('_diffrn_source.diffrn_id 1\n') if wavelengths != '1.54': file.write('_diffrn_source.source ') file.write(quote) file.write('Synchrotron') file.write(quote) file.write('\n_diffrn_source.type ') file.write(quote) file.write(beamline) file.write(quote) file.write('\n_diffrn_detector.diffrn_id 1\n') file.write('_diffrn_detector.pdbx_collection_date ') file.write(quote) file.write(data_collection_date) file.write(quote) file.write('\n_diffrn_detector.detector ') file.write(quote) file.write(detector_type) file.write(quote) file.write('\n_diffrn_detector.type ') file.write(quote) file.write(detector_maker) file.write(quote) file.write('\n#\n') else: file.write('_diffrn_source.type ') file.write(quote) file.write('Rotating anode') file.write(quote) file.write('\n_diffrn_detector.diffrn_id 1\n') file.write('_diffrn_detector.pdbx_collection_date ') file.write(quote) file.write(data_collection_date) file.write(quote) file.write('\n_diffrn_detector.detector ') file.write(quote) file.write(detector_type) file.write(quote) file.write('\n_diffrn_detector.type ') file.write(quote) file.write(detector_maker) file.write(quote) file.write('\n#\n') file.write('_diffrn_radiation.diffrn_id 1\n') file.write('_diffrn_radiation.wavelength_id 1\n') if xray_method == 'MAD': file.write('_diffrn_radiation.pdbx_diffrn_protocol MAD\n') else: file.write('_diffrn_radiation.pdbx_diffrn_protocol ') file.write(quote) file.write('Single wavelength') file.write(quote) file.write('\n_diffrn_radiation.monochromator ') file.write(quote) file.write(monochromator_type) file.write(quote) file.write('\n_diffrn_radiation.pdbx_wavelength_list ') file.write(quote) file.write(wavelengths) file.write(quote) file.write('\n_diffrn_reflns.diffrn_id 1\n') file.write('_diffrn_reflns.number ') file.write(data_num_unmerged) file.write('\n#\n') file.write('_computing.entry_id 1\n') file.write('_computing.data_collection ') file.write(quote) file.write(computing_data_collection) file.write(quote) file.write('\n_computing.data_reduction ') file.write(quote) file.write(computing_data_reduction) file.write(quote) file.write('\n_computing.structure_solution ') file.write(quote) file.write(computing_structure_solution) file.write(quote) file.write('\n_computing.molecular_graphics ') file.write(quote) file.write(computing_molecular_graphics) file.write(quote) file.write('\n_computing.structure_refinement ') file.write(quote) file.write(computing_structure_refinement) file.write(quote) file.write('\n#\n') file.write('################################\n') file.write('# Sequence information #\n') file.write('################################\n') file.write('#\n') # Write protein sequence (entity 1) file.write('_entity_poly.entity_id 1\n') file.write('_entity_poly.pdbx_seq_one_letter_code\n') file.write(';') # Pad out sequence with spaces remainder = sequence_length%60 pad = 60 - remainder count = 0 while count < pad: seqList.append(' ') count = count + 1 sequence_length = len(seqList) # Print out in blocks of 60 amino acids count = 0 while count < sequence_length: aa = seqList[count] file.write(aa) count = count + 1 remainder = count%60 if remainder == 0: file.write('\n') file.write(';\n') # Write the PDB chain-ids to which the sequence corresponds file.write('_entity_poly.pdbx_strand_id ') file.write(quote) count = 0 while count < number_chains: chain_out = aList_chains[count] pr_chain_out = ' ' + chain_out + ' ' file.write(pr_chain_out) if count != number_chains - 1: file.write(',') count = count + 1 file.write(quote) file.write('\n') # Entity annotation file.write('#\n') file.write('################################\n') file.write('# Entity information #\n') file.write('################################\n') file.write('#\n') file.write('loop_\n') file.write('_entity.id\n') file.write('_entity.pdbx_description\n') file.write('_entity.type\n') file.write('_entity.pdbx_ec\n') # Protein file.write('1') file.write('\n; ') file.write(protein_name) file.write('\n;\n') file.write(' polymer ') file.write(protein_ec_number) file.write('\n') # Ligand list number_entities = len(aList_hets) entity_list_number = 1 count = 0 if number_entities > 0: while count < number_entities: entity_number = aList_hets_number[count] entity_name = aList_hets_names[count] file.write(entity_number) file.write('\n; ') file.write(entity_name) file.write('\n;\n') file.write(' non-polymer ?\n') count = count + 1 # Waters if water_flag == 'yes': file.write(water_entity_list_number) file.write(' water water ?\n') # Add the entity keyword ids as separate table for ADIT file.write('#\n') file.write('loop_\n') file.write('_entity_keywords.entity_id\n') file.write('1\n') count = 0 if number_entities > 0: while count < number_entities: entity_number = aList_hets_number[count] file.write(entity_number) file.write('\n') count = count + 1 if water_flag == 'yes': file.write(water_entity_list_number) file.write('\n') file.write('#\n') file.write('##############################################################\n') file.write('# Structure annotation #\n') file.write('# note: _struct_keywords.pdbx_keywords maps to HEADER #\n') file.write('# and should report function. Use broad enzyme #\n') file.write('# classification or "Structural genomics, unknown function" #\n') file.write('# _struct_keywords.text can be any words #\n') file.write('##############################################################\n') file.write('#\n') file.write('_struct.entry_id 1\n') file.write('_struct.title ') file.write(quote) file.write(structure_title) file.write(quote) file.write('\n#\n') file.write('_struct_keywords.entry_id 1\n') file.write('_struct_keywords.pdbx_keywords ') file.write(quote) file.write(structure_class) file.write(quote) file.write('\n_struct_keywords.text ') file.write(quote) file.write(structure_keywords) file.write(quote) file.write('\n#\n') file.write('##############################################\n') file.write('# Unique entity map #\n') file.write('# note: points to _atom_site.label_asym_id #\n') file.write('##############################################\n') file.write('#\n') file.write('loop_\n') file.write('_struct_asym.id\n') file.write('_struct_asym.entity_id\n') # Protein count = 0 if number_chains > 0: while count < number_chains: entity_list_number = '1' entity_list_asym = aList_chains[count] file.write(entity_list_asym) file.write(' ') file.write(entity_list_number) file.write('\n') count = count + 1 # Ligand list count = 0 if number_entities > 0: while count < number_entities: entity_list_number = aList_hets_number[count] entity_list_asym = aList_hets_asym[count] file.write(entity_list_asym) file.write(entity_list_number) file.write('\n') count = count + 1 # Waters if water_flag == 'yes': file.write('W ') file.write(water_entity_list_number) file.write('\n') file.write('#\n') file.write('##############################################\n') file.write('# Asymmetric unit description #\n') file.write('##############################################\n') file.write('#\n') file.write('_struct_biol.id 1\n') file.write('_struct_biol.details ') file.write(quote) file.write(biological_unit) file.write(quote) file.write('\n#\n') file.write('##############################################\n') file.write('# Database sequence reference #\n') file.write('##############################################\n') file.write('#\n') file.write('loop_\n') file.write('_struct_ref.id\n') file.write('_struct_ref.entity_id\n') file.write('_struct_ref.db_name\n') file.write('_struct_ref.db_code\n') file.write('1 1 ') file.write(quote) file.write(sequence_databasename) file.write(quote) file.write(' ') file.write(quote) file.write(sequence_databasecode) file.write(quote) file.write('\n#\n') file.write('###########################\n') file.write('# Source information #\n') file.write('###########################\n') file.write('#\n') file.write('_entity_src_gen.entity_id 1\n') file.write('_entity_src_gen.gene_src_common_name ') file.write(quote) file.write(source_common_name) file.write(quote) file.write('\n_entity_src_gen.pdbx_gene_src_scientific_name ') file.write(quote) file.write(source_scientific_name) file.write(quote) file.write('\n_entity_src_gen.pdbx_gene_src_gene ') file.write(quote) file.write(source_gene_name) file.write(quote) file.write('\n_entity_src_gen.host_org_common_name ') file.write(quote) file.write(source_host_common_name) file.write(quote) file.write('\n_entity_src_gen.pdbx_host_org_scientific_name ') file.write(quote) file.write(source_host_scientific_name) file.write(quote) # Write crystal data file.write('\n#\n') file.write('##########################\n') file.write('# Crystal information #\n') file.write('##########################\n') file.write('#\n') file.write('_cell.entry_id 1\n') file.write('_cell.length_a ') file.write(acell) file.write('\n_cell.length_b ') file.write(bcell) file.write('\n_cell.length_c ') file.write(ccell) file.write('\n_cell.angle_alpha ') file.write(alpha) file.write('\n_cell.angle_beta ') file.write(beta) file.write('\n_cell.angle_gamma ') file.write(gamma) file.write('\n#\n') file.write('_symmetry.entry_id 1\n') file.write('_symmetry.Int_Tables_number ') file.write(spgno) file.write('\n_symmetry.space_group_name_H-M ') file.write(quote) file.write(spgname) file.write(quote) file.write('\n') file.write('#\n') file.write('_exptl_crystal.id 1\n') file.write('_exptl_crystal.density_percent_sol ') file.write(solvent_percent) file.write('\n_exptl_crystal.density_Matthews ') file.write(matthews_coef ) file.write('\n#\n') file.write('_exptl_crystal_grow.crystal_id 1\n') file.write('_exptl_crystal_grow.method ') file.write(quote) file.write(exptl_crystal_grow_method) file.write(quote) file.write('\n_exptl_crystal_grow.pH ') file.write(exptl_crystal_grow_pH) file.write('\n_exptl_crystal_grow.temp ') file.write(exptl_crystal_grow_temp) file.write('\n_exptl_crystal_grow.pdbx_details ') file.write(quote) file.write(exptl_crystal_grow_components) file.write(quote) file.write('\n') # Write data collection data file.write('#\n') file.write('###########################\n') file.write('# Data collection #\n') file.write('###########################\n') file.write('#\n') file.write('# Overall processing statistics\n') file.write('#\n') file.write('_reflns.entry_id 1\n') file.write('_reflns.number_all ') file.write(data_num) file.write('\n_reflns.number_obs ') file.write(data_num) file.write('\n_reflns.observed_criterion_sigma_F ') file.write(truncate_default_f) file.write('\n_reflns.observed_criterion_sigma_I ') file.write(truncate_default_i) file.write('\n_reflns.d_resolution_low ') file.write(data_rlow) file.write('\n_reflns.d_resolution_high ') file.write(data_rhigh) file.write('\n_reflns.percent_possible_obs ') file.write(data_percentobs) file.write('\n_reflns.pdbx_redundancy ') file.write(data_redund) file.write('\n_reflns.pdbx_Rmerge_I_obs ') file.write(data_rmerge) file.write('\n_reflns.pdbx_netI_over_av_sigmaI ') file.write(data_ioversig) file.write('\n#\n') file.write('# Outer shell processing statistics\n') file.write('#\n') file.write('_reflns_shell.number_measured_all ') file.write(datas_num) file.write('\n_reflns_shell.number_measured_obs ') file.write(datas_num) file.write('\n_reflns_shell.d_res_low ') file.write(datas_rlow) file.write('\n_reflns_shell.d_res_high ') file.write(datas_rhigh) file.write('\n_reflns_shell.meanI_over_sigI_obs ') file.write(datas_ioversig) file.write('\n_reflns_shell.Rmerge_I_obs ') file.write(datas_rmerge) file.write('\n_reflns_shell.percent_possible_all ') file.write(datas_percentobs) file.write('\n_reflns_shell.pdbx_redundancy ') file.write(datas_redund) # Write refinement information file.write('\n#\n') file.write('###########################\n') file.write('# Refinement information #\n') file.write('###########################\n') file.write('#\n') file.write('_refine.entry_id 1\n') if xray_method == 'MAD': file.write('_refine.pdbx_method_to_determine_struct ') file.write(quote) file.write('MAD phasing') file.write(quote) if xray_method == 'SAD': file.write('_refine.pdbx_method_to_determine_struct ') file.write(quote) file.write('SAD phasing') file.write(quote) if xray_method == 'IR': file.write('_refine.pdbx_method_to_determine_struct ') file.write(quote) file.write('Molecular Replacement') file.write(quote) if xray_method != 'MAD' and xray_method != 'SAD' and xray_method != 'IR': file.write('_refine.pdbx_method_to_determine_struct ') file.write(quote) file.write(xray_method) file.write(quote) file.write('\n#\n') file.write('# Data selection\n') file.write('#\n') file.write('_refine.ls_d_res_low ') file.write(ref_dlow) file.write('\n_refine.ls_d_res_high ') file.write(ref_dhigh) file.write('\n#\n') file.write('# Bulk solvent scattering model correction\n') file.write('#\n') file.write('_refine.solvent_model_details\n') # This selection is not used in deposition if ref_bulksolvent == 'babinet' or ref_bulksolvent == 'fixedbabinet': file.write('; Babinet bulk solvent correction\n') file.write(';\n') file.write('_refine.solvent_model_param_ksol ') file.write(ref_ksolv) file.write('\n_refine.solvent_model_param_bsol ') file.write(ref_bsolv) file.write('\n') if ref_bulksolvent == 'mask': file.write('; Mask bulk solvent correction\n') file.write(';\n') file.write('_refine.pdbx_solvent_vdw_probe_radii ') file.write(ref_solvent_vdw_probe_radii) file.write('\n_refine.pdbx_solvent_ion_probe_radii ') file.write(ref_solvent_ion_probe_radii) file.write('\n_refine.pdbx_solvent_shrinkage_radii ') file.write(ref_solvent_shrinkage_radii) file.write('\n') file.write('#\n') file.write('# Refinement scaling\n') file.write('#\n') file.write('_refine.aniso_B[1][1] ') file.write(ref_b11) file.write('\n_refine.aniso_B[1][2] ') file.write(ref_b12) file.write('\n_refine.aniso_B[1][3] ') file.write(ref_b13) file.write('\n_refine.aniso_B[2][2] ') file.write(ref_b22) file.write('\n_refine.aniso_B[2][3] ') file.write(ref_b23) file.write('\n_refine.aniso_B[3][3] ') file.write(ref_b33) file.write('\n#\n') file.write('# Mean B-factor\n') file.write('#\n') file.write('_refine.B_iso_mean ') file.write(ref_bmean) file.write('\n#\n') file.write('# B-factor refinement method\n') file.write('#\n') if ref_anisoflag == 'yes': file.write('_refine.pdbx_isotropic_thermal_model ') file.write(quote) file.write('anisotropic') file.write(quote) else: file.write('_refine.pdbx_isotropic_thermal_model ') file.write(quote) file.write('isotropic') file.write(quote) file.write('\n#\n') file.write('# Overall R-factors\n') file.write('#\n') file.write('_refine.ls_number_reflns_all ') file.write(ref_numall) file.write('\n_refine.ls_number_reflns_obs ') file.write(ref_numobs) file.write('\n_refine.ls_number_reflns_R_free ') file.write(ref_numfree) file.write('\n_refine.ls_percent_reflns_obs ') file.write(ref_percent) file.write('\n_refine.ls_R_factor_all ') file.write(ref_rall) file.write('\n_refine.ls_R_factor_obs ') file.write(ref_robs) file.write('\n_refine.ls_R_factor_R_work ') file.write(ref_rwork) file.write('\n_refine.ls_R_factor_R_free ') file.write(ref_rfree) file.write('\n_refine.pdbx_ls_sigma_F 0.0\n') file.write('_refine.pdbx_ls_cross_valid_method ') file.write(quote) file.write('Free R-value') file.write(quote) file.write('\n_refine.pdbx_R_Free_selection_details ') file.write(quote) file.write('random') file.write(quote) file.write('\n_refine.pdbx_stereochemistry_target_values ') file.write(quote) file.write('Engh-Huber') file.write(quote) file.write('\n#\n') file.write('# Stereochemical agreement\n') file.write('#\n') file.write('loop_\n') file.write('_refine_ls_restr.type\n') file.write('_refine_ls_restr.dev_ideal\n') file.write('r_bond_d ') file.write(ref_dbond) file.write('\nr_angle_d ') file.write(ref_dangle) file.write('\nr_planar_tor ') file.write(ref_dtorsion) file.write('\nr_chiral_restr ') file.write(ref_dchiral) file.write('\nr_plane_restr ') file.write(ref_dplane) file.write('\nr_mcbond_it ') file.write(ref_bmbond) file.write('\nr_mcangle_it ') file.write(ref_bmangle) file.write('\nr_scbond_it ') file.write(ref_bsbond) file.write('\nr_scangle_it ') file.write(ref_bsangle) file.write('\n#\n') file.write('# Atom counts\n') file.write('#\n') file.write('_refine_hist.cycle_id 1\n') file.write('_refine_hist.d_res_high ') file.write(ref_dhigh) file.write('\n_refine_hist.d_res_low ') file.write(ref_dlow) file.write('\n_refine_hist.number_atoms_total ') file.write(ref_natom) file.write('\n_refine_hist.number_atoms_solvent ') file.write(ref_nsolvent) file.write('\n#\n') # Add coordinates. Note that this uses the REFMAC o/p for better atom-typing xyzfile = open('temp_ref.pdb', 'r') allLines = xyzfile.readlines() xyzfile.close() os.remove('temp_use.pdb') os.remove('temp_ref.pdb') file.write('##############################\n') file.write('# Coordinates #\n') file.write('##############################\n') file.write('# \n') file.write('loop_ \n') file.write('_atom_site.type_symbol \n') file.write('_atom_site.label_atom_id \n') file.write('_atom_site.label_comp_id \n') file.write('_atom_site.auth_asym_id \n') file.write('_atom_site.auth_seq_id \n') file.write('_atom_site.label_seq_id \n') file.write('_atom_site.label_alt_id \n') file.write('_atom_site.Cartn_x \n') file.write('_atom_site.Cartn_y \n') file.write('_atom_site.Cartn_z \n') file.write('_atom_site.occupancy \n') file.write('_atom_site.B_iso_or_equiv \n') file.write('_atom_site.footnote_id \n') file.write('_atom_site.label_entity_id \n') file.write('_atom_site.id \n') file.write('_atom_site.label_asym_id \n') for eachLine in allLines: tag = eachLine[0:6] tag = tag.strip() if tag == 'ATOM' or tag == 'HETATM': atom_serial = eachLine[6:11] atom_name = eachLine[12:16] atom_alt = eachLine[16:17] res_name = eachLine[17:20] chain_id = eachLine[21:22] res_number = eachLine[22:26] insert_code = eachLine[26:27] x_coord = eachLine[30:38] y_coord = eachLine[38:46] z_coord = eachLine[46:54] occ_value = eachLine[54:60] b_value = eachLine[60:66] element = eachLine[76:78] # strip or pad some records pr_res_name = res_name.strip() wr_atom_serial = ' ' + atom_serial + ' ' wr_atom_name = ' ' + atom_name + ' ' wr_atom_alt = ' ' + atom_alt + ' ' wr_res_name = ' ' + res_name + ' ' wr_chain_id = ' ' + chain_id + ' ' wr_res_number = ' ' + res_number + ' ' wr_insert_code = ' ' + insert_code + ' ' wr_x_coord = ' ' + x_coord + ' ' wr_y_coord = ' ' + y_coord + ' ' wr_z_coord = ' ' + z_coord + ' ' wr_occ_value = ' ' + occ_value + ' ' wr_b_value = ' ' + b_value + ' ' wr_element = ' ' + element + ' ' # Patch null alternate records if atom_alt == ' ': wr_atom_alt = ' . ' # Establish label_asym records # 1.protein if chain_id != 'W' and res_name != 'HOH': label_asym = ' ' + chain_id label_entity = '1' # 2.ligands if chain_id == 'W' and res_name != 'HOH': wr_chain_id = ' . ' count = 0 while count < number_entities: hetname = aList_hets[count] if hetname == res_name: label_asym = aList_hets_asym[count] label_entity = aList_hets_number[count] count = count + 1 # 3.water if res_name == 'HOH': wr_chain_id = ' . ' label_asym = ' W' label_entity = water_entity_list_number # wr_label_asym = ' ' + label_asym wr_label_entity = ' ' + label_entity + ' ' # Write CIF atom record file.write(' ') file.write(wr_element) file.write(wr_atom_name) file.write(wr_res_name) file.write(wr_chain_id) file.write(wr_res_number) file.write(wr_res_number) file.write(wr_atom_alt) file.write(wr_x_coord) file.write(wr_y_coord) file.write(wr_z_coord) file.write(wr_occ_value) file.write(wr_b_value) file.write(footnote) file.write(wr_label_entity) file.write(wr_atom_serial) file.write(label_asym) file.write('\n') # Add list of anisotropic records of same type and order as PDB file if ref_anisoflag == 'yes': file.write('################################\n') file.write('# Anisotropic B-factor records #\n') file.write('################################\n') file.write('loop_\n') file.write('_atom_site_anisotrop.id\n') file.write('_atom_site_anisotrop.type_symbol\n') file.write('_atom_site_anisotrop.U[1][1]\n') file.write('_atom_site_anisotrop.U[2][2]\n') file.write('_atom_site_anisotrop.U[3][3]\n') file.write('_atom_site_anisotrop.U[1][2]\n') file.write('_atom_site_anisotrop.U[1][3]\n') file.write('_atom_site_anisotrop.U[2][3]\n') for eachLine in allLines: tag = eachLine[0:6] tag = tag.strip() if tag == 'ANISOU': atom_serial = eachLine[6:11] element = eachLine[76:78] u11 = eachLine[28:35] u22 = eachLine[35:42] u33 = eachLine[42:49] u12 = eachLine[49:56] u13 = eachLine[56:63] u23 = eachLine[63:70] wr_atom_serial = ' ' + atom_serial + ' ' wr_element = ' ' + element + ' ' wr_u11 = ' ' + u11 + ' ' wr_u22 = ' ' + u22 + ' ' wr_u33 = ' ' + u33 + ' ' wr_u12 = ' ' + u12 + ' ' wr_u13 = ' ' + u13 + ' ' wr_u23 = ' ' + u23 + ' ' file.write(wr_atom_serial) file.write(wr_element) file.write(wr_u11) file.write(wr_u22) file.write(wr_u33) file.write(wr_u12) file.write(wr_u13) file.write(wr_u23) file.write('\n') # file.write('#\n') file.write('###########\n') file.write('# The End #\n') file.write('###########\n') file.write('#\n') ################################################################### # Write simple CIF reflection file list from the refinement data # ################################################################### print 'Creating CIF reflection list' filename = 'runmtz2various.sh' file = open(filename, 'w') file.write('#!/bin/csh -f\n') file.write('source ') file.write(xtalsetup) file.write('\n$CCP4_BIN/mtz2various HKLIN ') file.write(mtzfile) file.write(' HKLOUT hkldeposit.cif << end-mtz2various > mtz2various.out\n') file.write('LABIN FP=') file.write(famp) file.write(' SIGFP=') file.write(sd) file.write(' FREE=') file.write(freer) file.write('\nOUTPUT CIF data_1\n') file.write('FREEVAL ') file.write(nfree_exclude) file.write('\nMISS\n') file.write('SCALE 10\n') file.write('MONITOR 1000\n') file.write('END\n') file.write('end-mtz2various\n') file.close() os.system('chmod +x runmtz2various.sh') os.system('./runmtz2various.sh') fileexists = os.path.exists('hkldeposit.cif') if fileexists == 0: print 'MTZ2VARIOUS run to generate CIF reflection file appears to have failed\n' sys.exit(1) os.remove('mtz2various.out') os.remove('runmtz2various.sh') ######################### # Report statistics # ######################### print '\nRefinement Summary' print '===================' print 'Resolution :',ref_dhigh print 'R(working) :',ref_rwork print 'R(free) :',ref_rfree print 'RMSD(bonds) :',ref_dbond print 'RMSD(angles):',ref_dangle # Flag geometric problems in bonds length_bonds = len(aList_bonds_resno) if length_bonds > 0: print '\nThe following residues have severely strained covalent bond lengths:' count = 0 while count < length_bonds: chain = aList_bonds_chain[count] aa = aList_bonds_resno[count] resname = aList_bonds_resname[count] print chain,aa,resname count = count + 1 # Flag geometric problems in angles length_angles = len(aList_angles_resno) if length_angles > 0: print '\nThe following residues have severely strained covalent bond angles:' count = 0 while count < length_angles: chain = aList_angles_chain[count] aa = aList_angles_resno[count] resname = aList_angles_resname[count] print chain,aa,resname count = count + 1 # Flag short VDV contacts length_vdv = len(aList_contacts_resno) if length_vdv > 0: print '\nThe following residues have extremely short VDW contacts:' count = 0 while count < length_vdv: chain = aList_contacts_chain[count] aa = aList_contacts_resno[count] resname = aList_contacts_resname[count] print chain,aa count = count + 1 # Flag abnormal chiral centers length_chiral = len(aList_chiral_resno) if length_chiral > 0: print '\nThe following residues have severely strained chiral centers:' count = 0 while count < length_chiral: chain = aList_chiral_chain[count] aa = aList_chiral_resno[count] resname = aList_chiral_resname[count] print chain,aa,resname count = count + 1 # Flag CIS residues length_cis = len(aList_cis_resno) if length_cis > 0: print '\nThe following residues are in CIS conformation:' count = 0 while count < length_cis: chain = aList_cis_chain[count] aa = aList_cis_resno[count] resname = aList_cis_resname[count] print chain,aa,resname count = count + 1 # if length_bonds > 0 or length_angles > 0 or length_chiral > 0: print '\nSevere stereochemical abnormalities are virtually impossible' print 'See file refmac.out for more information.' # QC notes from SGX work fl_ref_dhigh = float(ref_dhigh) rfree_max = -0.02 * fl_ref_dhigh * fl_ref_dhigh + 0.13 * fl_ref_dhigh + 0.11 rdif_max = -0.01 * fl_ref_dhigh * fl_ref_dhigh + 0.065 * fl_ref_dhigh - 0.02 phipsi_core_min = 100 * ( -0.04 * fl_ref_dhigh + 0.96 ) badsidechains_max = 0.75 * fl_ref_dhigh + 0.75 pr_rfree_max = round(rfree_max,3) pr_rdif_max = round(rdif_max,3) pr_phipsi_core_min = round(phipsi_core_min,1) pr_badsidechains_max = round(badsidechains_max,1) print '\nSGX expectations for global structure quality metrics at',ref_dhigh,'A resolution' print '===========================================================================' print 'Max value for R(free) :',pr_rfree_max print 'Max difference for R(free) - R(work) :',pr_rdif_max print 'Min percentage amino acids in Ramachandran plot core:',pr_phipsi_core_min print 'Max percentage side chains in abnormal conformations:',pr_badsidechains_max print '\n1. The value for R-free should only exceed this value if there are' print ' clear indications of problematic data (anisotropy, twinning).' print '2. Residues in the Ramachandran plot core are those calculated' print ' by PROCHECK or an equivalent phi-psi area in other tabulations.' print '3. Abnormal side chains are those flagged by a chi-1 angle differing' print ' by 45 degrees from the nearest rotamer as calculated by CCP4/ROTAMER.' # Write final deposition notes print '\nPDB Deposition Notes' print '=====================' print 'Structure deposition file : pdbdeposit.cif' print 'X-ray data deposition file: hkldeposit.cif' print '\nThe mmCIF sequence/structure/entity mappings will only need adjustment' print 'if there is more than one type of protein in the crystal.' print '\nThe structure deposition file contains annotation data and coordinates.' print 'This file may be deposited to the PDB through the RCSB/PDB ADIT interface' print '(http://rcsb-deposit.rutgers.edu/adit/). From the ADIT session ' print 'select file type "mmCIF" and upload the file. After deposition, use the' print '"PREVIEW ENTRY" option to check the information uploaded to the RCSB/PDB.' print 'Any alterations, missing or additional information may be entered through' print 'this interface.\n' #