# Supplemental SFile4. This is the InterProScan configuration file used to annotate the Aplysia AplCal3.0 reference proteome by Kron et al. 2022. data.directory=data bin.directory=bin ## ## Temporary files and directory ## # The text [UNIQUE], if present, will be replaced by a value unique to your running instance # Temporary files used by the analyses will be placed in directories here: temporary.file.directory.suffix=[UNIQUE] temporary.file.directory=temp/${temporary.file.directory.suffix} ## ## H2 database ## # The H2 database is copied by the standalone version of interproscan i5.h2.database.original.location=work/template/interpro.zip # LOCK_TIMEOUT: Sets the lock timeout (in milliseconds) for the current session i5.database.connection.url=jdbc:h2:mem:interpro;LOCK_TIMEOUT=10000000 ## ## binary paths ## # Configure the version of Perl and Python to use when running member databases Perl/Python binaries perl.command=perl python3.command=python # Binary file locations #rpsblast binary.rpsblast.path=${bin.directory}/blast/ncbi-blast-2.10.1+/rpsblast #rpsbproc binary.rpsbproc.path=${bin.directory}/blast/ncbi-blast-2.10.1+/rpsbproc #hmmer 3 binary.hmmer3.path=${bin.directory}/hmmer/hmmer3/3.1b1 binary.hmmer3.hmmscan.path=${bin.directory}/hmmer/hmmer3/3.1b1/hmmscan binary.hmmer3.hmmsearch.path=${bin.directory}/hmmer/hmmer3/3.1b1/hmmsearch binary.hmmer33.path=${bin.directory}/hmmer/hmmer3/3.3 binary.hmmer33.hmmscan.path=${bin.directory}/hmmer/hmmer3/3.3/hmmscan binary.hmmer33.hmmsearch.path=${bin.directory}/hmmer/hmmer3/3.3/hmmsearch #hmmer 2 binary.hmmer2.hmmsearch.path=${bin.directory}/hmmer/hmmer2/2.3.2/hmmsearch binary.hmmer2.hmmpfam.path=${bin.directory}/hmmer/hmmer2/2.3.2/hmmpfam binary.fingerprintscan.path=${bin.directory}/prints/fingerPRINTScan binary.coils.path=${bin.directory}/ncoils/2.2.1/ncoils # Note: Correct prosite binary distribution for your platform can be downloaded: ftp://ftp.expasy.org/databases/prosite/ps_scan/ binary.prosite.psscan.pl.path=${bin.directory}/prosite/ps_scan.pl binary.prosite.pfscan.path=${bin.directory}/prosite/pfscan binary.prosite.pfsearch.path=${bin.directory}/prosite/pfsearch binary.prosite.pfscanv3.path=${bin.directory}/prosite/pfscanV3 binary.prosite.pfsearchv3.path=${bin.directory}/prosite/pfsearchV3 binary.prosite.pfsearch.wrapperpath=${bin.directory}/prosite/pfsearch_wrapper.py binary.runprosite.path=${bin.directory}/prosite/runprosite.py #CATH-Gene3d cath.resolve.hits.path=${bin.directory}/gene3d/4.3.0/cath-resolve-hits assign.cath.superfamilies.path=${bin.directory}/gene3d/4.3.0/assign_cath_superfamilies.py #panther binary.pantherscore.path=${bin.directory}/panther/panther_score.py binary.superfamily.1.75.ass3.pl.path=${bin.directory}/superfamily/1.75/ass3_single_threaded.pl #PIRSF binary.pirsf.pl.path=${bin.directory}/pirsf/3.10/pirsf.pl binary.getorf.path=${bin.directory}/nucleotide/getorf binary.esltranslate.path=${bin.directory}/nucleotide/esl-translate #PIRSR pirsr.postprocess.command=${bin.directory}/pirsr/pirsr_postprocess #SFLD sfld.postprocess.command=${bin.directory}/sfld/sfld_postprocess #signalp # Note: SignalP binary not distributed with InterProScan 5, please install separately e.g. in ${bin.directory}/signalp/4.1/signalp signalp_euk.signature.library.release=4.1 signalp_gram_positive.signature.library.release=4.1 signalp_gram_negative.signature.library.release=4.1 binary.signalp.path=${bin.directory}/signalp/4.1/signalp signalp.perl.library.dir=${bin.directory}/signalp/4.1/lib #TMHMM 2.0 # Note: TMHMM binary not distributed with InterProScan 5, please install separately e.g. in ${bin.directory}/tmhmm/2.0c/decodeanhmm tmhmm.signature.library.release=2.0c binary.tmhmm.path=${bin.directory}/tmhmm/2.0c/decodeanhmm #PHOBIUS # Note: Phobius binary not distributed with InterProScan 5, please install separately e.g. in ${bin.directory}/phobius/1.01/phobius.pl phobius.signature.library.release=1.01 binary.phobius.pl.path=${bin.directory}/phobius/1.01/phobius.pl # ## ## Member database model / data file locations (alphabetically sorted) ## #CDD cdd.signature.list.path=${data.directory}/cdd/3.18/data/cddid.tbl cdd.library.path=${data.directory}/cdd/3.18/db/Cdd_NCBI cdd.data.path=${data.directory}/cdd/3.18/data #Gene3d 4.3.0 gene3d.hmm.path=${data.directory}/gene3d/4.3.0/gene3d_main.hmm gene3d.model2sf_map.path=${data.directory}/gene3d/4.3.0/model_to_family_map.tsv gene3d.discontinuous_regs.path=${data.directory}/gene3d/4.3.0/discontinuous_regs.pkl.py3 gene3d.hmmsearch.force=true # HAMAP hamap.profile.models.path=${data.directory}/hamap/2020_05/hamap.prf hamap.profile.models.dir=${data.directory}/hamap/2020_05/profiles hamap.hmm.path=${data.directory}/hamap/2020_05/hamap.hmm.lib #MobiDB binary.mobidb.path=${bin.directory}/mobidb/2.0/mobidb_lite.py binary.mobidb.binx.path=${bin.directory}/mobidb/2.0/binx #PANTHER panther.temporary.file.directory= panther.models.dir=${data.directory}/panther/15.0/ panther.hmm.path=${data.directory}/panther/15.0/panther.hmm panther.names.tab=${data.directory}/panther/15.0/names.tab # PFam pfam-a.hmm.path=data/pfam/33.1/pfam_a.hmm pfam-a.seed.path=data/pfam/33.1/pfam_a.seed pfam-clans.path=data/pfam/33.1/pfam_clans pfam-a.dat.path=data/pfam/33.1/pfam_a.dat ## #PIRSF 3.10 pirsf.sfhmm.path=${data.directory}/pirsf/3.10/sf_hmm_all pirsf.dat.path=${data.directory}/pirsf/3.10/pirsf.dat #PIRSR pirsr.hmm.path=${data.directory}/pirsr/2021_02/sr_hmm_all pirsr.data.path=${data.directory}/pirsr/2021_02/ pirsr.rules.path=${data.directory}/pirsr/2021_02/sr_uru.json # pirsr.sites.annotation.file.path=${data.directory}/pirsr/4/pirsr_sites.annot # pirsr.hierarchy.file.path=${data.directory}/pirsr/4/pirsr_hierarchy_flat.txt #PRINTS 42.0 prints.kdat.path.42.0=${data.directory}/prints/42.0/prints42_0.kdat prints.pval.path.42.0=${data.directory}/prints/42.0/prints.pval prints.hierarchy.path.42.0=${data.directory}/prints/42.0/FingerPRINTShierarchy.db #ProDom 2006.1 prodom.ipr.path.2006.1=${data.directory}/prodom/2006.1/prodom.ipr #Prosite prosite.patterns.models.path=${data.directory}/prosite/2021_01/prosite_patterns.dat prosite.profiles.models.path=${data.directory}/prosite/2021_01/prosite_profiles.dat prosite.models.path=${data.directory}/prosite/2021_01/prosite.dat #prosite.models.dir=${data.directory}/prosite/2021_01/prosite_models prosite.profiles.models.dir=${data.directory}/prosite/2021_01/profile_models prosite.patterns.models.dir=${data.directory}/prosite/2021_01/pattern_models prosite.evaluator.models.path=${data.directory}/prosite/2021_01/evaluator.dat prosite.profiles.skipflagged.profiles=${data.directory}/prosite/2021_01/skip_flagged_profles.txt psscan.prositeprofiles.usepfsearch=true # #SFLD sfld.hmm.path=${data.directory}/sfld/4/sfld.hmm sfld.sites.annotation.file.path=${data.directory}/sfld/4/sfld_sites.annot sfld.hierarchy.file.path=${data.directory}/sfld/4/sfld_hierarchy_flat.txt #smart 7.1 smart.hmm.path=${data.directory}/smart/7.1/smart.HMMs smart.hmm.bin.path=${data.directory}/smart/7.1/smart.HMMs.bin smart.overlapping.path= smart.threshold.path= #SuperFamily 1.75 superfamily.hmm.path.3.0=${data.directory}/superfamily/1.75/hmmlib_1.75 superfamily.self.hits.path.1.75=${data.directory}/superfamily/1.75/self_hits.tab superfamily.cla.path.1.75=${data.directory}/superfamily/1.75/dir.cla.scop.txt_1.75 superfamily.model.tab.path.1.75=${data.directory}/superfamily/1.75/model.tab superfamily.pdbj95d.path.1.75=${data.directory}/superfamily/1.75/pdbj95d #tigrfam 15.0 tigrfam.hmm.path=${data.directory}/tigrfam/15.0/TIGRFAMs_HMM.LIB #TMHMM 2.0 # Note: TMHMM model files not distributed with InterProScan 5, please install separately e.g. in data/tmhmm/2.0/TMHMM2.0.model tmhmm.model.path=${data.directory}/tmhmm/2.0c/TMHMM2.0c.model ## ## cpu options for parallel processing ## #hmmer cpu options for the different jobs hmmer3.hmmsearch.cpu.switch.gene3d=--cpu 1 hmmer3.hmmsearch.cpu.switch.panther=--cpu 1 hmmer3.hmmsearch.cpu.switch.pfama=--cpu 1 hmmer3.hmmsearch.cpu.switch.pirsf=--cpu 1 hmmer3.hmmsearch.cpu.switch.sfld=--cpu 1 hmmer3.hmmsearch.cpu.switch.superfamily=--cpu 1 hmmer3.hmmsearch.cpu.switch.tigrfam=--cpu 1 hmmer3.hmmsearch.cpu.switch.hmmfilter=--cpu 1 hmmer2.hmmpfam.cpu.switch.smart=--cpu 1 #panther binary cpu options (for blastall and hmmsearch) panther.binary.cpu.switch=-c 1 #pirsf binary cpu options (for hmmscan) pirsf.pl.binary.cpu.switch=-cpu 1 pfsearchv3.binary.switches.prositeprofiles=-f -o 7 pfsearchv3.cpu.switch.prositeprofiles=-t 4 ## ## max number of proteins per analysis batch ## # These values control the maximum number of proteins put through # an analysis in one go - different algorithms have different optimum values. # Note that if you suffer from out of memory errors, reducing these values # will almost certainly help, but may reduce the speed of analysis. analysis.max.sequence.count.CDD=1000 analysis.max.sequence.count.GENE3D=4000 analysis.max.sequence.count.SFLD=32000 analysis.max.sequence.count.TMHMM=16000 analysis.max.sequence.count.PANTHER=500 analysis.max.sequence.count.SMART=500 analysis.max.sequence.count.TIGRFAM=4000 analysis.max.sequence.count.PRINTS=500 analysis.max.sequence.count.PROSITE_PROFILES=1000 analysis.max.sequence.count.PROSITE_PATTERNS=4000 analysis.max.sequence.count.PIRSF=4000 analysis.max.sequence.count.PRODOM=4000 analysis.max.sequence.count.SSF=2000 analysis.max.sequence.count.HAMAP=32000 analysis.max.sequence.count.PFAM=4000 analysis.max.sequence.count.COILS=32000 analysis.max.sequence.count.PHOBIUS=16000 # SignalP 4.1 binary only allows a maximum of 10,000 sequences analysis.max.sequence.count.SIGNALP=8000 analysis.max.sequence.count.MOBIDB_LITE=1000 ## ## General settings ## kvstore.entrydb.path=work/kvs/idb # If multiple hosts are sharing the same file system, a delay may be required to # avoid stale NFS handles # nfs.delay.milliseconds=0 kvstore.delay.milliseconds=200 # Instructs I5 to completely clean up after itself - leave set to true. delete.temporary.directory.on.completion=false ## ## Broker TCP Connection ## # A list of TCP ports that should not be used for messaging. (Apart from this, only ports > 1024 and < 65535 will be used.) tcp.port.exclusion.list=3879,3878,3881,3882 tcp.port.prefered.list=1024-1200 ## ## precalculated match lookup service ## # By default, if the sequence already has matches available from the EBI, this service will look them # up for you. Note - at present it will always return all the available matches, ignoring any -appl options # set on the command line. precalculated.match.lookup.service.url=https://www.ebi.ac.uk/interpro/match-lookup #proxy set up precalculated.match.lookup.service.proxy.host= precalculated.match.lookup.service.proxy.port=3128 precalculated.match.protein.lookup.batch.size=100 precalculated.match.protein.insert.batch.size=500 precalculated.match.protein.insert.batch.size.nolookup=4000 #Exclude sites from output (residue level annotations) exclude.sites.from.output=false ## ## getorf configuration for nucleic acid sequences ## # the following are roughly the times getorf takes to find sequences of open reading frames (ORFs) in n nucleotide sequences #number of sequences -> approx. time it takes in our tests # 600000 -> 10 minutes # 3600000 -> 1 hour # 7200000 -> 2 hours # 43200000 -> 12 hours # JOB: jobLoadNucleicAcidSequence getorf.minsize=75 # Set InterProScan to only process the N longest ORFs for each nucleotide sequence binary.getorf.parser.filtersize=8 ## ## Output format ## # TRUE by default, which means all generated graphical output documents (only SVG at the moment) will be archived (using the Linux command tar). # This simple switch allows you to switch the archive mode off (simply set it to FALSE). archiveSVGOutput=true #disable HTMl output disable.html.output=false # #max.concurrent.threads.for.prepare.output.step= max.concurrent.threads.for.prepare.output.step=1 ## ## Master/Stand alone embedded workers ## # Set the number of embedded workers to the number of processors that you would like to employ # on the machine you are using to run InterProScan. #number of embedded workers a master process can have number.of.embedded.workers=6 maxnumber.of.embedded.workers=8 ## ## Distributed mode (Cluster mode) ## #grid name grid.name=lsf #grid.name=other-cluster #project name for this run - use user.digest user.digest=i5GridRun #grid jobs limit : number of jobs you are allowed to run on the cluster grid.jobs.limit=1000 #time between each bjobs or qstat command to check the status of jobs on the cluster grid.check.interval.seconds=120 #allow master interproscan to run binaries () master.can.run.binaries=true #deal with unknown step states recover.unknown.step.state=false #Grid submission commands (e.g. LSF bsub or SGE qsub) for starting remote workers #commands the master uses to start new remote workers grid.master.submit.command=bsub -q general -M 8192 grid.master.submit.high.memory.command=bsub -q bigmem -M 8192 #commands a worker uses to start new remote workers grid.worker.submit.command=bsub -q general -M 8192 grid.worker.submit.high.memory.command=bsub -q bigmem -M 8192 # command to start a new worker (new jvm) worker.command=java -Xms32m -Xmx2048m -jar interproscan-5.jar # This may be identical to the worker.command argument above, however you may choose to select # a machine with a much larger available memory, for use when a StepExecution fails. worker.high.memory.command=java -Xms32m -Xmx2048m -jar interproscan-5.jar # Set the number of embedded workers to the number of processors that you would like to employ # on the node machine on which the worker will run. #number of embedded workers in a remote worker worker.number.of.embedded.workers=4 worker.maxnumber.of.embedded.workers=4 # max number of connections to the master master.maxconsumers=48 #number of connections to the worker worker.maxconsumers=32 #throttled network? grid.throttle=true # max number of jobs a tier 1 worker is allowed on its queue worker.maxunfinished.jobs=32 #network tier depth max.tier.depth=1 # Active MQ JMS broker temporary data directory jms.broker.temp.directory=activemq-data/localhost/tmp_storage #verbose.log=true #verbose.log.level=10