# Supplemental SFile4. This is the InterProScan configuration file used to annotate the Aplysia AplCal3.0 reference proteome by Kron et al. 2022.

data.directory=data
bin.directory=bin

##
## Temporary files and directory
##
# The text [UNIQUE], if present, will be replaced by a value unique to your running instance

# Temporary files used by the analyses will be placed in directories here:
temporary.file.directory.suffix=[UNIQUE]
temporary.file.directory=temp/${temporary.file.directory.suffix}

##
## H2 database
##
# The H2 database is copied by the standalone version of interproscan
i5.h2.database.original.location=work/template/interpro.zip
# LOCK_TIMEOUT: Sets the lock timeout (in milliseconds) for the current session
i5.database.connection.url=jdbc:h2:mem:interpro;LOCK_TIMEOUT=10000000

##
## binary paths
##
# Configure the version of Perl and Python to use when running member databases Perl/Python binaries
perl.command=perl
python3.command=python

# Binary file locations
#rpsblast
binary.rpsblast.path=${bin.directory}/blast/ncbi-blast-2.10.1+/rpsblast

#rpsbproc
binary.rpsbproc.path=${bin.directory}/blast/ncbi-blast-2.10.1+/rpsbproc

#hmmer 3
binary.hmmer3.path=${bin.directory}/hmmer/hmmer3/3.1b1
binary.hmmer3.hmmscan.path=${bin.directory}/hmmer/hmmer3/3.1b1/hmmscan
binary.hmmer3.hmmsearch.path=${bin.directory}/hmmer/hmmer3/3.1b1/hmmsearch

binary.hmmer33.path=${bin.directory}/hmmer/hmmer3/3.3
binary.hmmer33.hmmscan.path=${bin.directory}/hmmer/hmmer3/3.3/hmmscan
binary.hmmer33.hmmsearch.path=${bin.directory}/hmmer/hmmer3/3.3/hmmsearch

#hmmer 2
binary.hmmer2.hmmsearch.path=${bin.directory}/hmmer/hmmer2/2.3.2/hmmsearch
binary.hmmer2.hmmpfam.path=${bin.directory}/hmmer/hmmer2/2.3.2/hmmpfam
binary.fingerprintscan.path=${bin.directory}/prints/fingerPRINTScan
binary.coils.path=${bin.directory}/ncoils/2.2.1/ncoils


# Note: Correct prosite binary distribution for your platform can be downloaded: ftp://ftp.expasy.org/databases/prosite/ps_scan/
binary.prosite.psscan.pl.path=${bin.directory}/prosite/ps_scan.pl
binary.prosite.pfscan.path=${bin.directory}/prosite/pfscan
binary.prosite.pfsearch.path=${bin.directory}/prosite/pfsearch
binary.prosite.pfscanv3.path=${bin.directory}/prosite/pfscanV3
binary.prosite.pfsearchv3.path=${bin.directory}/prosite/pfsearchV3
binary.prosite.pfsearch.wrapperpath=${bin.directory}/prosite/pfsearch_wrapper.py
binary.runprosite.path=${bin.directory}/prosite/runprosite.py

#CATH-Gene3d
cath.resolve.hits.path=${bin.directory}/gene3d/4.3.0/cath-resolve-hits
assign.cath.superfamilies.path=${bin.directory}/gene3d/4.3.0/assign_cath_superfamilies.py

#panther
binary.pantherscore.path=${bin.directory}/panther/panther_score.py

binary.superfamily.1.75.ass3.pl.path=${bin.directory}/superfamily/1.75/ass3_single_threaded.pl

#PIRSF
binary.pirsf.pl.path=${bin.directory}/pirsf/3.10/pirsf.pl

binary.getorf.path=${bin.directory}/nucleotide/getorf
binary.esltranslate.path=${bin.directory}/nucleotide/esl-translate

#PIRSR
pirsr.postprocess.command=${bin.directory}/pirsr/pirsr_postprocess

#SFLD
sfld.postprocess.command=${bin.directory}/sfld/sfld_postprocess

#signalp
# Note: SignalP binary not distributed with InterProScan 5, please install separately e.g. in ${bin.directory}/signalp/4.1/signalp
signalp_euk.signature.library.release=4.1
signalp_gram_positive.signature.library.release=4.1
signalp_gram_negative.signature.library.release=4.1
binary.signalp.path=${bin.directory}/signalp/4.1/signalp
signalp.perl.library.dir=${bin.directory}/signalp/4.1/lib

#TMHMM 2.0
# Note: TMHMM binary not distributed with InterProScan 5, please install separately e.g. in ${bin.directory}/tmhmm/2.0c/decodeanhmm
tmhmm.signature.library.release=2.0c
binary.tmhmm.path=${bin.directory}/tmhmm/2.0c/decodeanhmm

#PHOBIUS
# Note: Phobius binary not distributed with InterProScan 5, please install separately e.g. in ${bin.directory}/phobius/1.01/phobius.pl
phobius.signature.library.release=1.01
binary.phobius.pl.path=${bin.directory}/phobius/1.01/phobius.pl
#

##
##  Member database model / data file locations (alphabetically sorted)
##
#CDD
cdd.signature.list.path=${data.directory}/cdd/3.18/data/cddid.tbl
cdd.library.path=${data.directory}/cdd/3.18/db/Cdd_NCBI
cdd.data.path=${data.directory}/cdd/3.18/data

#Gene3d 4.3.0
gene3d.hmm.path=${data.directory}/gene3d/4.3.0/gene3d_main.hmm
gene3d.model2sf_map.path=${data.directory}/gene3d/4.3.0/model_to_family_map.tsv
gene3d.discontinuous_regs.path=${data.directory}/gene3d/4.3.0/discontinuous_regs.pkl.py3
gene3d.hmmsearch.force=true

# HAMAP
hamap.profile.models.path=${data.directory}/hamap/2020_05/hamap.prf
hamap.profile.models.dir=${data.directory}/hamap/2020_05/profiles
hamap.hmm.path=${data.directory}/hamap/2020_05/hamap.hmm.lib

#MobiDB
binary.mobidb.path=${bin.directory}/mobidb/2.0/mobidb_lite.py
binary.mobidb.binx.path=${bin.directory}/mobidb/2.0/binx

#PANTHER
panther.temporary.file.directory=
panther.models.dir=${data.directory}/panther/15.0/
panther.hmm.path=${data.directory}/panther/15.0/panther.hmm
panther.names.tab=${data.directory}/panther/15.0/names.tab

# PFam
pfam-a.hmm.path=data/pfam/33.1/pfam_a.hmm
pfam-a.seed.path=data/pfam/33.1/pfam_a.seed
pfam-clans.path=data/pfam/33.1/pfam_clans
pfam-a.dat.path=data/pfam/33.1/pfam_a.dat

##

#PIRSF 3.10
pirsf.sfhmm.path=${data.directory}/pirsf/3.10/sf_hmm_all
pirsf.dat.path=${data.directory}/pirsf/3.10/pirsf.dat

#PIRSR
pirsr.hmm.path=${data.directory}/pirsr/2021_02/sr_hmm_all
pirsr.data.path=${data.directory}/pirsr/2021_02/
pirsr.rules.path=${data.directory}/pirsr/2021_02/sr_uru.json

# pirsr.sites.annotation.file.path=${data.directory}/pirsr/4/pirsr_sites.annot
# pirsr.hierarchy.file.path=${data.directory}/pirsr/4/pirsr_hierarchy_flat.txt

#PRINTS 42.0
prints.kdat.path.42.0=${data.directory}/prints/42.0/prints42_0.kdat
prints.pval.path.42.0=${data.directory}/prints/42.0/prints.pval
prints.hierarchy.path.42.0=${data.directory}/prints/42.0/FingerPRINTShierarchy.db

#ProDom 2006.1
prodom.ipr.path.2006.1=${data.directory}/prodom/2006.1/prodom.ipr

#Prosite
prosite.patterns.models.path=${data.directory}/prosite/2021_01/prosite_patterns.dat
prosite.profiles.models.path=${data.directory}/prosite/2021_01/prosite_profiles.dat
prosite.models.path=${data.directory}/prosite/2021_01/prosite.dat
#prosite.models.dir=${data.directory}/prosite/2021_01/prosite_models
prosite.profiles.models.dir=${data.directory}/prosite/2021_01/profile_models
prosite.patterns.models.dir=${data.directory}/prosite/2021_01/pattern_models
prosite.evaluator.models.path=${data.directory}/prosite/2021_01/evaluator.dat
prosite.profiles.skipflagged.profiles=${data.directory}/prosite/2021_01/skip_flagged_profles.txt
psscan.prositeprofiles.usepfsearch=true

#
#SFLD
sfld.hmm.path=${data.directory}/sfld/4/sfld.hmm
sfld.sites.annotation.file.path=${data.directory}/sfld/4/sfld_sites.annot
sfld.hierarchy.file.path=${data.directory}/sfld/4/sfld_hierarchy_flat.txt

#smart 7.1
smart.hmm.path=${data.directory}/smart/7.1/smart.HMMs
smart.hmm.bin.path=${data.directory}/smart/7.1/smart.HMMs.bin
smart.overlapping.path=
smart.threshold.path=

#SuperFamily 1.75
superfamily.hmm.path.3.0=${data.directory}/superfamily/1.75/hmmlib_1.75
superfamily.self.hits.path.1.75=${data.directory}/superfamily/1.75/self_hits.tab
superfamily.cla.path.1.75=${data.directory}/superfamily/1.75/dir.cla.scop.txt_1.75
superfamily.model.tab.path.1.75=${data.directory}/superfamily/1.75/model.tab
superfamily.pdbj95d.path.1.75=${data.directory}/superfamily/1.75/pdbj95d

#tigrfam 15.0
tigrfam.hmm.path=${data.directory}/tigrfam/15.0/TIGRFAMs_HMM.LIB

#TMHMM 2.0
# Note: TMHMM model files not distributed with InterProScan 5, please install separately e.g. in data/tmhmm/2.0/TMHMM2.0.model
tmhmm.model.path=${data.directory}/tmhmm/2.0c/TMHMM2.0c.model

##
## cpu options for parallel processing
##

#hmmer cpu options for the different jobs
hmmer3.hmmsearch.cpu.switch.gene3d=--cpu 1
hmmer3.hmmsearch.cpu.switch.panther=--cpu 1
hmmer3.hmmsearch.cpu.switch.pfama=--cpu 1
hmmer3.hmmsearch.cpu.switch.pirsf=--cpu 1
hmmer3.hmmsearch.cpu.switch.sfld=--cpu 1
hmmer3.hmmsearch.cpu.switch.superfamily=--cpu 1
hmmer3.hmmsearch.cpu.switch.tigrfam=--cpu 1

hmmer3.hmmsearch.cpu.switch.hmmfilter=--cpu 1

hmmer2.hmmpfam.cpu.switch.smart=--cpu 1


#panther binary cpu options (for blastall and hmmsearch)
panther.binary.cpu.switch=-c 1

#pirsf binary cpu options (for hmmscan)
pirsf.pl.binary.cpu.switch=-cpu 1

pfsearchv3.binary.switches.prositeprofiles=-f -o 7
pfsearchv3.cpu.switch.prositeprofiles=-t 4

##
## max number of proteins per analysis batch
##
# These values control the maximum number of proteins put through
# an analysis in one go - different algorithms have different optimum values.
# Note that if you suffer from out of memory errors, reducing these values
# will almost certainly help, but may reduce the speed of analysis.
analysis.max.sequence.count.CDD=1000
analysis.max.sequence.count.GENE3D=4000
analysis.max.sequence.count.SFLD=32000
analysis.max.sequence.count.TMHMM=16000
analysis.max.sequence.count.PANTHER=500
analysis.max.sequence.count.SMART=500
analysis.max.sequence.count.TIGRFAM=4000
analysis.max.sequence.count.PRINTS=500
analysis.max.sequence.count.PROSITE_PROFILES=1000
analysis.max.sequence.count.PROSITE_PATTERNS=4000
analysis.max.sequence.count.PIRSF=4000
analysis.max.sequence.count.PRODOM=4000
analysis.max.sequence.count.SSF=2000
analysis.max.sequence.count.HAMAP=32000
analysis.max.sequence.count.PFAM=4000
analysis.max.sequence.count.COILS=32000
analysis.max.sequence.count.PHOBIUS=16000
# SignalP 4.1 binary only allows a maximum of 10,000 sequences
analysis.max.sequence.count.SIGNALP=8000
analysis.max.sequence.count.MOBIDB_LITE=1000

##
##  General settings
##

kvstore.entrydb.path=work/kvs/idb

# If multiple hosts are sharing the same file system, a delay may be required to
# avoid stale NFS handles
# nfs.delay.milliseconds=0
kvstore.delay.milliseconds=200

# Instructs I5 to completely clean up after itself - leave set to true.
delete.temporary.directory.on.completion=false

##
## Broker TCP Connection
##

# A list of TCP ports that should not be used for messaging. (Apart from this, only ports > 1024 and < 65535 will be used.)
tcp.port.exclusion.list=3879,3878,3881,3882

tcp.port.prefered.list=1024-1200

##
##  precalculated match lookup service
##
# By default, if the sequence already has matches available from the EBI, this service will look them
# up for you.  Note - at present it will always return all the available matches, ignoring any -appl options
# set on the command line.
precalculated.match.lookup.service.url=https://www.ebi.ac.uk/interpro/match-lookup

#proxy set up
precalculated.match.lookup.service.proxy.host=
precalculated.match.lookup.service.proxy.port=3128

precalculated.match.protein.lookup.batch.size=100
precalculated.match.protein.insert.batch.size=500
precalculated.match.protein.insert.batch.size.nolookup=4000

#Exclude sites from output (residue level annotations)
exclude.sites.from.output=false

##
## getorf configuration for nucleic acid sequences
##
# the following are roughly the times getorf takes to find sequences of open reading frames (ORFs) in n nucleotide sequences
#number of sequences -> approx. time it takes in our tests
#        600000 -> 10 minutes
#        3600000 -> 1 hour
#        7200000 -> 2 hours
#        43200000 -> 12 hours

# JOB: jobLoadNucleicAcidSequence
getorf.minsize=75
# Set InterProScan to only process the N longest ORFs for each nucleotide sequence
binary.getorf.parser.filtersize=8

##
## Output format
##
# TRUE by default, which means all generated graphical output documents (only SVG at the moment) will be archived (using the Linux command tar).
# This simple switch allows you to switch the archive mode off (simply set it to FALSE).
archiveSVGOutput=true

#disable HTMl output
disable.html.output=false

#
#max.concurrent.threads.for.prepare.output.step=
max.concurrent.threads.for.prepare.output.step=1


##
## Master/Stand alone embedded workers
##

# Set the number of embedded workers to the number of processors that you would like to employ
# on the machine you are using to run InterProScan.
#number of embedded workers  a master process can have
number.of.embedded.workers=6
maxnumber.of.embedded.workers=8

##
## Distributed mode (Cluster mode)
##

#grid name
grid.name=lsf
#grid.name=other-cluster

#project name for this run  - use user.digest
user.digest=i5GridRun

#grid jobs limit : number of jobs you are allowed to run on the cluster
grid.jobs.limit=1000


#time between each bjobs or qstat command to check the status of jobs on the cluster
grid.check.interval.seconds=120

#allow master interproscan to run binaries ()
master.can.run.binaries=true

#deal with unknown step states
recover.unknown.step.state=false

#Grid submission commands (e.g. LSF bsub or SGE qsub) for starting remote workers
#commands the master uses to start new remote workers
grid.master.submit.command=bsub -q general -M 8192
grid.master.submit.high.memory.command=bsub -q bigmem -M 8192

#commands a worker uses to start new remote workers
grid.worker.submit.command=bsub -q general -M 8192
grid.worker.submit.high.memory.command=bsub -q bigmem -M 8192

# command to start a new worker (new jvm)
worker.command=java -Xms32m -Xmx2048m -jar interproscan-5.jar
# This may be identical to the worker.command argument above, however you may choose to select
# a machine with a much larger available memory, for use when a StepExecution fails.
worker.high.memory.command=java -Xms32m -Xmx2048m -jar interproscan-5.jar

# Set the number of embedded workers to the number of processors that you would like to employ
# on the node machine on which the worker will run.
#number of embedded workers in a remote worker
worker.number.of.embedded.workers=4
worker.maxnumber.of.embedded.workers=4

# max number of connections to the master
master.maxconsumers=48

#number of connections to the worker
worker.maxconsumers=32

#throttled network?
grid.throttle=true

# max number of jobs a tier 1 worker is allowed on its queue
worker.maxunfinished.jobs=32

#network tier depth
max.tier.depth=1

# Active MQ JMS broker temporary data directory
jms.broker.temp.directory=activemq-data/localhost/tmp_storage

#verbose.log=true
#verbose.log.level=10