*************************************** * pknotsRG * * * * Canonical RNA Structures including * * pseudoknots in O(n^4) time and * * O(n^2) space. * * * * Copyright: Jens Reeder and * * Robert Giegerich, 2004 * *************************************** Note: This is a simplified version of the original pknotsRG. It folds RNA sequencences into its secondary structure including the class of canonical simple recursive pseudoknots. However, it is restricted to pseudoknots without bulge loops in their stems. For the full version visit us at http://bibiserv.techfak.uni-bielefeld.de/pknotsrg . > module Main where > import Array > import System(getArgs) > import Foldingspace > import RNACombinators > import Energy > import Algebras > import Utilities > main :: IO() > main = do > [arg1] <- getArgs > let input = arg1 > ((e,p):xs) = pknots input (energy *** pp) in > putStr (input++"\n" ++ p ++ "\t("++show ((fromIntegral e::Float) /100) ++")\n") The function pknots computes for a given input sequence and an evaluation algebra the optimal foldings. > pknots :: [Char] -> ( (RNAInput -> FS_Algebra Int a b) > ,(RNAInput -> FS_AlgebraExt Int a b)) -> [b] > pknots sequence algebra = axiom (q struct) where > > tabulated = table n > listed = table1 n > n = length sequence > axiom = axiom' n > inp = mk (rna sequence) > basepair (i,j) = basepair' (inp,(i,j)) > stackpair (i,j) = stackpair' (inp,(i,j)) > minloopsize m (i,j) = minloopsize' m (inp,(i,j)) > (alg, algext) = algebra > (sadd,cadd,is,sr,hl,bl,br,il, > ml, mldl, mldr, mldlr, dl, dr, dlr, edl, edr, edlr, > drem, cons, ul, pul, addss, ssadd, nil, h, h_l, h_s, h_i, h_p) = alg inp > (flush, nflush, co, cor, pk, pkmldl, pkmldr, pkmldlr, pkml, pk', > kndl, kndr, kndlr, frd, bkd, scale, unscale, > emptymid, midbase, middlro, middl, middr, middlr, midregion, pss)= algext inp > > struct = listed ( > sadd <<< base +~~ q struct ||| > cadd <<< edangle ~~~ q struct ||| > empty nil ... h_s) > where > edangle = edl <<< base +~~ closed ~~. loc ||| > edr <<< loc .~~ closed ~~+ base ||| > edlr <<< base +~~ closed ~~+ base ||| > drem <<< initstem ||| > kndr <<< knot ~~+ base ||| > kndl <<< base +~~ knot ||| > kndlr<<< base +~~ knot ~~+ base ||| > pk <<< knot ... h > where > initstem = is <<< loc .~~ closed ~~. loc > closed = tabulated ( > stack ||| (hairpin ||| leftB ||| rightB ||| iloop ||| multiloop) `with` stackpair ... h) > where > stack = (sr <<< base +~~ closed ~~+ base) `with` basepair > hairpin = (hl <<< base +~~ base ++~ (region `with` (minloopsize 3)) ~~+ base ~~+ base) > leftB = (bl <<< base +~~ base ++~ region ~~~ initstem ~~+ base ~~+ base) ... h > rightB = (br <<< base +~~ base ++~ initstem ~~~ region ~~+ base ~~+ base) ... h > iloop = (il <<< base +~~ base ++~ region !~~ closed ~~! region ~~+ base ~~+ base) ... h where > infixl 7 ~~!,!~~ > (~~!) = (~~<) 30 > (!~~) = (<~~) 32 > multiloop = (mldl <<< base +~~ base ++~ base +++ ml_components ~~+ base ~~+ base ||| > mldr <<< base +~~ base ++~ ml_components ~~+ base ~~+ base ~~+ base ||| > mldlr <<< base +~~ base ++~ base +++ ml_components ~~+ base ~~+ base ~~+ base ||| > ml <<< base +~~ base ++~ ml_components ~~+ base ~~+ base) ... h > where > ml_components = combine <<< block ~~~ comps > comps = tabulated ( > cons <<< block ~~~ comps ||| > block ||| > addss <<< block ~~~ region ... h_l) > > block = tabulated ( > ul <<< dangle ||| > ssadd <<< region ~~~ dangle ||| > pkmldl <<< base +~~ knot ||| > pkmldr <<< knot ~~+ base ||| > pkmldlr <<< base +~~ knot ~~+ base ||| > pkml <<< knot ... h_l) > where > dangle = dl <<< base +~~ initstem ~~. loc ||| > dr <<< loc .~~ initstem ~~+ base ||| > dlr <<< base +~~ initstem ~~+ base ||| > drem <<< initstem ... h > knot = tabulated ( pknot ... h_p) > where > pknot (i,j) = [pk' energy a u b v a' w b' | l <- [i+2 .. j-1], k <- [l+1 .. j-2], > (alphanrg, h) <- stacklen (i,k), > h >= 2, > (betanrg, betalen) <- stacklen (l,j), > let h' = if (betalen + h) > (k-l) > then k-l-h > else betalen, > h' >= 2, > a <- region (i , i+h ), > u <- front j (i+h+1, l ), > b <- region (l , l+h' ), > v <- middle (j-h') (i+h) (l+h', k-h ), > a'<- region (k-h , k ), > w <- back i (k , j-h'-2), > b'<- region (j-h' , j ), > (correctionterm, _) <- stacklen (l+h'-1,j-h'+1), > let energy = alphanrg + betanrg - correctionterm > ] The internal parts of a pseudoknot: > front j = co <<< front' ||| > frd j <<< front' ~~+ base ... h_l -- one base dangling of b,b' > front' = pul <<< emptystrand ||| > comps ... h_l > > middle k l = emptymid k l ||| > midbase k l ||| > middlro k l ||| > middl k <<< base +~~ mid ||| > middr l <<< mid ~~+ base ||| > middlr k l <<< base +~~ mid ~~+ base ||| > midregion <<< mid ... h_l > mid = pul <<< singlestrand ||| > comps ... h_l > > back i = co <<< back' ||| -- one base dangling of a,a' > bkd i <<< base +~~ back' ... h_l > back' = pul <<< emptystrand ||| > comps ... h_l > singlestrand = pss <<< region > emptystrand = pss <<< uregion stacklen tabulates the energy and the length of an optimal helix starting at position (i,j) > stacklen = tabulated( > (sum <<< base +~~ stacklen ~~+ base) `with` basepair ||| > (sumend <<< base +~~ (region `with` (minloopsize 3)) ~~+ base) `with` basepair ...hmin) > where sum lb (c,k) rb = (c + sr_energy inp (lb, rb), k+1) > sumend lb _ rb = (0,1) > hmin [] = [] > hmin xs = [minimum xs]