{-
   Scan a sequence looking for ORFs
-}

{-# OPTIONS -fglasgow-exts #-}
module Main where

import Bio.Sequence

import Control.Monad (when)
import System.IO
import System
import List (partition,isPrefixOf,sortBy)

usage = "korfu -[a|f|s] [file..]"

main :: IO ()
main = do
    (opts,args) <- return . partition (isPrefixOf "-") =<< getArgs
    when (null opts) (error usage)
    ss <- if null args then hReadFasta stdin 
          else return . concat =<< mapM readFasta args
    case opts of ["-a"] -> mapM_ (putStrLn . show_orfs) ss
                 ["-f"] -> hWriteFasta stdout (concatMap gen_fasta ss)
                 ["-s"] -> error "statistical output not implemented"

-- | Generate all orfs, highlight, and display indented
--   to match the nucleotide sequence
show_orfs :: Sequence -> String
show_orfs s = unlines $ [toStr $ seqheader s
                        ,toStr $ seqdata s]
              ++ map (show_orf id)      (rfs s)
              ++ map (show_orf reverse) (rrfs s)

show_orf :: (forall a. [a]->[a]) -> (Offset,[Amino]) -> String
show_orf rv (o,aas) = (concat . pad . rv . concatMap show') aas ++black++clear
  where pad x = pre:x
        pre  = replicate (fromIntegral o) ' '
        show' x | x == Met  = [black,green,show x,green,black]
                | x == STP  = [black,red,show x,red,black]
                | otherwise = [show x]
        black  = "\27[30m"
        red    = "\27[31m" 
        green  = "\27[32m"
        yellow = "\27[33m" 
        bold   = "\27[1m"
        clear  = "\27[0m"

-- | Output Fasta-formatted protein sequences for use in db searches etc.
gen_fasta :: Sequence -> [Sequence]
gen_fasta s = map (mkSeq '+') (rfs s) ++ map (mkSeq '-') (rrfs s)
   where mkSeq d (i,as) = 
           Seq (fromStr (toStr (seqlabel s)++d:show i)) (toIUPAC as) Nothing

-- | all ORFs ATG..STP
orfs :: Sequence -> [(Offset,[Amino])]
orfs s = map (\(i,f) -> (i,takeWhile (/=STP) f)) 
         . filter (\(_,f) -> not (null f) && head f==Met)
         $ (zip is . map (translate s) $ is)
         ++ (zip (map negate is) . map (translate $ revcompl s) $ is)
    where is = [0..seqlength s-3]

-- | Translate three RFs
rfs :: Sequence -> [(Offset,[Amino])]
rfs s = [(i,translate s i) | i <- [0..2]]
  
-- | Translate the three reverse RFs
rrfs :: Sequence -> [(Offset,[Amino])]
rrfs s = sort' [(rframe s i,translate (revcompl s) i) | i <- [0..2]]
    where sort' = sortBy (\ a b -> compare (fst a) (fst b))

rframe s off = (seqlength s `mod` 3 - off) `mod` 3

