module Main where

import qualified Data.ByteString.Lazy as BB
import qualified Data.ByteString.Lazy.Char8 as B
import Data.Char (toUpper)

import Bio.Sequence
import System.SimpleArgs

main = do
  (fs,qs) <- getArgs
  trimPolyA fs qs

trimPolyA fs qs = writeFastaQual (fs++".trim") (qs++".trim") =<< map doTrim `fmap` map castToNuc `fmap` readFastaQual fs qs

doTrim s = case findPolyA s of Just (x,y) -> slice (x,y) s
                               Nothing -> s

slice :: (Int,Int) -> Sequence Nuc -> Sequence Nuc
slice (x,y) (Seq h d (Just q)) = (Seq (B.concat [h,B.pack (" trim: "++show (y+1)++" poly-A: "++show (y-x+1))]) 
                                      (B.take (fromIntegral y) d) (Just $ B.take (fromIntegral y) q))

-- Ripped from Dephd
findPolyA :: Sequence Nuc -> Maybe (Int,Int)
findPolyA (Seq _ d mq) = 
      let qd = zip (B.unpack d) (maybe (repeat 15) BB.unpack mq)
          scores = map (\(c,q) -> if toUpper c=='A' then match q else mismatch q) qd
          match x' = let x = fromIntegral x' in log (4*(1-1/10**(x/10)))
          mismatch x' = let x = fromIntegral x' in log 4 - log 3 - x/10*log 10
          cumulative = scanl (\a b -> let r = a + b in max 0 r) 0
          (zi,mi,maxscore) = findmax $ cumulative scores
      in if maxscore > 12 then Just (zi+1,mi) else Nothing  -- arbitrary constant alert!

findmax :: [Double] -> (Int,Int,Double)
findmax = go 0 (0,0,0) . zip [0..]
    where go _ cm [] = cm
          go _ cm ((i,0):rest) = go i cm rest
          go last_z (cmz,cmi,cmx) ((i,x):rest) = if x > cmx then go last_z (last_z,i,x) rest 
                                                 else go last_z (cmz,cmi,cmx) rest

