{- Transculpt

   Simulate sequencing with flexible error modeling

   Should simulate different sequencing technologies
   primer properties, vector sequences(?), etc

   Implementation:

     1. use unfoldr, or:
     2. use a monad stack of StateT/Random

   Hmm.. input a sequence of char?

-}

module Main where

import Random

import Bio.Sequence
import Data.ByteString.Lazy.Char8 as B

import Id

main = undefined

-- | a transcriber takes a sequence and produces a transcript
type Transcriber = Sequence -> [Char]

-- | the mutator adds errors to a transcript
-- typically implemented as an unfoldr with a state?
type Mutator = [Char] -> [Char]

data Mut = Ins Char | Del | Subst Char | None

applyMut :: Mut -> [Char] -> [Char]
applyMut (Ins c) (x:xs) = c:x:xs
applyMut Del     (x:xs) = xs
applyMut Subst c (x:xs) = c:xs
applyMut None    (x:xs) = x:xs

-- | unlimited! with uniform probabilities for subst and indels
uniform :: (Double,Double) -> StdGen -> Mutator
uniform (subst,indel) g = 
   let mkMut p = if p < subst then Subst 'N'
                 else if p < subst+indel/2 then Ins 'N'
                      else if p < subst+indel then Del
                           else None
       ms = map mkMut $ randomRs (0,1) g
   in zipWith applyMut ms

-- unfolding a sequence according to a mutator
-- use: 'unfoldr :: (a -> Maybe (Char, a)) -> a -> ByteString'
transcribe s (start,end) = B.unfoldr (idseq s end) (id_init start)

-- pad to desired length
-- as transcribe, but only output N (assume perfect vector masking)
pad = undefined

-- combine unfoldr functions
-- something like this?
combine :: (a -> Maybe (Char,a)) -> (b -> Maybe (Char,b)) 
           -> ((a,b) -> Maybe (Char,(a,b)))

-- state includes: rndgen and sequence?
