// F# program to recognize non-context free pattern: implements a FSM ////////// The Story: // DNA sequences are represented using the symbols A, C, G and T. As part // of a multi-million dollar research project, a group of geneticists // collected numerous DNA samples from tigers, lions, panthers and other // members of the feline family. After years of research they discovered, // to their shock, that all DNA samples contained a certain pattern: a // number of C's, followed by the same number of A's, followed by the same // number of T's. For example, small cats had sequences like CCCCAAAATTTT, // while large cats (lions and tigers) had DNA that looked more like // CCCCCCCCCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTTTTTT. // To confirm their incredible finding, they need a computer program to // automatically detect the presence of this pattern. To their disappointment, // they found that none of the available, off-the-shelf pattern detection // programs can help them. While our geneticists are well-trained in biology, // none of them are computer scientists, so they couldn't possibly know about // the **context free pumping lemma**. This theoretical result implies that // the pattern C^nA^nT^n cannot be described by a *context free grammar*. // That is why no generic pattern matching program can detect it (the user // cannot describe the pattern). They will have to write a specific program // to do the job. // Thinking of you as a well-educated computer science student, they have // turned to you for help. Whether you think their hypothesis is ludicrous // is beside the point. You have to help them by implementing a finite state // machine (a finite automaton with memory and actions). open System;; type state = Sc | Sa | St | Sstart | Saccept;; /// get input Console.Write("Enter DNA string: ");; let inp = Console.ReadLine();; let instring = inp + "." // input string, with . marking end // global counter variables: let mutable cn = 0;; let mutable an = 0;; let mutable tn = 0;; // state values: let mutable i = 0;; // input position (index of instring) let mutable pi = -1;; // position of match, -1 means no match let mutable cstate = Sstart;; // current state // function to reset all counters let reset () = cn <- 0; an <- 0; tn <- 0;; //// //// main function implements state machine transition let fsm = function | (Sstart,'C') -> (cn <- 1; cstate <- Sc; pi<- i) | (Sstart,_) -> () // do nothing | (Sc,'C') -> cn<- cn+1 | (Sc,'A') -> (an <- 1; cstate <- Sa) | (Sa,'A') when (an an<- an+1 | (Sa,'C') -> (cn <- 1; cstate <- Sc; pi<- i) | (Sa,'T') when (an<=cn) -> (tn<- 1; cstate <- St; pi<- pi+(cn-an)) | (St,'T') when (tn tn<- tn+1 | (St,_) when (tn=an) -> cstate <- Saccept // accept | (St,'C') -> (cn <- 1; cstate <- Sc; pi<- i) | _ -> (reset(); cstate <- Sstart; pi <- -1);; //// fsm /// for tracing let printstate () = Console.Write("current state: "); match cstate with | Sstart -> Console.Write("Sstart") | Sc -> Console.Write("Sc") | Sa -> Console.Write("Sa") | St -> Console.Write("St") | Saccept -> Console.Write("Saccept"); Console.WriteLine(", i:chr = "+string(i)+":"+string(instring.[i]));; /// process input string one char at a time: while (cstate <> Saccept) && (i< instring.Length) do printstate(); fsm(cstate,instring.[i]); i <- i+1;; // while //// Report result: match cstate with | Saccept -> printf "Pattern found starting at position %d, size %d\n" pi (an*3) | _ -> printf "Pattern not found\n";; //////////////////////////// // Alternate version that relies less on mutable variables: let fcm = function // finite configuration machine | ((Sstart,cn,an,tn),'C') -> (pi<- i; (Sc,1,an,tn)) | ((Sstart,cn,an,tn),_) -> (Sstart,cn,an,tn) | ((Sc,cn,an,tn),'C') -> (Sc,cn+1,an,tn) | ((Sc,cn,an,tn),'A') -> (Sa,cn,1,tn) | ((Sa,cn,an,tn),'A') when (an (Sa,cn,an+1,tn) | ((Sa,cn,an,tn),'C') -> (pi<- i; (Sc,1,0,0)) | ((Sa,cn,an,tn),'T') when (an (pi<- pi+(cn-an); (St,cn,an,1)) | ((St,cn,an,tn),'T') when (tn (St,cn,an,tn+1) | ((St,cn,an,tn),_) when (tn=an) -> (printfn "success at %d" pi; (Saccept,cn,an,tn)) | ((St,cn,an,tn),'C') -> (pi<- i; (Sc,1,0,0)) | _ -> (pi<- -1; (Sstart,0,0,0));; let mutable config = (Sstart,0,0,0);; // initial configuration i <- 0; pi <- -1; while (i< instring.Length) do config <- fcm(config,instring.[i]); i <- i+1;; Console.WriteLine(string(config));;