//Copyright (c) 2003, The Institute for Genomic Research (TIGR), Rockville,
// Maryland, U.S.A.  All rights reserved.

//  A. L. Delcher
//
//  Written:  24 April 97
//
//     File:  ~delcher/TIGR/context.h
//  Version:  1.01  31 Jul 97
//
//  Routines to implement the nonuniform context Markov models described in
//  the Ristad/Thomas paper.


#ifndef  CONTEXT_H_INCLUDED
#define  CONTEXT_H_INCLUDED


#include  "delcher.h"
#include "strarray.h"

const int  SIMPLE_MODEL_LEN = 6;
const double  MIN_LOG_PROB_FACTOR = -6.0;


void  Simple_Evaluate  (char X [], int T, int Model_Len,
                        String_Array & Delta0, String_Array & Delta1,
                        String_Array & Delta2, double & Prob_X)

//  Set  Prob_X  to the log of the probability of generating DNA string
//  X [1 .. T]  in the simple nonhomogeneous Markov model with
//  model length  Model_Len  and conditional probabilites Delta .

  {
   int  t, Len;

   Prob_X = 0.0;
   for  (t = 1;  t <= T;  t ++)
     {
      Len = Min (t, Model_Len);
      switch  (t % 3)
        {
         case  0 :
           Prob_X += log (Delta0 (X + t - Len + 1, Len));
           break;
         case  1 :
           Prob_X += log (Delta1 (X + t - Len + 1, Len));
           break;
         case  2 :
           Prob_X += log (Delta2 (X + t - Len + 1, Len));
           break;
        }
     }

   Prob_X = Max (Prob_X, MIN_LOG_PROB_FACTOR * T);

   return;
  }


void  My_Fast_Evaluate  (char X [], int T, int Model_Len,
                        String_Array & Delta0, String_Array & Delta1,
                        String_Array & Delta2, double & Prob_X)

//  Set  Prob_X  to the log of the probability of generating DNA string
//  X [1 .. T]  in the simple nonhomogeneous Markov model with
//  model length  Model_Len  and conditional probabilites Delta .
//  Same as  Simple_Evaluate  but keeps track of subscripts for
//  delta values to go faster and assumes  Delta  entries have previously
//  been converted to logs.

  {
   int  t, Base, Len, Power, Sub;

   Prob_X = 0.0;

   Base = Sub = 0;
   Power = 1;
   Len = Min (T, Model_Len);
   for  (t = 1;  t <= Len;  t ++)
     {
      Base += Power;
      Sub = Sub * ALPHABET_SIZE + Delta0 . Value (X [t]);
      switch  (t % 3)
        {
         case  0 :
           Prob_X += exp(Delta0 [Base + Sub]);
           break;
         case  1 :
           Prob_X += exp(Delta1 [Base + Sub]);
           break;
         case  2 :
           Prob_X += exp(Delta2 [Base + Sub]);
           break;
        }
      if  (t < Len)
          Power *= ALPHABET_SIZE;
     }
   
   for  ( ;  t <= T;  t ++)
     {
      Sub = (Sub % Power) * ALPHABET_SIZE + Delta0 . Value (X [t]);
      switch  (t % 3)
        {
         case  0 :
           Prob_X += exp(Delta0 [Base + Sub]);
           break;
         case  1 :
           Prob_X += exp(Delta1 [Base + Sub]);
           break;
         case  2 :
           Prob_X += exp(Delta2 [Base + Sub]);
           break;
        }
     }

   //Prob_X = Max (Prob_X, MIN_LOG_PROB_FACTOR * T);
   Prob_X=Prob_X/T;

   return;
  }


/*void  Fast_Evaluate  (char X [], int T, int Model_Len,
  String_Array & Delta0, String_Array & Delta1,
  String_Array & Delta2, double & Prob_X)
  
  //  Set  Prob_X  to the log of the probability of generating DNA string
  //  X [1 .. T]  in the simple nonhomogeneous Markov model with
  //  model length  Model_Len  and conditional probabilites Delta .
  //  Same as  Simple_Evaluate  but keeps track of subscripts for
  //  delta values to go faster and assumes  Delta  entries have previously
  //  been converted to logs.
  
  {
  int  t, Base, Len, Power, Sub;

  Prob_X = 0.0;

  Base = Sub = 0;
  Power = 1;
  Len = Min (T, Model_Len);
  for  (t = 1;  t <= Len;  t ++)
  {
  Base += Power;
  Sub = Sub * ALPHABET_SIZE + Delta0 . Value (X [t]);
  switch  (t % 3)
  {
  case  0 :
  Prob_X += Delta0 [Base + Sub];
  break;
  case  1 :
  Prob_X += Delta1 [Base + Sub];
  break;
  case  2 :
  Prob_X += Delta2 [Base + Sub];
  break;
  }
  if  (t < Len)
  Power *= ALPHABET_SIZE;
  }
   
  for  ( ;  t <= T;  t ++)
  {
  Sub = (Sub % Power) * ALPHABET_SIZE + Delta0 . Value (X [t]);
  switch  (t % 3)
  {
  case  0 :
  Prob_X += Delta0 [Base + Sub];
  break;
  case  1 :
  Prob_X += Delta1 [Base + Sub];
  break;
  case  2 :
  Prob_X += Delta2 [Base + Sub];
  break;
  }
  }
  
  Prob_X = Max (Prob_X, MIN_LOG_PROB_FACTOR * T);
  
  return;
  }*/


void  Easy_Eval  (char X [], int T, String_Array & Lambda0,
                  String_Array & Lambda1, String_Array & Lambda2,
                  String_Array & Delta0, String_Array & Delta1,
                  String_Array & Delta2, double & Prob_X)

//  Set  Prob_X  to the log of the probability of generating DNA string
//  X [1 .. T]  in the nonuniform context model with parameters
//  Delta  and  Lambda .

  {
   int  t, Len;

   Prob_X = 0.0;
   for  (t = 1;  t <= T;  t ++)
     {
      Len = Min (t, SIMPLE_MODEL_LEN);
      switch  (t % 3)
        {
         case  0 :
           if  (Len < SIMPLE_MODEL_LEN || Lambda0 (X + t - Len + 1, Len - 1) > 0.999)
               Prob_X += log (Delta0 (X + t - Len + 1, Len));
             else
               Prob_X += log (Delta0 (X + t - SIMPLE_MODEL_LEN + 2, SIMPLE_MODEL_LEN - 1));
           break;
         case  1 :
           if  (Len < SIMPLE_MODEL_LEN || Lambda1 (X + t - Len + 1, Len - 1) > 0.999)
               Prob_X += log (Delta1 (X + t - Len + 1, Len));
             else
               Prob_X += log (Delta1 (X + t - SIMPLE_MODEL_LEN + 2, SIMPLE_MODEL_LEN - 1));
           break;
         case  2 :
           if  (Len < SIMPLE_MODEL_LEN || Lambda2 (X + t - Len + 1, Len - 1) > 0.999)
               Prob_X += log (Delta2 (X + t - Len + 1, Len));
             else
               Prob_X += log (Delta2 (X + t - SIMPLE_MODEL_LEN + 2, SIMPLE_MODEL_LEN - 1));
           break;
        }
     }

   return;
  }


void  Evaluate  (char X [], int T, String_Array & Lambda0,
                 String_Array & Lambda1, String_Array & Lambda2,
                 String_Array & Delta0, String_Array & Delta1,
                 String_Array & Delta2, double & Prob_X,int Model_Len)

//  Set  Prob_X  to the log of the probability of generating DNA string
//  X [1 .. T]  in the nonuniform context model with parameters
//  Delta  and  Lambda .

  {
   double  L, P, PC;
   int  i, t, Max_Context_Len;

   Prob_X = 0.0;
   for  (t = 0;  t < T;  t ++)
     {
//      Max_Context_Len = Min (t, SIMPLE_MODEL_LEN - 1);
      Max_Context_Len = Min (t, Model_Len - 1);
      P = 0.0;
      PC = 1.0;
      for  (i = Max_Context_Len;  i > 0 && PC > 0.0;  i --)
        {
         switch  ((t + 1) % 3)
           {
            case  0 :
              L = Lambda0 (X + t - i + 1, i);
              P += PC * L * Delta0 (X + t - i + 1, i + 1);
              PC *= 1 - L;
              break;
            case  1 :
              L = Lambda1 (X + t - i + 1, i);
              P += PC * L * Delta1 (X + t - i + 1, i + 1);
              PC *= 1 - L;
              break;
            case  2 :
              L = Lambda2 (X + t - i + 1, i);
              P += PC * L * Delta2 (X + t - i + 1, i + 1);
              PC *= 1 - L;
              break;
           }
        }
      switch  ((t + 1) % 3)
        {
         case  0 :
           P += PC * Delta0 (X + t - i + 1, i + 1);
           break;
         case  1 :
           P += PC * Delta1 (X + t - i + 1, i + 1);
           break;
         case  2 :
           P += PC * Delta2 (X + t - i + 1, i + 1);
           break;
        }

      Prob_X += log (P);
     }

   Prob_X = Max (Prob_X, MIN_LOG_PROB_FACTOR * T);

   return;
  }


void  Forward  (char X [], int T, double * * Alpha,
                double * * Transition, double & Prob_X,
                String_Array & Lambda0, String_Array & Lambda1,
                String_Array & Lambda2, String_Array & Delta0,
                String_Array & Delta1, String_Array & Delta2,
                double * A_Scale,int Model_Len)

//  Set values of  Alpha [t] [i] ,  0 <= t <= T ,
//  0 <= i < MODEL_LEN ,  to be the probability
//  of generating  X [1 .. t] , then choosing a context of length  i ,
//  and then generating the next symbol  X [t + 1] .
//  Set  Transition [t] [i]  to the probability of choosing a
//  context of length  i  and generating the next symbol, given
//  the current history  X [1 .. t] .  Set  Prob_X  to the log of
//  the probability
//  of generating the entire string  X [1 .. T] .  A_Scale [1 .. T]
//  is set to the log of the scaling factor for the probability
//  values.

  {
   double  A, L, PC;
   int  i, t, Max_Context_Len;
   
   A = 0.0;
   for  (i = 0;  i <= T;  i ++)
     A_Scale [i] = 0.0;

   for  (t = 0;  t < T;  t ++)
     {
      PC = 1.0;
      Max_Context_Len = Min (t, Model_Len - 1);

      for  (i = Max_Context_Len;  i > 0;  i --)
        {
         switch  ((t + 1) % 3)
           {
            case  0 :
              L = Lambda0 (X + t - i + 1, i);
              Transition [t] [i] = PC * L * Delta0 (X + t - i + 1, i + 1);
              PC *= 1 - L;
              break;
            case  1 :
              L = Lambda1 (X + t - i + 1, i);
              Transition [t] [i] = PC * L * Delta1 (X + t - i + 1, i + 1);
              PC *= 1 - L;
              break;
            case  2 :
              L = Lambda2 (X + t - i + 1, i);
              Transition [t] [i] = PC * L * Delta2 (X + t - i + 1, i + 1);
              PC *= 1 - L;
              break;
           }
         Alpha [t] [i] = Transition [t] [i];
         A += Alpha [t] [i];
        }

      switch  ((t + 1) % 3)
        {
         case  0 :
           Transition [t] [0] = PC * Delta0 (X + t + 1, 1);
           break;
         case  1 :
           Transition [t] [0] = PC * Delta1 (X + t + 1, 1);
           break;
         case  2 :
           Transition [t] [0] = PC * Delta2 (X + t + 1, 1);
           break;
        }
      Alpha [t] [0] = Transition [t] [0];
      A += Alpha [t] [0];

      A_Scale [t + 1] = A_Scale [t] + log (A);
      A = 1.0;
     }

   Prob_X = A_Scale [T];

   return;
  }


void  Backward  (char X [], int T, double Beta [],
                 double * * Transition, double * B_Scale,int Model_Len)

//  Set values of  Beta [t] ,  0 <= t <= T  to the probability of
//  generating the remaining symbols  X [t + 1 .. T] given that
//  the current history is  X [1 .. t] .  Uses the values  Transition
//  computed in  Forward () .

  {
   double  B;
   int  i, t, Max_Context_Len;

   Beta [T] = 1.0;
   B_Scale [T] = 0.0;

   for  (t = T - 1;  t >= 0;  t --)
     {
      Beta [t] = 0.0;
      Max_Context_Len = Min (t, Model_Len - 1);

      B_Scale [t] = log (Beta [t + 1]) + B_Scale [t + 1];
      B = Beta [t + 1] * exp (B_Scale [t + 1] - B_Scale [t]);

      for  (i = Max_Context_Len;  i >= 0;  i --)
        Beta [t] += Transition [t] [i] * B;
     }

   return;
  }


void  Calc_Gamma  (char X [], int T, double * * Gamma,
                   String_Array & Lambda0, String_Array & Lambda1,
                   String_Array & Lambda2, String_Array & Delta0,
                   String_Array & Delta1, String_Array & Delta2,
                   double & Prob_X,int Model_Len)

//  Calculate the transition probabilities  Gamma [t] [i]  of
//  the model for the string  X [1 .. T] .  Use the probability
//  parameters in  Lambda  and  Delta.  Set  Prob_X  to the
//  probability of this string being generated in the model.

  {
   double  Check, * * Alpha, * * Transition, * Beta;
   double  * A_Scale, * B_Scale;
   int  i, t, Max_Context_Len;

   Alpha = (double * *) Safe_malloc ((1 + T) * sizeof (double *));
   Transition = (double * *) Safe_malloc ((1 + T) * sizeof (double *));
   for  (t = 0;  t <= T;  t ++)
     {
      Alpha [t] = (double *) Safe_malloc (Model_Len * sizeof (double));
      Transition [t] = (double *) Safe_malloc (Model_Len * sizeof (double));
     }
   Beta = (double *) Safe_malloc ((1 + T) * sizeof (double));
   A_Scale = (double *) Safe_malloc ((1 + T) * sizeof (double));
   B_Scale = (double *) Safe_malloc ((1 + T) * sizeof (double));

   Forward (X, T, Alpha, Transition, Prob_X, Lambda0, Lambda1, Lambda2,
                  Delta0, Delta1, Delta2, A_Scale,Model_Len);
   Backward (X, T, Beta, Transition, B_Scale,Model_Len);

   Check = 0.0;
   for  (t = 0;  t < T;  t ++)
     {
      Max_Context_Len = Min (t, Model_Len - 1);

      for  (i = Max_Context_Len;  i >= 0;  i --)
        {
         Gamma [t] [i] = Alpha [t] [i] * Beta [t + 1]
                               * exp (A_Scale [t] + B_Scale [t + 1] -  Prob_X);
         Check += Gamma [t] [i];
        }
     }

//   printf ("Check = %f  T = %d\n", Check, T);         // Should = T

   for  (t = 0;  t <= T;  t ++)
     {
      free (Alpha [t]);
      free (Transition [t]);
     }
   free (Alpha);
   free (Transition);
   free (Beta);
   free (A_Scale);
   free (B_Scale);

   return;
  }


void  Expectation_Step  (char X [], int T, double * * Gamma,
                         String_Array & L_Plus0, String_Array & L_Plus1,
                         String_Array & L_Plus2, String_Array & L_Minus0,
                         String_Array & L_Minus1, String_Array & L_Minus2,int Model_Len)

//  Set the values of  L_Plus  and  L_Minus  according to the transitions
//  in  Gamma  for the string  X [1 .. T] .

  {
   int  i, k, t, Max_Context_Len;

   for  (t = T - 1;  t >= 0;  t --)
     {
      Max_Context_Len = Min (t, Model_Len - 1);

      for  (i = Max_Context_Len;  i > 0;  i --)
        {
         switch  ((t + 1) % 3)
           {
            case  0 :
              L_Plus0 . Incr (X + t - i + 1, i, Gamma [t] [i]);
              break;
            case  1 :
              L_Plus1 . Incr (X + t - i + 1, i, Gamma [t] [i]);
              break;
            case  2 :
              L_Plus2 . Incr (X + t - i + 1, i, Gamma [t] [i]);
              break;
           }
         for  (k = i + 1;  k <= Max_Context_Len;  k ++)
           switch  ((t + 1) % 3)
             {
              case  0 :
                L_Minus0 . Incr (X + t - k + 1, k, Gamma [t] [i]);
                break;
              case  1 :
                L_Minus1 . Incr (X + t - k + 1, k, Gamma [t] [i]);
                break;
              case  2 :
                L_Minus2 . Incr (X + t - k + 1, k, Gamma [t] [i]);
                break;
             }
        }
     }

   return;
  }


#endif