# Copyright 2008 by Norbert Dojer. All rights reserved. # Adapted by Bartek Wilczynski. # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """Approximate calculation of appropriate thresholds for motif finding """ import math,random class ScoreDistribution: """ Class representing approximate score distribution for a given motif. Utilizes a dynamic programming approch to calculate the distribution of scores with a predefined precision. Provides a number of methods for calculating thresholds for motif occurences. """ def __init__(self,motif,precision=10**3): self.min_score=min(0.0,motif.min_score()) self.interval=max(0.0,motif.max_score())-self.min_score self.n_points=precision*motif.length self.step=self.interval/(self.n_points-1) self.mo_density=[0.0]*self.n_points self.mo_density[-self._index_diff(self.min_score)]=1.0 self.bg_density=[0.0]*self.n_points self.bg_density[-self._index_diff(self.min_score)]=1.0 self.ic=motif.ic() for lo,mo in zip(motif.log_odds(),motif.pwm()): self.modify(lo,mo,motif.background) def _index_diff(self,x,y=0.0): return int((x-y+0.5*self.step)//self.step) def _add(self,i,j): return max(0,min(self.n_points-1,i+j)) def modify(self,scores,mo_probs,bg_probs): mo_new=[0.0]*self.n_points bg_new=[0.0]*self.n_points for k, v in scores.items(): d=self._index_diff(v) for i in range(self.n_points): mo_new[self._add(i,d)]+=self.mo_density[i]*mo_probs[k] bg_new[self._add(i,d)]+=self.bg_density[i]*bg_probs[k] self.mo_density=mo_new self.bg_density=bg_new def threshold_fpr(self,fpr): """ Approximate the log-odds threshold which makes the type I error (false positive rate). """ i=self.n_points prob=0.0 while prob