/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.bio.molbio;
import java.io.Serializable;
import org.biojava.bio.BioError;
import org.biojava.bio.BioException;
import org.biojava.bio.seq.DNATools;
import org.biojava.bio.symbol.IllegalAlphabetException;
import org.biojava.bio.symbol.IllegalSymbolException;
import org.biojava.bio.symbol.MotifTools;
import org.biojava.bio.symbol.SymbolList;
/**
* RestrictionEnzyme
represents a restriction enzyme
* according to the REBASE standard. The cut positions are indicated
* relative to the 5' end of the recognition site and occur downstream
* of the given residue. Note that some enzmyes cut in more than one
* position and that cut positions may occur outside the recognition
* site.
*
* @author Keith James
* @since 1.3
*/
public class RestrictionEnzyme implements Serializable
{
/**
* CUT_SIMPLE
a cut type where the enzyme cuts in one
* position relative to the recognition site. This covers the vast
* majority of cases.
*/
public static final int CUT_SIMPLE = 0;
/**
* CUT_COMPOUND
a cut type where the enzyme cuts in
* two positions relative to the recognition site.
*/
public static final int CUT_COMPOUND = 1;
/**
* OVERHANG_5PRIME
the sticky end type created by
* enzymes which leave a 5' overhang.
*/
public static final int OVERHANG_5PRIME = 0;
/**
* OVERHANG_3PRIME
the sticky end type created by
* enzymes which leave a 3' overhang.
*/
public static final int OVERHANG_3PRIME = 1;
/**
* BLUNT
the end type created by enzymes which leave
* a blunt end.
*/
public static final int BLUNT = 2;
protected String name;
protected SymbolList site;
protected int cutType;
protected int [] dsCutPositions;
protected int [] usCutPositions;
protected String forwardRegex;
protected String reverseRegex;
private String summary;
/**
* Creates a new RestrictionEnzyme
which cuts within
* or downstream of the recognition site. The cut position indices
* are always in the same coordinate space as the
* recognition site. RestrictionEnzyme
s are
* immutable.
*
* @param name a String
such as EcoRI.
* @param site a SymbolList
recognition site.
* @param dsForward an int
index in the forward
* strand (the strand conventionally written
* 5'-3') of the recognition site at which the
* cut occurs. The cut occurs between this base and the following
* one.
* @param dsReverse an int
index in the reverse
* strand (the strand conventionally written
* 3'-5') of the recognition site at which the
* cut occurs. The cut occurs between this base and the following
* one.
*
* @exception IllegalAlphabetException if an error occurs.
*/
public RestrictionEnzyme(String name, SymbolList site,
int dsForward, int dsReverse)
throws IllegalAlphabetException
{
this(name, site,
null,
new int [] { dsForward, dsReverse });
cutType = CUT_SIMPLE;
}
/**
* Creates a new RestrictionEnzyme
of the unusual
* type which cuts both upstream and downstream of its recognition
* site. The cut position indices are always in
* the same coordinate space as the recognition site.
*
* @param name a String
such as Bsp24I.
* @param site a SymbolList
recognition site.
* @param usForward an int
index in the forward
* strand (the strand conventionally written
* 5'-3' upstream of the recognition site at
* which the cut occurs. The cut occurs between this base and the
* following one.
* @param usReverse an int
index in the reverse
* strand (the strand conventionally written
* 3'-5) upstream of the recognition site at
* which the cut occurs. The cut occurs between this base and the
* following one.
* @param dsForward an int
index in the forward
* strand (the strand conventionally written
* 5'-3') downstream of the recognition site at
* which the cut occurs. The cut occurs between this base and the
* following one.
* @param dsReverse an int
index in the reverse
* strand (the strand conventionally written
* 3'-5') downstream of the recognition site at
* which the cut occurs. The cut occurs between this base and the
* following one.
*
* @exception IllegalAlphabetException if an error occurs.
*/
public RestrictionEnzyme(String name, SymbolList site,
int usForward, int usReverse,
int dsForward, int dsReverse)
throws IllegalAlphabetException
{
this(name, site,
new int [] { usForward, usReverse },
new int [] { dsForward, dsReverse });
cutType = CUT_COMPOUND;
}
/**
* Creates a new RestrictionEnzyme
.
*
* @param name a String
name.
* @param site a SymbolList
site.
* @param usCutPositions an int []
array of optional
* upstream indices.
* @param dsCutPositions an int []
array of
* downstream indices.
*
* @exception IllegalAlphabetException if an error occurs.
*/
private RestrictionEnzyme(String name, SymbolList site,
int [] usCutPositions,
int [] dsCutPositions)
throws IllegalAlphabetException
{
if (site.getAlphabet() != DNATools.getDNA())
throw new IllegalAlphabetException("RestrictionEnzyme site can only be a DNA SymbolList."
+ " A SymbolList using the "
+ site.getAlphabet().getName()
+ " was supplied" );
this.name = name;
this.site = site;
this.usCutPositions = usCutPositions;
this.dsCutPositions = dsCutPositions;
forwardRegex = MotifTools.createRegex(site);
try
{
reverseRegex =
MotifTools.createRegex(DNATools.reverseComplement(site));
}
catch (IllegalAlphabetException iae)
{
throw new BioError("RestrictionEnzyme site was not composed of a complementable Alphabet", iae);
}
StringBuffer sb = new StringBuffer();
sb.append(name);
sb.append(" ");
if (usCutPositions != null)
{
sb.append("(");
sb.append(usCutPositions[0]);
sb.append("/");
sb.append(usCutPositions[1]);
sb.append(") ");
}
try
{
for (int i = 1; i <= site.length(); i++)
sb.append(Character.toUpperCase(DNATools.dnaToken(site.symbolAt(i))));
}
catch (IllegalSymbolException ise)
{
throw new BioError("RestrictionEnzyme site contained non-DNA Symbol", ise);
}
sb.append(" (");
sb.append(dsCutPositions[0]);
sb.append("/");
sb.append(dsCutPositions[1]);
sb.append(")");
summary = sb.substring(0);
}
/**
* getName
returns the enzyme name.
*
* @return a String
.
*/
public String getName()
{
return name;
}
/**
* getRecognitionSite
returns the forward strand of
* the recognition site.
*
* @return a SymbolList
.
*/
public SymbolList getRecognitionSite()
{
return site;
}
/**
* getForwardRegex
returns a regular expression which
* matches the forward strand of the recognition site.
*
* @return a String
.
*/
public String getForwardRegex()
{
return forwardRegex;
}
/**
* getReverseRegex
returns a regular expression which
* matches the reverse strand of the recognition site.
*
* @return a String
.
*/
public String getReverseRegex()
{
return reverseRegex;
}
/**
* isPalindromic
returns true if the recognition site
* is palindromic.
*
* @return a boolean
.
*/
public boolean isPalindromic()
{
return forwardRegex.equals(reverseRegex);
}
/**
* getCutType
returns the type of cut produced by the
* enzyme. This will be one of either RestrictionEnzyme.CUT_SIMPLE
* (where it cuts in one position relative to the recognition site
* i.e. the vast majority of cases) or
* RestrictionEnzyme.CUT_COMPOUND (where it cuts in two positions).
*
* @return an int
.
*/
public int getCutType()
{
return cutType;
}
/**
* getDownstreamCut
returns the cut site within or
* downstream of the recognition site.
*
* @return an int []
array with the position in the
* 5'- strand at index 0 and the 3'- strand at index 1.
*/
public int [] getDownstreamCut()
{
return dsCutPositions;
}
/**
* getUpstreamCut
returns the cut site upstream of
* the recognition site.
*
* @return an int []
array with the position in the
* 5'- strand at index 0 and the 3'- strand at index 1.
*
* @exception BioException if the enzyme does not cleave on both
* sides of its recognition site.
*/
public int [] getUpstreamCut() throws BioException
{
if (cutType == CUT_SIMPLE)
throw new BioException(name + " does not cut upstream of the recognition site");
return usCutPositions;
}
/**
* getDownstreamEndType
returns the double-stranded
* end type produced by the primary (intra-site or downstream)
* cut.
*
* @return an int
equal to one of the constant fields
* OVERHANG_5PRIME, OVERHANG_3PRIME or BLUNT.
*/
public int getDownstreamEndType()
{
if (dsCutPositions[0] > dsCutPositions[1])
return OVERHANG_5PRIME;
else if (dsCutPositions[0] < dsCutPositions[1])
return OVERHANG_3PRIME;
else
return BLUNT;
}
/**
* getUpstreamEndType
returns the double-stranded end
* type produced by the secondary (upstream) cut.
*
* @return an int
equal to one of the constant fields
* OVERHANG_5PRIME, OVERHANG_3PRIME or BLUNT.
*
* @exception BioException if the enzyme does not cleave on both
* sides of its recognition site.
*/
public int getUpstreamEndType() throws BioException
{
if (cutType == CUT_SIMPLE)
throw new BioException(name + " does not cut upstream of the recognition site");
if (usCutPositions[0] > usCutPositions[1])
return OVERHANG_5PRIME;
else if (usCutPositions[0] < usCutPositions[1])
return OVERHANG_3PRIME;
else
return BLUNT;
}
public int hashCode()
{
return name.hashCode() ^ forwardRegex.hashCode();
}
public boolean equals(Object o)
{
return (o instanceof RestrictionEnzyme)
&& name.equals(((RestrictionEnzyme) o).getName());
}
public String toString()
{
return summary;
}
}