/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * */ package org.biojava.bio.molbio; import java.io.Serializable; import org.biojava.bio.BioError; import org.biojava.bio.BioException; import org.biojava.bio.seq.DNATools; import org.biojava.bio.symbol.IllegalAlphabetException; import org.biojava.bio.symbol.IllegalSymbolException; import org.biojava.bio.symbol.MotifTools; import org.biojava.bio.symbol.SymbolList; /** * RestrictionEnzyme represents a restriction enzyme * according to the REBASE standard. The cut positions are indicated * relative to the 5' end of the recognition site and occur downstream * of the given residue. Note that some enzmyes cut in more than one * position and that cut positions may occur outside the recognition * site. * * @author Keith James * @since 1.3 */ public class RestrictionEnzyme implements Serializable { /** * CUT_SIMPLE a cut type where the enzyme cuts in one * position relative to the recognition site. This covers the vast * majority of cases. */ public static final int CUT_SIMPLE = 0; /** * CUT_COMPOUND a cut type where the enzyme cuts in * two positions relative to the recognition site. */ public static final int CUT_COMPOUND = 1; /** * OVERHANG_5PRIME the sticky end type created by * enzymes which leave a 5' overhang. */ public static final int OVERHANG_5PRIME = 0; /** * OVERHANG_3PRIME the sticky end type created by * enzymes which leave a 3' overhang. */ public static final int OVERHANG_3PRIME = 1; /** * BLUNT the end type created by enzymes which leave * a blunt end. */ public static final int BLUNT = 2; protected String name; protected SymbolList site; protected int cutType; protected int [] dsCutPositions; protected int [] usCutPositions; protected String forwardRegex; protected String reverseRegex; private String summary; /** * Creates a new RestrictionEnzyme which cuts within * or downstream of the recognition site. The cut position indices * are always in the same coordinate space as the * recognition site. RestrictionEnzymes are * immutable. * * @param name a String such as EcoRI. * @param site a SymbolList recognition site. * @param dsForward an int index in the forward * strand (the strand conventionally written * 5'-3') of the recognition site at which the * cut occurs. The cut occurs between this base and the following * one. * @param dsReverse an int index in the reverse * strand (the strand conventionally written * 3'-5') of the recognition site at which the * cut occurs. The cut occurs between this base and the following * one. * * @exception IllegalAlphabetException if an error occurs. */ public RestrictionEnzyme(String name, SymbolList site, int dsForward, int dsReverse) throws IllegalAlphabetException { this(name, site, null, new int [] { dsForward, dsReverse }); cutType = CUT_SIMPLE; } /** * Creates a new RestrictionEnzyme of the unusual * type which cuts both upstream and downstream of its recognition * site. The cut position indices are always in * the same coordinate space as the recognition site. * * @param name a String such as Bsp24I. * @param site a SymbolList recognition site. * @param usForward an int index in the forward * strand (the strand conventionally written * 5'-3' upstream of the recognition site at * which the cut occurs. The cut occurs between this base and the * following one. * @param usReverse an int index in the reverse * strand (the strand conventionally written * 3'-5) upstream of the recognition site at * which the cut occurs. The cut occurs between this base and the * following one. * @param dsForward an int index in the forward * strand (the strand conventionally written * 5'-3') downstream of the recognition site at * which the cut occurs. The cut occurs between this base and the * following one. * @param dsReverse an int index in the reverse * strand (the strand conventionally written * 3'-5') downstream of the recognition site at * which the cut occurs. The cut occurs between this base and the * following one. * * @exception IllegalAlphabetException if an error occurs. */ public RestrictionEnzyme(String name, SymbolList site, int usForward, int usReverse, int dsForward, int dsReverse) throws IllegalAlphabetException { this(name, site, new int [] { usForward, usReverse }, new int [] { dsForward, dsReverse }); cutType = CUT_COMPOUND; } /** * Creates a new RestrictionEnzyme. * * @param name a String name. * @param site a SymbolList site. * @param usCutPositions an int [] array of optional * upstream indices. * @param dsCutPositions an int [] array of * downstream indices. * * @exception IllegalAlphabetException if an error occurs. */ private RestrictionEnzyme(String name, SymbolList site, int [] usCutPositions, int [] dsCutPositions) throws IllegalAlphabetException { if (site.getAlphabet() != DNATools.getDNA()) throw new IllegalAlphabetException("RestrictionEnzyme site can only be a DNA SymbolList." + " A SymbolList using the " + site.getAlphabet().getName() + " was supplied" ); this.name = name; this.site = site; this.usCutPositions = usCutPositions; this.dsCutPositions = dsCutPositions; forwardRegex = MotifTools.createRegex(site); try { reverseRegex = MotifTools.createRegex(DNATools.reverseComplement(site)); } catch (IllegalAlphabetException iae) { throw new BioError("RestrictionEnzyme site was not composed of a complementable Alphabet", iae); } StringBuffer sb = new StringBuffer(); sb.append(name); sb.append(" "); if (usCutPositions != null) { sb.append("("); sb.append(usCutPositions[0]); sb.append("/"); sb.append(usCutPositions[1]); sb.append(") "); } try { for (int i = 1; i <= site.length(); i++) sb.append(Character.toUpperCase(DNATools.dnaToken(site.symbolAt(i)))); } catch (IllegalSymbolException ise) { throw new BioError("RestrictionEnzyme site contained non-DNA Symbol", ise); } sb.append(" ("); sb.append(dsCutPositions[0]); sb.append("/"); sb.append(dsCutPositions[1]); sb.append(")"); summary = sb.substring(0); } /** * getName returns the enzyme name. * * @return a String. */ public String getName() { return name; } /** * getRecognitionSite returns the forward strand of * the recognition site. * * @return a SymbolList. */ public SymbolList getRecognitionSite() { return site; } /** * getForwardRegex returns a regular expression which * matches the forward strand of the recognition site. * * @return a String. */ public String getForwardRegex() { return forwardRegex; } /** * getReverseRegex returns a regular expression which * matches the reverse strand of the recognition site. * * @return a String. */ public String getReverseRegex() { return reverseRegex; } /** * isPalindromic returns true if the recognition site * is palindromic. * * @return a boolean. */ public boolean isPalindromic() { return forwardRegex.equals(reverseRegex); } /** * getCutType returns the type of cut produced by the * enzyme. This will be one of either RestrictionEnzyme.CUT_SIMPLE * (where it cuts in one position relative to the recognition site * i.e. the vast majority of cases) or * RestrictionEnzyme.CUT_COMPOUND (where it cuts in two positions). * * @return an int. */ public int getCutType() { return cutType; } /** * getDownstreamCut returns the cut site within or * downstream of the recognition site. * * @return an int [] array with the position in the * 5'- strand at index 0 and the 3'- strand at index 1. */ public int [] getDownstreamCut() { return dsCutPositions; } /** * getUpstreamCut returns the cut site upstream of * the recognition site. * * @return an int [] array with the position in the * 5'- strand at index 0 and the 3'- strand at index 1. * * @exception BioException if the enzyme does not cleave on both * sides of its recognition site. */ public int [] getUpstreamCut() throws BioException { if (cutType == CUT_SIMPLE) throw new BioException(name + " does not cut upstream of the recognition site"); return usCutPositions; } /** * getDownstreamEndType returns the double-stranded * end type produced by the primary (intra-site or downstream) * cut. * * @return an int equal to one of the constant fields * OVERHANG_5PRIME, OVERHANG_3PRIME or BLUNT. */ public int getDownstreamEndType() { if (dsCutPositions[0] > dsCutPositions[1]) return OVERHANG_5PRIME; else if (dsCutPositions[0] < dsCutPositions[1]) return OVERHANG_3PRIME; else return BLUNT; } /** * getUpstreamEndType returns the double-stranded end * type produced by the secondary (upstream) cut. * * @return an int equal to one of the constant fields * OVERHANG_5PRIME, OVERHANG_3PRIME or BLUNT. * * @exception BioException if the enzyme does not cleave on both * sides of its recognition site. */ public int getUpstreamEndType() throws BioException { if (cutType == CUT_SIMPLE) throw new BioException(name + " does not cut upstream of the recognition site"); if (usCutPositions[0] > usCutPositions[1]) return OVERHANG_5PRIME; else if (usCutPositions[0] < usCutPositions[1]) return OVERHANG_3PRIME; else return BLUNT; } public int hashCode() { return name.hashCode() ^ forwardRegex.hashCode(); } public boolean equals(Object o) { return (o instanceof RestrictionEnzyme) && name.equals(((RestrictionEnzyme) o).getName()); } public String toString() { return summary; } }