#!/usr/bin/perl -w # PURPOSE: puts the primers in 96 well format for ordering. # creates a file that can be emailed to # primers@invitrogen.com. Also creates a .fasta file of the primers. # # HOW TO USE IT: orderPrimerPairs.perl # # INPUT: uses the unsorted file of PRIMER_PAIR blocks, primers_unsorted.txt # transcripts_filename.txt, as created by amplifyTranscripts.perl # # OUTPUT: 1) a file for ordering # 2) a file of primer pairs, sorted by product size # 3) a fasta file of the primers, for your convenience for analysis $| = 1; $szUsage = "orderPrimerPairs.perl"; # (name of file with PRIMER_PAIR {} blocks); if ( $#ARGV != -1 ) { die "$szUsage"; } $szFileToEmail = "to_order.txt"; $szPrimersUnsorted = "primers_unsorted.txt"; $szPrimersSorted = "primers_sorted.txt"; open( filTranscriptsFilename, "transcripts_filename.txt" ) || die "couldn't open transcripts_filename.txt which is created by amplifyTranscripts.perl"; if ( !defined( $szTranscriptsFile = ) ) { die "transcripts_filename.txt is empty"; } chomp( $szTranscriptsFile ); # $szTranscriptsFile may or may not look like this: pcr.selections.030129 $szPhilDate = $szTranscriptsFile; if ( $szPhilDate =~ /\.([0-9][0-9][0-9][0-9][0-9][0-9])/ ) { # replace by whatever was matched $szPhilDate = $1; # if all is well, then $szPhilDate is the date in the form of YYMMDD if ( $szPhilDate !~ /^[0-9][0-9][0-9][0-9][0-9][0-9]$/ ) { die "something wrong: date is $szPhilDate from file $szTranscriptsFile"; } else { print "date is: $szPhilDate\n"; } } else { $szPhilDate = &szGetDate(); } $szFastaFileOfPrimers = "primers" . $szPhilDate . ".fasta"; # sort by product size &sortPrimerBlocks( $szPrimersUnsorted, $szPrimersSorted ); # now there are sorted blocks in primers_sorted.txt open( filPrimerPairs, "$szPrimersSorted" ) || die "couldn't open $szPrimersSorted"; $szPlatePrefix = $szPhilDate . "a_"; open( filFastaFile, ">$szFastaFileOfPrimers" ) || die "couldn't open $szFastaFileOfPrimers for write"; # with first increment, this will change to plate 1, well A 1 $nPlateNumber = -1; $nWellNumber = 12; $cWellLetter = 'H'; @aPrimersToOrder = (); while() { if ( /^PRIMER_PAIR/ ) { # looks like this #PRIMER PAIR { #Region: trans.3942866 Product size: 1791 #trans.3942866.fwd: GGGGTCAGGAGATTTCGG temp: 60 #trans.3942866.rev: TTATTTAACATCAAAATTTTAAATTATAAATCCC temp: 61 #} if ( !defined( $_ = ) ) { die "premature end of file after PRIMER_PAIR"; } if ( !defined( $_ = ) ) { die "premature end of file after PRIMER_PAIR"; } if ( ! /^\S+:\s+\S+\s+temp:\s\d+$/ ) { die "line was not of the form trans.3942866.fwd: GGGGTCAGGAGATTTCGG temp: 60"; } &incrementWellAndMaybePrintEmptyWells; $szForwardPlateName = $szPlatePrefix . sprintf( "%02d", $nPlateNumber ); $szReversePlateName = $szPlatePrefix . sprintf( "%02d", $nPlateNumber + 1 ); @aWords = split; $szPrimer1Name = $aWords[0]; $szPrimer1Name =~ s/:$//; $szPrimer1Sequence = $aWords[1]; $szPrimer1Sequence =~ tr/a-z/A-Z/; # looks like: # A1 4381034f GGTGTGGCTCATGAACAATTTT 1 # A2 4381034r CACTTAATTGGAAATTCGTACCATAGA 1 # A3 1053885f AAATACTTTATCGAGCTCTACAAGGATG 1 # A4 1053885r TCCTTTTCGATCGATTCGTG 1 $szLineToOrder = ""; $szLineToOrder .= "$szForwardPlateName\t"; $szLineToOrder .= "$cWellLetter\t"; $szLineToOrder .= "$nWellNumber\t"; $szLineToOrder .= "$szPrimer1Name\t"; $szLineToOrder .= "\t"; $szLineToOrder .= "$szPrimer1Sequence\t\t\n"; push( @aPrimersToOrder, $szLineToOrder ); print filFastaFile ">$szPrimer1Name $szPhilDate\n"; print filFastaFile "$szPrimer1Sequence\n"; if ( !defined( $_ = ) ) { die "premature end of file after PRIMER_PAIR"; } if ( ! /^\S+:\s+\S+\s+temp:\s\d+$/ ) { die "second line was not of the form trans.3942866.fwd: GGGGTCAGGAGATTTCGG temp: 60"; } # no--use same well but different plate number # &incrementWellAndMaybePrintEmptyWells; @aWords = split; $szPrimer2Name = $aWords[0]; $szPrimer2Name =~ s/:$//; $szPrimer2Sequence = $aWords[1]; $szPrimer2Sequence =~ tr/a-z/A-Z/; $szLineToOrder = ""; $szLineToOrder .= "$szReversePlateName\t"; $szLineToOrder .= "$cWellLetter\t"; $szLineToOrder .= "$nWellNumber\t"; $szLineToOrder .= "$szPrimer2Name\t"; $szLineToOrder .= "\t"; $szLineToOrder .= "$szPrimer2Sequence\t\t\n"; push( @aPrimersToOrder, $szLineToOrder ); print filFastaFile ">$szPrimer2Name $szPhilDate\n"; print filFastaFile "$szPrimer2Sequence\n"; } } # if the plate has not been filled up, fill the remaining spots # with "empty" lines while( $nWellNumber < 12 || $cWellLetter ne 'H' ) { &incrementWell; &emptyWells; } close( filPrimerPairs ); close( filFastaFile ); open( filFileToEmail, ">$szFileToEmail" ) || die "couldn't open $szFileToEmail for write"; print filFileToEmail "Plate\tRow\tColumn\tSequence Name\t5' Mod\tSequence (5'-3') (Blanks use period)\t3' Mod\tComment\n"; @aPrimersToOrder = sort byPlateThenLetterThenNumber @aPrimersToOrder; foreach $szLine ( @aPrimersToOrder ) { print filFileToEmail $szLine; } close( filFileToEmail ); $szCurrentDirectory = `/bin/pwd`; chomp( $szCurrentDirectory ); print "attachments:\n"; print "$szCurrentDirectory/$szTranscriptsFile \"$szTranscriptsFile\"\, $szCurrentDirectory/primers_sorted.txt \"primers_sorted.txt\"\, $szCurrentDirectory/to_order.txt \"to_order.txt\"\n"; print "bge\@u.washington.edu,ambohigh\@u.washington.edu,phg\@u.washington.edu\n"; #----------------------------------------------------- # end of program #----------------------------------------------------- sub incrementWellAndMaybePrintEmptyWells { &incrementWell; if ( $cWellLetter eq 'H' && ( $nWellNumber == 12 ) ) { &emptyWells; &incrementWell; } } sub incrementWell { ++$cWellLetter; if ( $cWellLetter gt 'H' ) { $cWellLetter = 'A'; ++$nWellNumber; if ( $nWellNumber > 12 ) { $nWellNumber = 1; $nPlateNumber += 2; } } } sub emptyWells { # Amber said not to print empty wells. Just do not print # anything for this well. (100915) # $szLineToOrder = ""; # $szLineToOrder .= "$szForwardPlateName\t"; # $szLineToOrder .= "$cWellLetter\t"; # $szLineToOrder .= "$nWellNumber\t"; # $szLineToOrder .= "empty\t"; # $szLineToOrder .= "\t"; # $szLineToOrder .= ".\t\t\n"; # push( @aForwardPrimers, $szLineToOrder ); # $szLineToOrder = ""; # $szLineToOrder .= "$szReversePlateName\t"; # $szLineToOrder .= "$cWellLetter\t"; # $szLineToOrder .= "$nWellNumber\t"; # $szLineToOrder .= "empty\t"; # $szLineToOrder .= "\t"; # $szLineToOrder .= ".\t\t\n"; # push( @aReversePrimers, $szLineToOrder ); } sub byPlateThenLetterThenNumber { my @aWordsA = split(' ', $a ); my @aWordsB = split(' ', $b ); # looks like: # $szLineToOrder = ""; # $szLineToOrder .= "$szPlateName\t"; # $szLineToOrder .= "$cWellLetter\t"; # $szLineToOrder .= "$nWellNumber\t"; # $szLineToOrder .= "$szPrimer1Name\t"; # $szLineToOrder .= "\t"; # $szLineToOrder .= "$szPrimer1Sequence\t\t\n"; my $szPlateNameA = $aWordsA[0]; my $szPlateNameB = $aWordsB[0]; my $nPlateNumberA = $szPlateNameA; my $nPlateNumberB = $szPlateNameB; my $re = qr/^${szPhilDate}a_/; $nPlateNumberA =~ s/$re//; $nPlateNumberB =~ s/$re//; if ( $nPlateNumberA < $nPlateNumberB ) { return -1; } elsif( $nPlateNumberA > $nPlateNumberB ) { return 1; } else { my $cWellLetterA = $aWordsA[1]; my $cWellLetterB = $aWordsB[1]; if ( $cWellLetterA lt $cWellLetterB ) { return -1; } elsif ( $cWellLetterA gt $cWellLetterB ) { return 1; } else { my $nWellNumberA = $aWordsA[2]; my $nWellNumberB = $aWordsB[2]; return( $nWellNumberA <=> $nWellNumberB ); } } } sub sortPrimerBlocks { if ( $#_ != 1 ) { die "sortPrimerBlocks has wrong number of arguments"; } ( $szUnsorted, $szSorted ) = @_; open( filUnsorted, $szUnsorted ) || die "couldn't open $szUnsorted"; open( filSorted, ">$szSorted" ) || die "couldn't open $szSorted for write"; @aPrimers = (); while() { if ( /^PRIMER_PAIR/ ) { # looks like this #PRIMER_PAIR { #Region: trans.3942866 Product size: 1791 #trans.3942866.fwd: GGGGTCAGGAGATTTCGG temp: 60 #trans.3942866.rev: TTATTTAACATCAAAATTTTAAATTATAAATCCC temp: 61 #} $szBlock = $_; if ( !defined( $_ = ) ) { die "premature end of file after PRIMER_PAIR"; } $szBlock .= $_; if ( !defined( $_ = ) ) { die "premature end of file after PRIMER_PAIR"; } $szBlock .= $_; if ( ! /^\S+:\s+\S+\s+temp:\s\d+$/ ) { die "line was not of the form trans.3942866.fwd: GGGGTCAGGAGATTTCGG temp: 60 but instead was $_"; } if ( !defined( $_ = ) ) { die "premature end of file after PRIMER_PAIR"; } $szBlock .= $_; if ( ! /^\S+:\s+\S+\s+temp:\s\d+$/ ) { die "second line was not of the form trans.3942866.fwd: GGGGTCAGGAGATTTCGG temp: 60"; } if ( !defined( $_ = ) ) { die "prematured end of file after PRIMER_PAIR"; } $szBlock .= $_; push( @aPrimers, $szBlock ); } } close( filUnsorted ); @aSortedPrimers = sort { byProductSize() } @aPrimers; foreach $szPrimerBlock (@aSortedPrimers ) { print filSorted $szPrimerBlock; print filSorted "\n"; } close( filSorted ); } sub byProductSize { # print $a; # print $b; @aWordsA = split(' ', $a ); @aWordsB = split(' ', $b ); $nSizeA = $aWordsA[6]; $nSizeB = $aWordsB[6]; # print $nSizeA, " ", $nSizeB, "\n"; return( $nSizeA <=> $nSizeB ); } sub makeShorterPrimerBlocksFile { if ( $#_ != 1 ) { die "makeShorterPrimerBlocksFile should have 2 arguments"; } ( $szLongerFile, $szShorterFile ) = @_; open( filLonger, $szLongerFile ) || die "couldn't open $szLongerFile"; open( filShorter, ">$szShorterFile" ) || die "couldn't open $szShorterFile for write"; @aPrimers = (); while() { if ( /^PRIMER_PAIR/ ) { # looks like this #PRIMER PAIR { #Region: trans.3942866 Product size: 1791 #trans.3942866.fwd: GGGGTCAGGAGATTTCGG temp: 60 #trans.3942866.rev: TTATTTAACATCAAAATTTTAAATTATAAATCCC temp: 61 #} $szBlock = $_; if ( !defined( $_ = ) ) { die "premature end of file after PRIMER_PAIR"; } $szBlock .= $_; if ( !defined( $_ = ) ) { die "premature end of file after PRIMER_PAIR"; } $szBlock .= $_; if ( ! /^\S+:\s+\S+\s+temp:\s\d+$/ ) { die "line was not of the form trans.3942866.fwd: GGGGTCAGGAGATTTCGG temp: 60 but instead was $_"; } if ( !defined( $_ = ) ) { die "premature end of file after PRIMER_PAIR"; } $szBlock .= $_; if ( ! /^\S+:\s+\S+\s+temp:\s\d+$/ ) { die "second line was not of the form trans.3942866.fwd: GGGGTCAGGAGATTTCGG temp: 60"; } if ( !defined( $_ = ) ) { die "prematured end of file after PRIMER_PAIR"; } $szBlock .= $_; push( @aPrimers, $szBlock ); } } close( filLonger ); # how many of these can we order? The plates must have at least 88 # primers per plate. For Colleen, every other plate must have 2 empty # wells at the end. # note that $#aPrimers is the last subscript of a zero-based array # so must add one $nOriginalNumberOfPrimerPairs = $#aPrimers + 1; ( $nShorterNumberOfPrimerPairs, $nBeginningBatchLastSubscript, $nLastBatchFirstSubscript ) = &getShorterNumberOfPrimers( $nOriginalNumberOfPrimerPairs ); $nNumberInShorterList = 0; for( $nSubscript = 0; $nSubscript <= $#aPrimers; ++$nSubscript ) { if ( $nSubscript <= $nBeginningBatchLastSubscript || $nLastBatchFirstSubscript <= $nSubscript ) { $szPrimerBlock = $aPrimers[ $nSubscript ]; print filShorter "$szPrimerBlock\n\n"; ++$nNumberInShorterList; } } if ( $nNumberInShorterList != $nShorterNumberOfPrimerPairs ) { die "wanted $nShorterNumberOfPrimerPairs but only made a list of $nNumberInShorterList"; } close( filShorter ); } sub getShorterNumberOfPrimers { my $nOriginalNumberOfPrimerPairs = $_[0]; # each pair of plates can contain 95 primer pairs: 96 primers on # the first plate and 94 primers on the second my $nLeftOverPairs = $nOriginalNumberOfPrimerPairs % 95; my $nFullPairsOfPlates = ( $nOriginalNumberOfPrimerPairs - $nLeftOverPairs ) / 95; # how many of those left over can we use? my $nCanUseLeftOver; if ( $nLeftOverPairs < 44 ) { $nCanUseLeftOver = 0; } elsif ( 44 <= $nLeftOverPairs && $nLeftOverPairs < 48 ) { # case in which the last plate has at least 44 pairs on it $nCanUseLeftOver = $nLeftOverPairs; } elsif ( 48 <= $nLeftOverPairs && $nLeftOverPairs < ( 44 + 48 ) ) { # case in which the pairs fills one more plate completely (with # 2 empty wells for controls, according to Colleen on Feb 2003) # but the next plate doesn't have 44 pairs on it $nCanUseLeftOver = 47; } elsif ( ( 44 + 48 ) <= $nLeftOverPairs && $nLeftOverPairs <= 95 ) { $nCanUseLeftOver = $nLeftOverPairs; } else { die "impossible"; } my $nShorterNumberOfPrimerPairs = $nFullPairsOfPlates * 95 + $nCanUseLeftOver; # how shall we choose these pairs? I don't want to choose them # from the beginning or end since then there will be a bias in which # I am continually excluding the same ones. # So take half of these from the front and half from the end of the list. my $nNumberInBeginningBatch = int( $nShorterNumberOfPrimerPairs / 2 ); my $nNumberInLastBatch = $nShorterNumberOfPrimerPairs - $nNumberInBeginningBatch; my $nBeginningBatchLastSubscript = $nNumberInBeginningBatch - 1; # for example, if there are 50 primer pairs (so subscripts 0 to 49) # and the last batch has 1 primer pair in it, the last subscript # would be 50 - 1 = 49 my $nLastBatchFirstSubscript = $nOriginalNumberOfPrimerPairs - $nNumberInLastBatch; my $nLastSubscript = $nOriginalNumberOfPrimerPairs -1; return( $nShorterNumberOfPrimerPairs, $nBeginningBatchLastSubscript, $nLastBatchFirstSubscript ); } sub szGetDate { my $szDate; ($nSecond, $nMinute, $nHour, $nDayInMonth, $nMonth, $nYear, $wday, $yday, $isdst ) = localtime; undef $isdst; undef $wday; undef $yday; undef $nSecond; undef $nMinute; undef $nHour; if ( $nYear >= 100 ) { $nYear = $nYear % 100; } $szDate = sprintf( "%02d%02d%02d", $nYear, $nMonth + 1, $nDayInMonth ); return( $szDate ); }