#!/usr/bin/perl -w
#
# phredPhrap
#
# PURPOSE: do an assembly and prepare for editing using consed
#
# HOW IT WORKS:
# 1) It runs phred on all *new* reads (reads for which
# there is no phd file.
#
#
# 2) It runs determineReadTypes.perl so consed, autofinish, and phrap
# will understand your read naming convention
#
#
# 3) Then it runs crossmatch to screen them
# for vector.
#
# 4) Then it runs phd2fasta to create 2 fasta files:
# one containing read bases and one containing read quality. These
# are of the highest versions of each read (in case any editing has
# been done).
#
# 5) It runs phrap
#
# 6) It runs transferConsensusTags to transfer any consensus tags
# from the newest old ace file to the one phrap created in step 4
#
# 7) It runs tagRepeats.perl to tag any common repeats (such as ALU)
# that you want to have automatically tagged for the benefit of consed
# users. See README.txt "INSTALLING CONSED"
#
# This script is also called by Consed to run miniassemblies. In that
# case it uses the -include_phds option.
#
# HOW TO USE IT:
# Typically, you just type: phredPhrap Within the project, there
# are 3 directories: chromat_dir (with the chromats), phd_dir
# (with the phd files) and edit_dir (with the ace files and other
# files). You type "phredPhrap" from within edit_dir.
#
# WHAT YOU SHOULD EDIT IN THIS FILE:
# You should customize determineReadTypes.perl and then uncomment
# the lines below in which determineReadTypes.perl is called.
# Some operating systems (e.g., macosx) have nice somewhere other
# than /bin/nice. You can determine this by typing:
# which nice
# If it says /usr/bin/nice, then you will need to replace the line:
# $niceExe = "/bin/nice";
# with
# $niceExe = "/usr/bin/nice";
# or some other location, if it says so.
#
#
#
# For phred, contact bge@u.washington.edu
# For phrap and cross_match, contact phg@u.washington.edu
#
# phd2seqfasta and phd2qualfasta are superceded by phd2fasta
# phd2fasta comes with the consed package
# transferConsensusTags.perl comes with the consed package
# tagRepeats.perl comes with the consed package
#
# Rev: 030326 for Jim Sloan's suggested polyphred options
# Rev: 030415 to transfer consensus tags when running miniassemblies
# Rev: 080818 to allow miniassemblies to use reads from phdballs
$szVersion = "080818";
defined( $szConsedHome = $ENV{'CONSED_HOME'} ) ||
( $szConsedHome = "/usr/local/genome" );
if ( !-r $szConsedHome ) {
die "Could not find $szConsedHome for the root of the consed programs. Is the environment variable CONSED_HOME set correctly?";
}
$cross_matchExe = $szConsedHome . "/bin/cross_match";
$phredExe = $szConsedHome . "/bin/phred";
$phrapExe = $szConsedHome . "/bin/phrap";
$phd2fasta = $szConsedHome . "/bin/phd2fasta";
$transferConsensusTags = $szConsedHome . "/bin/transferConsensusTags.perl";
#$transferConsensusTags = $szConsedHome . "/bin_common/transferConsensusTags.perl";
$tagRepeats = $szConsedHome . "/bin/tagRepeats.perl";
#$tagRepeats = $szConsedHome . "/bin_common/tagRepeats.perl";
# the following line is important only if you are using polyphred
# for polymorphism detection
$polyPhredExe = $szConsedHome . "/bin/polyphred";
$determineReadTypes = $szConsedHome . "/bin/determineReadTypes.perl";
# change this to reflect wherever you put you fasta file of vector sequences
$szDefaultVectorFile = $szConsedHome . "/lib/screenLibs/vector.seq";
# if you have a specific vector file for the project, then use that:
# (Thanks to Steve Kenton, Oklahoma Universtiy)
if (-e "../vector.seq") {
$szDefaultVectorFile = "../vector.seq";
}
# change this to reflect wherever you put the phredpar.dat file
$szPhredParameterFile = $szConsedHome . "/lib/phredpar.dat";
#$szPhredParameterFile = "/usr/local/common/lib/PhredPar/phredpar.dat";
#$szPhredParameterFile = "/usr/local/etc/PhredPar/phredpar.dat";
# change the 0 to 1 if you are using polyphred for polymorphism detection
$bUsingPolyPhred = 0;
# excellent suggestion from Steve Kenton, University of Oklahoma:
# He suggests:
#"We recently started doing a few snips so we are now using polyphred
#occasionally. I initially copied and modified the phredPhrap script
#to have a polyphred enabled version, but I hate having two copies of
#stuff like that. Today I realized that we could just check the
#program name $0 and do different things based on the name whic h could
#be a link (at least for unix system). Any any case, the source to
#both is then identical which helps with the ever present
#syncronization problem. So I put in the following mod and made a
#symbolic link ln -s phredPhrap polyphredPhrap"
if ( $0 =~ /polyphred/ ) {
$bUsingPolyPhred = 1;
}
if ( $#ARGV >= 0 ) {
if ( $ARGV[0] eq "-V" || $ARGV[0] eq "-v" ) {
print "$szVersion\n";
exit( 1 );
}
}
if ( $bUsingPolyPhred ) {
$szPolyPhredOptions = "-dd ../poly_dir";
}
else {
$szPolyPhredOptions = "";
}
if (!-x $cross_matchExe ) {
die "could not execute or find $cross_matchExe";
}
if (!-x $phredExe ) {
die "could not execute or find $phredExe";
}
if (!-x $phrapExe ) {
die "could not execute or find $phrapExe";
}
if (!-x $phd2fasta ) {
die "could not execute or find $phd2fasta";
}
if (!-x $transferConsensusTags || !-r $transferConsensusTags ) {
die "could not execute, read or find $transferConsensusTags";
}
if (!-x $tagRepeats || !-r $tagRepeats ) {
die "could not execute, read or find $tagRepeats";
}
if (!-x $determineReadTypes || !-r $determineReadTypes ) {
die "could not execute, read, or find $determineReadTypes";
}
if (!-r $szPhredParameterFile ) {
die "could not read $szPhredParameterFile";
}
# Let's try to centralize the location of the vector sequence
$szVector = $ENV{"CROSS_MATCH_VECTOR"} || $szDefaultVectorFile;
if (! -e $szVector ) {
die "cannot find $szVector specifying the pathname of the vector sequences file--specify this in environment variable CROSS_MATCH_VECTOR\n";
}
$ENV{'PHRED_PARAMETER_FILE'} = $szPhredParameterFile;
# for taint/setuid (Need gzip/gunzip in path)
#$ENV{'PATH'} = '/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin';
# do not change $chromatDirPath or $phdDirPath!
# consed expects the following directory structure:
# foo/phd_dir contains phd files
# foo/chromat_dir contains chromats
# foo/edit_dir contains ace file
# You can make these links, if you like.
$chromatDirPath = "../chromat_dir";
$phdDirPath = "../phd_dir";
$solexaDirPath = "../solexa_dir";
$sffDirPath = "../sff_dir";
$niceExe = "/bin/nice";
$mvExe = "/bin/mv";
$lsExe = "/bin/ls";
$pwdExe = "/bin/pwd";
$szUsage =
"Normal Usage: \n\
phredPhrap [phrap options]\n\
\n\
Special Usage:\n\
phredPhrap [phrap options]\n\
is used when you want the base name of the created files to be\n\
different than the name of the parent directory of the phd_dir directory\n\
\n\
phredPhrap -notags [phrap options]\n\
or\n\
phredPhrap -notags [phrap options]\n\
These are \*only\* to be used in the special case in which you want\n\
to lose any tags from previous assemblies.\n\
\n\
Unless you know what you are doing, always use the form:\n\
phredPhrap [phrap options]\
consed's miniassemblies feature will call it as follows:\n\
phredPhrap -miniassembly -fasta -phdballs -file_with_new_ace_file_name -ace_file_with_consensus_tags ";
# don't buffer STDOUT output
select(( select( STDOUT ), $| = 1 )[0]);
if ($#ARGV < -1) {
die "Invalid arguments @ARGV\n$szUsage\n";
}
# Figure out the basename for all created files.
# If the current directory is /blah1/blah2/blah3/edit_dir
# then the basename will be blah3
if ( $#ARGV == -1 ) {
$bProjectNameOnCommandLine = 0;
}
else {
if ( $ARGV[0] =~ /^-/ ) {
$bProjectNameOnCommandLine = 0;
}
else {
$bProjectNameOnCommandLine = 1;
}
}
if ( !$bProjectNameOnCommandLine ) {
$szCurrentDirectory = `$pwdExe`;
@aPathname = split( /\//, $szCurrentDirectory );
if ( $#aPathname < 2 ) {
die "Sorry--I can't figure out which project this is. Please specify the project on the command line $szUsage\n";
}
$szBaseName = $aPathname[ $#aPathname - 1 ];
}
else {
$szBaseName = $ARGV[0];
# untaint the name - it follows same rule as projects
unless ($szBaseName =~ /^([a-zA-Z0-9\-\.]+)$/) {
die "$szBaseName is not a valid project name\n";
}
$szBaseName = $1;
# I'm expecting $ARGV[0] to be the project name--not an option
if ( $szBaseName =~ /^-/ ) {
die "$szUsage";
}
shift( @ARGV );
}
print "Basename for all files: $szBaseName\n";
# Now see if the user wants to avoid transferring tags
# or if the user wants to exclude some reads
$szExcludeChromatsFile = "";
$szIncludePhdsFile = "";
$bTransferTags = 1;
$szFileWithNewAceFileName = "";
$szOldAceFileWithConsensusTags = "";
$bMiniassembly = 0;
$szPhdBallsFOF = "";
for( $nArg = $#ARGV; $nArg >= 0; --$nArg ) {
if ( $ARGV[ $nArg ] eq "-notags" ) {
print "You have specified -notags on the command line.\nThis is not recommened since it will mean that you will lose your consensus tags. We suggest you type this command again but without the -notags.\nAre you sure you want to include the -notags? (y/n) ";
$szYorN = ;
chomp( $szYorN );
if ( ($szYorN eq "y" ) || ( $szYorN eq "Y" ) || ($szYorN eq "yes" ) ||
($szYorN eq "YES" ) ) {
$bTransferTags = 0;
# remove the -notags keyword
splice( @ARGV, $nArg, 1 );
}
else {
exit(0);
}
}
elsif ( $ARGV[$nArg] eq "-exclude_chromats" ) {
die "-exclude_chromats must be followed by the name of the file containing the chromats to exclude from the assembly" unless ( $#ARGV >= ( $nArg + 1 ) );
$szExcludeChromatsFile = $ARGV[ $nArg + 1];
splice( @ARGV, $nArg, 2 );
}
elsif( $ARGV[$nArg] eq "-include_phds" ) {
die "-include_phds must be followed by the name of the file containing the full pathname of the phd files to be included in the assembly" unless ( $#ARGV >= ( $nArg + 1 ) );
$szIncludePhdsFile = $ARGV[ $nArg + 1];
splice( @ARGV, $nArg, 2 );
}
elsif( $ARGV[$nArg] eq "-file_with_new_ace_file_name" ) {
die "-file_with_new_ace_file_name must be followed by the name of the file in which phredPhrap is supposed to put the new ace file name"
unless ($#ARGV >= ( $nArg + 1 ) );
$szFileWithNewAceFileName = $ARGV[ $nArg + 1];
splice( @ARGV, $nArg, 2 );
}
elsif( $ARGV[$nArg] eq "-ace_file_with_consensus_tags" ) {
if ( $#ARGV <= $nArg ) {
die "-ace_file_with_consensus_tags must be followed by the name of the file from which to transfer existing consensus tags";
}
$szOldAceFileWithConsensusTags = $ARGV[ $nArg + 1];
splice( @ARGV, $nArg, 2 );
}
elsif( $ARGV[$nArg] eq "-miniassembly" ) {
$bMiniassembly = 1;
splice( @ARGV, $nArg, 1 );
}
elsif( $ARGV[$nArg] eq "-phdballs" ) {
if ( $#ARGV <= $nArg ) {
die "-phdballs must be followed by the name of the file containing the list of phdballs containing the reads to be phrap'd";
}
$szPhdBallsFOF = $ARGV[ $nArg + 1 ];
splice( @ARGV, $nArg, 2 );
}
}
if ( $bTransferTags == 0 && $szOldAceFileWithConsensusTags ne "" ) {
die "-ace_file_with_consensus_tags and -notags cannot both be specified";
}
if ( $szOldAceFileWithConsensusTags ne "" ) {
if ( ! -r $szOldAceFileWithConsensusTags ) {
die "could not open $szOldAceFileWithConsensusTags";
}
}
if ( $szExcludeChromatsFile ne "" ) {
if ( ! -r $szExcludeChromatsFile ) {
die "could not open $szExcludeChromatsFile";
}
}
if ( $szIncludePhdsFile ne "" ) {
if ( ! -r $szIncludePhdsFile ) {
die "could not open $szIncludePhdsFile";
}
}
if ( $szFileWithNewAceFileName ne "" ) {
open( filFileWithNewAceFileName, ">$szFileWithNewAceFileName" ) || die "Couldn't open $szFileWithNewAceFileName for output";
# just checking that we can open it.
close( filFileWithNewAceFileName );
}
if ( $szPhdBallsFOF ne "" ) {
open( filPhdBallsFOF, "$szPhdBallsFOF" ) || die "couldn't open $szPhdBallsFOF";
# just checking it can be opened
close( filPhdBallsFOF );
}
# setting @aPhrapOptions for passing to phrap
@aPhrapOptions = ();
if ( $#ARGV >= 0 ) {
@aPhrapOptions = @ARGV;
}
if ( $bTransferTags && $szOldAceFileWithConsensusTags eq "" ) {
# let's see if there is any old ace file from which we must transfer
# consensus tags
$szCommand = "$lsExe -t $szBaseName" . ".fasta.screen.ace" . '*' .
" 2>/dev/null | grep -v '.wrk\$'";
$szListOfOldAceFiles = `$szCommand`;
@aListOfOldAceFiles = split( ' ', $szListOfOldAceFiles );
my $bContinueLooking = 1;
do {
if ( $#aListOfOldAceFiles == -1 ) {
$bTransferTags = 0;
$bContinueLooking = 0;
}
else {
$szOldAceFileWithConsensusTags = shift @aListOfOldAceFiles;
$bIsAnAceFile = &bCheckIfThisIsAnAceFile( $szOldAceFileWithConsensusTags );
if ( $bIsAnAceFile ) {
$bTransferTags = 1;
$bContinueLooking = 0;
}
}
} while( $bContinueLooking );
}
if ( $bTransferTags ) {
print "Will transfer consensus tags from old ace file $szOldAceFileWithConsensusTags\n";
if ( ! -r $szOldAceFileWithConsensusTags ) {
print $szUsage;
die "Can't read $szOldAceFileWithConsensusTags in order to transfer consensus tags. Please make it readable or mv it out of the way so it isn't found\n";
}
}
# E. coli screen is just for reporting purposes. Commented out for
# users that don't have an E. coli database.
# do same for CROSS_MATCH_ECOLI
# chrisa 18-jan-96
#if ($ENV{"CROSS_MATCH_ECOLI"}) {
# $szEcoli = $ENV{"CROSS_MATCH_ECOLI"};
#}
#else {
# $szEcoli = "/bz1/screenLibs/ecoli.lib";
#}
#if (! -e $szEcoli ) {
# die "cannot find $szEcoli specifying the pathname of the E.Coli sequences file--specify this in environment variable CROSS_MATCH_ECOLI\n";
#}
if ( ( !(-e $chromatDirPath) ) &&
( !(-e $solexaDirPath) ) &&
( !(-e $sffDirPath ) ) ) {
die "there must be at least one of $chromatDirPath, $solexaDirPath, or $sffDirPath--otherwise, what are you trying to phrap?";
}
if (! -e $phdDirPath ) {
!system("mkdir $phdDirPath") || die "could not create subdirectory phd_dir $!";
}
# determine what the name of the newly created assembly is.
@aAceFiles = `$lsExe -t *.ace.* 2>/dev/null`;
$nHighestVersion = 0;
foreach $szFile (@aAceFiles ) {
chomp( $szFile );
if ( $szFile !~ /[.]wrk$/ ) {
$nPos = index( $szFile, ".ace." );
$nPos += 5;
if ($nPos >= length( $szFile ) ) {
# case in which the filename ends with .ace.
next;
}
else {
$szExtension = substr( $szFile, $nPos );
if ( $szExtension =~ /^[0-9]+$/ ) {
if ( $szExtension > $nHighestVersion ) {
$nHighestVersion = $szExtension;
}
}
}
}
}
$nNextHigherVersion = $nHighestVersion + 1;
$szAceFileToBeProduced =
"${szBaseName}.fasta.screen.ace.$nNextHigherVersion";
print "ace file to be created is $szAceFileToBeProduced\n";
# this is the name of the ace filename that will be produced by phrap
# It will then be renamed to $szAceFileOfNextHigherVersion
$szTempAceFilename = $szBaseName . ".fasta.screen.ace";
if (-e $szTempAceFilename ) {
die "$szTempAceFilename already exists. Phrap will create a temporary file of the same name. Thus you should delete or rename it and then run this script again.";
}
# if we are doing a miniassembly, consed will have already written out
# the reads to be re-phrapped. They are obviously already base-called,
# so no need to run phred. They already have vector masking so no
# need to do that. And they are already in fasta format so no need to
# run phd2fasta
if ( !$bMiniassembly ) {
# make a list of all the chromatigrams that haven't already been phred'd
# Do this in the following very efficient manner:
# Make a hash of the root name of the phd files (e.g.,
# the root of myRead.phd.1 would be 'myRead'
#
# For each chromat, check if it has a corresponding phd file.
# If not, then add it to the list of chromats to be phred'd
%aPhdFiles = ();
opendir( dirPhdDir, $phdDirPath ) || die "couldn't open directory $phdDirPath $!";
while( defined( $szPhdFile = readdir( dirPhdDir ) ) ) {
if ( index( $szPhdFile, ".phd." ) >= 0 ) {
( $szRoot = $szPhdFile ) =~ s/\.phd\..*$//;
$szRoot =~ s/^.*\///;
if ( ! exists $aPhdFiles{ $szRoot } ) {
$aPhdFiles{ $szRoot } = "";
}
}
}
closedir( dirPhdDir );
$szPHDFOF = $szBaseName . "NewChromats.fof";
if (-e $szPHDFOF ) {
unlink( $szPHDFOF ) || die "couldn't delete $szPHDFOF $!";
}
open( filPHDFOF, ">$szPHDFOF" ) || die "Couldn't open $szPHDFOF for output $!";
opendir( dirChromat, $chromatDirPath ) || die "Couldn't open $chromatDirPath $!";
$nFilesToPhred=0;
while( defined( $szChromatFile = readdir( dirChromat ) ) ) {
next if ($szChromatFile eq "." );
next if ($szChromatFile eq ".." );
$szChromatFile =~ s/\.gz$//; # no gzip suffix
$szChromatFile =~ s/\.Z$//; # or compress suffix
if ( ! exists $aPhdFiles{$szChromatFile} ) {
print( filPHDFOF "$chromatDirPath/$szChromatFile\n" );
++$nFilesToPhred;
}
}
close( filPHDFOF );
# use the list of all chromatigrams that haven't been phred'd to tell
# phred to phred them!
if ( $nFilesToPhred > 0 ) {
print "\n\n--------------------------------------------------------\n";
print "Now running phred on $nFilesToPhred files...\n";
print "--------------------------------------------------------\n\n\n";
!system("$niceExe $phredExe -if $szPHDFOF -pd $phdDirPath $szPolyPhredOptions") || die "some problem running phred $!";
}
else {
print "No need to run phred.\n";
}
# only uncomment the following 4 lines when you have customized
# determineReadTypes.perl
#print "\n\n--------------------------------------------------------\n";
#print "Now running determineReadTypes.perl...\n";
#print "--------------------------------------------------------\n\n\n";
#!system( "$determineReadTypes" ) || die "some problem running determineReadTypes.perl $!\n";
print "\n\n--------------------------------------------------------\n";
print "Now running phd2fasta...\n";
print "--------------------------------------------------------\n\n\n";
$szFastaFile = $szBaseName . ".fasta";
$szFastaQualFile = $szBaseName . ".fasta.qual";
$szPhd2FastaOptions = " -id $phdDirPath ";
if ( $szExcludeChromatsFile ne "" ) {
$szPhd2FastaOptions .= " -ix $szExcludeChromatsFile ";
}
if ( $szIncludePhdsFile ne "" ) {
$szPhd2FastaOptions = " -if $szIncludePhdsFile ";
}
$szCommand = "$phd2fasta $szPhd2FastaOptions -os $szFastaFile -oq $szFastaQualFile";
print "running: $szCommand\n";
!system("$szCommand" ) ||
die "some problem running phd2fasta $!";
$szScreenOut = $szBaseName . ".screen.out";
if (-e $szScreenOut ) {
unlink( $szScreenOut ) || die "couldn't delete $szScreenOut $!";
}
$szScreen = $szBaseName. ".fasta.screen";
if (-e $szScreen ) {
unlink( $szScreen ) || die "couldn't delete $szScreen $!";
}
print "\n\n--------------------------------------------------------\n";
print "Now running cross_match...\n";
print "--------------------------------------------------------\n\n\n";
!system( "$niceExe $cross_matchExe $szFastaFile $szVector -minmatch 12 -penalty -2 -minscore 20 -screen > $szScreenOut" ) || die "some problem running crossmatch $!";
#
# do a screen for e coli, creating report only
# chrisa 18-jan-96
#
# use sequence already screened for vector
# chrisa 31-july-96
#$szEcoliScreenOut = $szBaseName . ".screen-ecoli.out";
#system( "$niceExe $cross_matchExe $szScreen $szEcoli > $szEcoliScreenOut");
$szFastaQualScreenFile = $szBaseName . ".fasta.screen.qual";
!system( "$mvExe $szFastaQualFile $szFastaQualScreenFile" ) || die "some problem executing $mvExe $szFastaQualFile $szFastaQualScreenFile $!";
}
else {
# consed should have written this file for miniassemblies
$szScreen = $szBaseName. ".fasta.screen";
}
$szPhrapOut = $szBaseName . ".phrap.out";
if (-e $szPhrapOut ) {
unlink( $szPhrapOut ) || die "couldn't delete $szPhrapOut $!";
}
print "\n\n--------------------------------------------------------\n";
print "Now running phrap...\n";
print "--------------------------------------------------------\n\n\n";
!system( "$phrapExe $szScreen -new_ace -view @aPhrapOptions >$szPhrapOut" )
|| die "some problem running phrap $!";
rename( $szTempAceFilename, $szAceFileToBeProduced );
$szConsedWRKFile = $szAceFileToBeProduced . ".wrk";
print "deleting $szConsedWRKFile\n";
# This file must not be left around, or else consed will crash
# if the person attempts to apply the edits. Worse yet, consed
# may not crash, but will apply edits that the user didn't intend.
if (-e $szConsedWRKFile ) {
unlink( $szConsedWRKFile );
}
print "\n\n--------------------------------------------------------\n";
print "Now running tagRepeats.perl...\n";
print "--------------------------------------------------------\n\n\n";
!system( "$tagRepeats $szAceFileToBeProduced" )
|| die "some problem running $tagRepeats $!";
if ( $bTransferTags == 1 )
{
print "\n\n--------------------------------------------------------\n";
print "Now transferring consensus tags from $szOldAceFileWithConsensusTags to $szAceFileToBeProduced...\n";
print "--------------------------------------------------------\n\n\n";
!system( "$transferConsensusTags $szOldAceFileWithConsensusTags $szAceFileToBeProduced" ) ||
die "some problem transferring consensus tags: $transferConsensusTags $szOldAceFileWithConsensusTags $szAceFileToBeProduced $!\n";
}
else {
print "Not attempting to transfer consensus tags\n";
}
if ( $bUsingPolyPhred ) {
print "\n\n--------------------------------------------------------\n";
print "Now running polyphred for polymorphism detection...\n";
print "--------------------------------------------------------\n\n\n";
$szPolyPhredFile = $szBaseName . ".fasta.screen.polyphred.out";
!system( "$polyPhredExe -ace $szAceFileToBeProduced -tag polymorphism > $szPolyPhredFile" ) ||
die "some problem running $polyPhredExe $!";
}
if ( $szPhdBallsFOF ne "" ) {
open( filNewAce, ">>$szAceFileToBeProduced" ) ||
die "couldn't open $szAceFileToBeProduced for append";
open( filPhdBallsFOF, "$szPhdBallsFOF" ) || die "couldn't open $szPhdBallsFOF";
while( ) {
# looks like:
# WA{
# phdBall consed 080416:144002
# ../phdball_dir/phd.ball.1
# }
print filNewAce "\nWA{\n";
print filNewAce "phdBall consed " . &szGetDateForTag . "\n";
print filNewAce $_;
print filNewAce "}\n\n";
}
close( filPhdBallsFOF );
close( filNewAce );
}
if ( $szFileWithNewAceFileName ne "" ) {
open( filFileWithNewAceFileName, ">$szFileWithNewAceFileName" ) || die "Couldn't open $szFileWithNewAceFileName for output $!";
print( filFileWithNewAceFileName "$szAceFileToBeProduced\n" );
close( filFileWithNewAceFileName );
}
print "you may now run consed on $szAceFileToBeProduced\n";
exit(0);
# this checks that the file is an ace file by opening it and reading the
# first line. The first line should look like this:
# AS 27 2394
# There are many other file types that match the pattern
# .fasta.screen.ace.*
# such as djs74.fasta.screen.ace.21.990618.144707.customPrimers
# Thus we look in the file to check that it is a genuine ace file.
sub bCheckIfThisIsAnAceFile {
my $szFile = $_[0];
my $bSuccess = open( filAce, $szFile );
if (!$bSuccess ) {
print STDERR "couldn't open $szFile\n";
print STDERR "assuming this is not an ace file\n";
return( 0 );
}
my $szFirstLine;
if ( !defined( $szFirstLine = ) ) {
print STDERR "file $szFile was empty so assuming this is not an ace file\n";
close( filAce );
return( 0 );
}
if ( $szFirstLine =~ /^AS [0-9]+ [0-9]+/ ) {
close( filAce );
return( 1);
}
# check if perhaps old ace format (this won't be supported much longer)
chomp( $szFirstLine );
while( length( $szFirstLine ) == 0 ) {
if ( !defined( $szFirstLine = ) ) {
print STDERR "file $szFile was empty so assuming this is not an ace file\n";
close( filAce );
return( 0 );
}
chomp( $szFirstLine );
}
if ( $szFirstLine =~ /^DNA / ) {
close( filAce );
return( 1 );
}
else {
print STDERR "file $szFile is not an ace file--trying another\n";
close( filAce );
return( 0 );
}
}
sub szGetDateForTag {
my $szDate;
my ($nSecond, $nMinute, $nHour, $nDayInMonth, $nMonth, $nYear, $wday, $yday, $isdst
) = localtime;
undef $isdst;
undef $wday;
undef $yday;
if ( $nYear >= 100 ) {
$nYear = $nYear % 100;
}
$szDate = sprintf( "%02d%02d%02d:%02d%02d%02d",
$nYear,
$nMonth + 1,
$nDayInMonth,
$nHour,
$nMinute,
$nSecond );
return( $szDate );
}