#!/usr/bin/perl -w # # phredPhrap # # PURPOSE: do an assembly and prepare for editing using consed # # HOW IT WORKS: # 1) It runs phred on all *new* reads (reads for which # there is no phd file. # # # 2) It runs determineReadTypes.perl so consed, autofinish, and phrap # will understand your read naming convention # # # 3) Then it runs crossmatch to screen them # for vector. # # 4) Then it runs phd2fasta to create 2 fasta files: # one containing read bases and one containing read quality. These # are of the highest versions of each read (in case any editing has # been done). # # 5) It runs phrap # # 6) It runs transferConsensusTags to transfer any consensus tags # from the newest old ace file to the one phrap created in step 4 # # 7) It runs tagRepeats.perl to tag any common repeats (such as ALU) # that you want to have automatically tagged for the benefit of consed # users. See README.txt "INSTALLING CONSED" # # This script is also called by Consed to run miniassemblies. In that # case it uses the -include_phds option. # # HOW TO USE IT: # Typically, you just type: phredPhrap Within the project, there # are 3 directories: chromat_dir (with the chromats), phd_dir # (with the phd files) and edit_dir (with the ace files and other # files). You type "phredPhrap" from within edit_dir. # # WHAT YOU SHOULD EDIT IN THIS FILE: # You should customize determineReadTypes.perl and then uncomment # the lines below in which determineReadTypes.perl is called. # Some operating systems (e.g., macosx) have nice somewhere other # than /bin/nice. You can determine this by typing: # which nice # If it says /usr/bin/nice, then you will need to replace the line: # $niceExe = "/bin/nice"; # with # $niceExe = "/usr/bin/nice"; # or some other location, if it says so. # # # # For phred, contact bge@u.washington.edu # For phrap and cross_match, contact phg@u.washington.edu # # phd2seqfasta and phd2qualfasta are superceded by phd2fasta # phd2fasta comes with the consed package # transferConsensusTags.perl comes with the consed package # tagRepeats.perl comes with the consed package # # Rev: 030326 for Jim Sloan's suggested polyphred options # Rev: 030415 to transfer consensus tags when running miniassemblies # Rev: 080818 to allow miniassemblies to use reads from phdballs $szVersion = "080818"; defined( $szConsedHome = $ENV{'CONSED_HOME'} ) || ( $szConsedHome = "/usr/local/genome" ); if ( !-r $szConsedHome ) { die "Could not find $szConsedHome for the root of the consed programs. Is the environment variable CONSED_HOME set correctly?"; } $cross_matchExe = $szConsedHome . "/bin/cross_match"; $phredExe = $szConsedHome . "/bin/phred"; $phrapExe = $szConsedHome . "/bin/phrap"; $phd2fasta = $szConsedHome . "/bin/phd2fasta"; $transferConsensusTags = $szConsedHome . "/bin/transferConsensusTags.perl"; #$transferConsensusTags = $szConsedHome . "/bin_common/transferConsensusTags.perl"; $tagRepeats = $szConsedHome . "/bin/tagRepeats.perl"; #$tagRepeats = $szConsedHome . "/bin_common/tagRepeats.perl"; # the following line is important only if you are using polyphred # for polymorphism detection $polyPhredExe = $szConsedHome . "/bin/polyphred"; $determineReadTypes = $szConsedHome . "/bin/determineReadTypes.perl"; # change this to reflect wherever you put you fasta file of vector sequences $szDefaultVectorFile = $szConsedHome . "/lib/screenLibs/vector.seq"; # if you have a specific vector file for the project, then use that: # (Thanks to Steve Kenton, Oklahoma Universtiy) if (-e "../vector.seq") { $szDefaultVectorFile = "../vector.seq"; } # change this to reflect wherever you put the phredpar.dat file $szPhredParameterFile = $szConsedHome . "/lib/phredpar.dat"; #$szPhredParameterFile = "/usr/local/common/lib/PhredPar/phredpar.dat"; #$szPhredParameterFile = "/usr/local/etc/PhredPar/phredpar.dat"; # change the 0 to 1 if you are using polyphred for polymorphism detection $bUsingPolyPhred = 0; # excellent suggestion from Steve Kenton, University of Oklahoma: # He suggests: #"We recently started doing a few snips so we are now using polyphred #occasionally. I initially copied and modified the phredPhrap script #to have a polyphred enabled version, but I hate having two copies of #stuff like that. Today I realized that we could just check the #program name $0 and do different things based on the name whic h could #be a link (at least for unix system). Any any case, the source to #both is then identical which helps with the ever present #syncronization problem. So I put in the following mod and made a #symbolic link ln -s phredPhrap polyphredPhrap" if ( $0 =~ /polyphred/ ) { $bUsingPolyPhred = 1; } if ( $#ARGV >= 0 ) { if ( $ARGV[0] eq "-V" || $ARGV[0] eq "-v" ) { print "$szVersion\n"; exit( 1 ); } } if ( $bUsingPolyPhred ) { $szPolyPhredOptions = "-dd ../poly_dir"; } else { $szPolyPhredOptions = ""; } if (!-x $cross_matchExe ) { die "could not execute or find $cross_matchExe"; } if (!-x $phredExe ) { die "could not execute or find $phredExe"; } if (!-x $phrapExe ) { die "could not execute or find $phrapExe"; } if (!-x $phd2fasta ) { die "could not execute or find $phd2fasta"; } if (!-x $transferConsensusTags || !-r $transferConsensusTags ) { die "could not execute, read or find $transferConsensusTags"; } if (!-x $tagRepeats || !-r $tagRepeats ) { die "could not execute, read or find $tagRepeats"; } if (!-x $determineReadTypes || !-r $determineReadTypes ) { die "could not execute, read, or find $determineReadTypes"; } if (!-r $szPhredParameterFile ) { die "could not read $szPhredParameterFile"; } # Let's try to centralize the location of the vector sequence $szVector = $ENV{"CROSS_MATCH_VECTOR"} || $szDefaultVectorFile; if (! -e $szVector ) { die "cannot find $szVector specifying the pathname of the vector sequences file--specify this in environment variable CROSS_MATCH_VECTOR\n"; } $ENV{'PHRED_PARAMETER_FILE'} = $szPhredParameterFile; # for taint/setuid (Need gzip/gunzip in path) #$ENV{'PATH'} = '/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin'; # do not change $chromatDirPath or $phdDirPath! # consed expects the following directory structure: # foo/phd_dir contains phd files # foo/chromat_dir contains chromats # foo/edit_dir contains ace file # You can make these links, if you like. $chromatDirPath = "../chromat_dir"; $phdDirPath = "../phd_dir"; $solexaDirPath = "../solexa_dir"; $sffDirPath = "../sff_dir"; $niceExe = "/bin/nice"; $mvExe = "/bin/mv"; $lsExe = "/bin/ls"; $pwdExe = "/bin/pwd"; $szUsage = "Normal Usage: \n\ phredPhrap [phrap options]\n\ \n\ Special Usage:\n\ phredPhrap [phrap options]\n\ is used when you want the base name of the created files to be\n\ different than the name of the parent directory of the phd_dir directory\n\ \n\ phredPhrap -notags [phrap options]\n\ or\n\ phredPhrap -notags [phrap options]\n\ These are \*only\* to be used in the special case in which you want\n\ to lose any tags from previous assemblies.\n\ \n\ Unless you know what you are doing, always use the form:\n\ phredPhrap [phrap options]\ consed's miniassemblies feature will call it as follows:\n\ phredPhrap -miniassembly -fasta -phdballs -file_with_new_ace_file_name -ace_file_with_consensus_tags "; # don't buffer STDOUT output select(( select( STDOUT ), $| = 1 )[0]); if ($#ARGV < -1) { die "Invalid arguments @ARGV\n$szUsage\n"; } # Figure out the basename for all created files. # If the current directory is /blah1/blah2/blah3/edit_dir # then the basename will be blah3 if ( $#ARGV == -1 ) { $bProjectNameOnCommandLine = 0; } else { if ( $ARGV[0] =~ /^-/ ) { $bProjectNameOnCommandLine = 0; } else { $bProjectNameOnCommandLine = 1; } } if ( !$bProjectNameOnCommandLine ) { $szCurrentDirectory = `$pwdExe`; @aPathname = split( /\//, $szCurrentDirectory ); if ( $#aPathname < 2 ) { die "Sorry--I can't figure out which project this is. Please specify the project on the command line $szUsage\n"; } $szBaseName = $aPathname[ $#aPathname - 1 ]; } else { $szBaseName = $ARGV[0]; # untaint the name - it follows same rule as projects unless ($szBaseName =~ /^([a-zA-Z0-9\-\.]+)$/) { die "$szBaseName is not a valid project name\n"; } $szBaseName = $1; # I'm expecting $ARGV[0] to be the project name--not an option if ( $szBaseName =~ /^-/ ) { die "$szUsage"; } shift( @ARGV ); } print "Basename for all files: $szBaseName\n"; # Now see if the user wants to avoid transferring tags # or if the user wants to exclude some reads $szExcludeChromatsFile = ""; $szIncludePhdsFile = ""; $bTransferTags = 1; $szFileWithNewAceFileName = ""; $szOldAceFileWithConsensusTags = ""; $bMiniassembly = 0; $szPhdBallsFOF = ""; for( $nArg = $#ARGV; $nArg >= 0; --$nArg ) { if ( $ARGV[ $nArg ] eq "-notags" ) { print "You have specified -notags on the command line.\nThis is not recommened since it will mean that you will lose your consensus tags. We suggest you type this command again but without the -notags.\nAre you sure you want to include the -notags? (y/n) "; $szYorN = ; chomp( $szYorN ); if ( ($szYorN eq "y" ) || ( $szYorN eq "Y" ) || ($szYorN eq "yes" ) || ($szYorN eq "YES" ) ) { $bTransferTags = 0; # remove the -notags keyword splice( @ARGV, $nArg, 1 ); } else { exit(0); } } elsif ( $ARGV[$nArg] eq "-exclude_chromats" ) { die "-exclude_chromats must be followed by the name of the file containing the chromats to exclude from the assembly" unless ( $#ARGV >= ( $nArg + 1 ) ); $szExcludeChromatsFile = $ARGV[ $nArg + 1]; splice( @ARGV, $nArg, 2 ); } elsif( $ARGV[$nArg] eq "-include_phds" ) { die "-include_phds must be followed by the name of the file containing the full pathname of the phd files to be included in the assembly" unless ( $#ARGV >= ( $nArg + 1 ) ); $szIncludePhdsFile = $ARGV[ $nArg + 1]; splice( @ARGV, $nArg, 2 ); } elsif( $ARGV[$nArg] eq "-file_with_new_ace_file_name" ) { die "-file_with_new_ace_file_name must be followed by the name of the file in which phredPhrap is supposed to put the new ace file name" unless ($#ARGV >= ( $nArg + 1 ) ); $szFileWithNewAceFileName = $ARGV[ $nArg + 1]; splice( @ARGV, $nArg, 2 ); } elsif( $ARGV[$nArg] eq "-ace_file_with_consensus_tags" ) { if ( $#ARGV <= $nArg ) { die "-ace_file_with_consensus_tags must be followed by the name of the file from which to transfer existing consensus tags"; } $szOldAceFileWithConsensusTags = $ARGV[ $nArg + 1]; splice( @ARGV, $nArg, 2 ); } elsif( $ARGV[$nArg] eq "-miniassembly" ) { $bMiniassembly = 1; splice( @ARGV, $nArg, 1 ); } elsif( $ARGV[$nArg] eq "-phdballs" ) { if ( $#ARGV <= $nArg ) { die "-phdballs must be followed by the name of the file containing the list of phdballs containing the reads to be phrap'd"; } $szPhdBallsFOF = $ARGV[ $nArg + 1 ]; splice( @ARGV, $nArg, 2 ); } } if ( $bTransferTags == 0 && $szOldAceFileWithConsensusTags ne "" ) { die "-ace_file_with_consensus_tags and -notags cannot both be specified"; } if ( $szOldAceFileWithConsensusTags ne "" ) { if ( ! -r $szOldAceFileWithConsensusTags ) { die "could not open $szOldAceFileWithConsensusTags"; } } if ( $szExcludeChromatsFile ne "" ) { if ( ! -r $szExcludeChromatsFile ) { die "could not open $szExcludeChromatsFile"; } } if ( $szIncludePhdsFile ne "" ) { if ( ! -r $szIncludePhdsFile ) { die "could not open $szIncludePhdsFile"; } } if ( $szFileWithNewAceFileName ne "" ) { open( filFileWithNewAceFileName, ">$szFileWithNewAceFileName" ) || die "Couldn't open $szFileWithNewAceFileName for output"; # just checking that we can open it. close( filFileWithNewAceFileName ); } if ( $szPhdBallsFOF ne "" ) { open( filPhdBallsFOF, "$szPhdBallsFOF" ) || die "couldn't open $szPhdBallsFOF"; # just checking it can be opened close( filPhdBallsFOF ); } # setting @aPhrapOptions for passing to phrap @aPhrapOptions = (); if ( $#ARGV >= 0 ) { @aPhrapOptions = @ARGV; } if ( $bTransferTags && $szOldAceFileWithConsensusTags eq "" ) { # let's see if there is any old ace file from which we must transfer # consensus tags $szCommand = "$lsExe -t $szBaseName" . ".fasta.screen.ace" . '*' . " 2>/dev/null | grep -v '.wrk\$'"; $szListOfOldAceFiles = `$szCommand`; @aListOfOldAceFiles = split( ' ', $szListOfOldAceFiles ); my $bContinueLooking = 1; do { if ( $#aListOfOldAceFiles == -1 ) { $bTransferTags = 0; $bContinueLooking = 0; } else { $szOldAceFileWithConsensusTags = shift @aListOfOldAceFiles; $bIsAnAceFile = &bCheckIfThisIsAnAceFile( $szOldAceFileWithConsensusTags ); if ( $bIsAnAceFile ) { $bTransferTags = 1; $bContinueLooking = 0; } } } while( $bContinueLooking ); } if ( $bTransferTags ) { print "Will transfer consensus tags from old ace file $szOldAceFileWithConsensusTags\n"; if ( ! -r $szOldAceFileWithConsensusTags ) { print $szUsage; die "Can't read $szOldAceFileWithConsensusTags in order to transfer consensus tags. Please make it readable or mv it out of the way so it isn't found\n"; } } # E. coli screen is just for reporting purposes. Commented out for # users that don't have an E. coli database. # do same for CROSS_MATCH_ECOLI # chrisa 18-jan-96 #if ($ENV{"CROSS_MATCH_ECOLI"}) { # $szEcoli = $ENV{"CROSS_MATCH_ECOLI"}; #} #else { # $szEcoli = "/bz1/screenLibs/ecoli.lib"; #} #if (! -e $szEcoli ) { # die "cannot find $szEcoli specifying the pathname of the E.Coli sequences file--specify this in environment variable CROSS_MATCH_ECOLI\n"; #} if ( ( !(-e $chromatDirPath) ) && ( !(-e $solexaDirPath) ) && ( !(-e $sffDirPath ) ) ) { die "there must be at least one of $chromatDirPath, $solexaDirPath, or $sffDirPath--otherwise, what are you trying to phrap?"; } if (! -e $phdDirPath ) { !system("mkdir $phdDirPath") || die "could not create subdirectory phd_dir $!"; } # determine what the name of the newly created assembly is. @aAceFiles = `$lsExe -t *.ace.* 2>/dev/null`; $nHighestVersion = 0; foreach $szFile (@aAceFiles ) { chomp( $szFile ); if ( $szFile !~ /[.]wrk$/ ) { $nPos = index( $szFile, ".ace." ); $nPos += 5; if ($nPos >= length( $szFile ) ) { # case in which the filename ends with .ace. next; } else { $szExtension = substr( $szFile, $nPos ); if ( $szExtension =~ /^[0-9]+$/ ) { if ( $szExtension > $nHighestVersion ) { $nHighestVersion = $szExtension; } } } } } $nNextHigherVersion = $nHighestVersion + 1; $szAceFileToBeProduced = "${szBaseName}.fasta.screen.ace.$nNextHigherVersion"; print "ace file to be created is $szAceFileToBeProduced\n"; # this is the name of the ace filename that will be produced by phrap # It will then be renamed to $szAceFileOfNextHigherVersion $szTempAceFilename = $szBaseName . ".fasta.screen.ace"; if (-e $szTempAceFilename ) { die "$szTempAceFilename already exists. Phrap will create a temporary file of the same name. Thus you should delete or rename it and then run this script again."; } # if we are doing a miniassembly, consed will have already written out # the reads to be re-phrapped. They are obviously already base-called, # so no need to run phred. They already have vector masking so no # need to do that. And they are already in fasta format so no need to # run phd2fasta if ( !$bMiniassembly ) { # make a list of all the chromatigrams that haven't already been phred'd # Do this in the following very efficient manner: # Make a hash of the root name of the phd files (e.g., # the root of myRead.phd.1 would be 'myRead' # # For each chromat, check if it has a corresponding phd file. # If not, then add it to the list of chromats to be phred'd %aPhdFiles = (); opendir( dirPhdDir, $phdDirPath ) || die "couldn't open directory $phdDirPath $!"; while( defined( $szPhdFile = readdir( dirPhdDir ) ) ) { if ( index( $szPhdFile, ".phd." ) >= 0 ) { ( $szRoot = $szPhdFile ) =~ s/\.phd\..*$//; $szRoot =~ s/^.*\///; if ( ! exists $aPhdFiles{ $szRoot } ) { $aPhdFiles{ $szRoot } = ""; } } } closedir( dirPhdDir ); $szPHDFOF = $szBaseName . "NewChromats.fof"; if (-e $szPHDFOF ) { unlink( $szPHDFOF ) || die "couldn't delete $szPHDFOF $!"; } open( filPHDFOF, ">$szPHDFOF" ) || die "Couldn't open $szPHDFOF for output $!"; opendir( dirChromat, $chromatDirPath ) || die "Couldn't open $chromatDirPath $!"; $nFilesToPhred=0; while( defined( $szChromatFile = readdir( dirChromat ) ) ) { next if ($szChromatFile eq "." ); next if ($szChromatFile eq ".." ); $szChromatFile =~ s/\.gz$//; # no gzip suffix $szChromatFile =~ s/\.Z$//; # or compress suffix if ( ! exists $aPhdFiles{$szChromatFile} ) { print( filPHDFOF "$chromatDirPath/$szChromatFile\n" ); ++$nFilesToPhred; } } close( filPHDFOF ); # use the list of all chromatigrams that haven't been phred'd to tell # phred to phred them! if ( $nFilesToPhred > 0 ) { print "\n\n--------------------------------------------------------\n"; print "Now running phred on $nFilesToPhred files...\n"; print "--------------------------------------------------------\n\n\n"; !system("$niceExe $phredExe -if $szPHDFOF -pd $phdDirPath $szPolyPhredOptions") || die "some problem running phred $!"; } else { print "No need to run phred.\n"; } # only uncomment the following 4 lines when you have customized # determineReadTypes.perl #print "\n\n--------------------------------------------------------\n"; #print "Now running determineReadTypes.perl...\n"; #print "--------------------------------------------------------\n\n\n"; #!system( "$determineReadTypes" ) || die "some problem running determineReadTypes.perl $!\n"; print "\n\n--------------------------------------------------------\n"; print "Now running phd2fasta...\n"; print "--------------------------------------------------------\n\n\n"; $szFastaFile = $szBaseName . ".fasta"; $szFastaQualFile = $szBaseName . ".fasta.qual"; $szPhd2FastaOptions = " -id $phdDirPath "; if ( $szExcludeChromatsFile ne "" ) { $szPhd2FastaOptions .= " -ix $szExcludeChromatsFile "; } if ( $szIncludePhdsFile ne "" ) { $szPhd2FastaOptions = " -if $szIncludePhdsFile "; } $szCommand = "$phd2fasta $szPhd2FastaOptions -os $szFastaFile -oq $szFastaQualFile"; print "running: $szCommand\n"; !system("$szCommand" ) || die "some problem running phd2fasta $!"; $szScreenOut = $szBaseName . ".screen.out"; if (-e $szScreenOut ) { unlink( $szScreenOut ) || die "couldn't delete $szScreenOut $!"; } $szScreen = $szBaseName. ".fasta.screen"; if (-e $szScreen ) { unlink( $szScreen ) || die "couldn't delete $szScreen $!"; } print "\n\n--------------------------------------------------------\n"; print "Now running cross_match...\n"; print "--------------------------------------------------------\n\n\n"; !system( "$niceExe $cross_matchExe $szFastaFile $szVector -minmatch 12 -penalty -2 -minscore 20 -screen > $szScreenOut" ) || die "some problem running crossmatch $!"; # # do a screen for e coli, creating report only # chrisa 18-jan-96 # # use sequence already screened for vector # chrisa 31-july-96 #$szEcoliScreenOut = $szBaseName . ".screen-ecoli.out"; #system( "$niceExe $cross_matchExe $szScreen $szEcoli > $szEcoliScreenOut"); $szFastaQualScreenFile = $szBaseName . ".fasta.screen.qual"; !system( "$mvExe $szFastaQualFile $szFastaQualScreenFile" ) || die "some problem executing $mvExe $szFastaQualFile $szFastaQualScreenFile $!"; } else { # consed should have written this file for miniassemblies $szScreen = $szBaseName. ".fasta.screen"; } $szPhrapOut = $szBaseName . ".phrap.out"; if (-e $szPhrapOut ) { unlink( $szPhrapOut ) || die "couldn't delete $szPhrapOut $!"; } print "\n\n--------------------------------------------------------\n"; print "Now running phrap...\n"; print "--------------------------------------------------------\n\n\n"; !system( "$phrapExe $szScreen -new_ace -view @aPhrapOptions >$szPhrapOut" ) || die "some problem running phrap $!"; rename( $szTempAceFilename, $szAceFileToBeProduced ); $szConsedWRKFile = $szAceFileToBeProduced . ".wrk"; print "deleting $szConsedWRKFile\n"; # This file must not be left around, or else consed will crash # if the person attempts to apply the edits. Worse yet, consed # may not crash, but will apply edits that the user didn't intend. if (-e $szConsedWRKFile ) { unlink( $szConsedWRKFile ); } print "\n\n--------------------------------------------------------\n"; print "Now running tagRepeats.perl...\n"; print "--------------------------------------------------------\n\n\n"; !system( "$tagRepeats $szAceFileToBeProduced" ) || die "some problem running $tagRepeats $!"; if ( $bTransferTags == 1 ) { print "\n\n--------------------------------------------------------\n"; print "Now transferring consensus tags from $szOldAceFileWithConsensusTags to $szAceFileToBeProduced...\n"; print "--------------------------------------------------------\n\n\n"; !system( "$transferConsensusTags $szOldAceFileWithConsensusTags $szAceFileToBeProduced" ) || die "some problem transferring consensus tags: $transferConsensusTags $szOldAceFileWithConsensusTags $szAceFileToBeProduced $!\n"; } else { print "Not attempting to transfer consensus tags\n"; } if ( $bUsingPolyPhred ) { print "\n\n--------------------------------------------------------\n"; print "Now running polyphred for polymorphism detection...\n"; print "--------------------------------------------------------\n\n\n"; $szPolyPhredFile = $szBaseName . ".fasta.screen.polyphred.out"; !system( "$polyPhredExe -ace $szAceFileToBeProduced -tag polymorphism > $szPolyPhredFile" ) || die "some problem running $polyPhredExe $!"; } if ( $szPhdBallsFOF ne "" ) { open( filNewAce, ">>$szAceFileToBeProduced" ) || die "couldn't open $szAceFileToBeProduced for append"; open( filPhdBallsFOF, "$szPhdBallsFOF" ) || die "couldn't open $szPhdBallsFOF"; while( ) { # looks like: # WA{ # phdBall consed 080416:144002 # ../phdball_dir/phd.ball.1 # } print filNewAce "\nWA{\n"; print filNewAce "phdBall consed " . &szGetDateForTag . "\n"; print filNewAce $_; print filNewAce "}\n\n"; } close( filPhdBallsFOF ); close( filNewAce ); } if ( $szFileWithNewAceFileName ne "" ) { open( filFileWithNewAceFileName, ">$szFileWithNewAceFileName" ) || die "Couldn't open $szFileWithNewAceFileName for output $!"; print( filFileWithNewAceFileName "$szAceFileToBeProduced\n" ); close( filFileWithNewAceFileName ); } print "you may now run consed on $szAceFileToBeProduced\n"; exit(0); # this checks that the file is an ace file by opening it and reading the # first line. The first line should look like this: # AS 27 2394 # There are many other file types that match the pattern # .fasta.screen.ace.* # such as djs74.fasta.screen.ace.21.990618.144707.customPrimers # Thus we look in the file to check that it is a genuine ace file. sub bCheckIfThisIsAnAceFile { my $szFile = $_[0]; my $bSuccess = open( filAce, $szFile ); if (!$bSuccess ) { print STDERR "couldn't open $szFile\n"; print STDERR "assuming this is not an ace file\n"; return( 0 ); } my $szFirstLine; if ( !defined( $szFirstLine = ) ) { print STDERR "file $szFile was empty so assuming this is not an ace file\n"; close( filAce ); return( 0 ); } if ( $szFirstLine =~ /^AS [0-9]+ [0-9]+/ ) { close( filAce ); return( 1); } # check if perhaps old ace format (this won't be supported much longer) chomp( $szFirstLine ); while( length( $szFirstLine ) == 0 ) { if ( !defined( $szFirstLine = ) ) { print STDERR "file $szFile was empty so assuming this is not an ace file\n"; close( filAce ); return( 0 ); } chomp( $szFirstLine ); } if ( $szFirstLine =~ /^DNA / ) { close( filAce ); return( 1 ); } else { print STDERR "file $szFile is not an ace file--trying another\n"; close( filAce ); return( 0 ); } } sub szGetDateForTag { my $szDate; my ($nSecond, $nMinute, $nHour, $nDayInMonth, $nMonth, $nYear, $wday, $yday, $isdst ) = localtime; undef $isdst; undef $wday; undef $yday; if ( $nYear >= 100 ) { $nYear = $nYear % 100; } $szDate = sprintf( "%02d%02d%02d:%02d%02d%02d", $nYear, $nMonth + 1, $nDayInMonth, $nHour, $nMinute, $nSecond ); return( $szDate ); }