#!/usr/bin/perl -w $nQuality = 20; $szUsage = "fasta2PhdBall.perl (fasta file) (output phdball)"; if ( $#ARGV != 1 ) { die "$szUsage\n"; } $szFastaFile = $ARGV[0]; $szPhdBall = $ARGV[1]; open( filFasta, "$szFastaFile" ) || die "couldn't open $szFastaFile"; open( filPhdBall, ">$szPhdBall" ) || die "couldn't open $szPhdBall for write"; $szTemp3 = `date`; $szTemp4 = substr($szTemp3, 0, length( "Fri Dec 1 14:22:38 " )); $szTemp5 = substr($szTemp3, length( "Fri Dec 1 14:22:38 PST "), length("1995")); $szDate = $szTemp4 . $szTemp5; $bFirstSeq = 1; while(1) { defined( $_ = ) || ( $_ = ">___LAST___" ); if ( /^>(\S+)/ ) { $szSeqName = $1; $szLastSeqHeader = $szSeqHeader; $szSeqHeader = $_; # if this is not the first sequence, finish the previous one if ( $bFirstSeq ) { $bFirstSeq = 0; } else { print filPhdBall "END_DNA\n"; if ( $szLastSeqHeader =~ / polymorphism n1ChrPos=(\d+) n1ReadPos=(\d+)/ ) { $n1ChrPos = $1; $n1ReadPos = $2; $szDateTime = &szGetDateForReadTag; print filPhdBall "\nBEGIN_TAG\n"; print filPhdBall "TYPE: polymorphism\n"; print filPhdBall "SOURCE: fasta2PhdBall.perl\n"; print filPhdBall "UNPADDED_READ_POS: $n1ReadPos $n1ReadPos\n"; # 10/07/02 12:37:23 print filPhdBall "DATE: $szDateTime\n"; print filPhdBall "BEGIN_COMMENT\n"; print filPhdBall "chrpos: " . &addCommas( $n1ChrPos ) . "\n"; print filPhdBall "END_COMMENT\n"; print filPhdBall "END_TAG\n\n"; } print filPhdBall "END_SEQUENCE\n\n"; } if ( $szSeqName eq "___LAST___" ) { last; } print filPhdBall "BEGIN_SEQUENCE $szSeqName 1\n"; print filPhdBall "BEGIN_COMMENT\n"; # no CHROMAT_FILE, ABI_THUMBPRINT, PHRED_VERSION, CALL_METHOD, or # QUALITY_VALUES for fake reads print filPhdBall "TIME: $szDate\n"; print filPhdBall "CHEM: unknown\n"; print filPhdBall "END_COMMENT\n\n"; print filPhdBall "BEGIN_DNA\n"; } else { chomp; ( $szBasesLowerCase = $_ ) =~ tr/A-Z/a-z/; for( $n = 0; $n < length( $szBasesLowerCase ); ++$n ) { $cBase = substr( $szBasesLowerCase, $n, 1 ); print filPhdBall "$cBase $nQuality\n"; } } } close filPhdBall; ##################### subroutines #################### sub addCommas { ( $nOriginal ) = @_; ( $nWithCommas = $nOriginal ) =~ s/(?<=\d)(?=(\d\d\d)+$)/,/g; return $nWithCommas; } sub szGetDateForReadTag { my $szDate; ($nSecond, $nMinute, $nHour, $nDayInMonth, $nMonth, $nYear, $wday, $yday, $isdst ) = localtime; undef $isdst; undef $wday; undef $yday; if ( $nYear >= 100 ) { $nYear = $nYear % 100; } # format: # 10/07/02 12:37:23 $szDate = sprintf( "%02d/%02d/%02d %02d:%02d:%02d", $nYear, $nMonth + 1, $nDayInMonth, $nHour, $nMinute, $nSecond ); return( $szDate ); }