#!/usr/bin/perl -w #This module will take as input a string of DNA and print an output #file which contains the ORF's found in the DNA string. This is an #adjustment to somethign done last week, as you should recognize. print "\nStoring codon information:\n\n"; print "Please enter DNA data file with no markings:\n"; $dnadataname = ; chomp $dnadataname; #It is a good idea when you have an interactive data entry to have #an unless line to explain why the program stopped, if it had to: unless(open(FILE,$dnadataname)){ print "Trouble opening DNA data file\.\n\n"; exit; } @DNA = ; $longstring = join('',@DNA); $longstring =~ s/\n//g; $longstring =~ s/\s//g; print "\nORF's in the original sequence:\n\n"; @nucleotides = split('',$longstring); #As we already saw, this is how you open an output file. Don't forget #the " "'s or > in the open(STOREDORFS, ">$outputfile)! $outputfile = "storedorfs"; unless(open(STOREDORFS, ">$outputfile")){ print "\n Cannot open file \"$outputfile\"\n"; exit; } close FILE; $location = 0; $ATG = 0; $ORF = 0; #This next loop is the main piece, and mainly taken from last week: while($location < @nucleotides - 2) {$a = $nucleotides[$location].$nucleotides[$location + 1].$nucleotides[$location + 2]; ++$location; if($a =~ /ATG/i) { $b = $location; print "ATG found at position $location\n"; ++$ATG; while($b < @nucleotides - 1){ $c = $nucleotides[$b-1].$nucleotides[$b].$nucleotides[$b+1]; if($c =~ /TAA/i){ ++$ORF; print "TAA found at position $b\.\n"; $b = $b+2; print "ORF number $ORF from position $location to $b\.\n"; #The next two lines are the new part, and are repeated in each of the three if loops #which check the three types of stop codons as the end of our ORF. The first #creates a string by the substr (=substring) function. It looks like sunstr($A,$B,$C), #where $A is a scalar variable (i.e., a string), $B is a numeric variable which #tells you the beginning position of the substring, and $C tells what the length #of the substring will be. So the next example below creates a string (named #$string) which is a substring of $longline, beginning at position $location -1 (I #switched back to the Perl location system, which starts from 0, not 1), and is #$b - $location + 1 characters long. It is then printed to theoutputfile as an added #line in that file. $string = substr($longstring, $location-1, $b - $location + 1); print STOREDORFS ">ORF number $ORF:\n$string\n\n"; $b = @nucleotides; }elsif($c =~ /TAG/i){ ++$ORF; print "TAG found at position $b\.\n"; $b = $b+2; print "ORF number $ORF from position $location to $b\.\n\n"; $string = substr($longstring, $location-1, $b - $location + 1); print STOREDORFS ">ORF number $ORF:\n$string\n\n"; $b = @nucleotides; }elsif($c =~ /TGA/i){ ++$ORF; print "TGA found at position $b\.\n"; $b = $b+2; print "ORF number $ORF from position $location to $b\.\n\n"; $string = substr($longstring, $location-1, $b - $location + 1); print STOREDORFS ">ORF number $ORF:\n$string\n\n"; $b = @nucleotides; }$b = $b + 3; } }$location = $location + 2; } print "\nTotal number of ORF\'s found in the original sequence = $ORF\.\n\n"; close STOREDORFS; #Now let us have a look at what we have put in the file. The outputfile should remain #as a file in the directory in which you have been using Perl. $odata = "storedorfs"; open(ODATA,$odata); @orfdata = ; close ODATA; print "Here is what is contained in the file \"storedorfs\":\n"; print @orfdata; exit;