#! /usr/bin/perl -w #Written by CSR on 12.12.06 #Modified on 2.4.09 #usage: ./scriptname.pl -f [fasta filename] -l [list filename] #input = list of sequence ids to retrieve and a fasta file use strict; use warnings; use Bio::DB::Fasta; use Getopt::Std; #command line arguments: import fasta sequences and seqID list using "-f" and "-l". my $usage = "Warning, missing information!\nUsage is: ./retrieve_sequence_subset1.0.pl -f [fasta filename] -l [list filename]\n"; my %args; getopts('f:l:', \%args); my $fastafile; my $listfile; if ($args{f}) { $fastafile = $args{f}; } else { die "$usage"; } if ($args{l}) { $listfile = $args{l}; } else { die "$usage"; } my $db = Bio::DB::Fasta->new($fastafile) or die "scriptname.pl -f [fastafile] -l [listfile]\n"; my @ids = $db->get_all_ids(); ## Import a text file containing a list of seqIDs to retrieve open (FILE, $listfile) or die "scriptname.pl -f [fastafile] -l [listfile]\n"; while (my $line = ) { chomp ($line); #check to make sure that the seqIDs in the list file exist in the fasta file foreach my $ids(@ids) { if ($ids eq $line) { print ">$line\n"; my $seqstr = $db->seq($line); print "$seqstr\n\n"; } #should add a warning message for seqIDs that do not exist in the fasta file } } close (FILE);