extract-seq-by_minlen.pl

09/12/2014 15:41

Ce script écrit en perl permet d'extraire les séquences à partir d'un seuil de longueur

 

#!/usr/bin/perl -w
#Extract fasta sequences from a fasta file based on the sequence length

use strict;
use warnings;
use Getopt::Std;
use IO::String;
use Bio::SeqIO;
use Bio::Tools::SeqStats;

getopt('il');
our ($opt_i,$opt_l);
if (! $opt_i || ! -e $opt_i) {&usage();}
if (! $opt_l) {&usage();}

my $SEQIN = Bio::SeqIO->new(-file => $opt_i , '-format' => 'Fasta');
my $SEQOUT = Bio::SeqIO->new( '-format' => 'Fasta');

while ( my $seq = $SEQIN->next_seq() ) {
    my $len = $seq->length();
    if($len >= $opt_l) {
    $SEQOUT->write_seq($seq);
    }
}


#FUNCTIONS
sub usage{
    print "\nUSAGE: $0 -i input.fasta -l seq_len_to_extract \n";
    print "\n-i (input)  : Fasta file\n";
    print "\n-l (input) : minimum sequence length to extract\n";
    die "\ninsert coin and shoot again\n\n";
}