#!/usr/local/bin/perl
#
# parse_blast
# Reads and parse blast reports
# usage parse_blast > <Name_of_parse_output>
# Output format
#
# Author: Leonardo Marino-Ramirez <marino@tofu.tamu.edu>
#
# Please cite the author in any work or product based on this material.
#
use strict;
use Bio::Tools::BPlite;
use Getopt::Std;
use File::Basename;
use vars qw($opt_d $file @blast_reports $fname $report @tmp $qn $gi $sc
$ev $pid $qfr $qstr $qsta $qend $sstr $ssta $send $sbjct
$type $db $anal1 $anal2 $anal3 $anal4 $anal $dirname $opt_h
);
## Check command line
my $prog = basename($0);
getopt('hd');
if ($opt_h) {
usage($prog); exit();
} if ($opt_d) {
$dirname = "$opt_d";
} else {
usage($prog); exit();
}
## Get a working directory containing blast reports
if (opendir (DIR, $dirname)) {
while ($file = readdir(DIR)) {
## blast reports must contain the .br suffix
push (@blast_reports, "$file") if ($file =~ /\.br$/);
}
}
closedir(DIR);
foreach $fname (@blast_reports) {
parse_blast ( $report );
}
## parse_blast subroutine
sub parse_blast {
my $report = new Bio::Tools::BPlite(-file=>"$fname");
my $query = $report->query;
## Get only the clone id from all the query name
@tmp = split ' ', $query; $qn = $tmp[0];
$db = $report->database;
## get analysis code for database
$anal1 = $anal2 = $anal3 = $anal4 = ();
if ($db =~ m/ecoli.aa/) {
$anal1 = "1";
} elsif ($db =~ m/pdbaa/) {
$anal2 = "2";
} elsif ($db =~ m/ecoli.na/) {
$anal3 = "3";
} elsif ($db =~ m/mtuberculosis.aa/) {
$anal3 = "4";
} elsif ($db =~ m/mtuberculosis.na/) {
$anal3 = "5";
} elsif ($db =~ m/lambda.aa/) {
$anal3 = "6";
} elsif ($db =~ m/lambda.na/) {
$anal3 = "7";
} elsif ($db =~ m/paeruginosa.aa/) {
$anal3 = "8";
} elsif ($db =~ m/paeruginosa.na/) {
$anal3 = "9";
} else {
$anal1 = $anal2 = $anal3 = "0";
}
while(my $sbjct = $report->nextSbjct) {
my $type = $sbjct->report_type(); #print "$type\n";
## get analysis code for blast type
if ($type eq "BLASTX") {
$anal4 = "1";
} elsif ($type eq "BLASTN") {
$anal4 = "2";
} elsif ($type eq "BLASTP") {
$anal4 = "3";
} else {
$anal4 = "0";
}
my $blast_hit = $sbjct->name;
## get gi's from blast report
@tmp = split /\|/, $blast_hit; $gi = $tmp[1];
while(my $hsp = $sbjct->nextHSP) {
$sc = $hsp->bits;
$ev = $hsp->P;
$pid = $hsp->percent;
$qfr = $hsp->query->frame;
$qstr = $hsp->query->strand;
$qsta = $hsp->query->start;
$qend = $hsp->query->end;
$sstr = $hsp->hit->strand;
$ssta = $hsp->hit->start;
$send = $hsp->hit->end;
$anal = "$anal4$anal1$anal2$anal3";
print "$qsta\t$qend\t$sc\t$qstr\t$qfr\t$anal\t$qn\t$sstr\t$ssta\t$send\t$blast_hit\t$gi\t$ev\t$pid\n";
}
}
}
## Normal end
exit(0);
## Usage display
sub usage {
my $p = shift;
print STDERR <<USAGE
usage: $p [options] <file>
options [default]:
-h Usage display.
-d Directory containing blast reports (*.br).
USAGE
}