#!/usr/local/bin/perl
#
# parse_blast
# Reads and parse blast reports
# usage parse_blast > <Name_of_parse_output>
# Output format
#
# Author: Leonardo Marino-Ramirez <marino@tofu.tamu.edu>
#
# Please cite the author in any work or product based on this material.
#

use strict;
use Bio::Tools::BPlite;
use Getopt::Std;
use File::Basename;

use vars qw($opt_d $file @blast_reports $fname $report @tmp $qn $gi $sc
	    $ev $pid $qfr $qstr $qsta $qend $sstr $ssta $send $sbjct
	    $type $db $anal1 $anal2 $anal3 $anal4 $anal $dirname $opt_h
	    );

## Check command line
my $prog = basename($0);

getopt('hd');

if ($opt_h) {
    usage($prog); exit();
} if ($opt_d) {
$dirname = "$opt_d";
} else {
    usage($prog); exit();
}

## Get a working directory containing blast reports
if (opendir (DIR, $dirname)) {
   while  ($file = readdir(DIR)) {
       ## blast reports must contain the .br suffix 
	push (@blast_reports, "$file") if ($file =~ /\.br$/);
   }
}
   closedir(DIR);

foreach $fname (@blast_reports) {

    parse_blast ( $report );

}


## parse_blast subroutine
sub parse_blast {

my $report = new Bio::Tools::BPlite(-file=>"$fname");

my $query = $report->query;

## Get only the clone id from all the query name
@tmp = split ' ', $query; $qn = $tmp[0];

$db = $report->database;

## get analysis code for database
$anal1 = $anal2 = $anal3 = $anal4 = ();

if ($db =~ m/ecoli.aa/) {
    $anal1 = "1";
}   elsif ($db =~ m/pdbaa/) {
    $anal2 = "2";
}  elsif ($db =~ m/ecoli.na/) {
    $anal3 = "3";
}  elsif ($db =~ m/mtuberculosis.aa/) {
    $anal3 = "4";
}  elsif ($db =~ m/mtuberculosis.na/) {
    $anal3 = "5";
}  elsif ($db =~ m/lambda.aa/) {
    $anal3 = "6";
}  elsif ($db =~ m/lambda.na/) {
    $anal3 = "7";
}  elsif ($db =~ m/paeruginosa.aa/) {
    $anal3 = "8";
}  elsif ($db =~ m/paeruginosa.na/) {
    $anal3 = "9";
} else {
    $anal1 = $anal2 = $anal3 = "0";
}

while(my $sbjct = $report->nextSbjct) {
    my $type = $sbjct->report_type(); #print "$type\n";

    ## get analysis code for blast type
    if ($type eq "BLASTX") {
	$anal4 = "1";
    } elsif ($type eq "BLASTN") {
	$anal4 = "2";
    } elsif ($type eq "BLASTP") {
	$anal4 = "3";
    } else {
	$anal4 = "0";
    }

    my $blast_hit = $sbjct->name;
    ## get gi's from blast report
    @tmp = split /\|/, $blast_hit; $gi = $tmp[1];
    
    while(my $hsp = $sbjct->nextHSP) {
        $sc = $hsp->bits;
        $ev = $hsp->P; 
	$pid = $hsp->percent; 
        $qfr = $hsp->query->frame;
        $qstr = $hsp->query->strand; 
	$qsta = $hsp->query->start;
	$qend = $hsp->query->end;
	$sstr = $hsp->hit->strand;
	$ssta = $hsp->hit->start;
	$send = $hsp->hit->end;

	$anal = "$anal4$anal1$anal2$anal3";

	    print "$qsta\t$qend\t$sc\t$qstr\t$qfr\t$anal\t$qn\t$sstr\t$ssta\t$send\t$blast_hit\t$gi\t$ev\t$pid\n";

    }
}

}

## Normal end
exit(0);

## Usage display
sub usage {
  my $p = shift;
  print STDERR <<USAGE
usage: $p [options] <file>

options [default]:
    -h           Usage display.
    -d           Directory containing blast reports (*.br).
USAGE
}