#!/usr/local/bin/perl 

# dir2blast
# blast fasta sequences in a directory using a fasta file, 
# generate the report and and html
# file with graphical output
# fasta format. 
# Usage: 
#
# dir2blast -d <directory_with_fasta_files> -o <organism> -h <Html_readable_path>
#
# Author: Leonardo Marino-Ramirez <marino@tofu.tamu.edu>
#
# Please cite the author in any work or product based on this material.
#
# This script requires 1. NHGRI::Blastall 
#    <http://genome.nhgri.nih.gov/blastall/>
#                      2. Html4blast
#    <ftp://ftp.pasteur.fr/pub/GenSoft/unix/alignment/Blast_tools/Html4blast/>

use strict;
use Getopt::Std;
use NHGRI::Blastall;
use File::Basename;

use vars qw($opt_d $opt_o $opt_h $b $fname $i $db1 $db2 $db3 $hp $p1 $p2 $I
            $F $S $e $hp $dirname @filenames @fasta_files $opt_p $organism
	    );

## Check command line
my $prog = basename($0);

getopt('hdo');

## define organism to use
if ($opt_h) {
    usage($prog); exit();
} if ($opt_o) {
$organism = "$opt_o";
## define databases to use
$db1 = "$organism.aa";
$db2 = "pdbaa";
$db3 = "$organism.na";
} if ($opt_d) { 
    $dirname = "$opt_d"; 
} else {
    usage($prog); exit();
}

## Path to your programs (html4blast)

$ENV{'PATH'} = "/usr/local/ncbi/seals/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin";

$hp = "/usr/local/apache/htdocs/localhost/pub/$organism/blast/clones/";

## Get a working directory containing fasta sequences
if (opendir (DIR, $dirname)) {
   while  ($fname = readdir(DIR)) { 
	push (@fasta_files, "$fname") if ($fname =~ /\.seq$/);
   }
}
   closedir(DIR);


foreach $fname (@fasta_files) {
    do_blast ( $b );
    print "$fname\n";
}
sub do_blast {

## define BLAST parameters to use

    $p1 = "blastx";          # blast program 
    $p2 = "blastn";          # blast program 
    $i  = "$fname";          # input fasta file
    $a  = "2";               # number of processors to use
    $I  = "T";               # Include gis in the reports 
    $F  = "F";               # Use filter (NO)
    $e  = "1e-5";            # e-value
    $S  = "1";	             # strand to use

## Blast report for the first database
    $b = new NHGRI::Blastall;

    print "Running NCBI BLAST...\n";
 
    $b->blastall( p => "$p1",
		  d => "$db1",
		  a => "$a",
		  I => "$I",
		  F => "$F",
		  e => "$e",
		  i => "$i",
                  S => "$S",
		  o => "$i.$db1.br"
		  );
#$b->print_report; # If you want to view the output of the blast report.

    $b = shift;

## Blast report for the second database

    $b = new NHGRI::Blastall;
 
    $b->blastall( p => "$p1",
		  d => "$db2",
		  a => "$a",
		  I => "$I",
		  F => "$F",
		  e => "$e",
		  i => "$i",
                  S => "$S",
		  o => "$i.$db2.br"
		  );

    $b = shift;

## Blast report for the third database

    $b = new NHGRI::Blastall;
 
    $b->blastall( p => "$p2",
		  d => "$db3",
		  a => "$a",
		  I => "$I",
		  F => "$F",
		  e => "$e",
		  i => "$i",
		  o => "$i.$db3.br"
		  );
}

foreach $fname (@fasta_files) {
    system "html4blast -o $hp$fname.$db1.br.html -e -g $fname.$db1.br";
    system "html4blast -o $hp$fname.$db2.br.html -e -g $fname.$db2.br";
    system "html4blast -o $hp$fname.$db3.br.html -e -g $fname.$db3.br";
}

print "Done!\n";

## Normal end
exit(0);

## Usage display
sub usage {
  my $p = shift;
  print STDERR <<USAGE
usage: $p [options] <file>

options [default]:
    -h           Usage display.
    -d           Directory containing fasta files.
    -o           Organism.
USAGE
}