#!/usr/local/bin/perl 

# genome2blast
# blast fasta sequences in a directory using a fasta files, 
# generate the report and and html file with graphical output
# 
# Author: Leonardo Marino-Ramirez <marino@tofu.tamu.edu>
#
# Please cite the author in any work or product based on this material.
#
# This script requires 1. NHGRI::Blastall 
#    <http://genome.nhgri.nih.gov/blastall/>
#                      2. Html4blast
#    <ftp://ftp.pasteur.fr/pub/GenSoft/unix/alignment/Blast_tools/Html4blast/>

use strict;
use Getopt::Std;
use NHGRI::Blastall;
use File::Basename;

use vars qw($opt_d $b $fname $i $db1 $db2 $db3 $hp $p1 $p2
            $I $F $S $e $hp $dirname $opt_o $org @filenames
            @fasta_files $opt_h);

## Check command line
my $prog = basename($0);

getopt('hdo');

## define databases to use

if ($opt_h) {
    usage($prog); exit();
} if ($opt_o) {
$org = "$opt_o";
$db1 = $org.".aa";
} if ($opt_d) { 
$dirname = "$opt_d";
} else {
    usage($prog); exit();
}

$db2 = "pdbaa";

## Path to your programs (html4blast)

$ENV{'PATH'} = "/usr/local/ncbi/seals/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin";

## Define a path where to save the blast reports
$hp = "/home/httpd/html/pub/$org/blast/";  # Html readable path

opendir(DIR, $dirname) or die "can't opendir $dirname: $!";
@filenames = readdir(DIR);
closedir(DIR);

for (@filenames) {
    next if $_ eq '.';            # skip directory info
    next if $_ eq '..';   
    next if $_ eq $0;             # skip this file
    $fname = $_;    
    push (@fasta_files, $fname);
}


foreach $fname (@fasta_files) {
    do_blast ( $b );
    print "$fname\n";
}
sub do_blast {

## define BLAST parameters to use

    $p1 = "blastp";          # blast program 
    $i  = "$fname";          # input fasta file
    $a  = "2";               # number of processors to use
    $I  = "T";               # Include gis in the reports 
    $F  = "F";               # Use filter (NO)
    $e  = "1e-5";            # e-value


## Blast report for the first database
    $b = new NHGRI::Blastall;

    print "Running NCBI BLAST...\n";
 
    $b->blastall( p => "$p1",
		  d => "$db1",
		  a => "$a",
		  I => "$I",
		  F => "$F",
		  e => "$e",
		  i => "$i",
		  o => "$i.$db1.br"
		  );

    $b = shift;

## Blast report for the second database

    $b = new NHGRI::Blastall;
 
    $b->blastall( p => "$p1",
		  d => "$db2",
		  a => "$a",
		  I => "$I",
		  F => "$F",
		  e => "$e",
		  i => "$i",
		  o => "$i.$db2.br"
		  );
}

foreach $fname (@fasta_files) {
    system "html4blast -o $hp$fname.$db1.br.html -e -g $fname.$db1.br";
    system "html4blast -o $hp$fname.$db2.br.html -e -g $fname.$db2.br";
    system "html4blast -o $hp$fname.$db3.br.html -e -g $fname.$db3.br";
}

print "Done!\n";

## Normal end
exit(0);

## Usage display
sub usage {
  my $p = shift;
  print STDERR <<USAGE
usage: $p [options] <file>

options [default]:
    -h           Usage display.
    -d           Directory containing fasta files.
    -o           Organism.
USAGE
}