#!/usr/local/bin/perl
# genome2blast
# blast fasta sequences in a directory using a fasta files,
# generate the report and and html file with graphical output
#
# Author: Leonardo Marino-Ramirez <marino@tofu.tamu.edu>
#
# Please cite the author in any work or product based on this material.
#
# This script requires 1. NHGRI::Blastall
# <http://genome.nhgri.nih.gov/blastall/>
# 2. Html4blast
# <ftp://ftp.pasteur.fr/pub/GenSoft/unix/alignment/Blast_tools/Html4blast/>
use strict;
use Getopt::Std;
use NHGRI::Blastall;
use File::Basename;
use vars qw($opt_d $b $fname $i $db1 $db2 $db3 $hp $p1 $p2
$I $F $S $e $hp $dirname $opt_o $org @filenames
@fasta_files $opt_h);
## Check command line
my $prog = basename($0);
getopt('hdo');
## define databases to use
if ($opt_h) {
usage($prog); exit();
} if ($opt_o) {
$org = "$opt_o";
$db1 = $org.".aa";
} if ($opt_d) {
$dirname = "$opt_d";
} else {
usage($prog); exit();
}
$db2 = "pdbaa";
## Path to your programs (html4blast)
$ENV{'PATH'} = "/usr/local/ncbi/seals/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin";
## Define a path where to save the blast reports
$hp = "/home/httpd/html/pub/$org/blast/"; # Html readable path
opendir(DIR, $dirname) or die "can't opendir $dirname: $!";
@filenames = readdir(DIR);
closedir(DIR);
for (@filenames) {
next if $_ eq '.'; # skip directory info
next if $_ eq '..';
next if $_ eq $0; # skip this file
$fname = $_;
push (@fasta_files, $fname);
}
foreach $fname (@fasta_files) {
do_blast ( $b );
print "$fname\n";
}
sub do_blast {
## define BLAST parameters to use
$p1 = "blastp"; # blast program
$i = "$fname"; # input fasta file
$a = "2"; # number of processors to use
$I = "T"; # Include gis in the reports
$F = "F"; # Use filter (NO)
$e = "1e-5"; # e-value
## Blast report for the first database
$b = new NHGRI::Blastall;
print "Running NCBI BLAST...\n";
$b->blastall( p => "$p1",
d => "$db1",
a => "$a",
I => "$I",
F => "$F",
e => "$e",
i => "$i",
o => "$i.$db1.br"
);
$b = shift;
## Blast report for the second database
$b = new NHGRI::Blastall;
$b->blastall( p => "$p1",
d => "$db2",
a => "$a",
I => "$I",
F => "$F",
e => "$e",
i => "$i",
o => "$i.$db2.br"
);
}
foreach $fname (@fasta_files) {
system "html4blast -o $hp$fname.$db1.br.html -e -g $fname.$db1.br";
system "html4blast -o $hp$fname.$db2.br.html -e -g $fname.$db2.br";
system "html4blast -o $hp$fname.$db3.br.html -e -g $fname.$db3.br";
}
print "Done!\n";
## Normal end
exit(0);
## Usage display
sub usage {
my $p = shift;
print STDERR <<USAGE
usage: $p [options] <file>
options [default]:
-h Usage display.
-d Directory containing fasta files.
-o Organism.
USAGE
}