#!/usr/local/bin/perl -w
# protein_table2doodle
# Script to load a table in ptt format (NCBI) into doodle
#
# Author: Leonardo Marino-Ramirez <marino@tofu.tamu.edu>
#
# Please cite the author in any work or product based on this material.
#
use strict;
use Getopt::Std;
use File::Basename;
use vars qw($opt_i $opt_g @tmp @tmp2 @tmp3 $gi $sp $spid $gn $str $len
$syn $pro $cog $se $sta $end $opt_e $sp_ebi @tmp4 $opt_h
$infile $gi_list $ebifile
);
## Check command line
my $prog = basename($0);
getopt('hige');
if ($opt_h) {
usage($prog); exit();
} if ($opt_i) {
$infile = "$opt_i";
} if ($opt_g) {
$gi_list = "$opt_g";
} if ($opt_e) {
$ebifile = "$opt_e";
} else {
usage($prog); exit();
}
@tmp = @tmp2 = @tmp3 = @tmp4 = ();
open (FILE, $infile) || die "can't open file: $!";
while (<FILE>) {
@tmp = split /\t/;
$syn = $tmp[5];
$gi = $tmp[3];
$sp = `grep $gi $gi_list`; chomp $sp; @tmp3 = split /\t/, $sp;
$spid = $tmp3[1]; if (defined $spid) {
$spid = $spid;
} else {
$sp_ebi = `grep -i $syn $ebifile`; chomp $sp_ebi; @tmp4 = split /\t/, $sp_ebi;
$spid = $tmp4[6];
}
$gn = $tmp[4];
if ($gn eq "-") {
$gn = $tmp[5];
}
$str = $tmp[1];
$len = $tmp[2];
$pro = $tmp[8];
$cog = $tmp[7];
$se = $tmp[0]; @tmp2 = split /\.\./, $se;
$sta = $tmp2[0];
$end = $tmp2[1];
print "$gi\t$gn\t$sta\t$end\t$str\t$len\t$syn\t$spid\t$pro\t$cog\n";
}
close (FILE);
## Normal end
exit(0);
## Usage display
sub usage {
my $p = shift;
print STDERR <<USAGE
usage: $p [options] <file>
options [default]:
-h Usage display.
-i <file> Input file.
-g <file> List of gi's.
-e <file> EBI chromosome table.
USAGE
}