#!/usr/local/bin/perl -w 

# protein_table2doodle
# Script to load a table in ptt format (NCBI) into doodle
# 
# Author: Leonardo Marino-Ramirez <marino@tofu.tamu.edu>
# 
# Please cite the author in any work or product based on this material.
#

use strict;
use Getopt::Std;
use File::Basename;

use vars qw($opt_i $opt_g @tmp @tmp2 @tmp3 $gi $sp $spid $gn $str $len
	    $syn $pro $cog $se $sta $end $opt_e $sp_ebi @tmp4 $opt_h
	    $infile $gi_list $ebifile
	    );

## Check command line
my $prog = basename($0);

getopt('hige');

if ($opt_h) {
    usage($prog); exit();
} if ($opt_i) {
    $infile = "$opt_i";
} if ($opt_g) { 
    $gi_list = "$opt_g";
} if ($opt_e) {
    $ebifile = "$opt_e";
} else {
    usage($prog); exit();
}

@tmp = @tmp2 = @tmp3 = @tmp4 = ();

open (FILE, $infile) || die "can't open file: $!";
while (<FILE>) {
    @tmp = split /\t/;
    $syn = $tmp[5];
    $gi  = $tmp[3];
    $sp  = `grep $gi $gi_list`; chomp $sp; @tmp3 = split /\t/, $sp;
    $spid = $tmp3[1];  if (defined $spid) {
	$spid = $spid;
    } else {
	$sp_ebi = `grep -i $syn $ebifile`; chomp $sp_ebi; @tmp4 = split /\t/, $sp_ebi;
	$spid = $tmp4[6];
    }
    $gn  = $tmp[4];
    if ($gn eq "-") {
    $gn = $tmp[5];
    }
    $str = $tmp[1];
    $len = $tmp[2];
    $pro = $tmp[8];
    $cog = $tmp[7];
    $se  = $tmp[0]; @tmp2 = split /\.\./, $se;
    $sta = $tmp2[0];
    $end = $tmp2[1];
    print "$gi\t$gn\t$sta\t$end\t$str\t$len\t$syn\t$spid\t$pro\t$cog\n";
}
close (FILE);

## Normal end
exit(0);

## Usage display
sub usage {
  my $p = shift;
  print STDERR <<USAGE
usage: $p [options] <file>

options [default]:
    -h           Usage display.
    -i <file>    Input file.
    -g <file>    List of gi's.
    -e <file>    EBI chromosome table.
USAGE
}