#!/usr/local/bin/perl
# coils2pos.pl
# use for reading the directory /home/httpd/html/pub/ccp
# and reading the data from the *.aa.ccp file,
# then get the genbank id and position for the low case letter,
# and store them to the output file
#
# Authors: Xuemei Tang <txmei@hotmail.com>
# Leonardo Marino-Ramirez <marino@tofu.tamu.edu>
#
# Please cite the authors in any work or product based on this material.
#
use strict;
use Getopt::Std;
use File::Basename;
use vars qw($opt_d $file $filename $size $length $count $se @ccp_files @f
@start @end $opt_h $dirname
);
## Check command line
my $prog = basename($0);
getopt('hd');
if ($opt_h) {
usage($prog); exit();
} if ($opt_d) {
$dirname = "$opt_d";
} else {
usage($prog); exit();
}
# get the working directory containing *.aa.ccp file
if(opendir (DIR , $dirname))
{
while( $file = readdir(DIR))
{
# get the *.ccp file
push(@ccp_files, "$file") if ($file =~ /\.ccp$/);
}
}
closedir(DIR);
# if there is any *.ccp file
foreach $filename (@ccp_files)
{
# get input file
get_input();
# get the position of the low case letter
position();
# get output file
get_output();
}
# get input file function
sub get_input{
my($i) = 0;
my @line;
my $line;
my @s;
# open the file
open(IN, "$dirname/$filename") || die "can't open input: $!";
# skip the first line
$line[0] = <IN>;
# read the line by the end of the file
while($line = <IN>)
{
# split the line by the white space
($f[$i], $s[$i]) = split(" ", $line);
$i++;
}
# close the file
close(IN);
# get the number of the element for the array f
$size = @f;
}
# get the position
sub position{
$count = 0;
# make the empty string
$se = "";
# connect the string
for(my $j = 0; $j < $size; $j++)
{
$se .= $f[$j];
# clear the array
$f[$j] = "";
}
# get the length of string
$length = length($se);
# get the every character from the string
for(my $k = 0; $k < $length; $k++)
{
my $upcase;
my $UPCASE;
# get the one character from the string
my $char = substr($se, $k, 1);
# if the character is low case by the binding operator
if($char =~ /\b[a-z]/)
{
# get the previous letter and next letter
if($k == 0)
{
$start[$count] = 1;
$count++;
}
else
{
# get the previous letter
$upcase = substr($se, $k - 1, 1);
# get the next letter
$UPCASE = substr($se, $k + 1, 1);
}
# check if the previous letter is upper case letter
if($upcase =~ /\b[A-Z]/)
{
# write down the start position
$start[$count] = $k + 1;
$count++;
}
# check if the next letter is upper case letter
if($UPCASE =~ /\b[A-Z]/)
{
$count--;
# write down the end position
$end[$count] = $k + 1;
$count++;
}
}
}
}
# get output file function
sub get_output{
my($i);
# append and write the file
open(OUT, ">>output");
#printf OUT ("%s \t %s \t %d \t %d\n", $filename, $se, $length, $size);
$filename =~ s/.aa.ccp//;
for($i = 0; $i < $count; $i++)
{
printf OUT ("%s \t %d \t %d\n", $filename, $start[$i], $end[$i]);
}
close(OUT);
}
## Normal end
exit(0);
## Usage display
sub usage {
my $p = shift;
print STDERR <<USAGE
usage: $p [options] <file>
options [default]:
-h Usage display.
-d Directory containing ccp files (*.ccp).
USAGE
}