#!/usr/bin/perl

use strict;
use warnings;
use Getopt::Long;  # include this to parse command line parameters

my $trimmedTaxFile;    # fasta file to print results from
my $line; 	# holds each subsequent line one at a time

# Print usage info and exit if no command line parameters are passed
if (@ARGV < 1)  # if no command line arguments specified
{
 print "\nusage: separateRanks.pl --file Trimmed_Taxonomic_Hierarchy_File \n";
 exit;
}

# Parse command line args by name and save as variable
GetOptions('file=s'  => \$trimmedTaxFile );

# open the input file for reading
my $INPUT_FILE;
open ( $INPUT_FILE , '<' , $trimmedTaxFile )
or die "Cannot open the input file: $trimmedTaxFile: $!";

# Allocate useful variables for use during upcoming loop
my @currLineArray;
my $currentRank = "";
my $previousRank = "";
my $phylumString = "";
my $classString = "";
my $orderString = "";
my $familyString = "";
my $genusString = "";

# iterating entirely through the taxonomic hierarchy input file...
while ( $line = <$INPUT_FILE> ) {
	chomp $line;
	
	@currLineArray = split ( /\t/, $line );	# split line on tabs
	
	# assign Current Rank string - if there is no value there, assume that
	# it is the same rank as the previous rank.  Otherwise, allocate it to 
	# the first element of the array split on tabs
	if ( $currLineArray[ 0 ] ne "" ) {
		$currentRank = $currLineArray[ 0 ];
	} else {
		$currentRank = $previousRank;
	}
	
	# Discerning between which string to allocate the current line to.  Only
	# stores for phylum, class, order, family and genus.  Others are not
	# written to strings but previousRank is updated regardless because you
	# do not want to store entries that occur below ranks that are not
	# written.
	if ( $currentRank eq "phylum" ) {
		$previousRank = $currentRank;
		$phylumString .= $line."\n";
	} elsif ( $currentRank eq "class" ) {
		$previousRank = $currentRank;
		$classString .= $line."\n";
	} elsif ( $currentRank eq "order" ) {
		$previousRank = $currentRank;
		$orderString .= $line."\n";
	} elsif ( $currentRank eq "family" ) {
		$previousRank = $currentRank;
		$familyString .= $line."\n";
	} elsif ( $currentRank eq "genus" ) {
		$previousRank = $currentRank;
		$genusString .= $line."\n";
	} else {
		$previousRank = $currentRank;
	}
}

# Remove last carriage return on all the strings
chomp $phylumString;
chomp $classString;
chomp $orderString;
chomp $familyString;
chomp $genusString;


# PRINT THE STRINGS TO THEIR RESPECTIVE OUTPUT FILES

# Phylum
my $OUTPUT_HANDLE;
my $outputFile = $trimmedTaxFile;
$outputFile =~ s/2.txt/.phylum/;
open ($OUTPUT_HANDLE , '>', $outputFile) 
or die "Cannot open the file: $outputFile: $!"."\n";
printf $OUTPUT_HANDLE $phylumString;

# Class
$outputFile = $trimmedTaxFile;
$outputFile =~ s/2.txt/.class/;
open ($OUTPUT_HANDLE , '>', $outputFile) 
or die "Cannot open the file: $outputFile: $!"."\n";
printf $OUTPUT_HANDLE $classString;

# Order
$outputFile = $trimmedTaxFile;
$outputFile =~ s/2.txt/.order/;
open ($OUTPUT_HANDLE , '>', $outputFile) 
or die "Cannot open the file: $outputFile: $!"."\n";
printf $OUTPUT_HANDLE $orderString;

# Family
$outputFile = $trimmedTaxFile;
$outputFile =~ s/2.txt/.family/;
open ($OUTPUT_HANDLE , '>', $outputFile) 
or die "Cannot open the file: $outputFile: $!"."\n";
printf $OUTPUT_HANDLE $familyString;

# Genus
$outputFile = $trimmedTaxFile;
$outputFile =~ s/2.txt/.genus/;
open ($OUTPUT_HANDLE , '>', $outputFile) 
or die "Cannot open the file: $outputFile: $!"."\n";
printf $OUTPUT_HANDLE $genusString;

close( $OUTPUT_HANDLE );
