#!/usr/bin/env perl

# Copyright (c) Illumina 2009
# Author: Roman Petrovski
# This software is covered by the "Illumina Genome Analyzer Software
# License Agreement" and the "Illumina Source Code License Agreement",
# and certain third party copyright/licenses, and any user of this
# source file is bound by the terms therein (see accompanying files
# Illumina_Genome_Analyzer_Software_License_Agreement.pdf and
# Illumina_Source_Code_License_Agreement.pdf and third party
# copyright/license notices).

# Takes a CASAVA Task Manager task file as input and produces a gantt chart of
# the specified workflow execution timing.

use warnings;
use strict;

use File::Basename;
use Getopt::Long;
use lib '/home/psgendb/local/pkg/CASAVA_v1.8.2-build/lib/CASAVA-1.8.2/perl';
use Casava::Common::Log;

use perlchartdir;

#------------------------------------------------------------------------------

use Casava::TaskManager qw(%taskFields);

my $CFG_HELP = 0;
my $CFG_WORKFLOW_FILE = "";


sub textTimeToCtTime
{
    my $taskTime   = shift;
    if (defined $taskTime)
    {
        my (
             $taskYear, $taskMonth,  $taskDay,
             $taskHour, $taskMinute, $taskSecond
        ) = $taskTime =~ /(\d+)\/(\d+)\/(\d+)\s(\d+):(\d+):(\d+)/;
        my $ctTaskTime =
          perlchartdir::chartTime(
                                   $taskYear,   $taskMonth,
                                   $taskDay,    $taskHour,
                                   $taskMinute, $taskSecond
          );
        return $ctTaskTime;
    }
    return undef;
}

my $result     = GetOptions(
    "tasks|t=s"         => \$CFG_WORKFLOW_FILE,
    "help|h"            => \$CFG_HELP
);


my $usage       =
    "task2gantt.pl [options]\n"
  . "Generates the gantt-like chart of workflow execution.\n"  
  . "\t--tasks|t                - PATH to tasks file (required).\n"
  . "\t--help|-h                - Print this help\n";


if ($CFG_HELP) {
    print $usage;
    exit(0);
}    # if

errorExit "ERROR: tasks file not specified\n$usage" if ( $CFG_WORKFLOW_FILE eq "");


my @tasks = ();
my $ctMinSubmitTime;
my $MinSubmitTimeText;
my $ctMaxFinishTime;
my $maxFinishTimeText;


open(WORKFLOW, "<$CFG_WORKFLOW_FILE") or die("Failed to open $CFG_WORKFLOW_FILE");

my %taskTime;
my $totTime = 0;

while (my $line = <WORKFLOW>)
{
    chomp $line;
    my @fields = split(/\t/, $line);

    # Although the fields are tab-separated they are also space-padded!
    # Trim off the spaces so the same tokens in different fields match.
    for (my $field_ind = 0; $field_ind < scalar @fields; ++$field_ind)
    {
    	$fields[$field_ind] =~ s/^\s+//;
    	$fields[$field_ind] =~ s/\s+$//;
    }

    my $taskType = $fields[$taskFields{taskType}];
    my $taskState = $fields[$taskFields{status}];

    next if($taskState ne 'finished');

    my $taskAppTag = $fields[$taskFields{appTag}];
    my $taskSubmitTime = $fields[$taskFields{submitTime}];
    my $taskStartTime = $fields[$taskFields{startTime}];
    my $taskFinishTime = $fields[$taskFields{finishTime}];
    
    next unless(defined $taskFinishTime);     
    my $ctTaskFinishTime = textTimeToCtTime ($taskFinishTime);
    my $ctTaskStartTime = textTimeToCtTime ($taskStartTime);
    my $ctTaskSubmitTime = textTimeToCtTime ($taskSubmitTime);

    my $time=$ctTaskFinishTime-$ctTaskStartTime;
    my @tmpy = split(/ +/,$taskAppTag);
    $taskTime{$tmpy[0]} += $time;
    $totTime += $time;
}

print "Task:\tTime:\n";
foreach my $key (keys %taskTime) {
    print "$key\t$taskTime{$key}\n";
}
print "Total\t$totTime\n";

close(WORKFLOW);

