#!/usr/freeware/bin/perl

require "sys/wait.ph";

select(STDOUT); $| = 1;		# unbuffer output


@zero = ();
@one = ();
@medium = ();
@large = ();
@xlarge = ();

$P = how_many_cpus();
$mach = get_machine_name();

print "Running with " . $P . " processors on " . $mach . "\n";

@ready = (0..$P-1);

for $i (0..23) {
    $zero[$i] = 0;
    $one[$i] = 1;
    $small[$i] = int($P/4);
    $medium[$i] = int($P/2);
    $large[$i] = 3*int($P/4);
    $xlarge[$i] = $P;
}


@weekday = (@xlarge[0..7], $large[8], @small[9..16], @small[17..18], @large[19..23]);
@weekend = (@xlarge[0..10], @large[11..23]);

# Holiday rates
# @weekday = (@xlarge[0..8], @large[9..23]);
# @weekend = (@xlarge[0..11], @large[12..22], @xlarge[23..23]);

print "Weekday " . join(' ', @weekday) . "\n";
print "Weekend " . join(' ', @weekend) . "\n";

%day = ( 'Sun' => [@weekend],
	 'Mon' => [@weekday],
	 'Tue' => [@weekday],
	 'Wed' => [@weekday],
	 'Thu' => [@weekday],
	 'Fri' => [@weekday],
	 'Sat' => [@weekend]);

$SIG{'INT'} = 'CLEANUP';

while (1) {
    undef($did_something);
    should_we_cool_it();

    ($min1, $min5, $min15) = get_la();
    ($dayname, $hour) = get_day_and_hour();
    $ncpus = $day{$dayname}[$hour];


    # if there is too large a load average in the last minute,
    # start killing jobs rapidly, but don't die on NFS service (+0.15 or less)

    print scalar(keys(%pids)) . " > " .  int($ncpus - $min1 + 0.15) . "\n";
    while (scalar(keys(%pids)) > int($ncpus - $min1 + 0.15)) {
	last if scalar(keys(%pids)) <= 0;
	print $uptime . "\n";
	($pid, $id) = each(%pids);
#	print join(' ', sort(values(%pids))) . "\n";
	print "Killing " . $id . " (" . $pid . ")\n";
	delete $pids{$pid};
	kill 'INT', $pid;
	#print "Waiting " . $pid . "\n";
	sleep(2);
	waitpid($pid, 0);
	#print "Done " . $flags . "\n";
	push(@ready, $id);
	$did_something = 1;
    }

    # see if there are any zombies
    while (($pid = waitpid(-1, &WNOHANG)) > 0) {
	print $pid . " (" . $pids{$pid} . "): Found a zombie\n";
	push(@ready, $pids{$pid});
	delete $pids{$pid};
	$did_something = 1;
    }


    # if the machine has been a bit idle for the last 15 minutes,
    # start up some more tasks

    if (scalar(keys(%pids)) < int($ncpus - $min15) && 
	scalar(keys(%pids)) < int($ncpus - $min1)) {
	print $uptime . "\n";
	startup(shift @ready);
	$did_something = 1;
    }

    if (defined($did_something)) {
	print "Ideal: " . $ncpus;
	print " Running " . scalar(keys(%pids)) . ": " . join(' ', values(%pids)) . "\n";
	#print "Ready: " . join(' ', @ready) . "\n";
    }

    sleep(15);
}

sub startup {
    $id = shift;
    print "Starting " . $id . "\n";
    
    FORK: {
	if ($pid = fork) {
	    $pids{$pid} = $id;
	    #print "Child " . $id . "(" . $pid . ")\n";
	} elsif (defined $pid) {
	    if ($id < 10) {
		$dir = "0" . $id;
	    } else {
		$dir = $id;
	    }
	    chdir $mach . $dir;
	    print join(' ', ('npri', '-w', 'seti'.$dir, '-email')) . "\n";
	    close(STDIN);
	    close(STDOUT);
	    close(STDERR);
	    exec 'npri', '-w', 'seti'.$dir, '-email';
	    exit 0;
	} elsif ($! =~ /No more process/) {
	    # EAGAIN
	    sleep 5;
	    redo FORK;
	} else {
	    die "Can't fork: $!\n";
	}
    }
}

sub CLEANUP {
    kill('INT', keys(%pids)) if scalar(keys(%pids)) > 0;
    print $$ . " Exiting\n";
    sleep 2;
    exit 0;
}

sub get_machine_name {
    open(UNAME, "uname -n|") || die "Can't run uname\n";
    $_ = <UNAME>;
    m/^(..)/;
    close(UNAME);

    return $1;
}

sub how_many_cpus {
    open(HINV, "hinv|head -1 |") || return 1;
    split(' ', $_ = <HINV>);
    $P = shift;
    close(HINV);

    return $P;
}

sub get_day_and_hour {
    open(DATE, "date |");
    chop($_ = <DATE>);
    $date = $_;
    close(DATE);

    ($dayname, $month, $daynumber, $time, @_) = split(' ',$date);
    ($hour, $minute, $sec) = split(':', $time);
    return ($dayname, $hour);
}

sub get_la {
    open(UP, "uptime |");
    chop($_ = <UP>);
    $uptime = $_;
    close(UP);

    m/.*load average: ([\d.]+), ([\d.]+), ([\d.]+)/;
    return ($P/2*$1, $P/2*$2, $P/2*$3);
}

sub should_we_cool_it {
    COOL: {
	if (open(COOL, "/tmp/no_seti")) {
	    print "COOLed off\n";
	    close(COOL);
	    kill('INT', keys(%pids)) if scalar(keys(%pids)) > 0;
	    while (($pid, $id) = each(%pids)) {
		waitpid($pid, 0);
		print "Killed " . $pid . "\n";
		delete $pids{$pid};
	    }
	    sleep(60);
	    redo COOL;
	}
    }
}

