Simple script for batching tasks in a multi-core/CPU envionment
November 16, 2007
Leave a comment
Today I was tasked with keeping all four processors of a machine busy with CPU intensive processes (in this case transcoding video). I wrote a quick and dirty Perl script to keep running ffmpeg as input files became available, but never run more that four at a time. The script looks for .avi files in the directory from which it was executed. It then maintains a stack of file names, forks a child process for each file and moves it when done.
#!/usr/bin/perl
use POSIX ":sys_wait_h";
use Errno qw(EAGAIN);
$secs = 5;
$numProcs = 0;
$maxProcs = 4;
$fileExt = ".avi";
@files = ();
%was_processed = {};
while (1) {
#print "numProcs = $numProcs\n";
if($numProcs < $maxProcs) {
$numProcs++;
execNext();
}
my $reaped = reapNext();
if($reaped > 0) {
$numProcs--;
} elsif ($numProcs == $maxProcs) {
#print "Sleeping...\n";
sleep $secs;
}
}
sub reapNext {
return waitpid(-1, &WNOHANG);
}
sub execNext() {
my $file;
WAIT_FOR_NEW_FILE: {
getFiles();
while($was_processed{$file = pop @files}) {}
if(!defined $file) {
print "Waiting for new $fileExt files...\n";
sleep $secs;
redo WAIT_FOR_NEW_FILE;
} else {
$was_processed{$file} = 1;
}
}
doFork($file);
}
sub getFiles {
if($#files == -1) {
push @files, <*$fileExt>;
}
}
sub doFork {
my $file = @_[0];
FORK: {
if($pid = fork) {
#We are in the orig process
return;
} elsif (defined $pid) {
#We are in a child process
doChild($file);
} elsif ($! == EAGAIN) {
#Something is wonky, but we should be able to recover...
sleep 5;
redo FORK;
} else {
die "Can't fork: $!\n";
}
}
}
sub doChild {
my $file = @_[0];
(my $name, my $junk) = split /$fileExt/, $file;
print "Ripping $file\n";
system "(ffmpeg -i \"$file\" -acodec libfaac -ab 128k -ac 2 -vcodec libx264 -b 1500k -bf 2 -f mpegts -aspect 4:3 -y \"$name\.ts\" 2>&1) > \"$name\.log\"\n";
system "(mv \"$file\" ripped 2>&1) >> \"$name\.log\"\n";
#unlink "\"$name\.ts\.mpeg\"";#delete the ts file
#sleep $secs + rand $secs;
exit;
}