
| Current Path : /var/www/web-klick.de/dsh/AMTC-RMS-Batch/1.2/bin/ |
Linux ift1.ift-informatik.de 5.4.0-216-generic #236-Ubuntu SMP Fri Apr 11 19:53:21 UTC 2025 x86_64 |
| Current File : /var/www/web-klick.de/dsh/AMTC-RMS-Batch/1.2/bin/bstart.pl |
#!/sw/common-os/oss/perl/5.20.3-4.0.0/bin/perl
# $URL: https://svn.photomask.com/DR_FE/RMS-Batch/branches/AMTC-RMS-Batch-1.2/bin/bstart.pl $
# $Id: bstart.pl 74 2021-07-29 16:36:32Z heydero $
package bstart;
use strict;
use warnings;
our $VERSION; # is set in bin/inc.pl
BEGIN
{
use FindBin;
my $dir = $ENV{UNITTEST} ? './bin' : $FindBin::RealBin;
my $file = "$dir/inc.pl";
do $file || die "Couldn't run $file : $@ : $!";
use AMTC::Modules;
AMTC::Modules::load_module( 'lsf' );
}
use Cwd;
use Carp;
use Pod::Usage; # exports pod2usage
use Data::Dumper; # exports Dumper()
use File::Basename; # exports dirname() and basename()
use File::Temp;
use AppConfig;
use Log::Log4perl;
use AMTC::LSF;
use AMTC::RMS::Batch;
use AMTC::TPSM::TaskResult;
use AMTC::Logable 'INFO';
use Env qw( $TECHP_DATA );
use sigtrap qw(BUS SEGV PIPE ABRT);
our @Job_Ids; # for unit testing only
my %Submitted_Scripts;
my ($Opt_help, $Opt_man, $Opt_tpsm, $Opt_write_scripts, $Joblist_File );
my $Log = Log::Log4perl->get_logger;
my $Lsf = AMTC::LSF->new();
# This block makes the script loadable as a module.
#
if ( ! caller )
{
my $rc = -1;
eval { $rc = __PACKAGE__->main(); };
$Log->logdie( $@ ) if $@;
exit $rc;
}
#-----------------------------------------------------------------------------
# Entry point of the script's logic. See POD.
#
sub main
{
( $Joblist_File ) = parse_cmdline();
if ( $Opt_help )
{
pod2usage(
-exitstatus => 0,
-msg => "Version: $VERSION",
);
}
pod2usage( -exitstatus => 0, -verbose => 2 ) if $Opt_man;
die "missing joblist" unless defined $Joblist_File;
reinit_logging();
$Log->info( "Read job list [$Joblist_File]" );
my @all_jobs = AMTC::RMS::Batch::read_joblist_file( $Joblist_File );
my $jobs = map_jobs( @all_jobs );
start_jobs( $jobs );
my $success = update_jobs_info( $jobs );
write_joblist_file( $jobs, $Joblist_File );
write_job_script_files() if $Opt_write_scripts;
my $exit_code = $success ? 0 : 1;
print_message_for_tpsm( $exit_code );
return $exit_code;
}
sub print_message_for_tpsm
{
my ( $exit_code ) = @_;
return unless defined $Opt_tpsm;
my $tpsm = AMTC::TPSM::TaskResult->new;
my $event;
if ( $exit_code != 0 )
{
$event = "ERROR";
}
else
{
$event = "BSTART";
$event .= "_" . $Opt_tpsm if $Opt_tpsm;
$event .= "_OK";
}
$tpsm->add_event( $event );
$tpsm->write();
}
# Parse the command line and return the joblist file. (It might be undef, if
# not set by the user.)
#
sub parse_cmdline
{
my $config = AppConfig->new({ PEDANTIC => 1 });
$config->define( "h|help" );
$config->define( "man" );
$config->define( "tpsm=s");
$config->define( "joblist=s" );
$config->define( "w|writescripts" );
$config->args(); # consumes @ARGS
$Opt_help = $config->help;
$Opt_man = $config->man,
$Opt_tpsm = $config->tpsm;
$Opt_write_scripts = $config->writescripts;
return $config->joblist;
}
# Create a hashref which maps the index number of the given job list to a
# record (hashref) which holds the job itself and an attribute "startable".
# That attribute is only true if the corresponding job has no "job_ID", i.e.
# if the job is to be started.
#
# Example:
#
# $rv = {
# 0 => {
# job => $job,
# startable => 1,
# },
# # etc.
# }
#
sub map_jobs
{
my ( %map, $i );
foreach my $job ( @_ )
{
my $job_id = $job->{rms}->{job_ID};
# The $job_id is an empty hashref in case the XML element <job_ID>
# was empty. This comes from XML::Simple and its strange way to
# translate an XML string to a Perl hash.
my $startable;
if ( ! defined $job_id || ref( $job_id ) eq 'HASH')
{
$startable = 1;
}
else
{
$startable = 0;
}
my $record = {
job => $job,
startable => $startable,
};
$map{$i++} = $record;
}
return \%map;
}
# Update the given $jobs hashref with runtime information of the started jobs
# and return true if and only if all startable jobs have successfully been
# started.
#
# The following fields of each started job are updated:
#
# $job->{rms}->{job_ID} # this field is added
# $job->{rms}->{job_name} # because if no "job_name" was set in the given
# # job list file, then a name was generated
# # (derived from $job->{reference} and
# # $job->{script_type},
# # see also 'create_bsub_options' routine
# $job->{actual_output_path} # because $job->{rms}->{output_path} may have
# # contained a dynamic pattern like "%J" which
# # is resolved to the "actual" path here
# # the field might also be deleted in case the
# # the $job->{rms}->{output_path} was not set.
#
sub update_jobs_info
{
my ( $jobs ) = @_;
my $success = 1;
foreach my $i ( sort { $a <=> $b} keys %$jobs )
{
next unless $jobs->{$i}->{startable};
if ( $jobs->{$i}->{error_msg} )
{
$success = 0;
next;
}
my $job_id = $jobs->{$i}->{job_id} ||
die "internal error: no job_id [idx=$i]";
my $job = $jobs->{$i}->{job};
$job->{rms}->{job_ID} = $job_id;
my ( $lsf_job ) = $Lsf->query_jobs( { job_id => $job_id } );
my $msg = sprintf "LSF job #%d : %s", $i, Dumper( $lsf_job );
$Log->debug( $msg );
if ( ! defined $lsf_job )
{
$Log->warn( "Couldn't get job info for job $job_id" );
next;
}
$job->{rms}->{job_name} = $lsf_job->submit_info->job_name;
if ( $lsf_job->runtime_info->output_file )
{
$job->{actual_output_path} = $lsf_job->runtime_info->output_file;
}
else
{
delete $job->{actual_output_path};
}
}
return $success;
}
# Overwrite the $Joblist_File with the info of the given $jobs hashref.
# (The hash is assumed to have been updated by 'update_jobs_info' routine.)
#
sub write_joblist_file
{
my ( $jobs ) = @_;
my @job_list;
foreach my $i ( sort { $a <=> $b} keys %$jobs )
{
push @job_list, $jobs->{$i}->{job};
}
$Log->info( "Update job list [$Joblist_File]" );
AMTC::RMS::Batch::write_joblist_file(
jobs => \@job_list,
output => $Joblist_File,
);
}
# Start the given jobs (hashref) and store the result of the start operations
# in the given hashref which is expected to have been built by means of
# the 'map_jobs' routine, i.e. the hashref is enriched with either the job ID
# of a successfully started job or the error message, if the affected job
# could not be started.
#
# Example:
#
# $rv = {
# 0 => {
# job => $job,
# startable => 1,
# job_id => 12345,
# },
# 1 => {
# job => $job,
# startable => 1,
# error_msg => 'One or more options are invalid ... etc.',
# },
# # etc.
# }
#
sub start_jobs
{
my ( $jobs ) = @_;
@Job_Ids = (); # reset
$Log->info( "Start jobs" );
$Log->debug( "jobs: ", Dumper( $jobs ) );
my ( $success_count, $error_count ) = ( 0, 0 );
# Attempt to start each startable job
foreach my $i ( sort { $a <=> $b} keys %$jobs )
{
next unless $jobs->{$i}->{startable};
my $job = $jobs->{$i}->{job};
my $j = $i + 1; # job index for the user perspective
$Log->debug( "job description #$j : ", Dumper( $job ) );
my $job_id;
eval { $job_id = start_job( $job ) };
if ( $@ )
{
$jobs->{$i}->{error_msg} = $@;
$Log->error( "Couldn't start job #$j: $@" );
$error_count++;
}
else
{
$jobs->{$i}->{job_id} = $job_id;
$success_count++;
push @Job_Ids, $job_id;
my $job_name;
{
my ( $lsf_job ) = $Lsf->query_jobs( { job_id => $job_id } );
$job_name = $lsf_job->submit_info->job_name;
}
$Log->info( "Job $j has started [$job_id,$job_name]" );
}
}
# Log a summary
my $startable_count = $success_count + $error_count;
if ( $startable_count == 0 )
{
$Log->info( "No startable jobs found. Nothing to do." );
}
else
{
my $msg = sprintf "Started %d of %d jobs",
$success_count, $startable_count;
$Log->info( $msg );
}
if ( $error_count )
{
my $starts = $error_count == 1 ? 'start' : 'starts';
$Log->warn( "$error_count job $starts failed." )
}
}
# Submit the given job and return the LSF job_id or die if the job could
# not be started.
#
sub start_job
{
my ( $job ) = @_;
my $script = join( '', create_job_script_lines( $job ) );
my $tmp = File::Temp->new( UNLINK => 1 );
open( my $bsub, "|bsub > " . $tmp->filename . " 2>&1" )
or die "Failed to open bsub: $!";
print $bsub $script;
close( $bsub ) or do {
my @error = AMTC::FileUtils::read_file( $tmp->filename );
die "bsub failed:\n@error";
};
$tmp->seek( 0, SEEK_SET );
my @out = <$tmp>;
foreach ( @out )
{
next unless /^Job <(\d+)> is submitted to .+/;
my $job_id = $1;
$Submitted_Scripts{$job_id}{job} = $job;
$Submitted_Scripts{$job_id}{script} = $script;
return $job_id;
}
die "Can't extract job ID from bsub output:\n@out";
}
# Return the lines of a script for the given job. The script is based
# on the template bin/job.template.PL. It is ready to be submitted
# via bsub.
#
sub create_job_script_lines
{
my ( $job ) = @_;
my @lines = do {
my $dir = $FindBin::RealBin;
$dir =~ s/\/t$/\/bin/; # hack to enable easy unit test
AMTC::FileUtils::read_file( "$dir/job.template.PL" );
};
my @data_lines = create_job_script_data_lines( $job );
my $data_line_index = do {
my $i = 0;
foreach ( @lines )
{
last if /^__DATA__/;
$i++;
}
$i;
};
@lines = (
@lines[ 0 .. $data_line_index ],
@data_lines,
@lines[ $data_line_index + 1 .. $#lines ]
);
my @bsub_option_lines = create_bsub_option_lines( $job );
@lines = (
$lines[0],
"\n", # empty line for aesthetic reason
@bsub_option_lines,
@lines[ 1 .. $#lines ]
);
#TODO: If needed insert usage lines here and info about who generated
# the script.
return @lines;
}
# Create and return the lines which are to be inserted into the job script
# immediately after the __DATA__ marker. These lines control the behavior
# of the job script.
#
# Each line of the __DATA__ section begins with a "command letter" followed
# by a colon delimiter followed by a value, e.g.
#
# m:load cats
# e:FOO=42
# c:some-program.sh
#
# See also the manual of bin/job.template.PL
#
sub create_job_script_data_lines
{
my ( $job ) = @_;
my @data;
foreach my $item ( @{$job->{environment}} )
{
my $type = $item->{type} // '';
my $name = $item->{name} //
die "missing name element of job->environment";
my $value = $item->{value};
if ( $type eq 'module' )
{
$name .= "/$value" if defined $value;
push @data, "m:load $name\n";
}
elsif ( $type eq 'variable' )
{
my $line = defined $value
? "e:$name=$value\n"
: "e:$name\n";
push @data, $line;
}
else
{
die "unknown environment type [$type]";
}
}
my $cmd = create_job_cmdline( $job );
push @data, "c:$cmd\n";
return @data;
}
# Create a command line which finally is to be executed by LSF on an
# execution host.
#
sub create_job_cmdline
{
my ( $job ) = @_;
my $path = $job->{script_path} || die "script_path not set";
my $type = $job->{script_type} || 'Shell';
my $TCL_CMDL = 'echo \'TCL package require amtc::cats::scripts ; amtc::cats::scripts::run_and_exit %%script_path%%\' | CATS -nodisplay';
my %cmdl_tmpl = (
'Shell' => '/bin/sh -c \'%%script_path%%\'',
'CATS_Fracture_TCL' => $TCL_CMDL,
'CATS_Fracture_CINC' => $TCL_CMDL,
);
my $cmdline = $cmdl_tmpl{$type} || die "Unknown script type [$type]";
$cmdline =~ s/%%script_path%%/$path/g;
$Log->debug( "script_type=[$type] cmdline=[$cmdline]" );
return $cmdline;
}
# Create a hash that holds the options for aka bsub
# from the given job description.
#
sub create_bsub_options
{
my ( $job ) = @_;
my $output_path =
$job->{rms}->{output_path} || $job->{actual_output_path};
my $job_name;
{
my $default_job_name = $job->{reference} || 'nameless';
$default_job_name .= '.' . $job->{script_type}
if defined $job->{script_type};
$job_name = $job->{rms}->{job_name} || $default_job_name;
}
my %options = (
-R => $job->{rms}->{resource_requirement},
-J => $job_name,
-cwd => $job->{working_directory},
-q => $job->{rms}->{queue_name},
-sp => $job->{rms}->{priority},
-oo => $output_path, # stdout
-eo => $output_path, # stderr
);
# Remove all options whose value is undef
foreach my $key ( keys %options )
{
delete $options{$key} unless defined $options{$key};
}
return %options;
}
# Create and return the bsub option lines for the given job.
# The lines are to be inserted into the job script immediately after the
# shebang. Here is an example of the output
#
# #BSUB -J nameless.Shell
# #BSUB -eo bstart-test-main.out.%J
# #BSUB -oo bstart-test-main.out.%J
#
sub create_bsub_option_lines
{
my ( $job ) = @_;
my %opts = create_bsub_options( $job );
my @lines = map { "#BSUB $_ $opts{$_}\n" } sort keys %opts;
}
# Write a job script file to the working directory for every started job.
# The filename has the format "job.$job_name.$job_id.PL"
#
sub write_job_script_files
{
foreach my $job_id ( keys %Submitted_Scripts )
{
my $job = $Submitted_Scripts{$job_id}{job};
my $filename = do {
my $n = "job.";
$n .= $job->{rms}{job_name} . '.';
$n .= $job_id . ".PL";
};
my $content = $Submitted_Scripts{$job_id}{script};
AMTC::FileUtils::write_file( $filename, $content );
}
}
sub reinit_logging
{
return if $ENV{UNITTEST};
my $cfile = "$TECHP_DATA/AMTC-RMS-Batch/bstart-log4perl.ini";
if ( -e $cfile )
{
$Log->debug( "Re-init log4perl using $cfile" );
Log::Log4perl->init( $cfile );
}
$Log = Log::Log4perl->get_logger;
}
# Callback routine to be used by log4perl during initialization from file.
#
sub logfile
{
require AMTC::App::RmsBatch; # Log4perl needs it
AMTC::App::RmsBatch::create_logfile_name( $Joblist_File );
}
1;
__END__
=pod
=head1 NAME
bstart.pl - Batch Starter for LSF jobs
=head1 USAGE
bstart.pl [-h] [-man] [-tpsm identifier] [-w|-writescripts] -joblist file
=head1 REQUIRED ARGUMENTS
The batch starter has no required arguments. However the "option" C<-joblist>
is mandatory instead.
If you feel this is weird, then you are right. Unfortunately in order to make
the batch starter able to be used with the TPSM, the batch starter's command
line interface is enforced to follow that TPSM design constraint.
Again, the batch starter requires one mandatory "option"; the so-called job
list file. That file is expected to be an XML document which describes LSF
jobs. An XML parser validates the given file. See the man page of
C<AMTC::RMS::Batch> in the C<amtc-common-perl> package for the documentation
of the XML structure.
=head1 OPTIONS
=head2 -h
Print a short help text with version, usage and options.
=head2 -man
Print the man page.
=head2 -tpsm session_identifier
Write some TPSM specific XML to STDOUT when the program finishes. This option
is only necessary if the batch monitor is to be integrated into a TPSM flow.
If the bstart session ran without error, then the session identifier is
embedded in the event string for the TPSM. Otherwise the event string is
"ERROR".
Example of the XML (in case the session identfier is "FOO"):
<RESULT>
<EVENTS>
<EVENT id="BSTART_FOO_OK" />
</EVENTS>
<OUTPUTS />
</RESULT>
=head1 -w | -writescripts
Engineering feature! Write the scripts which are submitted to LSF to the
working directory. The scripts can be used to resubmit jobs independently
from I<bstart.pl>. The resulting file names have the format
"job.$job_name.$job_id.PL".
=head1 DESCRIPTION
The batch starter submits none, one or more jobs to LSF. The jobs that are
to be submitted (aka started) are described by means of an XML document
(aka job list file). That file is updated B<in-place (!)> with information
like the resulting LSF job ID or the actual output path for each job which
has succesfully been started.
The batch starter applies the following rules at runtime.
=head2 Rule # 1 - Determine what jobs are to be started
The job list file is inspected. Only jobs which do B<not>t have a C<job_ID>
will be attempted to start. All other jobs are ignored.
=head2 Rule # 2 - Set the job environment for each individual job
A job description in the job list file can have an C<environment> section
which describes the environment the job shall run in. That section is modeled
as a list of C<setting>s. All environment changes are applied in the same
order as given in list.
Each environment setting has the following attributes:
=head3 type (mandatory)
A type can be either "module" or "variable". If the type is "module", then
the job environment is set by means of the (external) C<module> program.
Otherwise the a environment variable of the given name is set or unset
according to the rules as described below.
=head3 name (mandatory)
The C<name> of an environment C<setting> is either a module class or a
variable name depending of the setting's type. See the man page of
C<AMTC::Modules> for the meaning of "module class" and how a process
environment can be modified by means of the C<module> program.
=head3 value (optional)
The C<value> of a C<setting> defines the value to be set depending on the
setting's type.
If the type is "module", then the given module is either switched to, or
loaded in case neither the module itself nor one of its "siblings" is loaded.
If no module name is given, then the module's class is loaded. Again, check
the man page of C<AMTC::Modules>.
If the type is "variable", then the environment variable of the given C<name>
is set or unset. The variable is set, if a C<value> is given. Otherwise
the variable is unset.
=head2 Rule # 3 - Set the job output path
A job may have a so-called output path (i.e. a file) where it can write to.
If no output path is set, then the LSF captures any job output and sends
it as email to the job owner.
A job description may have an C<actual_output_path> and/or an
C<output_path> in the C<rms> section of the job description.
If the C<actual_output_path> is set, then this path is used verbatim as the
output path when submitting the affected job. Everything what the job writes
to STDOUT and/or STDERR is will be written to that file in append mode.
If only the C<output_path> in the C<rms> section is set, then this path
is used when submitting the job. The C<output_path> may have so-called
dynamic patterns which are resolved at runtime, e.g. "%J" in the path is
resolved to the job ID at runtime. See the man page of the (external)
C<bsub> program the details.
=head2 Rule # 4 - Update the job list file in place
The given job list file is updated after all jobs that are eligible for
starting have been attempted to be submitted to LSF.
For each job who was successfully submitted/started the following elements
(XPath expressions) in the job description are updated:
=head3 //jobs/job/rms/job_ID
The "job_ID" is added, i.e. if the batch starter would run again using the
updated job list file, the affected job would be ignored.
=head3 //jobs/job/actual_output_path
The "actual_output_path" is updated, or added if necessary, with the
resolved output path. It is deleted, if the affected job has no "output_path".
=head1 CONFIGURATION AND ENVIRONMENT
An optional log4perl configuration file can be used to control the output
of the program. The program tries to read the configuration file from
$TECHP_DATA/AMTC-RMS-Batch/bstart-log4perl.cf
If it fails, then the output is written to STDOUT in a default format.
(timestamp and log level followed by the status information.)
The batch starter has a Perl routine which can be used as a callback function
while initializing log4perl from a configuration file in order to get
a session specific logfile path. The name of the Perl routine is
C<bstart::logfile>.
Example of the configuration of a log4perl file appender that logs to
a session log file:
log4perl.appender.session = Log::Log4perl::Appender::File
log4perl.appender.session.filename = sub { bstart::logfile() }
# etc.
The C<bstart::logfile> routine uses the path of the given job list file
(argument). If the suffix of the job list file is ".xml", then the suffix
is replaced with ".log". Otherwise the suffix ".log" is simply added.
Example:
job list file => session log file
----------------------------------------
./foo/bar/jobs.xml => ./foo/bar/jobs.log
(See also section REQUIRED ARGUMENTS.)
=head1 DEPENDENCIES
=over 4
=item LSF 9.1
=item Modules 3.1.6
=item Log::Log4perl 1.46
=item XML::LibXML 2.0118
=item AMTC::LSF 1.0
=item AppConfig 1.68
=item XML::Simple 2.20
=item amtc-common-perl 1.003
=back
=head1 AUTHOR
Olaf Heyder E<lt>heydero@drux25E<gt> or E<lt>olaf.heyder@amtc-dresden.comE<gt>
If you found a bug, please talk to MDP first. It might be a feature. If not,
then please report it using L<https://helpdesk.photomask.com/>.
=head1 COPYRIGHT
Copyright (C) 2016 by Advanced Mask Technology Center GmbH & Co. KG
This script is for AMTC business use only. All information is confidential.
=cut