1
2 """Encapsulates all necessary things for a cluster-job, like setting
3 up, running, restarting"""
4
5 import os,sys,subprocess
6 from os import path,unlink
7 from threading import Thread,Lock,Timer
8
9 from PyFoam.Applications.Decomposer import Decomposer
10 from PyFoam.Applications.Runner import Runner
11 from PyFoam.Applications.SteadyRunner import SteadyRunner
12 from PyFoam.Applications.CloneCase import CloneCase
13 from PyFoam.Applications.FromTemplate import FromTemplate
14
15 from PyFoam.FoamInformation import changeFoamVersion
16 from PyFoam.FoamInformation import foamVersion as getFoamVersion
17 from PyFoam.Error import error,warning
18 from PyFoam import configuration as config
19 from PyFoam.FoamInformation import oldAppConvention as oldApp
20 from PyFoam.RunDictionary.SolutionDirectory import SolutionDirectory
21
22 from PyFoam.ThirdParty.six import print_,iteritems
23
37
38
40 """ All Cluster-jobs are to be derived from this base-class
41
42 The actual jobs are implemented by overriding methods
43
44 There is a number of variables in this class that are used to
45 'communicate' information between the various stages"""
46
47 - def __init__(self,
48 basename,
49 arrayJob=False,
50 hardRestart=False,
51 autoParallel=True,
52 doAutoReconstruct=None,
53 foamVersion=None,
54 compileOption=None,
55 useFoamMPI=False,
56 multiRegion=False,
57 parameters={},
58 isDecomposed=False):
59 """Initializes the Job
60 @param basename: Basis name of the job
61 @param arrayJob: this job is a parameter variation. The tasks
62 are identified by their task-id
63 @param hardRestart: treat the job as restarted
64 @param autoParallel: Parallelization is handled by the base-class
65 @param doAutoReconstruct: Automatically reconstruct the case if
66 autoParalellel is set. If the value is None then it is looked up from
67 the configuration
68 @param foamVersion: The foam-Version that is to be used
69 @param compileOption: Forces compile-option (usually 'Opt' or 'Debug')
70 @param useFoamMPI: Use the OpenMPI supplied with OpenFOAM
71 @param multiRegion: This job consists of multiple regions
72 @param parameters: Dictionary with parameters that are being passed to the Runner
73 @param isDecomposed: Assume that the job is already decomposed"""
74
75
76
77 if not "JOB_ID" in os.environ:
78 error("Not an SGE-job. Environment variable JOB_ID is missing")
79 self.jobID=int(os.environ["JOB_ID"])
80 self.jobName=os.environ["JOB_NAME"]
81
82 self.basename=path.join(path.abspath(path.curdir),basename)
83
84 sgeRestarted=False
85 if "RESTARTED" in os.environ:
86 sgeRestarted=(int(os.environ["RESTARTED"])!=0)
87
88 if sgeRestarted or hardRestart:
89 self.restarted=True
90 else:
91 self.restarted=False
92
93 if foamVersion==None:
94 foamVersion=config().get("OpenFOAM","Version")
95
96 changeFoamVersion(foamVersion,compileOption=compileOption)
97
98 if not "WM_PROJECT_VERSION" in os.environ:
99 error("No OpenFOAM-Version seems to be configured. Set the foamVersion-parameter")
100
101 self.autoParallel=autoParallel
102
103 self.doAutoReconstruct=doAutoReconstruct
104 if self.doAutoReconstruct==None:
105 self.doAutoReconstruct=config().getboolean("ClusterJob","doAutoReconstruct")
106
107 self.multiRegion=multiRegion
108
109 self.parameters=parameters
110
111 self.hostfile=None
112 self.nproc=1
113
114 if "NSLOTS" in os.environ:
115 self.nproc=int(os.environ["NSLOTS"])
116 self.message("Running on",self.nproc,"CPUs")
117 if self.nproc>1:
118
119 self.hostfile=path.join(os.environ["TMP"],"machines")
120 self.message("Using the machinefile",self.hostfile)
121 self.message("Contents of the machinefile:",open(self.hostfile).readlines())
122
123 self.ordinaryEnd=True
124 self.listenToTimer=False
125
126 self.taskID=None
127 self.arrayJob=arrayJob
128
129 if self.arrayJob:
130 self.taskID=int(os.environ["SGE_TASK_ID"])
131
132 if not useFoamMPI and not foamVersion in eval(config().get("ClusterJob","useFoamMPI",default='[]')):
133
134 self.message("Adding Cluster-specific paths")
135 os.environ["PATH"]=config().get("ClusterJob","path")+":"+os.environ["PATH"]
136 os.environ["LD_LIBRARY_PATH"]=config().get("ClusterJob","ldpath")+":"+os.environ["LD_LIBRARY_PATH"]
137
138 self.isDecomposed=isDecomposed
139
141 """Return a string with the full job-ID"""
142 result=str(self.jobID)
143 if self.arrayJob:
144 result+=":"+str(self.taskID)
145 return result
146
153
160
162 """The file with the job information"""
163 jobfile="%s.%d" % (self.jobName,self.jobID)
164 if self.arrayJob:
165 jobfile+=".%d" % self.taskID
166 jobfile+=".pyFoam.clusterjob"
167 jobfile=path.join(path.dirname(self.basename),jobfile)
168
169 return jobfile
170
172 """The file that makes the job write a checkpoint"""
173 return self.jobFile()+".checkpoint"
174
176 """The file that makes the job write a checkpoint and end"""
177 return self.jobFile()+".stop"
178
246
248 """Returns the actual directory of the case
249 To be overridden if appropriate"""
250 if self.arrayJob:
251 return "%s.%05d" % (self.basename,self.taskID)
252 else:
253 return self.basename
254
256 """Returns just the name of the case"""
257 return path.basename(self.casedir())
258
260 """Execute a shell command in the case directory. No checking done
261 @param cmd: the command as a string"""
262 oldDir=os.getcwd()
263 self.message("Changing directory to",self.casedir())
264 os.chdir(self.casedir())
265 self.message("Executing",cmd)
266 try:
267 retcode = subprocess.call(cmd,shell=True)
268 if retcode < 0:
269 self.message(cmd,"was terminated by signal", -retcode)
270 else:
271 self.message(cmd,"returned", retcode)
272 except OSError:
273 e = sys.exc_info()[1]
274 self.message(cmd,"Execution failed:", e)
275
276 self.message("Executiong of",cmd,"ended")
277 self.message("Changing directory back to",oldDir)
278 os.chdir(oldDir)
279
281 """Looks for a template file and evaluates the template using
282 the usual parameters
283 @param fileName: the name of the file that will be
284 constructed. The template file is the same plus the extension '.template'"""
285
286 self.message("Building file",fileName,"from template with parameters",
287 self.parameters)
288
289 argList=["--output-file=%s" % path.join(self.casedir(),fileName),
290 "--dump-used-values"
291 ]
292
293 tmpl=FromTemplate(args=argList,
294 parameters=self.parameters)
295
296 - def foamRun(self,application,
297 args=[],
298 foamArgs=[],
299 steady=False,
300 multiRegion=None,
301 progress=False,
302 compress=False,
303 noLog=False):
304 """Runs a foam utility on the case.
305 If it is a parallel job and the grid has
306 already been decomposed (and not yet reconstructed) it is run in
307 parallel
308 @param application: the Foam-Application that is to be run
309 @param foamArgs: A list if with the additional arguments for the
310 Foam-Application
311 @param compress: Compress the log-file
312 @param args: A list with additional arguments for the Runner-object
313 @param steady: Use the steady-runner
314 @param multiRegion: Run this on multiple regions (if None: I don't have an opinion on this)
315 @param progress: Only output the time and nothing else
316 @param noLog: Do not generate a logfile"""
317
318 arglist=args[:]
319 arglist+=["--job-id=%s" % self.fullJobId()]
320 for k,v in iteritems(self.parameters):
321 arglist+=["--parameter=%s:%s" % (str(k),str(v))]
322
323 if self.isDecomposed and self.nproc>1:
324 arglist+=["--procnr=%d" % self.nproc,
325 "--machinefile=%s" % self.hostfile]
326
327 if progress:
328 arglist+=["--progress"]
329 if noLog:
330 arglist+=["--no-log"]
331 if compress:
332 arglist+=["--compress"]
333
334 if self.multiRegion:
335 if multiRegion==None or multiRegion==True:
336 arglist+=["--all-regions"]
337 elif multiRegion and not self.multiRegion:
338 warning("This is not a multi-region case, so trying to run stuff multi-region won't do any good")
339
340 if self.restarted:
341 arglist+=["--restart"]
342
343 arglist+=[application]
344 if oldApp():
345 arglist+=[".",self.casename()]
346 else:
347 arglist+=["-case",self.casename()]
348
349 arglist+=foamArgs
350
351 self.message("Executing",arglist)
352
353 if steady:
354 self.message("Running Steady")
355 runner=SteadyRunner(args=arglist)
356 else:
357 runner=Runner(args=arglist)
358
360 """Automatically decomposes the grid with a metis-algorithm"""
361
362 if path.isdir(path.join(self.casedir(),"processor0")):
363 warning("A processor directory already exists. There might be a problem")
364
365 defaultMethod="metis"
366
367 if getFoamVersion()>=(1,6):
368 defaultMethod="scotch"
369
370 args=["--method="+defaultMethod,
371 "--clear",
372 self.casename(),
373 self.nproc,
374 "--job-id=%s" % self.fullJobId()]
375
376 if self.multiRegion:
377 args.append("--all-regions")
378
379 deco=Decomposer(args=args)
380
382 """Default reconstruction of a parallel run"""
383
384 if self.doAutoReconstruct:
385 self.isDecomposed=False
386
387 self.foamRun("reconstructPar",
388 args=["--logname=ReconstructPar"])
389 else:
390 self.message("No reconstruction (because asked to)")
391
392 - def setup(self,parameters):
393 """Set up the job. Called in the beginning if the
394 job has not been restarted
395
396 Usual tasks include grid conversion/setup, mesh decomposition etc
397
398 @param parameters: a dictionary with parameters"""
399
400 pass
401
402 - def postDecomposeSetup(self,parameters):
403 """Additional setup, to be executed when the grid is already decomposed
404
405 Usually for tasks that can be done on a decomposed grid
406
407 @param parameters: a dictionary with parameters"""
408
409 pass
410
411 - def run(self,parameters):
412 """Run the actual job. Usually the solver.
413 @param parameters: a dictionary with parameters"""
414
415 pass
416
418 """Additional cleanup, to be executed when the grid is still decomposed
419
420 Usually for tasks that can be done on a decomposed grid
421
422 @param parameters: a dictionary with parameters"""
423
424 pass
425
427 """Clean up after a job
428 @param parameters: a dictionary with parameters"""
429
430 pass
431
433 """Additional reconstruction of parallel runs (Stuff that the
434 OpenFOAM-reconstructPar doesn't do
435 @param parameters: a dictionary with parameters"""
436
437 pass
438
440 """Parameters for a specific task
441 @param id: the id of the task
442 @return: a dictionary with parameters for this task"""
443
444 error("taskParameter not implemented. Not a parameterized job")
445
446 return {}
447
449 """Additional parameters
450 @return: a dictionary with parameters for this task"""
451
452 warning("Method 'additionalParameters' not implemented. Not a problem. Just saying")
453
454 return {}
455
466
468 if self.listenToTimer:
469 self.ordinaryEnd=False
470 f=open(path.join(self.basename,"stop"),"w")
471 f.write("Geh z'haus")
472 f.close()
473 unlink(self.stopFile())
474 else:
475 warning("I'm not listening to your callbacks")
476
478 """A Cluster-Job that executes a solver. It implements the run-function.
479 If a template-case is specified, the case is copied"""
480
481 - def __init__(self,basename,solver,
482 template=None,
483 cloneParameters=[],
484 arrayJob=False,
485 hardRestart=False,
486 autoParallel=True,
487 doAutoReconstruct=None,
488 foamVersion=None,
489 compileOption=None,
490 useFoamMPI=False,
491 steady=False,
492 multiRegion=False,
493 parameters={},
494 progress=False,
495 solverProgress=False,
496 solverNoLog=False,
497 solverLogCompress=False,
498 isDecomposed=False):
499 """@param template: Name of the template-case. It is assumed that
500 it resides in the same directory as the actual case
501 @param cloneParameters: a list with additional parameters for the
502 CloneCase-object that copies the template
503 @param solverProgress: Only writes the current time of the solver"""
504
505 ClusterJob.__init__(self,basename,
506 arrayJob=arrayJob,
507 hardRestart=hardRestart,
508 autoParallel=autoParallel,
509 doAutoReconstruct=doAutoReconstruct,
510 foamVersion=foamVersion,
511 compileOption=compileOption,
512 useFoamMPI=useFoamMPI,
513 multiRegion=multiRegion,
514 parameters=parameters,
515 isDecomposed=isDecomposed)
516 self.solver=solver
517 self.steady=steady
518 if template!=None and not self.restarted:
519 template=path.join(path.dirname(self.casedir()),template)
520 if path.abspath(basename)==path.abspath(template):
521 error("The basename",basename,"and the template",template,"are the same directory")
522 if isDecomposed:
523 cloneParameters+=["--parallel"]
524 clone=CloneCase(
525 args=cloneParameters+[template,self.casedir(),"--follow-symlinks"])
526 self.solverProgress=solverProgress
527 self.solverNoLog=solverNoLog
528 self.solverLogCompress=solverLogCompress
529
530 - def run(self,parameters):
531 self.foamRun(self.solver,
532 steady=self.steady,
533 multiRegion=False,
534 progress=self.solverProgress,
535 noLog=self.solverNoLog,
536 compress=self.solverLogCompress)
537
538
539