1
2 """Encapsulates all necessary things for a cluster-job, like setting up, running, restarting"""
3
4 import os,sys
5 from os import path,unlink
6 from threading import Thread,Lock,Timer
7
8 from PyFoam.Applications.Decomposer import Decomposer
9 from PyFoam.Applications.Runner import Runner
10 from PyFoam.Applications.SteadyRunner import SteadyRunner
11 from PyFoam.Applications.CloneCase import CloneCase
12 from PyFoam.FoamInformation import changeFoamVersion
13 from PyFoam.Error import error,warning
14 from PyFoam import configuration as config
15 from PyFoam.FoamInformation import oldAppConvention as oldApp
16
30
31
33 """ All Cluster-jobs are to be derived from this base-class
34
35 The actual jobs are implemented by overriding methods
36
37 There is a number of variables in this class that are used to
38 'communicate' information between the various stages"""
39
40 - def __init__(self,basename,
41 arrayJob=False,
42 hardRestart=False,
43 autoParallel=True,
44 foamVersion=None,
45 useFoamMPI=False,
46 multiRegion=False):
47 """Initializes the Job
48 @param basename: Basis name of the job
49 @param arrayJob: this job is a parameter variation. The tasks
50 are identified by their task-id
51 @param hardRestart: treat the job as restarted
52 @param autoParallel: Parallelization is handled by the base-class
53 @param foamVersion: The foam-Version that is to be used
54 @param useFoamMPI: Use the OpenMPI supplied with OpenFOAM
55 @param multiRegion: This job consists of multiple regions"""
56
57
58
59 if not os.environ.has_key("JOB_ID"):
60 error("Not an SGE-job. Environment variable JOB_ID is missing")
61 self.jobID=int(os.environ["JOB_ID"])
62 self.jobName=os.environ["JOB_NAME"]
63
64 self.basename=path.join(path.abspath(path.curdir),basename)
65
66 sgeRestarted=False
67 if os.environ.has_key("RESTARTED"):
68 sgeRestarted=(int(os.environ["RESTARTED"])!=0)
69
70 if sgeRestarted or hardRestart:
71 self.restarted=True
72 else:
73 self.restarted=False
74
75 if foamVersion==None:
76 foamVersion=config().get("OpenFOAM","Version")
77
78 changeFoamVersion(foamVersion)
79
80 if not os.environ.has_key("WM_PROJECT_VERSION"):
81 error("No OpenFOAM-Version seems to be configured. Set the foamVersion-parameter")
82
83 self.autoParallel=autoParallel
84 self.multiRegion=multiRegion
85
86 self.hostfile=None
87 self.nproc=1
88
89 if os.environ.has_key("NSLOTS"):
90 self.nproc=int(os.environ["NSLOTS"])
91 self.message("Running on",self.nproc,"CPUs")
92 if self.nproc>1:
93
94 self.hostfile=path.join(os.environ["TMP"],"machines")
95 self.message("Using the machinefile",self.hostfile)
96 self.message("Contents of the machinefile:",open(self.hostfile).readlines())
97
98 self.ordinaryEnd=True
99 self.listenToTimer=False
100
101 self.taskID=None
102 self.arrayJob=arrayJob
103
104 if self.arrayJob:
105 self.taskID=int(os.environ["SGE_TASK_ID"])
106
107 if not useFoamMPI and not foamVersion in eval(config().get("ClusterJob","useFoamMPI",default='[]')):
108
109 self.message("Adding Cluster-specific paths")
110 os.environ["PATH"]=config().get("ClusterJob","path")+":"+os.environ["PATH"]
111 os.environ["LD_LIBRARY_PATH"]=config().get("ClusterJob","ldpath")+":"+os.environ["LD_LIBRARY_PATH"]
112
113 self.isDecomposed=False
114
116 """Return a string with the full job-ID"""
117 result=str(self.jobID)
118 if self.arrayJob:
119 result+=":"+str(self.taskID)
120 return result
121
123 print "=== CLUSTERJOB: ",
124 for t in txt:
125 print t,
126 print " ==="
127 sys.stdout.flush()
128
130 self.message("Setting Job state to",txt)
131 fName=path.join(self.casedir(),"ClusterJobState")
132 f=open(fName,"w")
133 f.write(txt+"\n")
134 f.close()
135
137 """The file with the job information"""
138 jobfile="%s.%d" % (self.jobName,self.jobID)
139 if self.arrayJob:
140 jobfile+=".%d" % self.taskID
141 jobfile+=".pyFoam.clusterjob"
142 jobfile=path.join(path.dirname(self.basename),jobfile)
143
144 return jobfile
145
147 """The file that makes the job write a checkpoint"""
148 return self.jobFile()+".checkpoint"
149
151 """The file that makes the job write a checkpoint and end"""
152 return self.jobFile()+".stop"
153
220
222 """Returns the actual directory of the case
223 To be overridden if appropriate"""
224 if self.arrayJob:
225 return "%s.%05d" % (self.basename,self.taskID)
226 else:
227 return self.basename
228
230 """Returns just the name of the case"""
231 return path.basename(self.casedir())
232
233 - def foamRun(self,application,
234 args=[],
235 foamArgs=[],
236 steady=False,
237 multiRegion=None,
238 progress=False,
239 noLog=False):
240 """Runs a foam utility on the case.
241 If it is a parallel job and the grid has
242 already been decomposed (and not yet reconstructed) it is run in
243 parallel
244 @param application: the Foam-Application that is to be run
245 @param foamArgs: A list if with the additional arguments for the
246 Foam-Application
247 @param args: A list with additional arguments for the Runner-object
248 @param steady: Use the steady-runner
249 @param multiRegion: Run this on multiple regions (if None: I don't have an opinion on this)
250 @param progress: Only output the time and nothing else
251 @param noLog: Do not generate a logfile"""
252
253 arglist=args[:]
254 arglist+=["--job-id=%s" % self.fullJobId()]
255
256 if self.isDecomposed and self.nproc>1:
257 arglist+=["--procnr=%d" % self.nproc,
258 "--machinefile=%s" % self.hostfile]
259 if progress:
260 arglist+=["--progress"]
261 if noLog:
262 arglist+=["--no-log"]
263
264 if self.multiRegion:
265 if multiRegion==None or multiRegion==True:
266 arglist+=["--all-regions"]
267 elif multiRegion and not self.multiRegion:
268 warning("This is not a multi-region case, so trying to run stuff multi-region won't do any good")
269
270 if self.restarted:
271 arglist+=["--restart"]
272
273 arglist+=[application]
274 if oldApp():
275 arglist+=[".",self.casename()]
276 else:
277 arglist+=["-case",self.casename()]
278
279 arglist+=foamArgs
280
281 self.message("Executing",arglist)
282
283 if steady:
284 self.message("Running Steady")
285 runner=SteadyRunner(args=arglist)
286 else:
287 runner=Runner(args=arglist)
288
290 """Automatically decomposes the grid with a metis-algorithm"""
291
292 if path.isdir(path.join(self.casedir(),"processor0")):
293 warning("A processor directory already exists. There might be a problem")
294 args=["--method=metis",
295 "--clear",
296 self.casename(),
297 self.nproc,
298 "--job-id=%s" % self.fullJobId()]
299
300 if self.multiRegion:
301 args.append("--all-regions")
302
303 deco=Decomposer(args=args)
304
306 """Default reconstruction of a parallel run"""
307
308 self.foamRun("reconstructPar",
309 args=["--logname=ReconstructPar"])
310
311 - def setup(self,parameters):
312 """Set up the job. Called in the beginning if the
313 job has not been restarted
314
315 Usual tasks include grid conversion/setup, mesh decomposition etc
316
317 @param parameters: a dictionary with parameters"""
318
319 pass
320
321 - def postDecomposeSetup(self,parameters):
322 """Additional setup, to be executed when the grid is already decomposed
323
324 Usually for tasks that can be done on a decomposed grid
325
326 @param parameters: a dictionary with parameters"""
327
328 pass
329
330 - def run(self,parameters):
331 """Run the actual job. Usually the solver.
332 @param parameters: a dictionary with parameters"""
333
334 pass
335
337 """Additional cleanup, to be executed when the grid is still decomposed
338
339 Usually for tasks that can be done on a decomposed grid
340
341 @param parameters: a dictionary with parameters"""
342
343 pass
344
346 """Clean up after a job
347 @param parameters: a dictionary with parameters"""
348
349 pass
350
352 """Additional reconstruction of parallel runs (Stuff that the
353 OpenFOAM-reconstructPar doesn't do
354 @param parameters: a dictionary with parameters"""
355
356 pass
357
359 """Parameters for a specific task
360 @param id: the id of the task
361 @return: a dictionary with parameters for this task"""
362
363 error("taskParameter not implemented. Not a parameterized job")
364
365 return {}
366
377
379 if self.listenToTimer:
380 self.ordinaryEnd=False
381 f=open(path.join(self.basename,"stop"),"w")
382 f.write("Geh z'haus")
383 f.close()
384 unlink(self.stopFile())
385 else:
386 warning("I'm not listening to your callbacks")
387
389 """A Cluster-Job that executes a solver. It implements the run-function.
390 If a template-case is specified, the case is copied"""
391
392 - def __init__(self,basename,solver,
393 template=None,
394 cloneParameters=[],
395 arrayJob=False,
396 hardRestart=False,
397 autoParallel=True,
398 foamVersion=None,
399 useFoamMPI=False,
400 steady=False,
401 multiRegion=False,
402 progress=False,
403 solverProgress=False,
404 solverNoLog=False):
405 """@param template: Name of the template-case. It is assumed that
406 it resides in the same directory as the actual case
407 @param cloneParameters: a list with additional parameters for the
408 CloneCase-object that copies the template
409 @param solverProgress: Only writes the current time of the solver"""
410
411 ClusterJob.__init__(self,basename,
412 arrayJob=arrayJob,
413 hardRestart=hardRestart,
414 autoParallel=autoParallel,
415 foamVersion=foamVersion,
416 useFoamMPI=useFoamMPI,
417 multiRegion=multiRegion)
418 self.solver=solver
419 self.steady=steady
420 if template!=None and not self.restarted:
421 template=path.join(path.dirname(self.casedir()),template)
422 if path.abspath(basename)==path.abspath(template):
423 error("The basename",basename,"and the template",template,"are the same directory")
424 clone=CloneCase(
425 args=cloneParameters+[template,self.casedir(),"--follow-symlinks"])
426 self.solverProgress=solverProgress
427 self.solverNoLog=solverNoLog
428
429 - def run(self,parameters):
430 self.foamRun(self.solver,
431 steady=self.steady,
432 multiRegion=False,
433 progress=self.solverProgress,
434 noLog=self.solverNoLog)
435