Merge branch 'master' into el5
Conflicts: .gitignore sources torque-munge-size.patch torque.spec
This commit is contained in:
commit
72f042a527
16
.gitignore
vendored
16
.gitignore
vendored
@ -1,4 +1,14 @@
|
||||
torque-2.3.10.tar.gz
|
||||
/torque-2.3.12.tar.gz
|
||||
/torque-2.3.13.tar.gz
|
||||
/torque-2.5.2.tar.gz
|
||||
/torque-2.5.3.tar.gz
|
||||
/torque-2.5.4.tar.gz
|
||||
/torque-2.5.5.tar.gz
|
||||
/torque-2.5.7.tar.gz
|
||||
/torque-3.0.0.tar.gz
|
||||
/torque-3.0.0-snap.201102011355.tar.gz
|
||||
/torque-3.0.1.tar.gz
|
||||
/torque-3.0.2.tar.gz
|
||||
/torque-3.0.3.tar.gz
|
||||
/torque-3.0.4.tar.gz
|
||||
/torque-4.2.6.1.tar.gz
|
||||
/torque-4.2.8.tar.gz
|
||||
/torque-4.2.10.tar.gz
|
||||
|
@ -1,20 +0,0 @@
|
||||
To setup a basic single-node localhost-only batch system, install the
|
||||
torque-server, torque-mom, and torque-scheduler packages, and do something like
|
||||
this:
|
||||
|
||||
/sbin/chkconfig pbs_mom on
|
||||
/sbin/chkconfig pbs_server on
|
||||
/sbin/chkconfig pbs_sched on
|
||||
/bin/hostname --long > %{torquehomedir}/server_priv/nodes
|
||||
/bin/hostname --long > %{torquehomedir}/server_name
|
||||
service pbs_server start
|
||||
qmgr -c "s s scheduling=true"
|
||||
qmgr -c "c q batch queue_type=execution"
|
||||
qmgr -c "s q batch started=true"
|
||||
qmgr -c "s q batch enabled=true"
|
||||
qmgr -c "s q batch resources_default.nodes=1"
|
||||
qmgr -c "s q batch resources_default.walltime=3600"
|
||||
qmgr -c "s s default_queue=batch"
|
||||
service pbs_mom restart
|
||||
service pbs_sched restart
|
||||
|
84
README.Fedora
Normal file
84
README.Fedora
Normal file
@ -0,0 +1,84 @@
|
||||
This README describes how to get the most basic working
|
||||
torque service on a single host.
|
||||
|
||||
To setup a basic single-node localhost-only batch system, install the
|
||||
torque-server, torque-mom, and torque-scheduler packages, and do something like
|
||||
this:
|
||||
|
||||
0) If torque is built with munge support then this
|
||||
must be enabled first on all nodes. The munge
|
||||
package should allready be installed.
|
||||
|
||||
Create a munge key with
|
||||
|
||||
/usr/sbin/create-munge-key
|
||||
|
||||
Copy resulting key /etc/munge/munge.key to
|
||||
all torque nodes in your cluster including
|
||||
pbs_server, pbs_mom and client (qstat,qsub) nodes.
|
||||
|
||||
1) Get your full hostname with
|
||||
|
||||
# /bin/hostname --long
|
||||
|
||||
e.g myhost.example.org
|
||||
|
||||
2) Edit /etc/torque/server_name
|
||||
to contain the single line
|
||||
|
||||
myhost.example.org
|
||||
|
||||
3) Edit /etc/torque/mom/config
|
||||
to contain the single line
|
||||
|
||||
$pbsserver myhost.example.org
|
||||
|
||||
4) Create a torque serverdb file.
|
||||
# /usr/sbin/pbs_server -D -t create
|
||||
|
||||
Warning this will remove any existing serverdb
|
||||
file located at /var/lib/torque/server_priv/serverdb
|
||||
|
||||
You will have to Ctrl^C the pbs_server command, it will
|
||||
only take a moment to create this file.
|
||||
|
||||
5) Start the pbs_server and configure it.
|
||||
service pbs_server start
|
||||
# qmgr -c "s s scheduling=true"
|
||||
# qmgr -c "c q batch queue_type=execution"
|
||||
# qmgr -c "s q batch started=true"
|
||||
# qmgr -c "s q batch enabled=true"
|
||||
# qmgr -c "s q batch resources_default.nodes=1"
|
||||
# qmgr -c "s q batch resources_default.walltime=3600"
|
||||
# qmgr -c "s s default_queue=batch"
|
||||
|
||||
6) Add one batch worker to your pbs_server.
|
||||
|
||||
# qmgr -c "c n myhost.example.org"
|
||||
|
||||
7) Start the pbs_mom and pbs_sched deamons.
|
||||
|
||||
# service pbs_mom start
|
||||
# service pbs_sched start
|
||||
|
||||
8) Use chkconfig to start the services at boot time.
|
||||
|
||||
# /sbin/chkconfig pbs_mom on
|
||||
# /sbin/chkconfig pbs_server on
|
||||
# /sbin/chkconfig pbs_sched on
|
||||
# /sbin/chkconfig munge on
|
||||
|
||||
9) Submit a test job.
|
||||
As a user not as root run the following
|
||||
|
||||
$ qsub <<EOF
|
||||
hostname
|
||||
echo "Hi I am a batch job running in torque"
|
||||
EOF
|
||||
|
||||
10 ) Monitor the state of that job with qstat.
|
||||
|
||||
In case of problems first of all look in /var/log/torque
|
||||
|
||||
|
||||
|
3
sources
3
sources
@ -1 +1,2 @@
|
||||
aa033adc22df8ab333e5014dd93754b6 torque-2.5.7.tar.gz
|
||||
3dd4348f54ba236ee7c208cc6b97f674 torque-4.2.8.tar.gz
|
||||
541f58ab46166e86d7a468500be3fa4d torque-4.2.10.tar.gz
|
||||
|
173
torque-buffer-overrun-2.5.5.patch
Normal file
173
torque-buffer-overrun-2.5.5.patch
Normal file
@ -0,0 +1,173 @@
|
||||
diff -uNr torque-2.5.5.ORIG/src/lib/Libnet/get_hostaddr.c torque-2.5.5/src/lib/Libnet/get_hostaddr.c
|
||||
--- torque-2.5.5.ORIG/src/lib/Libnet/get_hostaddr.c 2011-06-08 18:40:00.251913002 +0200
|
||||
+++ torque-2.5.5/src/lib/Libnet/get_hostaddr.c 2011-06-08 18:41:06.651911946 +0200
|
||||
@@ -147,7 +147,8 @@
|
||||
|
||||
if (hp == NULL)
|
||||
{
|
||||
- sprintf(log_buffer,"cannot resolve IP address for host '%s' herror=%d: %s",
|
||||
+ snprintf(log_buffer, sizeof(log_buffer),
|
||||
+ "cannot resolve IP address for host '%s' herror=%d: %s",
|
||||
hostname,
|
||||
h_errno,
|
||||
hstrerror(h_errno));
|
||||
diff -uNr torque-2.5.5.ORIG/src/server/req_quejob.c torque-2.5.5/src/server/req_quejob.c
|
||||
--- torque-2.5.5.ORIG/src/server/req_quejob.c 2011-06-08 18:40:00.315913002 +0200
|
||||
+++ torque-2.5.5/src/server/req_quejob.c 2011-06-08 18:49:36.449912391 +0200
|
||||
@@ -1053,17 +1053,19 @@
|
||||
{
|
||||
if (errno == 0)
|
||||
{
|
||||
- sprintf(log_buffer, "job %s in unexpected state '%s'",
|
||||
- pj->ji_qs.ji_jobid,
|
||||
- PJobSubState[pj->ji_qs.ji_substate]);
|
||||
+ snprintf(log_buffer, sizeof(log_buffer),
|
||||
+ "job %s in unexpected state '%s'",
|
||||
+ pj->ji_qs.ji_jobid,
|
||||
+ PJobSubState[pj->ji_qs.ji_substate]);
|
||||
}
|
||||
else
|
||||
{
|
||||
- sprintf(log_buffer, "job %s in unexpected state '%s' (errno=%d - %s)",
|
||||
- pj->ji_qs.ji_jobid,
|
||||
- PJobSubState[pj->ji_qs.ji_substate],
|
||||
- errno,
|
||||
- strerror(errno));
|
||||
+ snprintf(log_buffer, sizeof(log_buffer),
|
||||
+ "job %s in unexpected state '%s' (errno=%d - %s)",
|
||||
+ pj->ji_qs.ji_jobid,
|
||||
+ PJobSubState[pj->ji_qs.ji_substate],
|
||||
+ errno,
|
||||
+ strerror(errno));
|
||||
}
|
||||
|
||||
log_err(errno, id, log_buffer);
|
||||
@@ -1264,9 +1266,10 @@
|
||||
|
||||
if (LOGLEVEL >= 6)
|
||||
{
|
||||
- sprintf(log_buffer, "successfully moved file '%s' for job '%s'",
|
||||
- namebuf,
|
||||
- preq->rq_ind.rq_jobfile.rq_jobid);
|
||||
+ snprintf(log_buffer, sizeof(log_buffer),
|
||||
+ "successfully moved file '%s' for job '%s'",
|
||||
+ namebuf,
|
||||
+ preq->rq_ind.rq_jobfile.rq_jobid);
|
||||
|
||||
log_record(
|
||||
PBSEVENT_JOB,
|
||||
@@ -1382,9 +1385,11 @@
|
||||
{
|
||||
char tmpLine[1024];
|
||||
|
||||
- sprintf(tmpLine, "cannot save job - errno=%d - %s",
|
||||
- errno,
|
||||
- strerror(errno));
|
||||
+ snprintf(tmpLine, sizeof(tmpLine),
|
||||
+ "cannot save job - errno=%d - %s",
|
||||
+ errno,
|
||||
+ strerror(errno));
|
||||
+
|
||||
|
||||
log_err(errno, id, tmpLine);
|
||||
|
||||
@@ -1408,9 +1413,11 @@
|
||||
{
|
||||
/* reply failed, purge the job and close the connection */
|
||||
|
||||
- sprintf(log_buffer, "cannot report jobid - errno=%d - %s",
|
||||
- errno,
|
||||
- strerror(errno));
|
||||
+ snprintf(log_buffer, sizeof(log_buffer),
|
||||
+ "cannot report jobid - errno=%d - %s",
|
||||
+ errno,
|
||||
+ strerror(errno));
|
||||
+
|
||||
|
||||
log_err(errno, id, log_buffer);
|
||||
|
||||
@@ -1700,11 +1707,12 @@
|
||||
|
||||
/* need to format message first, before request goes away */
|
||||
|
||||
- sprintf(log_buffer, msg_jobnew,
|
||||
- preq->rq_user, preq->rq_host,
|
||||
- pj->ji_wattr[(int)JOB_ATR_job_owner].at_val.at_str,
|
||||
- pj->ji_wattr[(int)JOB_ATR_jobname].at_val.at_str,
|
||||
- pj->ji_qhdr->qu_qs.qu_name);
|
||||
+ snprintf(log_buffer, sizeof(log_buffer),
|
||||
+ msg_jobnew,
|
||||
+ preq->rq_user, preq->rq_host,
|
||||
+ pj->ji_wattr[JOB_ATR_job_owner].at_val.at_str,
|
||||
+ pj->ji_wattr[JOB_ATR_jobname].at_val.at_str,
|
||||
+ pj->ji_qhdr->qu_qs.qu_name);
|
||||
|
||||
/* acknowledge the request with the job id */
|
||||
|
||||
@@ -1739,8 +1747,10 @@
|
||||
{
|
||||
if (LOGLEVEL >= 7)
|
||||
{
|
||||
- sprintf(log_buffer, "Trying to AUTORUN job %s",
|
||||
- pj->ji_qs.ji_jobid);
|
||||
+ snprintf(log_buffer, sizeof(log_buffer),
|
||||
+ "Trying to AUTORUN job %s",
|
||||
+ pj->ji_qs.ji_jobid);
|
||||
+
|
||||
log_record(
|
||||
PBSEVENT_JOB,
|
||||
PBS_EVENTCLASS_JOB,
|
||||
@@ -1861,7 +1871,7 @@
|
||||
|
||||
if (!user_account_read_user(arguser))
|
||||
{
|
||||
- sprintf(log_buffer, "user_account_verify(%s, %s) -> USER NOT FOUND",
|
||||
+ snprintf(log_buffer,sizeof(log_buffer), "user_account_verify(%s, %s) -> USER NOT FOUND",
|
||||
arguser,
|
||||
argaccount);
|
||||
|
||||
@@ -1872,7 +1882,7 @@
|
||||
{
|
||||
if (strcmp(argaccount, UserAcct.ActAdr[i]) == 0)
|
||||
{
|
||||
- sprintf(log_buffer, "user_account_verify(%s, %s) -> SUCCESS",
|
||||
+ snprintf(log_buffer,sizeof(log_buffer), "user_account_verify(%s, %s) -> SUCCESS",
|
||||
arguser,
|
||||
argaccount);
|
||||
|
||||
@@ -1882,7 +1892,7 @@
|
||||
}
|
||||
} /* END for (i) */
|
||||
|
||||
- sprintf(log_buffer, "user_account_verify(%s, %s) -> FAILED",
|
||||
+ snprintf(log_buffer, sizeof(log_buffer) "user_account_verify(%s, %s) -> FAILED",
|
||||
arguser,
|
||||
argaccount);
|
||||
|
||||
@@ -1909,7 +1919,7 @@
|
||||
|
||||
if (!user_account_read_user(arguser))
|
||||
{
|
||||
- sprintf(log_buffer, "user_account_default(%s) = USER NOT FOUND",
|
||||
+ snprintf(log_buffer,sizeof(log_buffer), "user_account_default(%s) = USER NOT FOUND",
|
||||
arguser);
|
||||
|
||||
goto user_account_default_done;
|
||||
@@ -1917,7 +1927,7 @@
|
||||
|
||||
if (UserAcct.ActCnt < 1)
|
||||
{
|
||||
- sprintf(log_buffer, "user_account_default(%s) = NO PROJECT FOUND",
|
||||
+ snprintf(log_buffer, sizeof(log_buffer), "user_account_default(%s) = NO PROJECT FOUND",
|
||||
arguser);
|
||||
|
||||
goto user_account_default_done;
|
||||
@@ -1925,7 +1935,7 @@
|
||||
|
||||
rc = UserAcct.ActAdr[0];
|
||||
|
||||
- sprintf(log_buffer, "user_account_default(%s) = %s",
|
||||
+ snprintf(log_buffer, sizeof(log_buffer), "user_account_default(%s) = %s",
|
||||
arguser,
|
||||
rc);
|
||||
|
@ -1,12 +1,25 @@
|
||||
diff -uNr torque-3.0.1.ORIG/src/include/libpbs.h torque-3.0.1/src/include/libpbs.h
|
||||
--- torque-3.0.1.ORIG/src/include/libpbs.h 2011-06-17 19:19:32.984380003 +0200
|
||||
+++ torque-3.0.1/src/include/libpbs.h 2011-06-17 19:23:19.406379620 +0200
|
||||
@@ -117,7 +117,7 @@
|
||||
From 12a8d7dde1d07aed670f0dd50b317b256daaa991 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Ha=C3=AFkel=20Gu=C3=A9mar?= <hguemar@fedoraproject.org>
|
||||
Date: Sun, 12 Jan 2014 11:42:32 +0100
|
||||
Subject: [PATCH] munge size fix
|
||||
|
||||
---
|
||||
src/include/libpbs.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/src/include/libpbs.h b/src/include/libpbs.h
|
||||
index 6d32c8b..06b1bec 100644
|
||||
--- a/src/include/libpbs.h
|
||||
+++ b/src/include/libpbs.h
|
||||
@@ -121,7 +121,7 @@
|
||||
#define EOF -1
|
||||
#endif
|
||||
|
||||
-#define MUNGE_SIZE 256 /* I do not know what the proper size of this should be. My
|
||||
+#define MUNGE_SIZE 1024 /* I do not know what the proper size of this should be. My
|
||||
+#define MUNGE_SIZE 1024 /* I do not know what the proper size of this should be. My
|
||||
testing with munge shows it creates a string of 128 bytes */
|
||||
|
||||
/* enums for standard job files (sync w/TJobFileType[]) */
|
||||
|
||||
--
|
||||
1.8.4.2
|
||||
|
||||
|
1225
torque.spec
1225
torque.spec
File diff suppressed because it is too large
Load Diff
@ -1,11 +1,11 @@
|
||||
[Desktop Entry]
|
||||
Encoding=UTF-8
|
||||
Name=xpbs
|
||||
Name=xPBS
|
||||
GenericName=PBS/TORQUE client
|
||||
Comment=View job status and submit jobs
|
||||
Exec=xpbs
|
||||
Icon=xpbs.png
|
||||
Terminal=false
|
||||
Type=Application
|
||||
Categories=Application;Other;
|
||||
Version=1.1.12
|
||||
Categories=Education;Science;ComputerScience;ParallelComputing;
|
||||
Version=1.0
|
||||
|
@ -1,11 +1,11 @@
|
||||
[Desktop Entry]
|
||||
Encoding=UTF-8
|
||||
Name=xpbsmon
|
||||
Name=xPBSMon
|
||||
GenericName=PBS/TORQUE cluster monitor
|
||||
Comment=View node status
|
||||
Exec=xpbsmon
|
||||
Icon=xpbsmon.png
|
||||
Terminal=false
|
||||
Type=Application
|
||||
Categories=Application;Other;
|
||||
Version=2.3
|
||||
Categories=Education;Science;ComputerScience;ParallelComputing;
|
||||
Version=1.0
|
||||
|
Loading…
Reference in New Issue
Block a user