Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: NixOS/nixpkgs
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 331f45f7bb5a
Choose a base ref
...
head repository: NixOS/nixpkgs
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 6928cb22e943
Choose a head ref
  • 4 commits
  • 3 files changed
  • 1 contributor

Commits on Nov 10, 2019

  1. nixos/slurm: fix X11 with spank module

    * Fix path in module for slurm to find plugstack.conf
    * Fix configure flags so that slurm can be compiled
      without internal X11 support (required for spank-x11).
    markuskowa committed Nov 10, 2019
    Copy the full SHA
    8219a3b View commit details
  2. nixos/slurm: add option for external slurmdbd.conf

    Slurmdbd requires a password database which is stored in slurmdbd.conf.
    A seperate config file avoids that the password ends up in the nix store.
    
    Slurmdbd does 19.5 does not support MySQL socket conections.
    Adapated the slurm test to provide username and password.
    markuskowa committed Nov 10, 2019
    Copy the full SHA
    472e165 View commit details
  3. Copy the full SHA
    9b28dbd View commit details

Commits on Nov 12, 2019

  1. Merge pull request #73179 from markuskowa/fix-slurm

    nixos/slurm: fix test and X11 options
    markuskowa authored Nov 12, 2019
    Copy the full SHA
    6928cb2 View commit details
Showing with 91 additions and 50 deletions.
  1. +46 −5 nixos/modules/services/computing/slurm/slurm.nix
  2. +43 −44 nixos/tests/slurm.nix
  3. +2 −1 pkgs/servers/computing/slurm/default.nix
51 changes: 46 additions & 5 deletions nixos/modules/services/computing/slurm/slurm.nix
Original file line number Diff line number Diff line change
@@ -18,7 +18,7 @@ let
${optionalString (cfg.controlAddr != null) ''controlAddr=${cfg.controlAddr}''}
${toString (map (x: "NodeName=${x}\n") cfg.nodeName)}
${toString (map (x: "PartitionName=${x}\n") cfg.partitionName)}
PlugStackConfig=${plugStackConfig}
PlugStackConfig=${plugStackConfig}/plugstack.conf
ProctrackType=${cfg.procTrackType}
${cfg.extraConfig}
'';
@@ -39,6 +39,8 @@ let
DbdHost=${cfg.dbdserver.dbdHost}
SlurmUser=${cfg.user}
StorageType=accounting_storage/mysql
StorageUser=${cfg.dbdserver.storageUser}
${optionalString (cfg.dbdserver.storagePass != null) "StoragePass=${cfg.dbdserver.storagePass}"}
${cfg.dbdserver.extraConfig}
'';

@@ -48,7 +50,6 @@ let
name = "etc-slurm";
paths = [ configFile cgroupConfig plugStackConfig ] ++ cfg.extraConfigPaths;
};

in

{
@@ -86,6 +87,37 @@ in
'';
};

storageUser = mkOption {
type = types.str;
default = cfg.user;
description = ''
Database user name.
'';
};

storagePass = mkOption {
type = types.nullOr types.str;
default = null;
description = ''
Database password. Note that this password will be publicable
readable in the nix store. Use <option>configFile</option>
to store the and config file and password outside the nix store.
'';
};

configFile = mkOption {
type = types.nullOr types.str;
default = null;
description = ''
Path to <literal>slurmdbd.conf</literal>. The password for the database connection
is stored in the config file. Use this option to specfify a path
outside the nix store. If this option is unset a configuration file
will be generated. See also:
<citerefentry><refentrytitle>slurmdbd.conf</refentrytitle>
<manvolnum>8</manvolnum></citerefentry>.
'';
};

extraConfig = mkOption {
type = types.lines;
default = "";
@@ -112,7 +144,7 @@ in

package = mkOption {
type = types.package;
default = pkgs.slurm;
default = pkgs.slurm.override { enableX11 = ! cfg.enableSrunX11; };
defaultText = "pkgs.slurm";
example = literalExample "pkgs.slurm-full";
description = ''
@@ -178,9 +210,14 @@ in
If enabled srun will accept the option "--x11" to allow for X11 forwarding
from within an interactive session or a batch job. This activates the
slurm-spank-x11 module. Note that this option also enables
'services.openssh.forwardX11' on the client.
<option>services.openssh.forwardX11</option> on the client.
This option requires slurm to be compiled without native X11 support.
The default behavior is to re-compile the slurm package with native X11
support disabled if this option is set to true.
To use the native X11 support add <literal>PrologFlags=X11</literal> in <option>extraConfig</option>.
Note that this method will only work RSA SSH host keys.
'';
};

@@ -356,7 +393,11 @@ in
requires = [ "munged.service" "mysql.service" ];

# slurm strips the last component off the path
environment.SLURM_CONF = "${slurmdbdConf}/slurm.conf";
environment.SLURM_CONF =
if (cfg.dbdserver.configFile == null) then
"${slurmdbdConf}/slurm.conf"
else
cfg.dbdserver.configFile;

serviceConfig = {
Type = "forking";
87 changes: 43 additions & 44 deletions nixos/tests/slurm.nix
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import ./make-test.nix ({ lib, ... }:
import ./make-test-python.nix ({ lib, ... }:
let
mungekey = "mungeverryweakkeybuteasytointegratoinatest";

@@ -54,10 +54,15 @@ in {
networking.firewall.enable = false;
services.slurm.dbdserver = {
enable = true;
storagePass = "password123";
};
services.mysql = {
enable = true;
package = pkgs.mysql;
package = pkgs.mariadb;
initialScript = pkgs.writeText "mysql-init.sql" ''
CREATE USER 'slurm'@'localhost' IDENTIFIED BY 'password123';
GRANT ALL PRIVILEGES ON slurm_acct_db.* TO 'slurm'@'localhost';
'';
ensureDatabases = [ "slurm_acct_db" ];
ensureUsers = [{
ensurePermissions = { "slurm_acct_db.*" = "ALL PRIVILEGES"; };
@@ -80,63 +85,57 @@ in {

testScript =
''
startAll;
start_all()
# Set up authentification across the cluster
foreach my $node (($submit,$control,$dbd,$node1,$node2,$node3))
{
$node->waitForUnit("default.target");
for node in [submit, control, dbd, node1, node2, node3]:
$node->succeed("mkdir /etc/munge");
$node->succeed("echo '${mungekey}' > /etc/munge/munge.key");
$node->succeed("chmod 0400 /etc/munge/munge.key");
$node->succeed("chown munge:munge /etc/munge/munge.key");
$node->succeed("systemctl restart munged");
node.wait_for_unit("default.target")
node.succeed("mkdir /etc/munge")
node.succeed(
"echo '${mungekey}' > /etc/munge/munge.key"
)
node.succeed("chmod 0400 /etc/munge/munge.key")
node.succeed("chown munge:munge /etc/munge/munge.key")
node.succeed("systemctl restart munged")
node.wait_for_unit("munged")
$node->waitForUnit("munged");
};
# Restart the services since they have probably failed due to the munge init
# failure
subtest "can_start_slurmdbd", sub {
$dbd->succeed("systemctl restart slurmdbd");
$dbd->waitForUnit("slurmdbd.service");
$dbd->waitForOpenPort(6819);
};
with subtest("can_start_slurmdbd"):
dbd.succeed("systemctl restart slurmdbd")
dbd.wait_for_unit("slurmdbd.service")
dbd.wait_for_open_port(6819)
# there needs to be an entry for the current
# cluster in the database before slurmctld is restarted
subtest "add_account", sub {
$control->succeed("sacctmgr -i add cluster default");
# check for cluster entry
$control->succeed("sacctmgr list cluster | awk '{ print \$1 }' | grep default");
};
with subtest("add_account"):
control.succeed("sacctmgr -i add cluster default")
# check for cluster entry
control.succeed("sacctmgr list cluster | awk '{ print $1 }' | grep default")
subtest "can_start_slurmctld", sub {
$control->succeed("systemctl restart slurmctld");
$control->waitForUnit("slurmctld.service");
};
with subtest("can_start_slurmctld"):
control.succeed("systemctl restart slurmctld")
control.waitForUnit("slurmctld.service")
subtest "can_start_slurmd", sub {
foreach my $node (($node1,$node2,$node3))
{
$node->succeed("systemctl restart slurmd.service");
$node->waitForUnit("slurmd");
}
};
with subtest("can_start_slurmd"):
for node in [node1, node2, node3]:
node.succeed("systemctl restart slurmd.service")
node.wait_for_unit("slurmd")
# Test that the cluster works and can distribute jobs;
subtest "run_distributed_command", sub {
# Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
# The output must contain the 3 different names
$submit->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq");
};
with subtest("run_distributed_command"):
# Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
# The output must contain the 3 different names
submit.succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq")
subtest "check_slurm_dbd", sub {
# find the srun job from above in the database
sleep 5;
$control->succeed("sacct | grep hostname");
};
with subtest("check_slurm_dbd"):
# find the srun job from above in the database
control.succeed("sleep 5")
control.succeed("sacct | grep hostname")
'';
})
3 changes: 2 additions & 1 deletion pkgs/servers/computing/slurm/default.nix
Original file line number Diff line number Diff line change
@@ -48,7 +48,8 @@ stdenv.mkDerivation rec {
"--with-zlib=${zlib}"
"--sysconfdir=/etc/slurm"
] ++ (optional (gtk2 == null) "--disable-gtktest")
++ (optional enableX11 "--with-libssh2=${libssh2.dev}");
++ (optional enableX11 "--with-libssh2=${libssh2.dev}")
++ (optional (!enableX11) "--disable-x11");


preConfigure = ''