nixpkgs-custom/nixos/tests/slurm.nix

81 lines
2.1 KiB
Nix
Raw Permalink Blame History

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

import ./make-test.nix ({ pkgs, ... }:
let mungekey = "mungeverryweakkeybuteasytointegratoinatest";
slurmconfig = {
client.enable = true;
controlMachine = "control";
nodeName = ''
control
NodeName=node[1-3] CPUs=1 State=UNKNOWN
'';
partitionName = "debug Nodes=node[1-3] Default=YES MaxTime=INFINITE State=UP";
};
in {
name = "slurm";
nodes =
let
computeNode =
{ config, pkgs, ...}:
{
# TODO slrumd port and slurmctld port should be configurations and
# automatically allowed by the firewall.
networking.firewall.enable = false;
services.munge.enable = true;
services.slurm = slurmconfig;
};
in {
control =
{ config, pkgs, ...}:
{
networking.firewall.enable = false;
services.munge.enable = true;
services.slurm = {
server.enable = true;
} // slurmconfig;
};
node1 = computeNode;
node2 = computeNode;
node3 = computeNode;
};
testScript =
''
startAll;
# Set up authentification across the cluster
foreach my $node (($control,$node1,$node2,$node3))
{
$node->waitForUnit("default.target");
$node->succeed("mkdir /etc/munge");
$node->succeed("echo '${mungekey}' > /etc/munge/munge.key");
$node->succeed("chmod 0400 /etc/munge/munge.key");
$node->succeed("systemctl restart munged");
}
# Restart the services since they have probably failed due to the munge init
# failure
subtest "can_start_slurmctld", sub {
$control->succeed("systemctl restart slurmctld");
$control->waitForUnit("slurmctld.service");
};
subtest "can_start_slurmd", sub {
foreach my $node (($control,$node1,$node2,$node3))
{
$node->succeed("systemctl restart slurmd.service");
$node->waitForUnit("slurmd");
}
};
# Test that the cluster work and can distribute jobs;
subtest "run_distributed_command", sub {
# Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
# The output must contain the 3 different names
$control->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq");
};
'';
})