diff options
author | Ludovic Courtès <ludo@gnu.org> | 2022-11-22 07:17:17 +0100 |
---|---|---|
committer | Ludovic Courtès <ludo@gnu.org> | 2022-11-22 09:43:54 +0100 |
commit | b2b9571935f9188086b2e7b434840eeda6c42805 (patch) | |
tree | 96ad67225c65cfbd5af5f145c4cb18b3d9713bb4 | |
parent | 60bea075938cae10147d1d8fd414dc0140f4118f (diff) |
offload: Gracefully handle 'guix repl' protocol errors.
Fixes <https://issues.guix.gnu.org/59447>.
Reported by Mathieu Othacehe <othacehe@gnu.org>.
Previously, if a machine had a buggy 'guix repl', 'guix offload' would
crash with a backtrace instead of just ignoring the machine.
* guix/scripts/offload.scm (remote-inferior*): New procedure.
(check-machine-availability)[if-true]: New procedure.
Use 'remote-inferior*' and 'if-true'.
(check-machine-status): Use 'remote-inferior*'.
-rw-r--r-- | guix/scripts/offload.scm | 38 |
1 files changed, 26 insertions, 12 deletions
diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm index 925325ef5f..8ab393c0ac 100644 --- a/guix/scripts/offload.scm +++ b/guix/scripts/offload.scm @@ -25,7 +25,7 @@ #:autoload (ssh auth) (userauth-public-key!) #:autoload (ssh session) (make-session connect! get-error - disconnect! session-set!) + disconnect! session-set! session-get) #:autoload (ssh version) (zlib-support?) #:use-module (guix config) #:use-module (guix records) @@ -34,7 +34,8 @@ send-files retrieve-files retrieve-files* remote-inferior report-guile-error) #:use-module (guix store) - #:autoload (guix inferior) (inferior-eval close-inferior inferior?) + #:autoload (guix inferior) (inferior-eval close-inferior + inferior? inferior-protocol-error?) #:autoload (guix derivations) (read-derivation-from-file derivation-file-name build-derivations) @@ -473,6 +474,15 @@ logical cores available, to give a rough estimation of CPU usage. Return (vector-set! vec j (vector-ref vec (- i 1))) (loop (cons val result) (- i 1)))))))) +(define (remote-inferior* session) + "Like 'remote-inferior', but upon error return #f." + (or (guard (c ((inferior-protocol-error? c) #f)) + (remote-inferior session)) + (begin + (warning (G_ "failed to run 'guix repl' on machine '~a'~%") + (session-get session 'host)) + #f))) + (define (choose-build-machine machines) "Return two values: the best machine among MACHINES and its build slot (which must later be released with 'release-build-slot'), or #f and #f." @@ -511,7 +521,7 @@ slot (which must later be released with 'release-build-slot'), or #f and #f." ;; too costly to call it once for every machine. (let* ((session (false-if-exception (open-ssh-session best %short-timeout))) - (node (and session (remote-inferior session))) + (node (and session (remote-inferior* session))) (load (and node (node-load node))) (threshold (build-machine-overload-threshold best)) (space (and node (node-free-disk-space node)))) @@ -708,6 +718,11 @@ machine." (and (string=? (build-machine-name m1) (build-machine-name m2)) (= (build-machine-port m1) (build-machine-port m2)))) + (define (if-true proc) + (lambda args + (when (every ->bool args) + (apply proc args)))) + ;; A given build machine may appear several times (e.g., once for ;; "x86_64-linux" and a second time for "i686-linux"); test them only once. (let ((machines (filter pred @@ -718,12 +733,12 @@ machine." (let* ((names (map build-machine-name machines)) (sockets (map build-machine-daemon-socket machines)) (sessions (map (cut open-ssh-session <> %short-timeout) machines)) - (nodes (map remote-inferior sessions))) - (for-each assert-node-has-guix nodes names) - (for-each assert-node-repl nodes names) - (for-each assert-node-can-import sessions nodes names sockets) - (for-each assert-node-can-export sessions nodes names sockets) - (for-each close-inferior nodes) + (nodes (map remote-inferior* sessions))) + (for-each (if-true assert-node-has-guix) nodes names) + (for-each (if-true assert-node-repl) nodes names) + (for-each (if-true assert-node-can-import) sessions nodes names sockets) + (for-each (if-true assert-node-can-export) sessions nodes names sockets) + (for-each (if-true close-inferior) nodes) (for-each disconnect! sessions)))) (define (check-machine-status machine-file pred) @@ -743,10 +758,9 @@ machine." (define session (open-ssh-session machine %short-timeout)) - (match (remote-inferior session) + (match (remote-inferior* session) (#f - (warning (G_ "failed to run 'guix repl' on machine '~a'~%") - (build-machine-name machine))) + #f) ((? inferior? inferior) (let ((now (car (gettimeofday)))) (match (inferior-eval '(list (uname) |