diff options
-rw-r--r-- | gnu/packages/fabric-management.scm | 4 | ||||
-rw-r--r-- | gnu/packages/patches/ucx-tcp-iface-ioctl.patch | 105 |
2 files changed, 64 insertions, 45 deletions
diff --git a/gnu/packages/fabric-management.scm b/gnu/packages/fabric-management.scm index ccdaa0ee0a..f41b4e99ed 100644 --- a/gnu/packages/fabric-management.scm +++ b/gnu/packages/fabric-management.scm @@ -185,7 +185,7 @@ testing InfiniBand networks.") (define-public ucx (package (name "ucx") - (version "1.14.0") + (version "1.15.0") (source (origin (method git-fetch) (uri (git-reference @@ -195,7 +195,7 @@ testing InfiniBand networks.") (patches (search-patches "ucx-tcp-iface-ioctl.patch")) (sha256 (base32 - "0ki2r768wqm92qv06wxrh3kv2nl2yj4ds9fz0s0b5rr2ycjiw9ir")))) + "1mk46vyfp8hsivk88s8gv0nf458jfs59fczpf66wwa3a9yp324jp")))) (build-system gnu-build-system) (arguments (list diff --git a/gnu/packages/patches/ucx-tcp-iface-ioctl.patch b/gnu/packages/patches/ucx-tcp-iface-ioctl.patch index c441a0861a..2a0e4ce138 100644 --- a/gnu/packages/patches/ucx-tcp-iface-ioctl.patch +++ b/gnu/packages/patches/ucx-tcp-iface-ioctl.patch @@ -3,102 +3,121 @@ TCP network interfaces cannot be obtained via /sys/class/net. This patch provides alternative code that uses the SIOCGIFCONF ioctl to get the names of the available TCP network interfaces. +Initially submitted at <https://github.com/openucx/ucx/pull/4462>. + diff --git a/src/uct/tcp/tcp_iface.c b/src/uct/tcp/tcp_iface.c -index cad4a2709..7c1d2c9de 100644 +index 6a6cd34fa..af32bb2e9 100644 --- a/src/uct/tcp/tcp_iface.c +++ b/src/uct/tcp/tcp_iface.c -@@ -17,6 +17,8 @@ - #include <sys/poll.h> +@@ -18,6 +18,8 @@ #include <netinet/tcp.h> #include <dirent.h> + #include <float.h> +#include <net/if.h> +#include <sys/ioctl.h> + #define UCT_TCP_IFACE_NETDEV_DIR "/sys/class/net" - extern ucs_class_t UCS_CLASS_DECL_NAME(uct_tcp_iface_t); -@@ -586,6 +588,68 @@ static UCS_CLASS_DEFINE_NEW_FUNC(uct_tcp_iface_t, uct_iface_t, uct_md_h, +@@ -875,6 +877,85 @@ static UCS_CLASS_DEFINE_NEW_FUNC(uct_tcp_iface_t, uct_iface_t, uct_md_h, uct_worker_h, const uct_iface_params_t*, const uct_iface_config_t*); +/* Fetch information about available network devices through an ioctl. */ -+static ucs_status_t query_devices_ioctl(uct_md_h md, -+ uct_tl_device_resource_t **tl_devices_p, -+ unsigned *num_tl_devices_p) ++static ucs_status_t uct_tcp_query_devices_ioctl(uct_md_h md, ++ uct_tl_device_resource_t **devices_p, ++ unsigned *num_devices_p) +{ + int sock, err, i; -+ uct_tl_device_resource_t *resources, *tmp; -+ unsigned num_resources; ++ uct_tl_device_resource_t *devices, *tmp; ++ unsigned num_devices; + ucs_status_t status; + struct ifconf conf; -+ struct ifreq reqs[10]; + -+ conf.ifc_len = sizeof reqs; -+ conf.ifc_req = reqs; ++ conf.ifc_len = 0; ++ conf.ifc_req = NULL; ++ ++ status = ucs_socket_create(AF_INET, SOCK_STREAM, &sock); ++ if (status != UCS_OK) { ++ goto out; ++ } ++ ++ err = ioctl(sock, SIOCGIFCONF, &conf); ++ if (err < 0) { ++ ucs_error("ioctl(SIOCGIFCONF) failed: %m"); ++ status = UCS_ERR_IO_ERROR; ++ goto out; ++ } + -+ sock = socket(SOCK_STREAM, AF_INET, 0); -+ if (sock < 0) { -+ ucs_error("socket(2) failed: %m"); -+ status = UCS_ERR_IO_ERROR; -+ goto out; ++ conf.ifc_req = ucs_calloc(1, conf.ifc_len, "ifreq"); ++ if (conf.ifc_req == NULL) { ++ ucs_error("memory alocation failed"); ++ status = UCS_ERR_NO_MEMORY; ++ goto out; + } + + err = ioctl(sock, SIOCGIFCONF, &conf); + if (err < 0) { -+ ucs_error("SIOCGIFCONF ioctl failed: %m"); -+ status = UCS_ERR_IO_ERROR; -+ goto out; ++ ucs_error("ioctl(SIOCGIFCONF) failed: %m"); ++ status = UCS_ERR_IO_ERROR; ++ goto out_free; + } + -+ resources = NULL; -+ num_resources = 0; -+ for (i = 0; i < conf.ifc_len / sizeof(struct ifreq); i++) { -+ const char *name = reqs[i].ifr_name; ++ devices = NULL; ++ num_devices = 0; ++ for (i = 0; i < (conf.ifc_len / sizeof(struct ifreq)); i++) { ++ const char *name = conf.ifc_req[i].ifr_name; ++ sa_family_t family = conf.ifc_req[i].ifr_addr.sa_family; + -+ if (!ucs_netif_is_active(name, AF_INET)) { ++ if (!ucs_netif_is_active(name, family)) { + continue; + } + -+ tmp = ucs_realloc(resources, sizeof(*resources) * (num_resources + 1), -+ "tcp resources"); ++ tmp = ucs_realloc(devices, sizeof(*devices) * (num_devices + 1), ++ "tcp devices"); + if (tmp == NULL) { -+ ucs_free(resources); ++ ucs_free(devices); + status = UCS_ERR_NO_MEMORY; -+ goto out; ++ goto out_free; + } -+ resources = tmp; ++ devices = tmp; + -+ ucs_snprintf_zero(resources[i].name, sizeof(resources[i].name), ++ ucs_snprintf_zero(devices[num_devices].name, ++ sizeof(devices[num_devices].name), + "%s", name); -+ resources[i].type = UCT_DEVICE_TYPE_NET; -+ ++num_resources; ++ devices[num_devices].type = UCT_DEVICE_TYPE_NET; ++ ++num_devices; + } + -+ *num_tl_devices_p = num_resources; -+ *tl_devices_p = resources; -+ status = UCS_OK; ++ *num_devices_p = num_devices; ++ *devices_p = devices; ++ status = UCS_OK; + ++out_free: ++ ucs_free(conf.ifc_req); +out: -+ if (sock >= 0) close(sock); ++ if (sock >= 0) { ++ close(sock); ++ } + return status; +} + ucs_status_t uct_tcp_query_devices(uct_md_h md, uct_tl_device_resource_t **devices_p, unsigned *num_devices_p) -@@ -599,9 +663,9 @@ ucs_status_t uct_tcp_query_devices(uct_md_h md, +@@ -893,9 +974,9 @@ ucs_status_t uct_tcp_query_devices(uct_md_h md, dir = opendir(UCT_TCP_IFACE_NETDEV_DIR); if (dir == NULL) { - ucs_error("opendir(%s) failed: %m", UCT_TCP_IFACE_NETDEV_DIR); - status = UCS_ERR_IO_ERROR; - goto out; -+ /* When /sys is unavailable, as can be the case in a container, -+ * resort to a good old 'ioctl'. */ -+ return query_devices_ioctl(md, devices_p, num_devices_p); ++ /* When /sys is unavailable, as can be the case in a container, ++ * resort to a good old 'ioctl'. */ ++ return uct_tcp_query_devices_ioctl(md, devices_p, num_devices_p); } devices = NULL; -@@ -655,7 +719,6 @@ ucs_status_t uct_tcp_query_devices(uct_md_h md, +@@ -963,7 +1044,6 @@ ucs_status_t uct_tcp_query_devices(uct_md_h md, out_closedir: closedir(dir); |