me
/
guix
Archived
1
0
Fork 0

gnu: ucx: Update to 1.15.0.

* gnu/packages/fabric-management.scm (ucx): Update to 1.15.0.
* gnu/packages/patches/ucx-tcp-iface-ioctl.patch: Update for 1.15.0.
master
Ludovic Courtès 2023-10-16 09:09:52 +02:00
parent d117c6b422
commit 975350fab7
No known key found for this signature in database
GPG Key ID: 090B11993D9AEBB5
2 changed files with 64 additions and 45 deletions

View File

@ -185,7 +185,7 @@ testing InfiniBand networks.")
(define-public ucx (define-public ucx
(package (package
(name "ucx") (name "ucx")
(version "1.14.0") (version "1.15.0")
(source (origin (source (origin
(method git-fetch) (method git-fetch)
(uri (git-reference (uri (git-reference
@ -195,7 +195,7 @@ testing InfiniBand networks.")
(patches (search-patches "ucx-tcp-iface-ioctl.patch")) (patches (search-patches "ucx-tcp-iface-ioctl.patch"))
(sha256 (sha256
(base32 (base32
"0ki2r768wqm92qv06wxrh3kv2nl2yj4ds9fz0s0b5rr2ycjiw9ir")))) "1mk46vyfp8hsivk88s8gv0nf458jfs59fczpf66wwa3a9yp324jp"))))
(build-system gnu-build-system) (build-system gnu-build-system)
(arguments (arguments
(list (list

View File

@ -3,89 +3,108 @@ TCP network interfaces cannot be obtained via /sys/class/net. This patch
provides alternative code that uses the SIOCGIFCONF ioctl to get the provides alternative code that uses the SIOCGIFCONF ioctl to get the
names of the available TCP network interfaces. names of the available TCP network interfaces.
Initially submitted at <https://github.com/openucx/ucx/pull/4462>.
diff --git a/src/uct/tcp/tcp_iface.c b/src/uct/tcp/tcp_iface.c diff --git a/src/uct/tcp/tcp_iface.c b/src/uct/tcp/tcp_iface.c
index cad4a2709..7c1d2c9de 100644 index 6a6cd34fa..af32bb2e9 100644
--- a/src/uct/tcp/tcp_iface.c --- a/src/uct/tcp/tcp_iface.c
+++ b/src/uct/tcp/tcp_iface.c +++ b/src/uct/tcp/tcp_iface.c
@@ -17,6 +17,8 @@ @@ -18,6 +18,8 @@
#include <sys/poll.h>
#include <netinet/tcp.h> #include <netinet/tcp.h>
#include <dirent.h> #include <dirent.h>
#include <float.h>
+#include <net/if.h> +#include <net/if.h>
+#include <sys/ioctl.h> +#include <sys/ioctl.h>
#define UCT_TCP_IFACE_NETDEV_DIR "/sys/class/net"
extern ucs_class_t UCS_CLASS_DECL_NAME(uct_tcp_iface_t); @@ -875,6 +877,85 @@ static UCS_CLASS_DEFINE_NEW_FUNC(uct_tcp_iface_t, uct_iface_t, uct_md_h,
@@ -586,6 +588,68 @@ static UCS_CLASS_DEFINE_NEW_FUNC(uct_tcp_iface_t, uct_iface_t, uct_md_h,
uct_worker_h, const uct_iface_params_t*, uct_worker_h, const uct_iface_params_t*,
const uct_iface_config_t*); const uct_iface_config_t*);
+/* Fetch information about available network devices through an ioctl. */ +/* Fetch information about available network devices through an ioctl. */
+static ucs_status_t query_devices_ioctl(uct_md_h md, +static ucs_status_t uct_tcp_query_devices_ioctl(uct_md_h md,
+ uct_tl_device_resource_t **tl_devices_p, + uct_tl_device_resource_t **devices_p,
+ unsigned *num_tl_devices_p) + unsigned *num_devices_p)
+{ +{
+ int sock, err, i; + int sock, err, i;
+ uct_tl_device_resource_t *resources, *tmp; + uct_tl_device_resource_t *devices, *tmp;
+ unsigned num_resources; + unsigned num_devices;
+ ucs_status_t status; + ucs_status_t status;
+ struct ifconf conf; + struct ifconf conf;
+ struct ifreq reqs[10];
+ +
+ conf.ifc_len = sizeof reqs; + conf.ifc_len = 0;
+ conf.ifc_req = reqs; + conf.ifc_req = NULL;
+ +
+ sock = socket(SOCK_STREAM, AF_INET, 0); + status = ucs_socket_create(AF_INET, SOCK_STREAM, &sock);
+ if (sock < 0) { + if (status != UCS_OK) {
+ ucs_error("socket(2) failed: %m");
+ status = UCS_ERR_IO_ERROR;
+ goto out; + goto out;
+ } + }
+ +
+ err = ioctl(sock, SIOCGIFCONF, &conf); + err = ioctl(sock, SIOCGIFCONF, &conf);
+ if (err < 0) { + if (err < 0) {
+ ucs_error("SIOCGIFCONF ioctl failed: %m"); + ucs_error("ioctl(SIOCGIFCONF) failed: %m");
+ status = UCS_ERR_IO_ERROR; + status = UCS_ERR_IO_ERROR;
+ goto out; + goto out;
+ } + }
+ +
+ resources = NULL; + conf.ifc_req = ucs_calloc(1, conf.ifc_len, "ifreq");
+ num_resources = 0; + if (conf.ifc_req == NULL) {
+ for (i = 0; i < conf.ifc_len / sizeof(struct ifreq); i++) { + ucs_error("memory alocation failed");
+ const char *name = reqs[i].ifr_name;
+
+ if (!ucs_netif_is_active(name, AF_INET)) {
+ continue;
+ }
+
+ tmp = ucs_realloc(resources, sizeof(*resources) * (num_resources + 1),
+ "tcp resources");
+ if (tmp == NULL) {
+ ucs_free(resources);
+ status = UCS_ERR_NO_MEMORY; + status = UCS_ERR_NO_MEMORY;
+ goto out; + goto out;
+ } + }
+ resources = tmp;
+ +
+ ucs_snprintf_zero(resources[i].name, sizeof(resources[i].name), + err = ioctl(sock, SIOCGIFCONF, &conf);
+ "%s", name); + if (err < 0) {
+ resources[i].type = UCT_DEVICE_TYPE_NET; + ucs_error("ioctl(SIOCGIFCONF) failed: %m");
+ ++num_resources; + status = UCS_ERR_IO_ERROR;
+ goto out_free;
+ } + }
+ +
+ *num_tl_devices_p = num_resources; + devices = NULL;
+ *tl_devices_p = resources; + num_devices = 0;
+ for (i = 0; i < (conf.ifc_len / sizeof(struct ifreq)); i++) {
+ const char *name = conf.ifc_req[i].ifr_name;
+ sa_family_t family = conf.ifc_req[i].ifr_addr.sa_family;
+
+ if (!ucs_netif_is_active(name, family)) {
+ continue;
+ }
+
+ tmp = ucs_realloc(devices, sizeof(*devices) * (num_devices + 1),
+ "tcp devices");
+ if (tmp == NULL) {
+ ucs_free(devices);
+ status = UCS_ERR_NO_MEMORY;
+ goto out_free;
+ }
+ devices = tmp;
+
+ ucs_snprintf_zero(devices[num_devices].name,
+ sizeof(devices[num_devices].name),
+ "%s", name);
+ devices[num_devices].type = UCT_DEVICE_TYPE_NET;
+ ++num_devices;
+ }
+
+ *num_devices_p = num_devices;
+ *devices_p = devices;
+ status = UCS_OK; + status = UCS_OK;
+ +
+out_free:
+ ucs_free(conf.ifc_req);
+out: +out:
+ if (sock >= 0) close(sock); + if (sock >= 0) {
+ close(sock);
+ }
+ return status; + return status;
+} +}
+ +
ucs_status_t uct_tcp_query_devices(uct_md_h md, ucs_status_t uct_tcp_query_devices(uct_md_h md,
uct_tl_device_resource_t **devices_p, uct_tl_device_resource_t **devices_p,
unsigned *num_devices_p) unsigned *num_devices_p)
@@ -599,9 +663,9 @@ ucs_status_t uct_tcp_query_devices(uct_md_h md, @@ -893,9 +974,9 @@ ucs_status_t uct_tcp_query_devices(uct_md_h md,
dir = opendir(UCT_TCP_IFACE_NETDEV_DIR); dir = opendir(UCT_TCP_IFACE_NETDEV_DIR);
if (dir == NULL) { if (dir == NULL) {
@ -94,11 +113,11 @@ index cad4a2709..7c1d2c9de 100644
- goto out; - goto out;
+ /* When /sys is unavailable, as can be the case in a container, + /* When /sys is unavailable, as can be the case in a container,
+ * resort to a good old 'ioctl'. */ + * resort to a good old 'ioctl'. */
+ return query_devices_ioctl(md, devices_p, num_devices_p); + return uct_tcp_query_devices_ioctl(md, devices_p, num_devices_p);
} }
devices = NULL; devices = NULL;
@@ -655,7 +719,6 @@ ucs_status_t uct_tcp_query_devices(uct_md_h md, @@ -963,7 +1044,6 @@ ucs_status_t uct_tcp_query_devices(uct_md_h md,
out_closedir: out_closedir:
closedir(dir); closedir(dir);