This fixes test failures of packages that use Open MPI, whereby UCX would error out due to /sys/class/net being unavailable in the build chroot that the daemon sets up. * gnu/packages/patches/ucx-tcp-iface-ioctl.patch: New file. * gnu/packages/fabric-management.scm (ucx)[source]: Use it. * gnu/local.mk (dist_patch_DATA): Add it.
		
			
				
	
	
		
			109 lines
		
	
	
	
		
			3.4 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			109 lines
		
	
	
	
		
			3.4 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
| Since /sys is unavailable in build environments, the list of available
 | |
| TCP network interfaces cannot be obtained via /sys/class/net.  This patch
 | |
| provides alternative code that uses the SIOCGIFCONF ioctl to get the
 | |
| names of the available TCP network interfaces.
 | |
| 
 | |
| diff --git a/src/uct/tcp/tcp_iface.c b/src/uct/tcp/tcp_iface.c
 | |
| index 81ad459..10024a6 100644
 | |
| --- a/src/uct/tcp/tcp_iface.c
 | |
| +++ b/src/uct/tcp/tcp_iface.c
 | |
| @@ -12,6 +12,8 @@
 | |
|  #include <sys/poll.h>
 | |
|  #include <netinet/tcp.h>
 | |
|  #include <dirent.h>
 | |
| +#include <net/if.h>
 | |
| +#include <sys/ioctl.h>
 | |
|  
 | |
|  static ucs_config_field_t uct_tcp_iface_config_table[] = {
 | |
|    {"", "MAX_SHORT=8k", NULL,
 | |
| @@ -483,6 +485,70 @@ static UCS_CLASS_DEFINE_NEW_FUNC(uct_tcp_iface_t, uct_iface_t, uct_md_h,
 | |
|                                   uct_worker_h, const uct_iface_params_t*,
 | |
|                                   const uct_iface_config_t*);
 | |
|  
 | |
| +/* Fetch information about available network devices through an ioctl.  */
 | |
| +static ucs_status_t query_devices_ioctl(uct_md_h md,
 | |
| +					uct_tl_resource_desc_t **resource_p,
 | |
| +					unsigned *num_resources_p)
 | |
| +{
 | |
| +    int sock, err, i;
 | |
| +    uct_tl_resource_desc_t *resources, *tmp;
 | |
| +    unsigned num_resources;
 | |
| +    ucs_status_t status;
 | |
| +    struct ifconf conf;
 | |
| +    struct ifreq reqs[10];
 | |
| +
 | |
| +    conf.ifc_len = sizeof reqs;
 | |
| +    conf.ifc_req = reqs;
 | |
| +
 | |
| +    sock = socket(SOCK_STREAM, AF_INET, 0);
 | |
| +    if (sock < 0) {
 | |
| +	ucs_error("socket(2) failed: %m");
 | |
| +	status = UCS_ERR_IO_ERROR;
 | |
| +	goto out;
 | |
| +    }
 | |
| +
 | |
| +    err = ioctl(sock, SIOCGIFCONF, &conf);
 | |
| +    if (err < 0) {
 | |
| +	ucs_error("SIOCGIFCONF ioctl failed: %m");
 | |
| +	status = UCS_ERR_IO_ERROR;
 | |
| +	goto out;
 | |
| +    }
 | |
| +
 | |
| +    resources     = NULL;
 | |
| +    num_resources = 0;
 | |
| +    for (i = 0; i < conf.ifc_len / sizeof(struct ifreq); i++) {
 | |
| +	const char *name = reqs[i].ifr_name;
 | |
| +
 | |
| +        if (!ucs_netif_is_active(name)) {
 | |
| +            continue;
 | |
| +        }
 | |
| +
 | |
| +        tmp = ucs_realloc(resources, sizeof(*resources) * (num_resources + 1),
 | |
| +                          "tcp resources");
 | |
| +        if (tmp == NULL) {
 | |
| +            ucs_free(resources);
 | |
| +            status = UCS_ERR_NO_MEMORY;
 | |
| +            goto out;
 | |
| +        }
 | |
| +        resources = tmp;
 | |
| +
 | |
| +        ucs_snprintf_zero(resources[i].tl_name, sizeof(resources[i].tl_name),
 | |
| +                          "%s", UCT_TCP_NAME);
 | |
| +        ucs_snprintf_zero(resources[i].dev_name, sizeof(resources[i].dev_name),
 | |
| +                          "%s", name);
 | |
| +        resources[i].dev_type = UCT_DEVICE_TYPE_NET;
 | |
| +        ++num_resources;
 | |
| +    }
 | |
| +
 | |
| +    *num_resources_p = num_resources;
 | |
| +    *resource_p      = resources;
 | |
| +    status           = UCS_OK;
 | |
| +
 | |
| +out:
 | |
| +    if (sock >= 0) close(sock);
 | |
| +    return status;
 | |
| +}
 | |
| +
 | |
|  static ucs_status_t uct_tcp_query_tl_resources(uct_md_h md,
 | |
|                                                 uct_tl_resource_desc_t **resource_p,
 | |
|                                                 unsigned *num_resources_p)
 | |
| @@ -496,9 +562,9 @@ static ucs_status_t uct_tcp_query_tl_resources(uct_md_h md,
 | |
|  
 | |
|      dir = opendir(netdev_dir);
 | |
|      if (dir == NULL) {
 | |
| -        ucs_error("opendir(%s) failed: %m", netdev_dir);
 | |
| -        status = UCS_ERR_IO_ERROR;
 | |
| -        goto out;
 | |
| +	/* When /sys is unavailable, as can be the case in a container,
 | |
| +	 * resort to a good old 'ioctl'.  */
 | |
| +	return query_devices_ioctl(md, resource_p, num_resources_p);
 | |
|      }
 | |
|  
 | |
|      resources     = NULL;
 | |
| @@ -543,6 +609,5 @@ static ucs_status_t uct_tcp_query_tl_resources(uct_md_h md,
 | |
|  
 | |
|  out_closedir:
 | |
|      closedir(dir);
 | |
| -out:
 | |
|      return status;
 | |
|  }
 |