1 // SPDX-License-Identifier: GPL-2.0 2 #include <test_progs.h> 3 #include <linux/genetlink.h> 4 #include "network_helpers.h" 5 #include "bpf_smc.skel.h" 6 7 #ifndef IPPROTO_SMC 8 #define IPPROTO_SMC 256 9 #endif 10 11 #define CLIENT_IP "127.0.0.1" 12 #define SERVER_IP "127.0.1.0" 13 #define SERVER_IP_VIA_RISK_PATH "127.0.2.0" 14 15 #define SERVICE_1 80 16 #define SERVICE_2 443 17 #define SERVICE_3 8443 18 19 #define TEST_NS "bpf_smc_netns" 20 21 static struct netns_obj *test_netns; 22 23 struct smc_policy_ip_key { 24 __u32 sip; 25 __u32 dip; 26 }; 27 28 struct smc_policy_ip_value { 29 __u8 mode; 30 }; 31 32 #if defined(__s390x__) 33 /* s390x has default seid */ 34 static bool setup_ueid(void) { return true; } 35 static void cleanup_ueid(void) {} 36 #else 37 enum { 38 SMC_NETLINK_ADD_UEID = 10, 39 SMC_NETLINK_REMOVE_UEID 40 }; 41 42 enum { 43 SMC_NLA_EID_TABLE_UNSPEC, 44 SMC_NLA_EID_TABLE_ENTRY, /* string */ 45 }; 46 47 struct msgtemplate { 48 struct nlmsghdr n; 49 struct genlmsghdr g; 50 char buf[1024]; 51 }; 52 53 #define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN)) 54 #define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN) 55 #define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN)) 56 #define NLA_PAYLOAD(len) ((len) - NLA_HDRLEN) 57 58 #define SMC_GENL_FAMILY_NAME "SMC_GEN_NETLINK" 59 #define SMC_BPFTEST_UEID "SMC-BPFTEST-UEID" 60 61 static uint16_t smc_nl_family_id = -1; 62 63 static int send_cmd(int fd, __u16 nlmsg_type, __u32 nlmsg_pid, 64 __u16 nlmsg_flags, __u8 genl_cmd, __u16 nla_type, 65 void *nla_data, int nla_len) 66 { 67 struct nlattr *na; 68 struct sockaddr_nl nladdr; 69 int r, buflen; 70 char *buf; 71 72 struct msgtemplate msg = {0}; 73 74 msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); 75 msg.n.nlmsg_type = nlmsg_type; 76 msg.n.nlmsg_flags = nlmsg_flags; 77 msg.n.nlmsg_seq = 0; 78 msg.n.nlmsg_pid = nlmsg_pid; 79 msg.g.cmd = genl_cmd; 80 msg.g.version = 1; 81 na = (struct nlattr *)GENLMSG_DATA(&msg); 82 na->nla_type = nla_type; 83 na->nla_len = nla_len + 1 + NLA_HDRLEN; 84 memcpy(NLA_DATA(na), nla_data, nla_len); 85 msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); 86 87 buf = (char *)&msg; 88 buflen = msg.n.nlmsg_len; 89 memset(&nladdr, 0, sizeof(nladdr)); 90 nladdr.nl_family = AF_NETLINK; 91 92 while ((r = sendto(fd, buf, buflen, 0, (struct sockaddr *)&nladdr, 93 sizeof(nladdr))) < buflen) { 94 if (r > 0) { 95 buf += r; 96 buflen -= r; 97 } else if (errno != EAGAIN) { 98 return -1; 99 } 100 } 101 return 0; 102 } 103 104 static bool get_smc_nl_family_id(void) 105 { 106 struct sockaddr_nl nl_src; 107 struct msgtemplate msg; 108 struct nlattr *nl; 109 int fd, ret; 110 pid_t pid; 111 112 fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 113 if (!ASSERT_OK_FD(fd, "nl_family socket")) 114 return false; 115 116 pid = getpid(); 117 118 memset(&nl_src, 0, sizeof(nl_src)); 119 nl_src.nl_family = AF_NETLINK; 120 nl_src.nl_pid = pid; 121 122 ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src)); 123 if (!ASSERT_OK(ret, "nl_family bind")) 124 goto fail; 125 126 ret = send_cmd(fd, GENL_ID_CTRL, pid, 127 NLM_F_REQUEST, CTRL_CMD_GETFAMILY, 128 CTRL_ATTR_FAMILY_NAME, (void *)SMC_GENL_FAMILY_NAME, 129 strlen(SMC_GENL_FAMILY_NAME)); 130 if (!ASSERT_OK(ret, "nl_family query")) 131 goto fail; 132 133 ret = recv(fd, &msg, sizeof(msg), 0); 134 if (!ASSERT_FALSE(msg.n.nlmsg_type == NLMSG_ERROR || ret < 0 || 135 !NLMSG_OK(&msg.n, ret), "nl_family response")) 136 goto fail; 137 138 nl = (struct nlattr *)GENLMSG_DATA(&msg); 139 nl = (struct nlattr *)((char *)nl + NLA_ALIGN(nl->nla_len)); 140 if (!ASSERT_EQ(nl->nla_type, CTRL_ATTR_FAMILY_ID, "nl_family nla type")) 141 goto fail; 142 143 smc_nl_family_id = *(uint16_t *)NLA_DATA(nl); 144 close(fd); 145 return true; 146 fail: 147 close(fd); 148 return false; 149 } 150 151 static bool smc_ueid(int op) 152 { 153 struct sockaddr_nl nl_src; 154 struct msgtemplate msg; 155 struct nlmsgerr *err; 156 char test_ueid[32]; 157 int fd, ret; 158 pid_t pid; 159 160 /* UEID required */ 161 memset(test_ueid, '\x20', sizeof(test_ueid)); 162 memcpy(test_ueid, SMC_BPFTEST_UEID, strlen(SMC_BPFTEST_UEID)); 163 fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 164 if (!ASSERT_OK_FD(fd, "ueid socket")) 165 return false; 166 167 pid = getpid(); 168 memset(&nl_src, 0, sizeof(nl_src)); 169 nl_src.nl_family = AF_NETLINK; 170 nl_src.nl_pid = pid; 171 172 ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src)); 173 if (!ASSERT_OK(ret, "ueid bind")) 174 goto fail; 175 176 ret = send_cmd(fd, smc_nl_family_id, pid, 177 NLM_F_REQUEST | NLM_F_ACK, op, SMC_NLA_EID_TABLE_ENTRY, 178 (void *)test_ueid, sizeof(test_ueid)); 179 if (!ASSERT_OK(ret, "ueid cmd")) 180 goto fail; 181 182 ret = recv(fd, &msg, sizeof(msg), 0); 183 if (!ASSERT_FALSE(ret < 0 || 184 !NLMSG_OK(&msg.n, ret), "ueid response")) 185 goto fail; 186 187 if (msg.n.nlmsg_type == NLMSG_ERROR) { 188 err = NLMSG_DATA(&msg); 189 switch (op) { 190 case SMC_NETLINK_REMOVE_UEID: 191 if (!ASSERT_FALSE((err->error && err->error != -ENOENT), 192 "ueid remove")) 193 goto fail; 194 break; 195 case SMC_NETLINK_ADD_UEID: 196 if (!ASSERT_OK(err->error, "ueid add")) 197 goto fail; 198 break; 199 default: 200 break; 201 } 202 } 203 close(fd); 204 return true; 205 fail: 206 close(fd); 207 return false; 208 } 209 210 static bool setup_ueid(void) 211 { 212 /* get smc nl id */ 213 if (!get_smc_nl_family_id()) 214 return false; 215 /* clear old ueid for bpftest */ 216 smc_ueid(SMC_NETLINK_REMOVE_UEID); 217 /* smc-loopback required ueid */ 218 return smc_ueid(SMC_NETLINK_ADD_UEID); 219 } 220 221 static void cleanup_ueid(void) 222 { 223 smc_ueid(SMC_NETLINK_REMOVE_UEID); 224 } 225 #endif /* __s390x__ */ 226 227 static bool setup_netns(void) 228 { 229 test_netns = netns_new(TEST_NS, true); 230 if (!ASSERT_OK_PTR(test_netns, "open net namespace")) 231 goto fail_netns; 232 233 SYS(fail_ip, "ip addr add 127.0.1.0/8 dev lo"); 234 SYS(fail_ip, "ip addr add 127.0.2.0/8 dev lo"); 235 236 return true; 237 fail_ip: 238 netns_free(test_netns); 239 fail_netns: 240 return false; 241 } 242 243 static void cleanup_netns(void) 244 { 245 netns_free(test_netns); 246 } 247 248 static bool setup_smc(void) 249 { 250 if (!setup_ueid()) 251 return false; 252 253 if (!setup_netns()) 254 goto fail_netns; 255 256 return true; 257 fail_netns: 258 cleanup_ueid(); 259 return false; 260 } 261 262 static int set_client_addr_cb(int fd, void *opts) 263 { 264 const char *src = (const char *)opts; 265 struct sockaddr_in localaddr; 266 267 localaddr.sin_family = AF_INET; 268 localaddr.sin_port = htons(0); 269 localaddr.sin_addr.s_addr = inet_addr(src); 270 return !ASSERT_OK(bind(fd, &localaddr, sizeof(localaddr)), "client bind"); 271 } 272 273 static void run_link(const char *src, const char *dst, int port) 274 { 275 struct network_helper_opts opts = {0}; 276 int server, client; 277 278 server = start_server_str(AF_INET, SOCK_STREAM, dst, port, NULL); 279 if (!ASSERT_OK_FD(server, "start service_1")) 280 return; 281 282 opts.proto = IPPROTO_TCP; 283 opts.post_socket_cb = set_client_addr_cb; 284 opts.cb_opts = (void *)src; 285 286 client = connect_to_fd_opts(server, &opts); 287 if (!ASSERT_OK_FD(client, "start connect")) 288 goto fail_client; 289 290 close(client); 291 fail_client: 292 close(server); 293 } 294 295 static void block_link(int map_fd, const char *src, const char *dst) 296 { 297 struct smc_policy_ip_value val = { .mode = /* block */ 0 }; 298 struct smc_policy_ip_key key = { 299 .sip = inet_addr(src), 300 .dip = inet_addr(dst), 301 }; 302 303 bpf_map_update_elem(map_fd, &key, &val, BPF_ANY); 304 } 305 306 /* 307 * This test describes a real-life service topology as follows: 308 * 309 * +-------------> service_1 310 * link 1 | | 311 * +--------------------> server | link 2 312 * | | V 313 * | +-------------> service_2 314 * | link 3 315 * client -------------------> server_via_unsafe_path -> service_3 316 * 317 * Among them, 318 * 1. link-1 is very suitable for using SMC. 319 * 2. link-2 is not suitable for using SMC, because the mode of this link is 320 * kind of short-link services. 321 * 3. link-3 is also not suitable for using SMC, because the RDMA link is 322 * unavailable and needs to go through a long timeout before it can fallback 323 * to TCP. 324 * To achieve this goal, we use a customized SMC ip strategy via smc_hs_ctrl. 325 */ 326 static void test_topo(void) 327 { 328 struct bpf_smc *skel; 329 int rc, map_fd; 330 331 skel = bpf_smc__open_and_load(); 332 if (!ASSERT_OK_PTR(skel, "bpf_smc__open_and_load")) 333 return; 334 335 rc = bpf_smc__attach(skel); 336 if (!ASSERT_OK(rc, "bpf_smc__attach")) 337 goto fail; 338 339 map_fd = bpf_map__fd(skel->maps.smc_policy_ip); 340 if (!ASSERT_OK_FD(map_fd, "bpf_map__fd")) 341 goto fail; 342 343 /* Mock the process of transparent replacement, since we will modify 344 * protocol to ipproto_smc accropding to it via 345 * fmod_ret/update_socket_protocol. 346 */ 347 write_sysctl("/proc/sys/net/smc/hs_ctrl", "linkcheck"); 348 349 /* Configure ip strat */ 350 block_link(map_fd, CLIENT_IP, SERVER_IP_VIA_RISK_PATH); 351 block_link(map_fd, SERVER_IP, SERVER_IP); 352 353 /* should go with smc */ 354 run_link(CLIENT_IP, SERVER_IP, SERVICE_1); 355 /* should go with smc fallback */ 356 run_link(SERVER_IP, SERVER_IP, SERVICE_2); 357 358 ASSERT_EQ(skel->bss->smc_cnt, 2, "smc count"); 359 ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count"); 360 361 /* should go with smc */ 362 run_link(CLIENT_IP, SERVER_IP, SERVICE_2); 363 364 ASSERT_EQ(skel->bss->smc_cnt, 3, "smc count"); 365 ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count"); 366 367 /* should go with smc fallback */ 368 run_link(CLIENT_IP, SERVER_IP_VIA_RISK_PATH, SERVICE_3); 369 370 ASSERT_EQ(skel->bss->smc_cnt, 4, "smc count"); 371 ASSERT_EQ(skel->bss->fallback_cnt, 2, "fallback count"); 372 373 fail: 374 bpf_smc__destroy(skel); 375 } 376 377 void test_bpf_smc(void) 378 { 379 if (!setup_smc()) { 380 printf("setup for smc test failed, test SKIP:\n"); 381 test__skip(); 382 return; 383 } 384 385 if (test__start_subtest("topo")) 386 test_topo(); 387 388 cleanup_ueid(); 389 cleanup_netns(); 390 } 391