1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * smc_sysctl.c: sysctl interface to SMC subsystem. 6 * 7 * Copyright (c) 2022, Alibaba Inc. 8 * 9 * Author: Tony Lu <tonylu@linux.alibaba.com> 10 * 11 */ 12 13 #include <linux/init.h> 14 #include <linux/sysctl.h> 15 #include <linux/bpf.h> 16 #include <net/net_namespace.h> 17 18 #include "smc.h" 19 #include "smc_core.h" 20 #include "smc_llc.h" 21 #include "smc_sysctl.h" 22 #include "smc_hs_bpf.h" 23 24 static int min_sndbuf = SMC_BUF_MIN_SIZE; 25 static int min_rcvbuf = SMC_BUF_MIN_SIZE; 26 static int max_sndbuf = INT_MAX / 2; 27 static int max_rcvbuf = INT_MAX / 2; 28 static const int net_smc_wmem_init = (64 * 1024); 29 static const int net_smc_rmem_init = (64 * 1024); 30 static int links_per_lgr_min = SMC_LINKS_ADD_LNK_MIN; 31 static int links_per_lgr_max = SMC_LINKS_ADD_LNK_MAX; 32 static int conns_per_lgr_min = SMC_CONN_PER_LGR_MIN; 33 static int conns_per_lgr_max = SMC_CONN_PER_LGR_MAX; 34 static unsigned int smcr_max_wr_min = 2; 35 static unsigned int smcr_max_wr_max = 2048; 36 37 #if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) 38 static int smc_net_replace_smc_hs_ctrl(struct net *net, const char *name) 39 { 40 struct smc_hs_ctrl *ctrl = NULL; 41 42 rcu_read_lock(); 43 /* null or empty name ask to clear current ctrl */ 44 if (name && name[0]) { 45 ctrl = smc_hs_ctrl_find_by_name(name); 46 if (!ctrl) { 47 rcu_read_unlock(); 48 return -EINVAL; 49 } 50 /* no change, just return */ 51 if (ctrl == rcu_dereference(net->smc.hs_ctrl)) { 52 rcu_read_unlock(); 53 return 0; 54 } 55 if (!bpf_try_module_get(ctrl, ctrl->owner)) { 56 rcu_read_unlock(); 57 return -EBUSY; 58 } 59 } 60 /* xhcg old ctrl with the new one atomically */ 61 ctrl = unrcu_pointer(xchg(&net->smc.hs_ctrl, RCU_INITIALIZER(ctrl))); 62 /* release old ctrl */ 63 if (ctrl) 64 bpf_module_put(ctrl, ctrl->owner); 65 66 rcu_read_unlock(); 67 return 0; 68 } 69 70 static int proc_smc_hs_ctrl(const struct ctl_table *ctl, int write, 71 void *buffer, size_t *lenp, loff_t *ppos) 72 { 73 struct net *net = container_of(ctl->data, struct net, smc.hs_ctrl); 74 char val[SMC_HS_CTRL_NAME_MAX]; 75 const struct ctl_table tbl = { 76 .data = val, 77 .maxlen = SMC_HS_CTRL_NAME_MAX, 78 }; 79 struct smc_hs_ctrl *ctrl; 80 int ret; 81 82 rcu_read_lock(); 83 ctrl = rcu_dereference(net->smc.hs_ctrl); 84 if (ctrl) 85 memcpy(val, ctrl->name, sizeof(ctrl->name)); 86 else 87 val[0] = '\0'; 88 rcu_read_unlock(); 89 90 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 91 if (ret) 92 return ret; 93 94 if (write) 95 ret = smc_net_replace_smc_hs_ctrl(net, val); 96 return ret; 97 } 98 #endif /* CONFIG_SMC_HS_CTRL_BPF */ 99 100 static struct ctl_table smc_table[] = { 101 { 102 .procname = "autocorking_size", 103 .data = &init_net.smc.sysctl_autocorking_size, 104 .maxlen = sizeof(unsigned int), 105 .mode = 0644, 106 .proc_handler = proc_douintvec, 107 }, 108 { 109 .procname = "smcr_buf_type", 110 .data = &init_net.smc.sysctl_smcr_buf_type, 111 .maxlen = sizeof(unsigned int), 112 .mode = 0644, 113 .proc_handler = proc_douintvec_minmax, 114 .extra1 = SYSCTL_ZERO, 115 .extra2 = SYSCTL_TWO, 116 }, 117 { 118 .procname = "smcr_testlink_time", 119 .data = &init_net.smc.sysctl_smcr_testlink_time, 120 .maxlen = sizeof(int), 121 .mode = 0644, 122 .proc_handler = proc_dointvec_jiffies, 123 }, 124 { 125 .procname = "wmem", 126 .data = &init_net.smc.sysctl_wmem, 127 .maxlen = sizeof(int), 128 .mode = 0644, 129 .proc_handler = proc_dointvec_minmax, 130 .extra1 = &min_sndbuf, 131 .extra2 = &max_sndbuf, 132 }, 133 { 134 .procname = "rmem", 135 .data = &init_net.smc.sysctl_rmem, 136 .maxlen = sizeof(int), 137 .mode = 0644, 138 .proc_handler = proc_dointvec_minmax, 139 .extra1 = &min_rcvbuf, 140 .extra2 = &max_rcvbuf, 141 }, 142 { 143 .procname = "smcr_max_links_per_lgr", 144 .data = &init_net.smc.sysctl_max_links_per_lgr, 145 .maxlen = sizeof(int), 146 .mode = 0644, 147 .proc_handler = proc_dointvec_minmax, 148 .extra1 = &links_per_lgr_min, 149 .extra2 = &links_per_lgr_max, 150 }, 151 { 152 .procname = "smcr_max_conns_per_lgr", 153 .data = &init_net.smc.sysctl_max_conns_per_lgr, 154 .maxlen = sizeof(int), 155 .mode = 0644, 156 .proc_handler = proc_dointvec_minmax, 157 .extra1 = &conns_per_lgr_min, 158 .extra2 = &conns_per_lgr_max, 159 }, 160 { 161 .procname = "limit_smc_hs", 162 .data = &init_net.smc.limit_smc_hs, 163 .maxlen = sizeof(int), 164 .mode = 0644, 165 .proc_handler = proc_dointvec_minmax, 166 .extra1 = SYSCTL_ZERO, 167 .extra2 = SYSCTL_ONE, 168 }, 169 { 170 .procname = "smcr_max_send_wr", 171 .data = &init_net.smc.sysctl_smcr_max_send_wr, 172 .maxlen = sizeof(int), 173 .mode = 0644, 174 .proc_handler = proc_dointvec_minmax, 175 .extra1 = &smcr_max_wr_min, 176 .extra2 = &smcr_max_wr_max, 177 }, 178 { 179 .procname = "smcr_max_recv_wr", 180 .data = &init_net.smc.sysctl_smcr_max_recv_wr, 181 .maxlen = sizeof(int), 182 .mode = 0644, 183 .proc_handler = proc_dointvec_minmax, 184 .extra1 = &smcr_max_wr_min, 185 .extra2 = &smcr_max_wr_max, 186 }, 187 #if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) 188 { 189 .procname = "hs_ctrl", 190 .data = &init_net.smc.hs_ctrl, 191 .mode = 0644, 192 .maxlen = SMC_HS_CTRL_NAME_MAX, 193 .proc_handler = proc_smc_hs_ctrl, 194 }, 195 #endif /* CONFIG_SMC_HS_CTRL_BPF */ 196 }; 197 198 int __net_init smc_sysctl_net_init(struct net *net) 199 { 200 size_t table_size = ARRAY_SIZE(smc_table); 201 struct ctl_table *table; 202 203 table = smc_table; 204 if (!net_eq(net, &init_net)) { 205 int i; 206 #if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) 207 struct smc_hs_ctrl *ctrl; 208 209 rcu_read_lock(); 210 ctrl = rcu_dereference(init_net.smc.hs_ctrl); 211 if (ctrl && ctrl->flags & SMC_HS_CTRL_FLAG_INHERITABLE && 212 bpf_try_module_get(ctrl, ctrl->owner)) 213 rcu_assign_pointer(net->smc.hs_ctrl, ctrl); 214 rcu_read_unlock(); 215 #endif /* CONFIG_SMC_HS_CTRL_BPF */ 216 217 table = kmemdup(table, sizeof(smc_table), GFP_KERNEL); 218 if (!table) 219 goto err_alloc; 220 221 for (i = 0; i < table_size; i++) 222 table[i].data += (void *)net - (void *)&init_net; 223 } 224 225 net->smc.smc_hdr = register_net_sysctl_sz(net, "net/smc", table, 226 table_size); 227 if (!net->smc.smc_hdr) 228 goto err_reg; 229 230 net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE; 231 net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS; 232 net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME; 233 WRITE_ONCE(net->smc.sysctl_wmem, net_smc_wmem_init); 234 WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init); 235 net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER; 236 net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER; 237 net->smc.sysctl_smcr_max_send_wr = SMCR_MAX_SEND_WR_DEF; 238 net->smc.sysctl_smcr_max_recv_wr = SMCR_MAX_RECV_WR_DEF; 239 /* disable handshake limitation by default */ 240 net->smc.limit_smc_hs = 0; 241 242 return 0; 243 244 err_reg: 245 if (!net_eq(net, &init_net)) 246 kfree(table); 247 err_alloc: 248 #if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) 249 smc_net_replace_smc_hs_ctrl(net, NULL); 250 #endif /* CONFIG_SMC_HS_CTRL_BPF */ 251 return -ENOMEM; 252 } 253 254 void __net_exit smc_sysctl_net_exit(struct net *net) 255 { 256 const struct ctl_table *table; 257 258 table = net->smc.smc_hdr->ctl_table_arg; 259 unregister_net_sysctl_table(net->smc.smc_hdr); 260 #if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) 261 smc_net_replace_smc_hs_ctrl(net, NULL); 262 #endif /* CONFIG_SMC_HS_CTRL_BPF */ 263 264 if (!net_eq(net, &init_net)) 265 kfree(table); 266 } 267