1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Standard module for handling DLPI Style 2 attach/detach
28 */
29
30 #include <sys/types.h>
31 #include <sys/conf.h>
32 #include <sys/modctl.h>
33 #include <sys/cmn_err.h>
34 #include <sys/sunddi.h>
35 #include <sys/esunddi.h>
36 #include <sys/strsubr.h>
37 #include <sys/ddi.h>
38 #include <sys/dlpi.h>
39 #include <sys/strsun.h>
40 #include <sys/policy.h>
41
42 static struct streamtab drstab;
43
44 static struct fmodsw fsw = {
45 DRMODNAME,
46 &drstab,
47 D_MP
48 };
49
50
51 /*
52 * Module linkage information for the kernel.
53 */
54
55 static struct modlstrmod modlstrmod = {
56 &mod_strmodops, "dr compatibility for DLPI style 2 drivers", &fsw
57 };
58
59
60 static struct modlinkage modlinkage = {
61 MODREV_1, &modlstrmod, NULL
62 };
63
64
65 int
_init(void)66 _init(void)
67 {
68 return (mod_install(&modlinkage));
69 }
70
71 int
_fini(void)72 _fini(void)
73 {
74 return (mod_remove(&modlinkage));
75 }
76
77 int
_info(struct modinfo * modinfop)78 _info(struct modinfo *modinfop)
79 {
80 return (mod_info(&modlinkage, modinfop));
81 }
82
83
84 static int dropen(queue_t *, dev_t *, int, int, cred_t *);
85 static int drclose(queue_t *, int, cred_t *);
86 static int drrput(queue_t *, mblk_t *);
87 static int drwput(queue_t *, mblk_t *);
88
89 static struct module_info drinfo = {
90 0,
91 DRMODNAME,
92 0,
93 INFPSZ,
94 1,
95 0
96 };
97
98 static struct qinit drrinit = {
99 (int (*)())drrput,
100 NULL,
101 dropen,
102 drclose,
103 NULL,
104 &drinfo
105 };
106
107 static struct qinit drwinit = {
108 (int (*)())drwput,
109 NULL,
110 NULL,
111 NULL,
112 NULL,
113 &drinfo
114 };
115
116 static struct streamtab drstab = {
117 &drrinit,
118 &drwinit,
119 NULL,
120 NULL
121 };
122
123 /*
124 * This module is pushed directly on top of the bottom driver
125 * in a DLPI style-2 stream by stropen(). It intercepts
126 * DL_ATTACH_REQ/DL_DETACH_REQ messages on the write side
127 * and acks on the read side, calls qassociate where needed.
128 * The primary purpose is to workaround a DR race condition
129 * affecting non-DDI compliant DLPI style 2 drivers, which may
130 * cause the system to panic.
131 *
132 * The following action is taken:
133 * Write side (drwput):
134 * attach request: hold driver instance assuming ppa == instance.
135 * This way, the instance cannot be detached while the
136 * driver is processing DL_ATTACH_REQ.
137 *
138 * On a successful hold, store the dip in a ring buffer
139 * to be processed lated by the read side.
140 * If hold fails (most likely ppa != instance), we store
141 * NULL in the ring buffer and read side won't take
142 * any action on ack.
143 *
144 * Read side (drrput):
145 * attach success: if (dip held on write side) associate queue with dip
146 * attach failure: if (dip held on write side) release hold on dip
147 * detach success: associate queue with NULL
148 * detach failure: do nothing
149 *
150 * The module assumes that incoming DL_ATTACH_REQ/DL_DETACH_REQ
151 * messages are ordered (non-concurrent) and the bottom
152 * driver processes them and sends acknowledgements in the same
153 * order. This assumption is reasonable because concurrent
154 * association results in non-deterministic queue behavior.
155 * The module is coded carefully such that unordered messages
156 * do not result in a system panic.
157 *
158 * The module handles multiple outstanding messages queued
159 * in the bottom driver. Messages processed on the write side
160 * but not yet arrived at read side are placed in the ring buffer
161 * dr_dip[], between dr_nfirst and dr_nlast. The write side is
162 * producer and the read side is the consumer. The buffer is full
163 * when dr_nfirst == dr_nlast.
164 *
165 * The current size of the ring buffer is 64 (MAX_DLREQS) per stream.
166 * During normal testing, we have not seen outstanding messages
167 * above 10.
168 */
169
170 #define MAX_DLREQS 64
171 #define INCR(x) {(x)++; if ((x) >= MAX_DLREQS) (x) = 0; }
172
173 struct drstate {
174 kmutex_t dr_lock;
175 major_t dr_major;
176 int dr_nfirst;
177 int dr_nlast;
178 dev_info_t *dr_dip[MAX_DLREQS];
179 };
180
181 /* ARGSUSED1 */
182 static int
dropen(queue_t * q,dev_t * devp,int oflag,int sflag,cred_t * crp)183 dropen(queue_t *q, dev_t *devp, int oflag, int sflag, cred_t *crp)
184 {
185 struct drstate *dsp;
186
187 if (sflag != MODOPEN) { /* must be a pushed module */
188 return (EINVAL);
189 }
190
191 if (secpolicy_net_rawaccess(crp) != 0) {
192 return (EPERM);
193 }
194
195 if (q->q_ptr != NULL) {
196 return (0); /* already open */
197 }
198
199 dsp = kmem_zalloc(sizeof (*dsp), KM_SLEEP);
200 dsp->dr_major = getmajor(*devp);
201 mutex_init(&dsp->dr_lock, NULL, MUTEX_DEFAULT, NULL);
202 q->q_ptr = OTHERQ(q)->q_ptr = dsp;
203 qprocson(q);
204 ddi_assoc_queue_with_devi(q, NULL);
205 return (0);
206 }
207
208 /* ARGSUSED1 */
209 static int
drclose(queue_t * q,int cflag,cred_t * crp)210 drclose(queue_t *q, int cflag, cred_t *crp)
211 {
212 struct drstate *dsp = q->q_ptr;
213
214 ASSERT(dsp);
215 ddi_assoc_queue_with_devi(q, NULL);
216 qprocsoff(q);
217
218 mutex_destroy(&dsp->dr_lock);
219 kmem_free(dsp, sizeof (*dsp));
220 q->q_ptr = NULL;
221
222 return (0);
223 }
224
225 static int
drrput(queue_t * q,mblk_t * mp)226 drrput(queue_t *q, mblk_t *mp)
227 {
228 struct drstate *dsp;
229 union DL_primitives *dlp;
230 dev_info_t *dip;
231
232 switch (DB_TYPE(mp)) {
233 case M_PROTO:
234 case M_PCPROTO:
235 break;
236 default:
237 putnext(q, mp);
238 return (0);
239 }
240
241 /* make sure size is sufficient for dl_primitive */
242 if (MBLKL(mp) < sizeof (t_uscalar_t)) {
243 putnext(q, mp);
244 return (0);
245 }
246
247 dlp = (union DL_primitives *)mp->b_rptr;
248 switch (dlp->dl_primitive) {
249 case DL_OK_ACK: {
250 /* check for proper size, let upper layer deal with error */
251 if (MBLKL(mp) < DL_OK_ACK_SIZE) {
252 putnext(q, mp);
253 return (0);
254 }
255
256 dsp = q->q_ptr;
257 switch (dlp->ok_ack.dl_correct_primitive) {
258 case DL_ATTACH_REQ:
259 /*
260 * ddi_assoc_queue_with_devi() will hold dip,
261 * so release after association.
262 *
263 * dip is NULL means we didn't hold dip on read side.
264 * (unlikely, but possible), so we do nothing.
265 */
266 mutex_enter(&dsp->dr_lock);
267 dip = dsp->dr_dip[dsp->dr_nlast];
268 dsp->dr_dip[dsp->dr_nlast] = NULL;
269 INCR(dsp->dr_nlast);
270 mutex_exit(&dsp->dr_lock);
271 if (dip) {
272 ddi_assoc_queue_with_devi(q, dip);
273 ddi_release_devi(dip);
274 }
275 break;
276
277 case DL_DETACH_REQ:
278 ddi_assoc_queue_with_devi(q, NULL);
279 break;
280 default:
281 break;
282 }
283 break;
284 }
285 case DL_ERROR_ACK:
286 if (dlp->error_ack.dl_error_primitive != DL_ATTACH_REQ)
287 break;
288
289 dsp = q->q_ptr;
290 mutex_enter(&dsp->dr_lock);
291 dip = dsp->dr_dip[dsp->dr_nlast];
292 dsp->dr_dip[dsp->dr_nlast] = NULL;
293 INCR(dsp->dr_nlast);
294 mutex_exit(&dsp->dr_lock);
295 /*
296 * Release dip on attach failure
297 */
298 if (dip) {
299 ddi_release_devi(dip);
300 }
301 break;
302 default:
303 break;
304 }
305
306 putnext(q, mp);
307 return (0);
308 }
309
310 /*
311 * Detect dl attach, hold the dip to prevent it from detaching
312 */
313 static int
drwput(queue_t * q,mblk_t * mp)314 drwput(queue_t *q, mblk_t *mp)
315 {
316 struct drstate *dsp;
317 union DL_primitives *dlp;
318 dev_info_t *dip;
319
320 switch (DB_TYPE(mp)) {
321 case M_PROTO:
322 case M_PCPROTO:
323 break;
324 default:
325 putnext(q, mp);
326 return (0);
327 }
328
329 /* make sure size is sufficient for dl_primitive */
330 if (MBLKL(mp) < sizeof (t_uscalar_t)) {
331 putnext(q, mp);
332 return (0);
333 }
334
335 dlp = (union DL_primitives *)mp->b_rptr;
336 switch (dlp->dl_primitive) {
337 case DL_ATTACH_REQ:
338 /*
339 * Check for proper size of the message.
340 *
341 * If size is correct, get the ppa and attempt to
342 * hold the device assuming ppa is instance.
343 *
344 * If size is wrong, we can't get the ppa, but
345 * still increment dr_nfirst because the read side
346 * will get a error ack on DL_ATTACH_REQ.
347 */
348 dip = NULL;
349 dsp = q->q_ptr;
350 if (MBLKL(mp) >= DL_OK_ACK_SIZE) {
351 dip = ddi_hold_devi_by_instance(dsp->dr_major,
352 dlp->attach_req.dl_ppa, E_DDI_HOLD_DEVI_NOATTACH);
353 }
354
355 mutex_enter(&dsp->dr_lock);
356 dsp->dr_dip[dsp->dr_nfirst] = dip;
357 INCR(dsp->dr_nfirst);
358 /*
359 * Check if ring buffer is full. If so, assert in debug
360 * kernel and produce a warning in non-debug kernel.
361 */
362 ASSERT(dsp->dr_nfirst != dsp->dr_nlast);
363 if (dsp->dr_nfirst == dsp->dr_nlast) {
364 cmn_err(CE_WARN, "drcompat: internal buffer full");
365 }
366 mutex_exit(&dsp->dr_lock);
367 break;
368 default:
369 break;
370 }
371
372 putnext(q, mp);
373 return (0);
374 }
375