1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <mdb/mdb_modapi.h>
27
28 #include <pthread.h>
29 #include <stddef.h>
30 #include <dlfcn.h>
31 #include <link.h>
32 #include <libproc.h>
33
34 #include <Python.h>
35 #include <frameobject.h>
36
37 /*
38 * Decoding Python Stack Frames
39 * ============================
40 *
41 * Python2 uses a variety of objects to construct its call chain. An address
42 * space may have one or more PyInterpreterState objects, which are the base
43 * object in the interpreter's state. These objects are kept in a linked list
44 * with a head pointer named interp_head. This makes it possible for the
45 * debugger to get a toehold on data structures necessary to understand the
46 * interpreter. Since most of these structures are linked out of the
47 * InterpreterState, traversals generally start here.
48 *
49 * In order to decode a frame, the debugger needs to walk from
50 * PyInterpreterState down to a PyCodeObject. The diagram below shows the
51 * the objects that must be examined in order to reach a leaf PyCodeObject.
52 *
53 * +--------------------+ next +--------------------+ next
54 * interp_head -> | PyInterpreterState | ----> | PyInterpreterState | ---> ...
55 * +--------------------+ +--------------------+
56 * | | tstate_head
57 * | tstate_head V
58 * | +---------------+ frame
59 * V | PyThreadState | -----> ...
60 * +---------------+ frame +---------------+
61 * | PyThreadState | ---> ...
62 * +---------------+
63 * | next
64 * V
65 * +---------------+ frame +---------------+ f_back +---------------+
66 * | PyThreadState | ------> | PyFrameObject | -----> | PyFrameObject |
67 * +---------------+ +---------------+ +---------------+
68 * | |
69 * | f_code | f_code
70 * V V
71 * +--------------+ ...
72 * | PyCodeObject |
73 * +--------------+
74 * co_filename | | | co_lnotab
75 * +-------------+ | +-------------+
76 * | co_name | |
77 * V V V
78 * +----------------+ +----------------+ +----------------+
79 * | PyStringObject | | PyStringObject | | PyStringObject |
80 * +----------------+ +----------------+ +----------------+
81 *
82 * The interp_head pointer is a list of one or more PyInterpreterState
83 * objects. Each of these objects can contain one or more PyThreadState
84 * objects. The PyInterpreterState object keeps a pointer to the head of the
85 * list of PyThreadState objects as tstate_head.
86 *
87 * Each thread keeps ahold of its stack frames. The PyThreadState object
88 * has a pointer to the topmost PyFrameObject, kept in frame. The
89 * successive frames on the stack are kept linked in the PyFrameObject's
90 * f_back pointer, with each frame pointing to its caller.
91 *
92 * In order to decode each call frame, our code needs to look at the
93 * PyCodeObject for each frame. Essentially, this is the code that is
94 * being executed in the frame. The PyFrameObject keeps a pointer to this
95 * code object in f_code. In order to print meaningful debug information,
96 * it's necessary to extract the Python filename (co_filename), the
97 * function name (co_name), and the line number within the file
98 * (co_lnotab). The filename and function are stored as strings, but the
99 * line number is a mapping of bytecode offsets to line numbers. The
100 * description of the lnotab algorithm lives here:
101 *
102 * http://svn.python.org/projects/python/trunk/Objects/lnotab_notes.txt
103 *
104 * In order to decode the frame, the debugger needs to walk each
105 * InterpreterState object. For each InterpreterState, every PyThreadState
106 * must be traversed. The PyThreadState objects point to the
107 * PyFrameObjects. For every thread, we must walk the frames backwards and
108 * decode the strings that are in the PyCodeObjects.
109 */
110
111 /*
112 * The Python-dependent debugging functionality lives in its own helper
113 * library. The helper agent is provided by libpython2.[67]_db.so, which
114 * is also used by pstack(1) for debugging Python processes.
115 *
116 * Define needed prototypes here.
117 */
118
119 #define PYDB_VERSION 1
120 typedef struct pydb_agent pydb_agent_t;
121 typedef struct pydb_iter pydb_iter_t;
122
123 typedef pydb_agent_t *(*pydb_agent_create_f)(struct ps_prochandle *P, int vers);
124 typedef void (*pydb_agent_destroy_f)(pydb_agent_t *py);
125 typedef int (*pydb_get_frameinfo_f)(pydb_agent_t *py, uintptr_t frame_addr,
126 char *fbuf, size_t bufsz, int verbose);
127 typedef pydb_iter_t *(*pydb_iter_init_f)(pydb_agent_t *py, uintptr_t addr);
128 typedef uintptr_t (*pydb_iter_next_f)(pydb_iter_t *iter);
129 typedef void (*pydb_iter_fini_f)(pydb_iter_t *iter);
130
131 static pydb_agent_create_f pydb_agent_create;
132 static pydb_agent_destroy_f pydb_agent_destroy;
133 static pydb_get_frameinfo_f pydb_get_frameinfo;
134 static pydb_iter_init_f pydb_frame_iter_init;
135 static pydb_iter_init_f pydb_interp_iter_init;
136 static pydb_iter_init_f pydb_thread_iter_init;
137 static pydb_iter_next_f pydb_iter_next;
138 static pydb_iter_fini_f pydb_iter_fini;
139
140 static pydb_agent_t *pydb_hdl = NULL;
141 static void *pydb_dlhdl = NULL;
142
143 /*ARGSUSED*/
144 static int
py_frame(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)145 py_frame(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
146 {
147 char buf[1024];
148 int verbose = FALSE;
149
150 if (mdb_getopts(argc, argv,
151 'v', MDB_OPT_SETBITS, TRUE, &verbose,
152 NULL) != argc) {
153 return (DCMD_USAGE);
154 }
155
156 if (flags & DCMD_PIPE_OUT) {
157 mdb_warn("py_stack cannot output into a pipe\n");
158 return (DCMD_ERR);
159 }
160
161 if (!(flags & DCMD_ADDRSPEC)) {
162 mdb_warn("no address");
163 return (DCMD_USAGE);
164 }
165
166 if (pydb_get_frameinfo(pydb_hdl, addr, buf, sizeof (buf),
167 verbose) < 0) {
168 mdb_warn("Unable to find frame at address %p\n", addr);
169 return (DCMD_ERR);
170 }
171
172 mdb_printf("%s", buf);
173
174 return (DCMD_OK);
175 }
176
177 int
py_interp_walk_init(mdb_walk_state_t * wsp)178 py_interp_walk_init(mdb_walk_state_t *wsp)
179 {
180 pydb_iter_t *pdi;
181
182 pdi = pydb_interp_iter_init(pydb_hdl, wsp->walk_addr);
183
184 if (pdi == NULL) {
185 mdb_warn("unable to create interpreter iterator\n");
186 return (DCMD_ERR);
187 }
188
189 wsp->walk_data = pdi;
190
191 return (WALK_NEXT);
192 }
193
194 int
py_walk_step(mdb_walk_state_t * wsp)195 py_walk_step(mdb_walk_state_t *wsp)
196 {
197 pydb_iter_t *pdi = wsp->walk_data;
198 uintptr_t addr;
199 int status;
200
201 addr = pydb_iter_next(pdi);
202
203 if (addr == NULL) {
204 return (WALK_DONE);
205 }
206
207 status = wsp->walk_callback(addr, 0, wsp->walk_cbdata);
208
209 return (status);
210 }
211
212 void
py_walk_fini(mdb_walk_state_t * wsp)213 py_walk_fini(mdb_walk_state_t *wsp)
214 {
215 pydb_iter_t *pdi = wsp->walk_data;
216 pydb_iter_fini(pdi);
217 }
218
219 int
py_thread_walk_init(mdb_walk_state_t * wsp)220 py_thread_walk_init(mdb_walk_state_t *wsp)
221 {
222 pydb_iter_t *pdi;
223
224 pdi = pydb_thread_iter_init(pydb_hdl, wsp->walk_addr);
225 if (pdi == NULL) {
226 mdb_warn("unable to create thread iterator\n");
227 return (DCMD_ERR);
228 }
229
230 wsp->walk_data = pdi;
231
232 return (WALK_NEXT);
233 }
234
235 int
py_frame_walk_init(mdb_walk_state_t * wsp)236 py_frame_walk_init(mdb_walk_state_t *wsp)
237 {
238 pydb_iter_t *pdi;
239
240 pdi = pydb_frame_iter_init(pydb_hdl, wsp->walk_addr);
241 if (pdi == NULL) {
242 mdb_warn("unable to create frame iterator\n");
243 return (DCMD_ERR);
244 }
245
246 wsp->walk_data = pdi;
247
248 return (WALK_NEXT);
249 }
250
251 /*ARGSUSED*/
252 static int
python_stack(uintptr_t addr,const PyThreadState * ts,uint_t * verbose)253 python_stack(uintptr_t addr, const PyThreadState *ts, uint_t *verbose)
254 {
255 mdb_arg_t nargv;
256 uint_t nargc = (verbose != NULL && *verbose) ? 1 : 0;
257 /*
258 * Pass the ThreadState to the frame walker. Have frame walker
259 * call frame dcmd.
260 */
261 mdb_printf("PyThreadState: %0?p\n", addr);
262
263 nargv.a_type = MDB_TYPE_STRING;
264 nargv.a_un.a_str = "-v";
265
266 if (mdb_pwalk_dcmd("pyframe", "pyframe", nargc, &nargv, addr) == -1) {
267 mdb_warn("can't walk 'pyframe'");
268 return (WALK_ERR);
269 }
270
271 return (WALK_NEXT);
272 }
273
274 /*ARGSUSED*/
275 static int
python_thread(uintptr_t addr,const PyInterpreterState * is,uint_t * verbose)276 python_thread(uintptr_t addr, const PyInterpreterState *is, uint_t *verbose)
277 {
278 /*
279 * Pass the InterpreterState to the threadstate walker.
280 */
281 if (mdb_pwalk("pythread", (mdb_walk_cb_t)python_stack, verbose,
282 addr) == -1) {
283 mdb_warn("can't walk 'pythread'");
284 return (WALK_ERR);
285 }
286
287 return (WALK_NEXT);
288 }
289
290 /*ARGSUSED*/
291 static int
py_stack(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)292 py_stack(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
293 {
294 uint_t verbose = FALSE;
295
296 if (mdb_getopts(argc, argv,
297 'v', MDB_OPT_SETBITS, TRUE, &verbose,
298 NULL) != argc)
299 return (DCMD_USAGE);
300
301 if (flags & DCMD_PIPE_OUT) {
302 mdb_warn("py_stack cannot output into a pipe\n");
303 return (DCMD_ERR);
304 }
305
306 if (flags & DCMD_ADDRSPEC) {
307 mdb_arg_t nargv;
308 uint_t nargc = verbose ? 1 : 0;
309
310 nargv.a_type = MDB_TYPE_STRING;
311 nargv.a_un.a_str = "-v";
312
313 if (mdb_pwalk_dcmd("pyframe", "pyframe", nargc, &nargv, addr)
314 == -1) {
315 mdb_warn("can't walk 'pyframe'");
316 return (DCMD_ERR);
317 }
318 return (DCMD_OK);
319 }
320
321 if (mdb_walk("pyinterp", (mdb_walk_cb_t)python_thread,
322 &verbose) == -1) {
323 mdb_warn("can't walk 'pyinterp'");
324 return (DCMD_ERR);
325 }
326
327 return (DCMD_OK);
328 }
329
330 static const mdb_dcmd_t dcmds[] = {
331 { "pystack", "[-v]", "print python stacks", py_stack },
332 { "pyframe", "[-v]", "print python frames", py_frame },
333 { NULL }
334 };
335
336 static const mdb_walker_t walkers[] = {
337 { "pyinterp", "walk python interpreter structures",
338 py_interp_walk_init, py_walk_step, py_walk_fini },
339 { "pythread", "given an interpreter, walk the list of python threads",
340 py_thread_walk_init, py_walk_step, py_walk_fini },
341 { "pyframe", "given a thread state, walk the list of frame objects",
342 py_frame_walk_init, py_walk_step, py_walk_fini },
343 { NULL }
344 };
345
346 static const mdb_modinfo_t modinfo = {
347 MDB_API_VERSION, dcmds, walkers
348 };
349
350 /*ARGSUSED*/
351 static int
python_object_iter(void * cd,const prmap_t * pmp,const char * obj)352 python_object_iter(void *cd, const prmap_t *pmp, const char *obj)
353 {
354 char path[PATH_MAX];
355 char *name;
356 char *s1, *s2;
357 struct ps_prochandle *Pr = cd;
358
359 name = strstr(obj, "/libpython");
360
361 if (name) {
362 (void) strcpy(path, obj);
363 if (Pstatus(Pr)->pr_dmodel != PR_MODEL_NATIVE) {
364 s1 = name;
365 s2 = path + (s1 - obj);
366 (void) strcpy(s2, "/64");
367 s2 += 3;
368 (void) strcpy(s2, s1);
369 }
370
371 s1 = strstr(obj, ".so");
372 s2 = strstr(path, ".so");
373 (void) strcpy(s2, "_db");
374 s2 += 3;
375 (void) strcpy(s2, s1);
376
377 if ((pydb_dlhdl = dlopen(path, RTLD_LAZY|RTLD_GLOBAL)) != NULL)
378 return (1);
379 }
380
381 return (0);
382 }
383
384 static int
python_db_init(void)385 python_db_init(void)
386 {
387 struct ps_prochandle *Ph;
388
389 if (mdb_get_xdata("pshandle", &Ph, sizeof (Ph)) == -1) {
390 mdb_warn("couldn't read pshandle xdata\n");
391 dlclose(pydb_dlhdl);
392 pydb_dlhdl = NULL;
393 return (-1);
394 }
395
396 (void) Pobject_iter(Ph, python_object_iter, Ph);
397
398 pydb_agent_create = (pydb_agent_create_f)
399 dlsym(pydb_dlhdl, "pydb_agent_create");
400 pydb_agent_destroy = (pydb_agent_destroy_f)
401 dlsym(pydb_dlhdl, "pydb_agent_destroy");
402 pydb_get_frameinfo = (pydb_get_frameinfo_f)
403 dlsym(pydb_dlhdl, "pydb_get_frameinfo");
404
405 pydb_frame_iter_init = (pydb_iter_init_f)
406 dlsym(pydb_dlhdl, "pydb_frame_iter_init");
407 pydb_interp_iter_init = (pydb_iter_init_f)
408 dlsym(pydb_dlhdl, "pydb_interp_iter_init");
409 pydb_thread_iter_init = (pydb_iter_init_f)
410 dlsym(pydb_dlhdl, "pydb_thread_iter_init");
411 pydb_iter_next = (pydb_iter_next_f)dlsym(pydb_dlhdl, "pydb_iter_next");
412 pydb_iter_fini = (pydb_iter_fini_f)dlsym(pydb_dlhdl, "pydb_iter_fini");
413
414
415 if (pydb_agent_create == NULL || pydb_agent_destroy == NULL ||
416 pydb_get_frameinfo == NULL || pydb_frame_iter_init == NULL ||
417 pydb_interp_iter_init == NULL || pydb_thread_iter_init == NULL ||
418 pydb_iter_next == NULL || pydb_iter_fini == NULL) {
419 mdb_warn("couldn't load pydb functions");
420 dlclose(pydb_dlhdl);
421 pydb_dlhdl = NULL;
422 return (-1);
423 }
424
425 pydb_hdl = pydb_agent_create(Ph, PYDB_VERSION);
426 if (pydb_hdl == NULL) {
427 mdb_warn("unable to create pydb_agent");
428 dlclose(pydb_dlhdl);
429 pydb_dlhdl = NULL;
430 return (-1);
431 }
432
433 return (0);
434 }
435
436 static void
python_db_fini(void)437 python_db_fini(void)
438 {
439 if (pydb_dlhdl) {
440 pydb_agent_destroy(pydb_hdl);
441 pydb_hdl = NULL;
442
443 dlclose(pydb_dlhdl);
444 pydb_dlhdl = NULL;
445 }
446 }
447
448 const mdb_modinfo_t *
_mdb_init(void)449 _mdb_init(void)
450 {
451 if (python_db_init() != 0)
452 return (NULL);
453
454 return (&modinfo);
455 }
456
457 void
_mdb_fini(void)458 _mdb_fini(void)
459 {
460 python_db_fini();
461 }
462