xref: /illumos-gate/usr/src/cmd/mdb/common/modules/libpython/libpython.c (revision 78801af7286cd73dbc996d470f789e75993cf15d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <mdb/mdb_modapi.h>
27 
28 #include <pthread.h>
29 #include <stddef.h>
30 #include <dlfcn.h>
31 #include <link.h>
32 #include <libproc.h>
33 
34 #include <Python.h>
35 #include <frameobject.h>
36 
37 /*
38  * Decoding Python Stack Frames
39  * ============================
40  *
41  * Python2 uses a variety of objects to construct its call chain.  An address
42  * space may have one or more PyInterpreterState objects, which are the base
43  * object in the interpreter's state.  These objects are kept in a linked list
44  * with a head pointer named interp_head.  This makes it possible for the
45  * debugger to get a toehold on data structures necessary to understand the
46  * interpreter.  Since most of these structures are linked out of the
47  * InterpreterState, traversals generally start here.
48  *
49  * In order to decode a frame, the debugger needs to walk from
50  * PyInterpreterState down to a PyCodeObject.  The diagram below shows the
51  * the objects that must be examined in order to reach a leaf PyCodeObject.
52  *
53  *                +--------------------+ next   +--------------------+ next
54  * interp_head -> | PyInterpreterState | ---->  | PyInterpreterState | ---> ...
55  *                +--------------------+        +--------------------+
56  *                  |                            | tstate_head
57  *                  | tstate_head                V
58  *                  |                 +---------------+  frame
59  *                  V                 | PyThreadState | -----> ...
60  *  +---------------+  frame          +---------------+
61  *  | PyThreadState |  ---> ...
62  *  +---------------+
63  *          | next
64  *          V
65  *  +---------------+  frame    +---------------+ f_back +---------------+
66  *  | PyThreadState |  ------>  | PyFrameObject | -----> | PyFrameObject |
67  *  +---------------+           +---------------+        +---------------+
68  *                                      |                       |
69  *                                      | f_code                | f_code
70  *                                      V                       V
71  *                              +--------------+               ...
72  *                              | PyCodeObject |
73  *                              +--------------+
74  *                 co_filename   |      |     | co_lnotab
75  *                 +-------------+      |     +-------------+
76  *                 |           co_name  |                   |
77  *                 V                    V                   V
78  * +----------------+          +----------------+         +----------------+
79  * | PyStringObject |          | PyStringObject |         | PyStringObject |
80  * +----------------+          +----------------+         +----------------+
81  *
82  * The interp_head pointer is a list of one or more PyInterpreterState
83  * objects.  Each of these objects can contain one or more PyThreadState
84  * objects.  The PyInterpreterState object keeps a pointer to the head of the
85  * list of PyThreadState objects as tstate_head.
86  *
87  * Each thread keeps ahold of its stack frames.  The PyThreadState object
88  * has a pointer to the topmost PyFrameObject, kept in frame.  The
89  * successive frames on the stack are kept linked in the PyFrameObject's
90  * f_back pointer, with each frame pointing to its caller.
91  *
92  * In order to decode each call frame, our code needs to look at the
93  * PyCodeObject for each frame.  Essentially, this is the code that is
94  * being executed in the frame.  The PyFrameObject keeps a pointer to this
95  * code object in f_code.  In order to print meaningful debug information,
96  * it's necessary to extract the Python filename (co_filename), the
97  * function name (co_name), and the line number within the file
98  * (co_lnotab).  The filename and function are stored as strings, but the
99  * line number is a mapping of bytecode offsets to line numbers.  The
100  * description of the lnotab algorithm lives here:
101  *
102  * http://svn.python.org/projects/python/trunk/Objects/lnotab_notes.txt
103  *
104  * In order to decode the frame, the debugger needs to walk each
105  * InterpreterState object.  For each InterpreterState, every PyThreadState
106  * must be traversed.  The PyThreadState objects point to the
107  * PyFrameObjects.  For every thread, we must walk the frames backwards and
108  * decode the strings that are in the PyCodeObjects.
109  */
110 
111 /*
112  * The Python-dependent debugging functionality lives in its own helper
113  * library.  The helper agent is provided by libpython2.[67]_db.so, which
114  * is also used by pstack(1) for debugging Python processes.
115  *
116  * Define needed prototypes here.
117  */
118 
119 #define	PYDB_VERSION	1
120 typedef struct pydb_agent pydb_agent_t;
121 typedef struct pydb_iter pydb_iter_t;
122 
123 typedef pydb_agent_t *(*pydb_agent_create_f)(struct ps_prochandle *P, int vers);
124 typedef void (*pydb_agent_destroy_f)(pydb_agent_t *py);
125 typedef int (*pydb_get_frameinfo_f)(pydb_agent_t *py, uintptr_t frame_addr,
126     char *fbuf, size_t bufsz, int verbose);
127 typedef pydb_iter_t *(*pydb_iter_init_f)(pydb_agent_t *py, uintptr_t addr);
128 typedef uintptr_t (*pydb_iter_next_f)(pydb_iter_t *iter);
129 typedef void (*pydb_iter_fini_f)(pydb_iter_t *iter);
130 
131 static pydb_agent_create_f pydb_agent_create;
132 static pydb_agent_destroy_f pydb_agent_destroy;
133 static pydb_get_frameinfo_f pydb_get_frameinfo;
134 static pydb_iter_init_f pydb_frame_iter_init;
135 static pydb_iter_init_f pydb_interp_iter_init;
136 static pydb_iter_init_f pydb_thread_iter_init;
137 static pydb_iter_next_f pydb_iter_next;
138 static pydb_iter_fini_f pydb_iter_fini;
139 
140 static pydb_agent_t *pydb_hdl = NULL;
141 static void *pydb_dlhdl = NULL;
142 
143 /*ARGSUSED*/
144 static int
145 py_frame(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
146 {
147 	char buf[1024];
148 	int verbose = FALSE;
149 
150 	if (mdb_getopts(argc, argv,
151 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
152 	    NULL) != argc) {
153 		return (DCMD_USAGE);
154 	}
155 
156 	if (flags & DCMD_PIPE_OUT) {
157 		mdb_warn("py_stack cannot output into a pipe\n");
158 		return (DCMD_ERR);
159 	}
160 
161 	if (!(flags & DCMD_ADDRSPEC)) {
162 		mdb_warn("no address");
163 		return (DCMD_USAGE);
164 	}
165 
166 	if (pydb_get_frameinfo(pydb_hdl, addr, buf, sizeof (buf),
167 	    verbose) < 0) {
168 		mdb_warn("Unable to find frame at address %p\n", addr);
169 		return (DCMD_ERR);
170 	}
171 
172 	mdb_printf("%s", buf);
173 
174 	return (DCMD_OK);
175 }
176 
177 int
178 py_interp_walk_init(mdb_walk_state_t *wsp)
179 {
180 	pydb_iter_t *pdi;
181 
182 	pdi = pydb_interp_iter_init(pydb_hdl, wsp->walk_addr);
183 
184 	if (pdi == NULL) {
185 		mdb_warn("unable to create interpreter iterator\n");
186 		return (DCMD_ERR);
187 	}
188 
189 	wsp->walk_data = pdi;
190 
191 	return (WALK_NEXT);
192 }
193 
194 int
195 py_walk_step(mdb_walk_state_t *wsp)
196 {
197 	pydb_iter_t *pdi = wsp->walk_data;
198 	uintptr_t addr;
199 	int status;
200 
201 	addr = pydb_iter_next(pdi);
202 
203 	if (addr == 0) {
204 		return (WALK_DONE);
205 	}
206 
207 	status = wsp->walk_callback(addr, 0, wsp->walk_cbdata);
208 
209 	return (status);
210 }
211 
212 void
213 py_walk_fini(mdb_walk_state_t *wsp)
214 {
215 	pydb_iter_t *pdi = wsp->walk_data;
216 	pydb_iter_fini(pdi);
217 }
218 
219 int
220 py_thread_walk_init(mdb_walk_state_t *wsp)
221 {
222 	pydb_iter_t *pdi;
223 
224 	pdi = pydb_thread_iter_init(pydb_hdl, wsp->walk_addr);
225 	if (pdi == NULL) {
226 		mdb_warn("unable to create thread iterator\n");
227 		return (DCMD_ERR);
228 	}
229 
230 	wsp->walk_data = pdi;
231 
232 	return (WALK_NEXT);
233 }
234 
235 int
236 py_frame_walk_init(mdb_walk_state_t *wsp)
237 {
238 	pydb_iter_t *pdi;
239 
240 	pdi = pydb_frame_iter_init(pydb_hdl, wsp->walk_addr);
241 	if (pdi == NULL) {
242 		mdb_warn("unable to create frame iterator\n");
243 		return (DCMD_ERR);
244 	}
245 
246 	wsp->walk_data = pdi;
247 
248 	return (WALK_NEXT);
249 }
250 
251 /*ARGSUSED*/
252 static int
253 python_stack(uintptr_t addr, const PyThreadState *ts, uint_t *verbose)
254 {
255 	mdb_arg_t nargv;
256 	uint_t nargc = (verbose != NULL && *verbose) ? 1 : 0;
257 	/*
258 	 * Pass the ThreadState to the frame walker. Have frame walker
259 	 * call frame dcmd.
260 	 */
261 	mdb_printf("PyThreadState: %0?p\n", addr);
262 
263 	nargv.a_type = MDB_TYPE_STRING;
264 	nargv.a_un.a_str = "-v";
265 
266 	if (mdb_pwalk_dcmd("pyframe", "pyframe", nargc, &nargv, addr) == -1) {
267 		mdb_warn("can't walk 'pyframe'");
268 		return (WALK_ERR);
269 	}
270 
271 	return (WALK_NEXT);
272 }
273 
274 /*ARGSUSED*/
275 static int
276 python_thread(uintptr_t addr, const PyInterpreterState *is, uint_t *verbose)
277 {
278 	/*
279 	 * Pass the InterpreterState to the threadstate walker.
280 	 */
281 	if (mdb_pwalk("pythread", (mdb_walk_cb_t)python_stack, verbose,
282 	    addr) == -1) {
283 		mdb_warn("can't walk 'pythread'");
284 		return (WALK_ERR);
285 	}
286 
287 	return (WALK_NEXT);
288 }
289 
290 /*ARGSUSED*/
291 static int
292 py_stack(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
293 {
294 	uint_t verbose = FALSE;
295 
296 	if (mdb_getopts(argc, argv,
297 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
298 	    NULL) != argc)
299 		return (DCMD_USAGE);
300 
301 	if (flags & DCMD_PIPE_OUT) {
302 		mdb_warn("py_stack cannot output into a pipe\n");
303 		return (DCMD_ERR);
304 	}
305 
306 	if (flags & DCMD_ADDRSPEC) {
307 		mdb_arg_t nargv;
308 		uint_t nargc = verbose ? 1 : 0;
309 
310 		nargv.a_type = MDB_TYPE_STRING;
311 		nargv.a_un.a_str = "-v";
312 
313 		if (mdb_pwalk_dcmd("pyframe", "pyframe", nargc, &nargv, addr)
314 		    == -1) {
315 			mdb_warn("can't walk 'pyframe'");
316 			return (DCMD_ERR);
317 		}
318 		return (DCMD_OK);
319 	}
320 
321 	if (mdb_walk("pyinterp", (mdb_walk_cb_t)python_thread,
322 	    &verbose) == -1) {
323 		mdb_warn("can't walk 'pyinterp'");
324 		return (DCMD_ERR);
325 	}
326 
327 	return (DCMD_OK);
328 }
329 
330 static const mdb_dcmd_t dcmds[] = {
331 	{ "pystack", "[-v]", "print python stacks", py_stack },
332 	{ "pyframe", "[-v]", "print python frames", py_frame },
333 	{ NULL }
334 };
335 
336 static const mdb_walker_t walkers[] = {
337 	{ "pyinterp", "walk python interpreter structures",
338 		py_interp_walk_init, py_walk_step, py_walk_fini },
339 	{ "pythread", "given an interpreter, walk the list of python threads",
340 		py_thread_walk_init, py_walk_step, py_walk_fini },
341 	{ "pyframe", "given a thread state, walk the list of frame objects",
342 		py_frame_walk_init, py_walk_step, py_walk_fini },
343 	{ NULL }
344 };
345 
346 static const mdb_modinfo_t modinfo = {
347 	MDB_API_VERSION, dcmds, walkers
348 };
349 
350 /*ARGSUSED*/
351 static int
352 python_object_iter(void *cd, const prmap_t *pmp, const char *obj)
353 {
354 	char path[PATH_MAX];
355 	char *name;
356 	char *s1, *s2;
357 	struct ps_prochandle *Pr = cd;
358 
359 	name = strstr(obj, "/libpython");
360 
361 	if (name) {
362 		(void) strcpy(path, obj);
363 		if (Pstatus(Pr)->pr_dmodel != PR_MODEL_NATIVE) {
364 			s1 = name;
365 			s2 = path + (s1 - obj);
366 			(void) strcpy(s2, "/64");
367 			s2 += 3;
368 			(void) strcpy(s2, s1);
369 		}
370 
371 		s1 = strstr(obj, ".so");
372 		s2 = strstr(path, ".so");
373 		(void) strcpy(s2, "_db");
374 		s2 += 3;
375 		(void) strcpy(s2, s1);
376 
377 		if ((pydb_dlhdl = dlopen(path, RTLD_LAZY|RTLD_GLOBAL)) != NULL)
378 			return (1);
379 	}
380 
381 	return (0);
382 }
383 
384 static int
385 python_db_init(void)
386 {
387 	struct ps_prochandle *Ph;
388 
389 	if (mdb_get_xdata("pshandle", &Ph, sizeof (Ph)) == -1) {
390 		mdb_warn("couldn't read pshandle xdata\n");
391 		dlclose(pydb_dlhdl);
392 		pydb_dlhdl = NULL;
393 		return (-1);
394 	}
395 
396 	(void) Pobject_iter(Ph, python_object_iter, Ph);
397 
398 	pydb_agent_create = (pydb_agent_create_f)
399 	    dlsym(pydb_dlhdl, "pydb_agent_create");
400 	pydb_agent_destroy = (pydb_agent_destroy_f)
401 	    dlsym(pydb_dlhdl, "pydb_agent_destroy");
402 	pydb_get_frameinfo = (pydb_get_frameinfo_f)
403 	    dlsym(pydb_dlhdl, "pydb_get_frameinfo");
404 
405 	pydb_frame_iter_init = (pydb_iter_init_f)
406 	    dlsym(pydb_dlhdl, "pydb_frame_iter_init");
407 	pydb_interp_iter_init = (pydb_iter_init_f)
408 	    dlsym(pydb_dlhdl, "pydb_interp_iter_init");
409 	pydb_thread_iter_init = (pydb_iter_init_f)
410 	    dlsym(pydb_dlhdl, "pydb_thread_iter_init");
411 	pydb_iter_next = (pydb_iter_next_f)dlsym(pydb_dlhdl, "pydb_iter_next");
412 	pydb_iter_fini = (pydb_iter_fini_f)dlsym(pydb_dlhdl, "pydb_iter_fini");
413 
414 
415 	if (pydb_agent_create == NULL || pydb_agent_destroy == NULL ||
416 	    pydb_get_frameinfo == NULL || pydb_frame_iter_init == NULL ||
417 	    pydb_interp_iter_init == NULL || pydb_thread_iter_init == NULL ||
418 	    pydb_iter_next == NULL || pydb_iter_fini == NULL) {
419 		mdb_warn("couldn't load pydb functions");
420 		dlclose(pydb_dlhdl);
421 		pydb_dlhdl = NULL;
422 		return (-1);
423 	}
424 
425 	pydb_hdl = pydb_agent_create(Ph, PYDB_VERSION);
426 	if (pydb_hdl == NULL) {
427 		mdb_warn("unable to create pydb_agent");
428 		dlclose(pydb_dlhdl);
429 		pydb_dlhdl = NULL;
430 		return (-1);
431 	}
432 
433 	return (0);
434 }
435 
436 static void
437 python_db_fini(void)
438 {
439 	if (pydb_dlhdl) {
440 		pydb_agent_destroy(pydb_hdl);
441 		pydb_hdl = NULL;
442 
443 		dlclose(pydb_dlhdl);
444 		pydb_dlhdl = NULL;
445 	}
446 }
447 
448 const mdb_modinfo_t *
449 _mdb_init(void)
450 {
451 	if (python_db_init() != 0)
452 		return (NULL);
453 
454 	return (&modinfo);
455 }
456 
457 void
458 _mdb_fini(void)
459 {
460 	python_db_fini();
461 }
462