xref: /linux/tools/perf/scripts/python/export-to-postgresql.py (revision ca55b2fef3a9373fcfc30f82fd26bc7fccbda732)
1# export-to-postgresql.py: export perf data to a postgresql database
2# Copyright (c) 2014, Intel Corporation.
3#
4# This program is free software; you can redistribute it and/or modify it
5# under the terms and conditions of the GNU General Public License,
6# version 2, as published by the Free Software Foundation.
7#
8# This program is distributed in the hope it will be useful, but WITHOUT
9# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11# more details.
12
13import os
14import sys
15import struct
16import datetime
17
18# To use this script you will need to have installed package python-pyside which
19# provides LGPL-licensed Python bindings for Qt.  You will also need the package
20# libqt4-sql-psql for Qt postgresql support.
21#
22# The script assumes postgresql is running on the local machine and that the
23# user has postgresql permissions to create databases. Examples of installing
24# postgresql and adding such a user are:
25#
26# fedora:
27#
28#	$ sudo yum install postgresql postgresql-server python-pyside qt-postgresql
29#	$ sudo su - postgres -c initdb
30#	$ sudo service postgresql start
31#	$ sudo su - postgres
32#	$ createuser <your user id here>
33#	Shall the new role be a superuser? (y/n) y
34#
35# ubuntu:
36#
37#	$ sudo apt-get install postgresql
38#	$ sudo su - postgres
39#	$ createuser <your user id here>
40#	Shall the new role be a superuser? (y/n) y
41#
42# An example of using this script with Intel PT:
43#
44#	$ perf record -e intel_pt//u ls
45#	$ perf script -s ~/libexec/perf-core/scripts/python/export-to-postgresql.py pt_example branches calls
46#	2015-05-29 12:49:23.464364 Creating database...
47#	2015-05-29 12:49:26.281717 Writing to intermediate files...
48#	2015-05-29 12:49:27.190383 Copying to database...
49#	2015-05-29 12:49:28.140451 Removing intermediate files...
50#	2015-05-29 12:49:28.147451 Adding primary keys
51#	2015-05-29 12:49:28.655683 Adding foreign keys
52#	2015-05-29 12:49:29.365350 Done
53#
54# To browse the database, psql can be used e.g.
55#
56#	$ psql pt_example
57#	pt_example=# select * from samples_view where id < 100;
58#	pt_example=# \d+
59#	pt_example=# \d+ samples_view
60#	pt_example=# \q
61#
62# An example of using the database is provided by the script
63# call-graph-from-postgresql.py.  Refer to that script for details.
64
65from PySide.QtSql import *
66
67# Need to access PostgreSQL C library directly to use COPY FROM STDIN
68from ctypes import *
69libpq = CDLL("libpq.so.5")
70PQconnectdb = libpq.PQconnectdb
71PQconnectdb.restype = c_void_p
72PQfinish = libpq.PQfinish
73PQstatus = libpq.PQstatus
74PQexec = libpq.PQexec
75PQexec.restype = c_void_p
76PQresultStatus = libpq.PQresultStatus
77PQputCopyData = libpq.PQputCopyData
78PQputCopyData.argtypes = [ c_void_p, c_void_p, c_int ]
79PQputCopyEnd = libpq.PQputCopyEnd
80PQputCopyEnd.argtypes = [ c_void_p, c_void_p ]
81
82sys.path.append(os.environ['PERF_EXEC_PATH'] + \
83	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
84
85# These perf imports are not used at present
86#from perf_trace_context import *
87#from Core import *
88
89perf_db_export_mode = True
90perf_db_export_calls = False
91
92def usage():
93	print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>]"
94	print >> sys.stderr, "where:	columns		'all' or 'branches'"
95	print >> sys.stderr, "		calls		'calls' => create calls table"
96	raise Exception("Too few arguments")
97
98if (len(sys.argv) < 2):
99	usage()
100
101dbname = sys.argv[1]
102
103if (len(sys.argv) >= 3):
104	columns = sys.argv[2]
105else:
106	columns = "all"
107
108if columns not in ("all", "branches"):
109	usage()
110
111branches = (columns == "branches")
112
113if (len(sys.argv) >= 4):
114	if (sys.argv[3] == "calls"):
115		perf_db_export_calls = True
116	else:
117		usage()
118
119output_dir_name = os.getcwd() + "/" + dbname + "-perf-data"
120os.mkdir(output_dir_name)
121
122def do_query(q, s):
123	if (q.exec_(s)):
124		return
125	raise Exception("Query failed: " + q.lastError().text())
126
127print datetime.datetime.today(), "Creating database..."
128
129db = QSqlDatabase.addDatabase('QPSQL')
130query = QSqlQuery(db)
131db.setDatabaseName('postgres')
132db.open()
133try:
134	do_query(query, 'CREATE DATABASE ' + dbname)
135except:
136	os.rmdir(output_dir_name)
137	raise
138query.finish()
139query.clear()
140db.close()
141
142db.setDatabaseName(dbname)
143db.open()
144
145query = QSqlQuery(db)
146do_query(query, 'SET client_min_messages TO WARNING')
147
148do_query(query, 'CREATE TABLE selected_events ('
149		'id		bigint		NOT NULL,'
150		'name		varchar(80))')
151do_query(query, 'CREATE TABLE machines ('
152		'id		bigint		NOT NULL,'
153		'pid		integer,'
154		'root_dir 	varchar(4096))')
155do_query(query, 'CREATE TABLE threads ('
156		'id		bigint		NOT NULL,'
157		'machine_id	bigint,'
158		'process_id	bigint,'
159		'pid		integer,'
160		'tid		integer)')
161do_query(query, 'CREATE TABLE comms ('
162		'id		bigint		NOT NULL,'
163		'comm		varchar(16))')
164do_query(query, 'CREATE TABLE comm_threads ('
165		'id		bigint		NOT NULL,'
166		'comm_id	bigint,'
167		'thread_id	bigint)')
168do_query(query, 'CREATE TABLE dsos ('
169		'id		bigint		NOT NULL,'
170		'machine_id	bigint,'
171		'short_name	varchar(256),'
172		'long_name	varchar(4096),'
173		'build_id	varchar(64))')
174do_query(query, 'CREATE TABLE symbols ('
175		'id		bigint		NOT NULL,'
176		'dso_id		bigint,'
177		'sym_start	bigint,'
178		'sym_end	bigint,'
179		'binding	integer,'
180		'name		varchar(2048))')
181do_query(query, 'CREATE TABLE branch_types ('
182		'id		integer		NOT NULL,'
183		'name		varchar(80))')
184
185if branches:
186	do_query(query, 'CREATE TABLE samples ('
187		'id		bigint		NOT NULL,'
188		'evsel_id	bigint,'
189		'machine_id	bigint,'
190		'thread_id	bigint,'
191		'comm_id	bigint,'
192		'dso_id		bigint,'
193		'symbol_id	bigint,'
194		'sym_offset	bigint,'
195		'ip		bigint,'
196		'time		bigint,'
197		'cpu		integer,'
198		'to_dso_id	bigint,'
199		'to_symbol_id	bigint,'
200		'to_sym_offset	bigint,'
201		'to_ip		bigint,'
202		'branch_type	integer,'
203		'in_tx		boolean)')
204else:
205	do_query(query, 'CREATE TABLE samples ('
206		'id		bigint		NOT NULL,'
207		'evsel_id	bigint,'
208		'machine_id	bigint,'
209		'thread_id	bigint,'
210		'comm_id	bigint,'
211		'dso_id		bigint,'
212		'symbol_id	bigint,'
213		'sym_offset	bigint,'
214		'ip		bigint,'
215		'time		bigint,'
216		'cpu		integer,'
217		'to_dso_id	bigint,'
218		'to_symbol_id	bigint,'
219		'to_sym_offset	bigint,'
220		'to_ip		bigint,'
221		'period		bigint,'
222		'weight		bigint,'
223		'transaction	bigint,'
224		'data_src	bigint,'
225		'branch_type	integer,'
226		'in_tx		boolean)')
227
228if perf_db_export_calls:
229	do_query(query, 'CREATE TABLE call_paths ('
230		'id		bigint		NOT NULL,'
231		'parent_id	bigint,'
232		'symbol_id	bigint,'
233		'ip		bigint)')
234	do_query(query, 'CREATE TABLE calls ('
235		'id		bigint		NOT NULL,'
236		'thread_id	bigint,'
237		'comm_id	bigint,'
238		'call_path_id	bigint,'
239		'call_time	bigint,'
240		'return_time	bigint,'
241		'branch_count	bigint,'
242		'call_id	bigint,'
243		'return_id	bigint,'
244		'parent_call_path_id	bigint,'
245		'flags		integer)')
246
247do_query(query, 'CREATE VIEW samples_view AS '
248	'SELECT '
249		'id,'
250		'time,'
251		'cpu,'
252		'(SELECT pid FROM threads WHERE id = thread_id) AS pid,'
253		'(SELECT tid FROM threads WHERE id = thread_id) AS tid,'
254		'(SELECT comm FROM comms WHERE id = comm_id) AS command,'
255		'(SELECT name FROM selected_events WHERE id = evsel_id) AS event,'
256		'to_hex(ip) AS ip_hex,'
257		'(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,'
258		'sym_offset,'
259		'(SELECT short_name FROM dsos WHERE id = dso_id) AS dso_short_name,'
260		'to_hex(to_ip) AS to_ip_hex,'
261		'(SELECT name FROM symbols WHERE id = to_symbol_id) AS to_symbol,'
262		'to_sym_offset,'
263		'(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,'
264		'(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,'
265		'in_tx'
266	' FROM samples')
267
268
269file_header = struct.pack("!11sii", "PGCOPY\n\377\r\n\0", 0, 0)
270file_trailer = "\377\377"
271
272def open_output_file(file_name):
273	path_name = output_dir_name + "/" + file_name
274	file = open(path_name, "w+")
275	file.write(file_header)
276	return file
277
278def close_output_file(file):
279	file.write(file_trailer)
280	file.close()
281
282def copy_output_file_direct(file, table_name):
283	close_output_file(file)
284	sql = "COPY " + table_name + " FROM '" + file.name + "' (FORMAT 'binary')"
285	do_query(query, sql)
286
287# Use COPY FROM STDIN because security may prevent postgres from accessing the files directly
288def copy_output_file(file, table_name):
289	conn = PQconnectdb("dbname = " + dbname)
290	if (PQstatus(conn)):
291		raise Exception("COPY FROM STDIN PQconnectdb failed")
292	file.write(file_trailer)
293	file.seek(0)
294	sql = "COPY " + table_name + " FROM STDIN (FORMAT 'binary')"
295	res = PQexec(conn, sql)
296	if (PQresultStatus(res) != 4):
297		raise Exception("COPY FROM STDIN PQexec failed")
298	data = file.read(65536)
299	while (len(data)):
300		ret = PQputCopyData(conn, data, len(data))
301		if (ret != 1):
302			raise Exception("COPY FROM STDIN PQputCopyData failed, error " + str(ret))
303		data = file.read(65536)
304	ret = PQputCopyEnd(conn, None)
305	if (ret != 1):
306		raise Exception("COPY FROM STDIN PQputCopyEnd failed, error " + str(ret))
307	PQfinish(conn)
308
309def remove_output_file(file):
310	name = file.name
311	file.close()
312	os.unlink(name)
313
314evsel_file		= open_output_file("evsel_table.bin")
315machine_file		= open_output_file("machine_table.bin")
316thread_file		= open_output_file("thread_table.bin")
317comm_file		= open_output_file("comm_table.bin")
318comm_thread_file	= open_output_file("comm_thread_table.bin")
319dso_file		= open_output_file("dso_table.bin")
320symbol_file		= open_output_file("symbol_table.bin")
321branch_type_file	= open_output_file("branch_type_table.bin")
322sample_file		= open_output_file("sample_table.bin")
323if perf_db_export_calls:
324	call_path_file		= open_output_file("call_path_table.bin")
325	call_file		= open_output_file("call_table.bin")
326
327def trace_begin():
328	print datetime.datetime.today(), "Writing to intermediate files..."
329	# id == 0 means unknown.  It is easier to create records for them than replace the zeroes with NULLs
330	evsel_table(0, "unknown")
331	machine_table(0, 0, "unknown")
332	thread_table(0, 0, 0, -1, -1)
333	comm_table(0, "unknown")
334	dso_table(0, 0, "unknown", "unknown", "")
335	symbol_table(0, 0, 0, 0, 0, "unknown")
336	sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
337	if perf_db_export_calls:
338		call_path_table(0, 0, 0, 0)
339
340unhandled_count = 0
341
342def trace_end():
343	print datetime.datetime.today(), "Copying to database..."
344	copy_output_file(evsel_file,		"selected_events")
345	copy_output_file(machine_file,		"machines")
346	copy_output_file(thread_file,		"threads")
347	copy_output_file(comm_file,		"comms")
348	copy_output_file(comm_thread_file,	"comm_threads")
349	copy_output_file(dso_file,		"dsos")
350	copy_output_file(symbol_file,		"symbols")
351	copy_output_file(branch_type_file,	"branch_types")
352	copy_output_file(sample_file,		"samples")
353	if perf_db_export_calls:
354		copy_output_file(call_path_file,	"call_paths")
355		copy_output_file(call_file,		"calls")
356
357	print datetime.datetime.today(), "Removing intermediate files..."
358	remove_output_file(evsel_file)
359	remove_output_file(machine_file)
360	remove_output_file(thread_file)
361	remove_output_file(comm_file)
362	remove_output_file(comm_thread_file)
363	remove_output_file(dso_file)
364	remove_output_file(symbol_file)
365	remove_output_file(branch_type_file)
366	remove_output_file(sample_file)
367	if perf_db_export_calls:
368		remove_output_file(call_path_file)
369		remove_output_file(call_file)
370	os.rmdir(output_dir_name)
371	print datetime.datetime.today(), "Adding primary keys"
372	do_query(query, 'ALTER TABLE selected_events ADD PRIMARY KEY (id)')
373	do_query(query, 'ALTER TABLE machines        ADD PRIMARY KEY (id)')
374	do_query(query, 'ALTER TABLE threads         ADD PRIMARY KEY (id)')
375	do_query(query, 'ALTER TABLE comms           ADD PRIMARY KEY (id)')
376	do_query(query, 'ALTER TABLE comm_threads    ADD PRIMARY KEY (id)')
377	do_query(query, 'ALTER TABLE dsos            ADD PRIMARY KEY (id)')
378	do_query(query, 'ALTER TABLE symbols         ADD PRIMARY KEY (id)')
379	do_query(query, 'ALTER TABLE branch_types    ADD PRIMARY KEY (id)')
380	do_query(query, 'ALTER TABLE samples         ADD PRIMARY KEY (id)')
381	if perf_db_export_calls:
382		do_query(query, 'ALTER TABLE call_paths      ADD PRIMARY KEY (id)')
383		do_query(query, 'ALTER TABLE calls           ADD PRIMARY KEY (id)')
384
385	print datetime.datetime.today(), "Adding foreign keys"
386	do_query(query, 'ALTER TABLE threads '
387					'ADD CONSTRAINT machinefk  FOREIGN KEY (machine_id)   REFERENCES machines   (id),'
388					'ADD CONSTRAINT processfk  FOREIGN KEY (process_id)   REFERENCES threads    (id)')
389	do_query(query, 'ALTER TABLE comm_threads '
390					'ADD CONSTRAINT commfk     FOREIGN KEY (comm_id)      REFERENCES comms      (id),'
391					'ADD CONSTRAINT threadfk   FOREIGN KEY (thread_id)    REFERENCES threads    (id)')
392	do_query(query, 'ALTER TABLE dsos '
393					'ADD CONSTRAINT machinefk  FOREIGN KEY (machine_id)   REFERENCES machines   (id)')
394	do_query(query, 'ALTER TABLE symbols '
395					'ADD CONSTRAINT dsofk      FOREIGN KEY (dso_id)       REFERENCES dsos       (id)')
396	do_query(query, 'ALTER TABLE samples '
397					'ADD CONSTRAINT evselfk    FOREIGN KEY (evsel_id)     REFERENCES selected_events (id),'
398					'ADD CONSTRAINT machinefk  FOREIGN KEY (machine_id)   REFERENCES machines   (id),'
399					'ADD CONSTRAINT threadfk   FOREIGN KEY (thread_id)    REFERENCES threads    (id),'
400					'ADD CONSTRAINT commfk     FOREIGN KEY (comm_id)      REFERENCES comms      (id),'
401					'ADD CONSTRAINT dsofk      FOREIGN KEY (dso_id)       REFERENCES dsos       (id),'
402					'ADD CONSTRAINT symbolfk   FOREIGN KEY (symbol_id)    REFERENCES symbols    (id),'
403					'ADD CONSTRAINT todsofk    FOREIGN KEY (to_dso_id)    REFERENCES dsos       (id),'
404					'ADD CONSTRAINT tosymbolfk FOREIGN KEY (to_symbol_id) REFERENCES symbols    (id)')
405	if perf_db_export_calls:
406		do_query(query, 'ALTER TABLE call_paths '
407					'ADD CONSTRAINT parentfk    FOREIGN KEY (parent_id)    REFERENCES call_paths (id),'
408					'ADD CONSTRAINT symbolfk    FOREIGN KEY (symbol_id)    REFERENCES symbols    (id)')
409		do_query(query, 'ALTER TABLE calls '
410					'ADD CONSTRAINT threadfk    FOREIGN KEY (thread_id)    REFERENCES threads    (id),'
411					'ADD CONSTRAINT commfk      FOREIGN KEY (comm_id)      REFERENCES comms      (id),'
412					'ADD CONSTRAINT call_pathfk FOREIGN KEY (call_path_id) REFERENCES call_paths (id),'
413					'ADD CONSTRAINT callfk      FOREIGN KEY (call_id)      REFERENCES samples    (id),'
414					'ADD CONSTRAINT returnfk    FOREIGN KEY (return_id)    REFERENCES samples    (id),'
415					'ADD CONSTRAINT parent_call_pathfk FOREIGN KEY (parent_call_path_id) REFERENCES call_paths (id)')
416		do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)')
417
418	if (unhandled_count):
419		print datetime.datetime.today(), "Warning: ", unhandled_count, " unhandled events"
420	print datetime.datetime.today(), "Done"
421
422def trace_unhandled(event_name, context, event_fields_dict):
423	global unhandled_count
424	unhandled_count += 1
425
426def sched__sched_switch(*x):
427	pass
428
429def evsel_table(evsel_id, evsel_name, *x):
430	n = len(evsel_name)
431	fmt = "!hiqi" + str(n) + "s"
432	value = struct.pack(fmt, 2, 8, evsel_id, n, evsel_name)
433	evsel_file.write(value)
434
435def machine_table(machine_id, pid, root_dir, *x):
436	n = len(root_dir)
437	fmt = "!hiqiii" + str(n) + "s"
438	value = struct.pack(fmt, 3, 8, machine_id, 4, pid, n, root_dir)
439	machine_file.write(value)
440
441def thread_table(thread_id, machine_id, process_id, pid, tid, *x):
442	value = struct.pack("!hiqiqiqiiii", 5, 8, thread_id, 8, machine_id, 8, process_id, 4, pid, 4, tid)
443	thread_file.write(value)
444
445def comm_table(comm_id, comm_str, *x):
446	n = len(comm_str)
447	fmt = "!hiqi" + str(n) + "s"
448	value = struct.pack(fmt, 2, 8, comm_id, n, comm_str)
449	comm_file.write(value)
450
451def comm_thread_table(comm_thread_id, comm_id, thread_id, *x):
452	fmt = "!hiqiqiq"
453	value = struct.pack(fmt, 3, 8, comm_thread_id, 8, comm_id, 8, thread_id)
454	comm_thread_file.write(value)
455
456def dso_table(dso_id, machine_id, short_name, long_name, build_id, *x):
457	n1 = len(short_name)
458	n2 = len(long_name)
459	n3 = len(build_id)
460	fmt = "!hiqiqi" + str(n1) + "si"  + str(n2) + "si" + str(n3) + "s"
461	value = struct.pack(fmt, 5, 8, dso_id, 8, machine_id, n1, short_name, n2, long_name, n3, build_id)
462	dso_file.write(value)
463
464def symbol_table(symbol_id, dso_id, sym_start, sym_end, binding, symbol_name, *x):
465	n = len(symbol_name)
466	fmt = "!hiqiqiqiqiii" + str(n) + "s"
467	value = struct.pack(fmt, 6, 8, symbol_id, 8, dso_id, 8, sym_start, 8, sym_end, 4, binding, n, symbol_name)
468	symbol_file.write(value)
469
470def branch_type_table(branch_type, name, *x):
471	n = len(name)
472	fmt = "!hiii" + str(n) + "s"
473	value = struct.pack(fmt, 2, 4, branch_type, n, name)
474	branch_type_file.write(value)
475
476def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, *x):
477	if branches:
478		value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiB", 17, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx)
479	else:
480		value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiB", 21, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx)
481	sample_file.write(value)
482
483def call_path_table(cp_id, parent_id, symbol_id, ip, *x):
484	fmt = "!hiqiqiqiq"
485	value = struct.pack(fmt, 4, 8, cp_id, 8, parent_id, 8, symbol_id, 8, ip)
486	call_path_file.write(value)
487
488def call_return_table(cr_id, thread_id, comm_id, call_path_id, call_time, return_time, branch_count, call_id, return_id, parent_call_path_id, flags, *x):
489	fmt = "!hiqiqiqiqiqiqiqiqiqiqii"
490	value = struct.pack(fmt, 11, 8, cr_id, 8, thread_id, 8, comm_id, 8, call_path_id, 8, call_time, 8, return_time, 8, branch_count, 8, call_id, 8, return_id, 8, parent_call_path_id, 4, flags)
491	call_file.write(value)
492