xref: /linux/arch/powerpc/perf/vpa-dtl.c (revision 417552999d0b6681ac30e117ae890828ca7e46b3)
14708fba1SKajol Jain // SPDX-License-Identifier: GPL-2.0-or-later
24708fba1SKajol Jain /*
34708fba1SKajol Jain  * Perf interface to expose Dispatch Trace Log counters.
44708fba1SKajol Jain  *
54708fba1SKajol Jain  * Copyright (C) 2024 Kajol Jain, IBM Corporation
64708fba1SKajol Jain  */
74708fba1SKajol Jain 
84708fba1SKajol Jain #ifdef CONFIG_PPC_SPLPAR
94708fba1SKajol Jain #define pr_fmt(fmt) "vpa_dtl: " fmt
104708fba1SKajol Jain 
114708fba1SKajol Jain #include <asm/dtl.h>
124708fba1SKajol Jain #include <linux/perf_event.h>
134708fba1SKajol Jain #include <asm/plpar_wrappers.h>
145d75aed8SAthira Rajeev #include <linux/vmalloc.h>
154708fba1SKajol Jain 
164708fba1SKajol Jain #define EVENT(_name, _code)     enum{_name = _code}
174708fba1SKajol Jain 
184708fba1SKajol Jain /*
194708fba1SKajol Jain  * Based on Power Architecture Platform Reference(PAPR) documentation,
204708fba1SKajol Jain  * Table 14.14. Per Virtual Processor Area, below Dispatch Trace Log(DTL)
214708fba1SKajol Jain  * Enable Mask used to get corresponding virtual processor dispatch
224708fba1SKajol Jain  * to preempt traces:
234708fba1SKajol Jain  *   DTL_CEDE(0x1): Trace voluntary (OS initiated) virtual
244708fba1SKajol Jain  *   processor waits
254708fba1SKajol Jain  *   DTL_PREEMPT(0x2): Trace time slice preempts
264708fba1SKajol Jain  *   DTL_FAULT(0x4): Trace virtual partition memory page
274708fba1SKajol Jain  faults.
284708fba1SKajol Jain  *   DTL_ALL(0x7): Trace all (DTL_CEDE | DTL_PREEMPT | DTL_FAULT)
294708fba1SKajol Jain  *
304708fba1SKajol Jain  * Event codes based on Dispatch Trace Log Enable Mask.
314708fba1SKajol Jain  */
324708fba1SKajol Jain EVENT(DTL_CEDE,         0x1);
334708fba1SKajol Jain EVENT(DTL_PREEMPT,      0x2);
344708fba1SKajol Jain EVENT(DTL_FAULT,        0x4);
354708fba1SKajol Jain EVENT(DTL_ALL,          0x7);
364708fba1SKajol Jain 
374708fba1SKajol Jain GENERIC_EVENT_ATTR(dtl_cede, DTL_CEDE);
384708fba1SKajol Jain GENERIC_EVENT_ATTR(dtl_preempt, DTL_PREEMPT);
394708fba1SKajol Jain GENERIC_EVENT_ATTR(dtl_fault, DTL_FAULT);
404708fba1SKajol Jain GENERIC_EVENT_ATTR(dtl_all, DTL_ALL);
414708fba1SKajol Jain 
424708fba1SKajol Jain PMU_FORMAT_ATTR(event, "config:0-7");
434708fba1SKajol Jain 
444708fba1SKajol Jain static struct attribute *events_attr[] = {
454708fba1SKajol Jain 	GENERIC_EVENT_PTR(DTL_CEDE),
464708fba1SKajol Jain 	GENERIC_EVENT_PTR(DTL_PREEMPT),
474708fba1SKajol Jain 	GENERIC_EVENT_PTR(DTL_FAULT),
484708fba1SKajol Jain 	GENERIC_EVENT_PTR(DTL_ALL),
494708fba1SKajol Jain 	NULL
504708fba1SKajol Jain };
514708fba1SKajol Jain 
524708fba1SKajol Jain static struct attribute_group event_group = {
534708fba1SKajol Jain 	.name = "events",
544708fba1SKajol Jain 	.attrs = events_attr,
554708fba1SKajol Jain };
564708fba1SKajol Jain 
574708fba1SKajol Jain static struct attribute *format_attrs[] = {
584708fba1SKajol Jain 	&format_attr_event.attr,
594708fba1SKajol Jain 	NULL,
604708fba1SKajol Jain };
614708fba1SKajol Jain 
624708fba1SKajol Jain static const struct attribute_group format_group = {
634708fba1SKajol Jain 	.name = "format",
644708fba1SKajol Jain 	.attrs = format_attrs,
654708fba1SKajol Jain };
664708fba1SKajol Jain 
674708fba1SKajol Jain static const struct attribute_group *attr_groups[] = {
684708fba1SKajol Jain 	&format_group,
694708fba1SKajol Jain 	&event_group,
704708fba1SKajol Jain 	NULL,
714708fba1SKajol Jain };
724708fba1SKajol Jain 
734708fba1SKajol Jain struct vpa_dtl {
744708fba1SKajol Jain 	struct dtl_entry	*buf;
754708fba1SKajol Jain 	u64			last_idx;
764708fba1SKajol Jain };
774708fba1SKajol Jain 
785d75aed8SAthira Rajeev struct vpa_pmu_ctx {
795d75aed8SAthira Rajeev 	struct perf_output_handle handle;
805d75aed8SAthira Rajeev };
815d75aed8SAthira Rajeev 
825d75aed8SAthira Rajeev struct vpa_pmu_buf {
835d75aed8SAthira Rajeev 	int     nr_pages;
845d75aed8SAthira Rajeev 	bool    snapshot;
855d75aed8SAthira Rajeev 	u64     *base;
865d75aed8SAthira Rajeev 	u64     size;
875d75aed8SAthira Rajeev 	u64     head;
88*b5e71cafSAthira Rajeev 	u64	head_size;
892de8b6ddSAthira Rajeev 	/* boot timebase and frequency needs to be saved only at once */
902de8b6ddSAthira Rajeev 	int	boottb_freq_saved;
91*b5e71cafSAthira Rajeev 	u64	threshold;
92*b5e71cafSAthira Rajeev 	bool	full;
932de8b6ddSAthira Rajeev };
942de8b6ddSAthira Rajeev 
952de8b6ddSAthira Rajeev /*
962de8b6ddSAthira Rajeev  * To corelate each DTL entry with other events across CPU's,
972de8b6ddSAthira Rajeev  * we need to map timebase from "struct dtl_entry" which phyp
982de8b6ddSAthira Rajeev  * provides with boot timebase. This also needs timebase frequency.
992de8b6ddSAthira Rajeev  * Formula is: ((timbase from DTL entry - boot time) / frequency)
1002de8b6ddSAthira Rajeev  *
1012de8b6ddSAthira Rajeev  * To match with size of "struct dtl_entry" to ease post processing,
1022de8b6ddSAthira Rajeev  * padded 24 bytes to the structure.
1032de8b6ddSAthira Rajeev  */
1042de8b6ddSAthira Rajeev struct boottb_freq {
1052de8b6ddSAthira Rajeev 	u64	boot_tb;
1062de8b6ddSAthira Rajeev 	u64	tb_freq;
1072de8b6ddSAthira Rajeev 	u64	timebase;
1082de8b6ddSAthira Rajeev 	u64	padded[3];
1095d75aed8SAthira Rajeev };
1105d75aed8SAthira Rajeev 
1115d75aed8SAthira Rajeev static DEFINE_PER_CPU(struct vpa_pmu_ctx, vpa_pmu_ctx);
1124708fba1SKajol Jain static DEFINE_PER_CPU(struct vpa_dtl, vpa_dtl_cpu);
1134708fba1SKajol Jain 
1144708fba1SKajol Jain /* variable to capture reference count for the active dtl threads */
1154708fba1SKajol Jain static int dtl_global_refc;
1164708fba1SKajol Jain static spinlock_t dtl_global_lock = __SPIN_LOCK_UNLOCKED(dtl_global_lock);
1174708fba1SKajol Jain 
1184708fba1SKajol Jain /*
1192de8b6ddSAthira Rajeev  * Capture DTL data in AUX buffer
1202de8b6ddSAthira Rajeev  */
vpa_dtl_capture_aux(long * n_entries,struct vpa_pmu_buf * buf,struct vpa_dtl * dtl,int index)1212de8b6ddSAthira Rajeev static void vpa_dtl_capture_aux(long *n_entries, struct vpa_pmu_buf *buf,
1222de8b6ddSAthira Rajeev 		struct vpa_dtl *dtl, int index)
1232de8b6ddSAthira Rajeev {
1242de8b6ddSAthira Rajeev 	struct dtl_entry *aux_copy_buf = (struct dtl_entry *)buf->base;
1252de8b6ddSAthira Rajeev 
1262de8b6ddSAthira Rajeev 	/*
127*b5e71cafSAthira Rajeev 	 * check if there is enough space to contain the
128*b5e71cafSAthira Rajeev 	 * DTL data. If not, save the data for available
129*b5e71cafSAthira Rajeev 	 * memory and set full to true.
130*b5e71cafSAthira Rajeev 	 */
131*b5e71cafSAthira Rajeev 	if (buf->head + *n_entries >= buf->threshold) {
132*b5e71cafSAthira Rajeev 		*n_entries = buf->threshold - buf->head;
133*b5e71cafSAthira Rajeev 		buf->full = 1;
134*b5e71cafSAthira Rajeev 	}
135*b5e71cafSAthira Rajeev 
136*b5e71cafSAthira Rajeev 	/*
1372de8b6ddSAthira Rajeev 	 * Copy to AUX buffer from per-thread address
1382de8b6ddSAthira Rajeev 	 */
1392de8b6ddSAthira Rajeev 	memcpy(aux_copy_buf + buf->head, &dtl->buf[index], *n_entries * sizeof(struct dtl_entry));
1402de8b6ddSAthira Rajeev 
141*b5e71cafSAthira Rajeev 	if (buf->full) {
142*b5e71cafSAthira Rajeev 		/*
143*b5e71cafSAthira Rajeev 		 * Set head of private aux to zero when buffer is full
144*b5e71cafSAthira Rajeev 		 * so that next data will be copied to beginning of the
145*b5e71cafSAthira Rajeev 		 * buffer
146*b5e71cafSAthira Rajeev 		 */
147*b5e71cafSAthira Rajeev 		buf->head = 0;
148*b5e71cafSAthira Rajeev 		return;
149*b5e71cafSAthira Rajeev 	}
150*b5e71cafSAthira Rajeev 
1512de8b6ddSAthira Rajeev 	buf->head += *n_entries;
1522de8b6ddSAthira Rajeev 
1532de8b6ddSAthira Rajeev 	return;
1542de8b6ddSAthira Rajeev }
1552de8b6ddSAthira Rajeev 
1562de8b6ddSAthira Rajeev /*
1574708fba1SKajol Jain  * Function to dump the dispatch trace log buffer data to the
1584708fba1SKajol Jain  * perf data.
1592de8b6ddSAthira Rajeev  *
1602de8b6ddSAthira Rajeev  * perf_aux_output_begin: This function is called before writing
1612de8b6ddSAthira Rajeev  * to AUX area. This returns the pointer to aux area private structure,
1622de8b6ddSAthira Rajeev  * ie "struct vpa_pmu_buf" here which is set in setup_aux() function.
1632de8b6ddSAthira Rajeev  * The function obtains the output handle (used in perf_aux_output_end).
1642de8b6ddSAthira Rajeev  * when capture completes in vpa_dtl_capture_aux(), call perf_aux_output_end()
1652de8b6ddSAthira Rajeev  * to commit the recorded data.
1662de8b6ddSAthira Rajeev  *
1672de8b6ddSAthira Rajeev  * perf_aux_output_end: This function commits data by adjusting the
1682de8b6ddSAthira Rajeev  * aux_head of "struct perf_buffer". aux_tail will be moved in perf tools
1692de8b6ddSAthira Rajeev  * side when writing the data from aux buffer to perf.data file in disk.
1702de8b6ddSAthira Rajeev  *
1712de8b6ddSAthira Rajeev  * Here in the private aux structure, we maintain head to know where
1722de8b6ddSAthira Rajeev  * to copy data next time in the PMU driver. vpa_pmu_buf->head is moved to
1732de8b6ddSAthira Rajeev  * maintain the aux head for PMU driver. It is responsiblity of PMU
1742de8b6ddSAthira Rajeev  * driver to make sure data is copied between perf_aux_output_begin and
1752de8b6ddSAthira Rajeev  * perf_aux_output_end.
1762de8b6ddSAthira Rajeev  *
1772de8b6ddSAthira Rajeev  * After data is copied in vpa_dtl_capture_aux() function, perf_aux_output_end()
1782de8b6ddSAthira Rajeev  * is called to move the aux->head of "struct perf_buffer" to indicate size of
1792de8b6ddSAthira Rajeev  * data in aux buffer. This will post a PERF_RECORD_AUX into the perf buffer.
1802de8b6ddSAthira Rajeev  * Data will be written to disk only when the allocated buffer is full.
1812de8b6ddSAthira Rajeev  *
1822de8b6ddSAthira Rajeev  * By this approach, all the DTL data will be present as-is in the
1832de8b6ddSAthira Rajeev  * perf.data. The data will be pre-processed in perf tools side when doing
1842de8b6ddSAthira Rajeev  * perf report/perf script and this will avoid time taken to create samples
1852de8b6ddSAthira Rajeev  * in the kernel space.
1864708fba1SKajol Jain  */
vpa_dtl_dump_sample_data(struct perf_event * event)1874708fba1SKajol Jain static void vpa_dtl_dump_sample_data(struct perf_event *event)
1884708fba1SKajol Jain {
1892de8b6ddSAthira Rajeev 	u64 cur_idx, last_idx, i;
1902de8b6ddSAthira Rajeev 	u64 boot_tb;
1912de8b6ddSAthira Rajeev 	struct boottb_freq boottb_freq;
1922de8b6ddSAthira Rajeev 
1932de8b6ddSAthira Rajeev 	/* actual number of entries read */
1942de8b6ddSAthira Rajeev 	long n_read = 0, read_size = 0;
1952de8b6ddSAthira Rajeev 
1962de8b6ddSAthira Rajeev 	/* number of entries added to dtl buffer */
1972de8b6ddSAthira Rajeev 	long n_req;
1982de8b6ddSAthira Rajeev 
1992de8b6ddSAthira Rajeev 	struct vpa_pmu_ctx *vpa_ctx = this_cpu_ptr(&vpa_pmu_ctx);
2002de8b6ddSAthira Rajeev 
2012de8b6ddSAthira Rajeev 	struct vpa_pmu_buf *aux_buf;
2022de8b6ddSAthira Rajeev 
2032de8b6ddSAthira Rajeev 	struct vpa_dtl *dtl = &per_cpu(vpa_dtl_cpu, event->cpu);
204*b5e71cafSAthira Rajeev 	u64 size;
2052de8b6ddSAthira Rajeev 
2062de8b6ddSAthira Rajeev 	cur_idx = be64_to_cpu(lppaca_of(event->cpu).dtl_idx);
2072de8b6ddSAthira Rajeev 	last_idx = dtl->last_idx;
2082de8b6ddSAthira Rajeev 
2092de8b6ddSAthira Rajeev 	if (last_idx + N_DISPATCH_LOG <= cur_idx)
2102de8b6ddSAthira Rajeev 		last_idx = cur_idx - N_DISPATCH_LOG + 1;
2112de8b6ddSAthira Rajeev 
2122de8b6ddSAthira Rajeev 	n_req = cur_idx - last_idx;
2132de8b6ddSAthira Rajeev 
2142de8b6ddSAthira Rajeev 	/* no new entry added to the buffer, return */
2152de8b6ddSAthira Rajeev 	if (n_req <= 0)
2164708fba1SKajol Jain 		return;
2172de8b6ddSAthira Rajeev 
2182de8b6ddSAthira Rajeev 	dtl->last_idx = last_idx + n_req;
2192de8b6ddSAthira Rajeev 	boot_tb = get_boot_tb();
2202de8b6ddSAthira Rajeev 
2212de8b6ddSAthira Rajeev 	i = last_idx % N_DISPATCH_LOG;
2222de8b6ddSAthira Rajeev 
2232de8b6ddSAthira Rajeev 	aux_buf = perf_aux_output_begin(&vpa_ctx->handle, event);
2242de8b6ddSAthira Rajeev 	if (!aux_buf) {
2252de8b6ddSAthira Rajeev 		pr_debug("returning. no aux\n");
2262de8b6ddSAthira Rajeev 		return;
2272de8b6ddSAthira Rajeev 	}
2282de8b6ddSAthira Rajeev 
2292de8b6ddSAthira Rajeev 	if (!aux_buf->boottb_freq_saved) {
2302de8b6ddSAthira Rajeev 		pr_debug("Copying boot tb to aux buffer: %lld\n", boot_tb);
2312de8b6ddSAthira Rajeev 		/* Save boot_tb to convert raw timebase to it's relative system boot time */
2322de8b6ddSAthira Rajeev 		boottb_freq.boot_tb = boot_tb;
2332de8b6ddSAthira Rajeev 		/* Save tb_ticks_per_sec to convert timebase to sec */
2342de8b6ddSAthira Rajeev 		boottb_freq.tb_freq = tb_ticks_per_sec;
2352de8b6ddSAthira Rajeev 		boottb_freq.timebase = 0;
2362de8b6ddSAthira Rajeev 		memcpy(aux_buf->base, &boottb_freq, sizeof(boottb_freq));
2372de8b6ddSAthira Rajeev 		aux_buf->head += 1;
2382de8b6ddSAthira Rajeev 		aux_buf->boottb_freq_saved = 1;
2392de8b6ddSAthira Rajeev 		n_read += 1;
2402de8b6ddSAthira Rajeev 	}
2412de8b6ddSAthira Rajeev 
2422de8b6ddSAthira Rajeev 	/* read the tail of the buffer if we've wrapped */
2432de8b6ddSAthira Rajeev 	if (i + n_req > N_DISPATCH_LOG) {
2442de8b6ddSAthira Rajeev 		read_size = N_DISPATCH_LOG - i;
2452de8b6ddSAthira Rajeev 		vpa_dtl_capture_aux(&read_size, aux_buf, dtl, i);
2462de8b6ddSAthira Rajeev 		n_req -= read_size;
2472de8b6ddSAthira Rajeev 		n_read += read_size;
2482de8b6ddSAthira Rajeev 		i = 0;
249*b5e71cafSAthira Rajeev 		if (aux_buf->full) {
250*b5e71cafSAthira Rajeev 			size = (n_read * sizeof(struct dtl_entry));
251*b5e71cafSAthira Rajeev 			if ((size +  aux_buf->head_size) > aux_buf->size) {
252*b5e71cafSAthira Rajeev 				size = aux_buf->size - aux_buf->head_size;
253*b5e71cafSAthira Rajeev 				perf_aux_output_end(&vpa_ctx->handle, size);
254*b5e71cafSAthira Rajeev 				aux_buf->head = 0;
255*b5e71cafSAthira Rajeev 				aux_buf->head_size = 0;
256*b5e71cafSAthira Rajeev 			} else {
257*b5e71cafSAthira Rajeev 				aux_buf->head_size += (n_read * sizeof(struct dtl_entry));
258*b5e71cafSAthira Rajeev 				perf_aux_output_end(&vpa_ctx->handle, n_read * sizeof(struct dtl_entry));
259*b5e71cafSAthira Rajeev 			}
260*b5e71cafSAthira Rajeev 			goto out;
261*b5e71cafSAthira Rajeev 		}
2622de8b6ddSAthira Rajeev 	}
2632de8b6ddSAthira Rajeev 
2642de8b6ddSAthira Rajeev 	/* .. and now the head */
2652de8b6ddSAthira Rajeev 	vpa_dtl_capture_aux(&n_req, aux_buf, dtl, i);
2662de8b6ddSAthira Rajeev 
267*b5e71cafSAthira Rajeev 	size = ((n_req + n_read) * sizeof(struct dtl_entry));
268*b5e71cafSAthira Rajeev 	if ((size +  aux_buf->head_size) > aux_buf->size) {
269*b5e71cafSAthira Rajeev 		size = aux_buf->size - aux_buf->head_size;
270*b5e71cafSAthira Rajeev 		perf_aux_output_end(&vpa_ctx->handle, size);
271*b5e71cafSAthira Rajeev 		aux_buf->head = 0;
272*b5e71cafSAthira Rajeev 		aux_buf->head_size = 0;
273*b5e71cafSAthira Rajeev 	} else {
274*b5e71cafSAthira Rajeev 		aux_buf->head_size += ((n_req + n_read) * sizeof(struct dtl_entry));
2752de8b6ddSAthira Rajeev 		/* Move the aux->head to indicate size of data in aux buffer */
2762de8b6ddSAthira Rajeev 		perf_aux_output_end(&vpa_ctx->handle, (n_req + n_read) * sizeof(struct dtl_entry));
2774708fba1SKajol Jain 	}
278*b5e71cafSAthira Rajeev out:
279*b5e71cafSAthira Rajeev 	aux_buf->full = 0;
280*b5e71cafSAthira Rajeev }
2814708fba1SKajol Jain 
2824708fba1SKajol Jain /*
2834708fba1SKajol Jain  * The VPA Dispatch Trace log counters do not interrupt on overflow.
2844708fba1SKajol Jain  * Therefore, the kernel needs to poll the counters to avoid missing
2854708fba1SKajol Jain  * an overflow using hrtimer. The timer interval is based on sample_period
2864708fba1SKajol Jain  * count provided by user, and minimum interval is 1 millisecond.
2874708fba1SKajol Jain  */
vpa_dtl_hrtimer_handle(struct hrtimer * hrtimer)2884708fba1SKajol Jain static enum hrtimer_restart vpa_dtl_hrtimer_handle(struct hrtimer *hrtimer)
2894708fba1SKajol Jain {
2904708fba1SKajol Jain 	struct perf_event *event;
2914708fba1SKajol Jain 	u64 period;
2924708fba1SKajol Jain 
2934708fba1SKajol Jain 	event = container_of(hrtimer, struct perf_event, hw.hrtimer);
2944708fba1SKajol Jain 
2954708fba1SKajol Jain 	if (event->state != PERF_EVENT_STATE_ACTIVE)
2964708fba1SKajol Jain 		return HRTIMER_NORESTART;
2974708fba1SKajol Jain 
2984708fba1SKajol Jain 	vpa_dtl_dump_sample_data(event);
2994708fba1SKajol Jain 	period = max_t(u64, NSEC_PER_MSEC, event->hw.sample_period);
3004708fba1SKajol Jain 	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
3014708fba1SKajol Jain 
3024708fba1SKajol Jain 	return HRTIMER_RESTART;
3034708fba1SKajol Jain }
3044708fba1SKajol Jain 
vpa_dtl_start_hrtimer(struct perf_event * event)3054708fba1SKajol Jain static void vpa_dtl_start_hrtimer(struct perf_event *event)
3064708fba1SKajol Jain {
3074708fba1SKajol Jain 	u64 period;
3084708fba1SKajol Jain 	struct hw_perf_event *hwc = &event->hw;
3094708fba1SKajol Jain 
3104708fba1SKajol Jain 	period = max_t(u64, NSEC_PER_MSEC, hwc->sample_period);
3114708fba1SKajol Jain 	hrtimer_start(&hwc->hrtimer, ns_to_ktime(period), HRTIMER_MODE_REL_PINNED);
3124708fba1SKajol Jain }
3134708fba1SKajol Jain 
vpa_dtl_stop_hrtimer(struct perf_event * event)3144708fba1SKajol Jain static void vpa_dtl_stop_hrtimer(struct perf_event *event)
3154708fba1SKajol Jain {
3164708fba1SKajol Jain 	struct hw_perf_event *hwc = &event->hw;
3174708fba1SKajol Jain 
3184708fba1SKajol Jain 	hrtimer_cancel(&hwc->hrtimer);
3194708fba1SKajol Jain }
3204708fba1SKajol Jain 
vpa_dtl_reset_global_refc(struct perf_event * event)3214708fba1SKajol Jain static void vpa_dtl_reset_global_refc(struct perf_event *event)
3224708fba1SKajol Jain {
3234708fba1SKajol Jain 	spin_lock(&dtl_global_lock);
3244708fba1SKajol Jain 	dtl_global_refc--;
3254708fba1SKajol Jain 	if (dtl_global_refc <= 0) {
3264708fba1SKajol Jain 		dtl_global_refc = 0;
3274708fba1SKajol Jain 		up_write(&dtl_access_lock);
3284708fba1SKajol Jain 	}
3294708fba1SKajol Jain 	spin_unlock(&dtl_global_lock);
3304708fba1SKajol Jain }
3314708fba1SKajol Jain 
vpa_dtl_mem_alloc(int cpu)3324708fba1SKajol Jain static int vpa_dtl_mem_alloc(int cpu)
3334708fba1SKajol Jain {
3344708fba1SKajol Jain 	struct vpa_dtl *dtl = &per_cpu(vpa_dtl_cpu, cpu);
3354708fba1SKajol Jain 	struct dtl_entry *buf = NULL;
3364708fba1SKajol Jain 
3374708fba1SKajol Jain 	/* Check for dispatch trace log buffer cache */
3384708fba1SKajol Jain 	if (!dtl_cache)
3394708fba1SKajol Jain 		return -ENOMEM;
3404708fba1SKajol Jain 
3414708fba1SKajol Jain 	buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL | GFP_ATOMIC, cpu_to_node(cpu));
3424708fba1SKajol Jain 	if (!buf) {
3434708fba1SKajol Jain 		pr_warn("buffer allocation failed for cpu %d\n", cpu);
3444708fba1SKajol Jain 		return -ENOMEM;
3454708fba1SKajol Jain 	}
3464708fba1SKajol Jain 	dtl->buf = buf;
3474708fba1SKajol Jain 	return 0;
3484708fba1SKajol Jain }
3494708fba1SKajol Jain 
vpa_dtl_event_init(struct perf_event * event)3504708fba1SKajol Jain static int vpa_dtl_event_init(struct perf_event *event)
3514708fba1SKajol Jain {
3524708fba1SKajol Jain 	struct hw_perf_event *hwc = &event->hw;
3534708fba1SKajol Jain 
3544708fba1SKajol Jain 	/* test the event attr type for PMU enumeration */
3554708fba1SKajol Jain 	if (event->attr.type != event->pmu->type)
3564708fba1SKajol Jain 		return -ENOENT;
3574708fba1SKajol Jain 
3584708fba1SKajol Jain 	if (!perfmon_capable())
3594708fba1SKajol Jain 		return -EACCES;
3604708fba1SKajol Jain 
3614708fba1SKajol Jain 	/* Return if this is a counting event */
3624708fba1SKajol Jain 	if (!is_sampling_event(event))
3634708fba1SKajol Jain 		return -EOPNOTSUPP;
3644708fba1SKajol Jain 
3654708fba1SKajol Jain 	/* no branch sampling */
3664708fba1SKajol Jain 	if (has_branch_stack(event))
3674708fba1SKajol Jain 		return -EOPNOTSUPP;
3684708fba1SKajol Jain 
3694708fba1SKajol Jain 	/* Invalid eventcode */
3704708fba1SKajol Jain 	switch (event->attr.config) {
3714708fba1SKajol Jain 	case DTL_LOG_CEDE:
3724708fba1SKajol Jain 	case DTL_LOG_PREEMPT:
3734708fba1SKajol Jain 	case DTL_LOG_FAULT:
3744708fba1SKajol Jain 	case DTL_LOG_ALL:
3754708fba1SKajol Jain 		break;
3764708fba1SKajol Jain 	default:
3774708fba1SKajol Jain 		return -EINVAL;
3784708fba1SKajol Jain 	}
3794708fba1SKajol Jain 
3804708fba1SKajol Jain 	spin_lock(&dtl_global_lock);
3814708fba1SKajol Jain 
3824708fba1SKajol Jain 	/*
3834708fba1SKajol Jain 	 * To ensure there are no other conflicting dtl users
3844708fba1SKajol Jain 	 * (example: /proc/powerpc/vcpudispatch_stats or debugfs dtl),
3854708fba1SKajol Jain 	 * below code try to take the dtl_access_lock.
3864708fba1SKajol Jain 	 * The dtl_access_lock is a rwlock defined in dtl.h, which is used
3874708fba1SKajol Jain 	 * to unsure there is no conflicting dtl users.
3884708fba1SKajol Jain 	 * Based on below code, vpa_dtl pmu tries to take write access lock
3894708fba1SKajol Jain 	 * and also checks for dtl_global_refc, to make sure that the
3904708fba1SKajol Jain 	 * dtl_access_lock is taken by vpa_dtl pmu interface.
3914708fba1SKajol Jain 	 */
3924708fba1SKajol Jain 	if (dtl_global_refc == 0 && !down_write_trylock(&dtl_access_lock)) {
3934708fba1SKajol Jain 		spin_unlock(&dtl_global_lock);
3944708fba1SKajol Jain 		return -EBUSY;
3954708fba1SKajol Jain 	}
3964708fba1SKajol Jain 
3974708fba1SKajol Jain 	/* Allocate dtl buffer memory */
3984708fba1SKajol Jain 	if (vpa_dtl_mem_alloc(event->cpu)) {
3994708fba1SKajol Jain 		spin_unlock(&dtl_global_lock);
4004708fba1SKajol Jain 		return -ENOMEM;
4014708fba1SKajol Jain 	}
4024708fba1SKajol Jain 
4034708fba1SKajol Jain 	/*
4044708fba1SKajol Jain 	 * Increment the number of active vpa_dtl pmu threads. The
4054708fba1SKajol Jain 	 * dtl_global_refc is used to keep count of cpu threads that
4064708fba1SKajol Jain 	 * currently capturing dtl data using vpa_dtl pmu interface.
4074708fba1SKajol Jain 	 */
4084708fba1SKajol Jain 	dtl_global_refc++;
4094708fba1SKajol Jain 
4104708fba1SKajol Jain 	spin_unlock(&dtl_global_lock);
4114708fba1SKajol Jain 
4124708fba1SKajol Jain 	hrtimer_setup(&hwc->hrtimer, vpa_dtl_hrtimer_handle, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
4134708fba1SKajol Jain 
4144708fba1SKajol Jain 	/*
4154708fba1SKajol Jain 	 * Since hrtimers have a fixed rate, we can do a static freq->period
4164708fba1SKajol Jain 	 * mapping and avoid the whole period adjust feedback stuff.
4174708fba1SKajol Jain 	 */
4184708fba1SKajol Jain 	if (event->attr.freq) {
4194708fba1SKajol Jain 		long freq = event->attr.sample_freq;
4204708fba1SKajol Jain 
4214708fba1SKajol Jain 		event->attr.sample_period = NSEC_PER_SEC / freq;
4224708fba1SKajol Jain 		hwc->sample_period = event->attr.sample_period;
4234708fba1SKajol Jain 		local64_set(&hwc->period_left, hwc->sample_period);
4244708fba1SKajol Jain 		hwc->last_period = hwc->sample_period;
4254708fba1SKajol Jain 		event->attr.freq = 0;
4264708fba1SKajol Jain 	}
4274708fba1SKajol Jain 
4284708fba1SKajol Jain 	event->destroy = vpa_dtl_reset_global_refc;
4294708fba1SKajol Jain 	return 0;
4304708fba1SKajol Jain }
4314708fba1SKajol Jain 
vpa_dtl_event_add(struct perf_event * event,int flags)4324708fba1SKajol Jain static int vpa_dtl_event_add(struct perf_event *event, int flags)
4334708fba1SKajol Jain {
4344708fba1SKajol Jain 	int ret, hwcpu;
4354708fba1SKajol Jain 	unsigned long addr;
4364708fba1SKajol Jain 	struct vpa_dtl *dtl = &per_cpu(vpa_dtl_cpu, event->cpu);
4374708fba1SKajol Jain 
4384708fba1SKajol Jain 	/*
4394708fba1SKajol Jain 	 * Register our dtl buffer with the hypervisor. The
4404708fba1SKajol Jain 	 * HV expects the buffer size to be passed in the second
4414708fba1SKajol Jain 	 * word of the buffer. Refer section '14.11.3.2. H_REGISTER_VPA'
4424708fba1SKajol Jain 	 * from PAPR for more information.
4434708fba1SKajol Jain 	 */
4444708fba1SKajol Jain 	((u32 *)dtl->buf)[1] = cpu_to_be32(DISPATCH_LOG_BYTES);
4454708fba1SKajol Jain 	dtl->last_idx = 0;
4464708fba1SKajol Jain 
4474708fba1SKajol Jain 	hwcpu = get_hard_smp_processor_id(event->cpu);
4484708fba1SKajol Jain 	addr = __pa(dtl->buf);
4494708fba1SKajol Jain 
4504708fba1SKajol Jain 	ret = register_dtl(hwcpu, addr);
4514708fba1SKajol Jain 	if (ret) {
4524708fba1SKajol Jain 		pr_warn("DTL registration for cpu %d (hw %d) failed with %d\n",
4534708fba1SKajol Jain 			event->cpu, hwcpu, ret);
4544708fba1SKajol Jain 		return ret;
4554708fba1SKajol Jain 	}
4564708fba1SKajol Jain 
4574708fba1SKajol Jain 	/* set our initial buffer indices */
4584708fba1SKajol Jain 	lppaca_of(event->cpu).dtl_idx = 0;
4594708fba1SKajol Jain 
4604708fba1SKajol Jain 	/*
4614708fba1SKajol Jain 	 * Ensure that our updates to the lppaca fields have
4624708fba1SKajol Jain 	 * occurred before we actually enable the logging
4634708fba1SKajol Jain 	 */
4644708fba1SKajol Jain 	smp_wmb();
4654708fba1SKajol Jain 
4664708fba1SKajol Jain 	/* enable event logging */
4674708fba1SKajol Jain 	lppaca_of(event->cpu).dtl_enable_mask = event->attr.config;
4684708fba1SKajol Jain 
4694708fba1SKajol Jain 	vpa_dtl_start_hrtimer(event);
4704708fba1SKajol Jain 
4714708fba1SKajol Jain 	return 0;
4724708fba1SKajol Jain }
4734708fba1SKajol Jain 
vpa_dtl_event_del(struct perf_event * event,int flags)4744708fba1SKajol Jain static void vpa_dtl_event_del(struct perf_event *event, int flags)
4754708fba1SKajol Jain {
4764708fba1SKajol Jain 	int hwcpu = get_hard_smp_processor_id(event->cpu);
4774708fba1SKajol Jain 	struct vpa_dtl *dtl = &per_cpu(vpa_dtl_cpu, event->cpu);
4784708fba1SKajol Jain 
4794708fba1SKajol Jain 	vpa_dtl_stop_hrtimer(event);
4804708fba1SKajol Jain 	unregister_dtl(hwcpu);
4814708fba1SKajol Jain 	kmem_cache_free(dtl_cache, dtl->buf);
4824708fba1SKajol Jain 	dtl->buf = NULL;
4834708fba1SKajol Jain 	lppaca_of(event->cpu).dtl_enable_mask = 0x0;
4844708fba1SKajol Jain }
4854708fba1SKajol Jain 
4864708fba1SKajol Jain /*
4874708fba1SKajol Jain  * This function definition is empty as vpa_dtl_dump_sample_data
4884708fba1SKajol Jain  * is used to parse and dump the dispatch trace log data,
4894708fba1SKajol Jain  * to perf data.
4904708fba1SKajol Jain  */
vpa_dtl_event_read(struct perf_event * event)4914708fba1SKajol Jain static void vpa_dtl_event_read(struct perf_event *event)
4924708fba1SKajol Jain {
4934708fba1SKajol Jain }
4944708fba1SKajol Jain 
4955d75aed8SAthira Rajeev /*
4965d75aed8SAthira Rajeev  * Set up pmu-private data structures for an AUX area
4975d75aed8SAthira Rajeev  * **pages contains the aux buffer allocated for this event
4985d75aed8SAthira Rajeev  * for the corresponding cpu. rb_alloc_aux uses "alloc_pages_node"
4995d75aed8SAthira Rajeev  * and returns pointer to each page address. Map these pages to
5005d75aed8SAthira Rajeev  * contiguous space using vmap and use that as base address.
5015d75aed8SAthira Rajeev  *
5025d75aed8SAthira Rajeev  * The aux private data structure ie, "struct vpa_pmu_buf" mainly
5035d75aed8SAthira Rajeev  * saves
5045d75aed8SAthira Rajeev  * - buf->base: aux buffer base address
5055d75aed8SAthira Rajeev  * - buf->head: offset from base address where data will be written to.
5065d75aed8SAthira Rajeev  * - buf->size: Size of allocated memory
5075d75aed8SAthira Rajeev  */
vpa_dtl_setup_aux(struct perf_event * event,void ** pages,int nr_pages,bool snapshot)5085d75aed8SAthira Rajeev static void *vpa_dtl_setup_aux(struct perf_event *event, void **pages,
5095d75aed8SAthira Rajeev 		int nr_pages, bool snapshot)
5105d75aed8SAthira Rajeev {
5115d75aed8SAthira Rajeev 	int i, cpu = event->cpu;
5125d75aed8SAthira Rajeev 	struct vpa_pmu_buf *buf __free(kfree) = NULL;
5135d75aed8SAthira Rajeev 	struct page **pglist __free(kfree) = NULL;
5145d75aed8SAthira Rajeev 
5155d75aed8SAthira Rajeev 	/* We need at least one page for this to work. */
5165d75aed8SAthira Rajeev 	if (!nr_pages)
5175d75aed8SAthira Rajeev 		return NULL;
5185d75aed8SAthira Rajeev 
5195d75aed8SAthira Rajeev 	if (cpu == -1)
5205d75aed8SAthira Rajeev 		cpu = raw_smp_processor_id();
5215d75aed8SAthira Rajeev 
5225d75aed8SAthira Rajeev 	buf = kzalloc_node(sizeof(*buf), GFP_KERNEL, cpu_to_node(cpu));
5235d75aed8SAthira Rajeev 	if (!buf)
5245d75aed8SAthira Rajeev 		return NULL;
5255d75aed8SAthira Rajeev 
5265d75aed8SAthira Rajeev 	pglist = kcalloc(nr_pages, sizeof(*pglist), GFP_KERNEL);
5275d75aed8SAthira Rajeev 	if (!pglist)
5285d75aed8SAthira Rajeev 		return NULL;
5295d75aed8SAthira Rajeev 
5305d75aed8SAthira Rajeev 	for (i = 0; i < nr_pages; ++i)
5315d75aed8SAthira Rajeev 		pglist[i] = virt_to_page(pages[i]);
5325d75aed8SAthira Rajeev 
5335d75aed8SAthira Rajeev 	buf->base = vmap(pglist, nr_pages, VM_MAP, PAGE_KERNEL);
5345d75aed8SAthira Rajeev 	if (!buf->base)
5355d75aed8SAthira Rajeev 		return NULL;
5365d75aed8SAthira Rajeev 
5375d75aed8SAthira Rajeev 	buf->nr_pages = nr_pages;
5385d75aed8SAthira Rajeev 	buf->snapshot = false;
5395d75aed8SAthira Rajeev 
5405d75aed8SAthira Rajeev 	buf->size = nr_pages << PAGE_SHIFT;
5415d75aed8SAthira Rajeev 	buf->head = 0;
542*b5e71cafSAthira Rajeev 	buf->head_size = 0;
5432de8b6ddSAthira Rajeev 	buf->boottb_freq_saved = 0;
544*b5e71cafSAthira Rajeev 	buf->threshold = ((buf->size - 32) / sizeof(struct dtl_entry));
5455d75aed8SAthira Rajeev 	return no_free_ptr(buf);
5465d75aed8SAthira Rajeev }
5475d75aed8SAthira Rajeev 
5485d75aed8SAthira Rajeev /*
5495d75aed8SAthira Rajeev  * free pmu-private AUX data structures
5505d75aed8SAthira Rajeev  */
vpa_dtl_free_aux(void * aux)5515d75aed8SAthira Rajeev static void vpa_dtl_free_aux(void *aux)
5525d75aed8SAthira Rajeev {
5535d75aed8SAthira Rajeev 	struct vpa_pmu_buf *buf = aux;
5545d75aed8SAthira Rajeev 
5555d75aed8SAthira Rajeev 	vunmap(buf->base);
5565d75aed8SAthira Rajeev 	kfree(buf);
5575d75aed8SAthira Rajeev }
5585d75aed8SAthira Rajeev 
5594708fba1SKajol Jain static struct pmu vpa_dtl_pmu = {
5604708fba1SKajol Jain 	.task_ctx_nr = perf_invalid_context,
5614708fba1SKajol Jain 
5624708fba1SKajol Jain 	.name = "vpa_dtl",
5634708fba1SKajol Jain 	.attr_groups = attr_groups,
5644708fba1SKajol Jain 	.event_init  = vpa_dtl_event_init,
5654708fba1SKajol Jain 	.add         = vpa_dtl_event_add,
5664708fba1SKajol Jain 	.del         = vpa_dtl_event_del,
5674708fba1SKajol Jain 	.read        = vpa_dtl_event_read,
5685d75aed8SAthira Rajeev 	.setup_aux   = vpa_dtl_setup_aux,
5695d75aed8SAthira Rajeev 	.free_aux    = vpa_dtl_free_aux,
5704708fba1SKajol Jain 	.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_EXCLUSIVE,
5714708fba1SKajol Jain };
5724708fba1SKajol Jain 
vpa_dtl_init(void)5734708fba1SKajol Jain static int vpa_dtl_init(void)
5744708fba1SKajol Jain {
5754708fba1SKajol Jain 	int r;
5764708fba1SKajol Jain 
5774708fba1SKajol Jain 	if (!firmware_has_feature(FW_FEATURE_SPLPAR)) {
5784708fba1SKajol Jain 		pr_debug("not a shared virtualized system, not enabling\n");
5794708fba1SKajol Jain 		return -ENODEV;
5804708fba1SKajol Jain 	}
5814708fba1SKajol Jain 
5824708fba1SKajol Jain 	/* This driver is intended only for L1 host. */
5834708fba1SKajol Jain 	if (is_kvm_guest()) {
5844708fba1SKajol Jain 		pr_debug("Only supported for L1 host system\n");
5854708fba1SKajol Jain 		return -ENODEV;
5864708fba1SKajol Jain 	}
5874708fba1SKajol Jain 
5884708fba1SKajol Jain 	r = perf_pmu_register(&vpa_dtl_pmu, vpa_dtl_pmu.name, -1);
5894708fba1SKajol Jain 	if (r)
5904708fba1SKajol Jain 		return r;
5914708fba1SKajol Jain 
5924708fba1SKajol Jain 	return 0;
5934708fba1SKajol Jain }
5944708fba1SKajol Jain 
5954708fba1SKajol Jain device_initcall(vpa_dtl_init);
5964708fba1SKajol Jain #endif //CONFIG_PPC_SPLPAR
597