14708fba1SKajol Jain // SPDX-License-Identifier: GPL-2.0-or-later
24708fba1SKajol Jain /*
34708fba1SKajol Jain * Perf interface to expose Dispatch Trace Log counters.
44708fba1SKajol Jain *
54708fba1SKajol Jain * Copyright (C) 2024 Kajol Jain, IBM Corporation
64708fba1SKajol Jain */
74708fba1SKajol Jain
84708fba1SKajol Jain #ifdef CONFIG_PPC_SPLPAR
94708fba1SKajol Jain #define pr_fmt(fmt) "vpa_dtl: " fmt
104708fba1SKajol Jain
114708fba1SKajol Jain #include <asm/dtl.h>
124708fba1SKajol Jain #include <linux/perf_event.h>
134708fba1SKajol Jain #include <asm/plpar_wrappers.h>
145d75aed8SAthira Rajeev #include <linux/vmalloc.h>
154708fba1SKajol Jain
164708fba1SKajol Jain #define EVENT(_name, _code) enum{_name = _code}
174708fba1SKajol Jain
184708fba1SKajol Jain /*
194708fba1SKajol Jain * Based on Power Architecture Platform Reference(PAPR) documentation,
204708fba1SKajol Jain * Table 14.14. Per Virtual Processor Area, below Dispatch Trace Log(DTL)
214708fba1SKajol Jain * Enable Mask used to get corresponding virtual processor dispatch
224708fba1SKajol Jain * to preempt traces:
234708fba1SKajol Jain * DTL_CEDE(0x1): Trace voluntary (OS initiated) virtual
244708fba1SKajol Jain * processor waits
254708fba1SKajol Jain * DTL_PREEMPT(0x2): Trace time slice preempts
264708fba1SKajol Jain * DTL_FAULT(0x4): Trace virtual partition memory page
274708fba1SKajol Jain faults.
284708fba1SKajol Jain * DTL_ALL(0x7): Trace all (DTL_CEDE | DTL_PREEMPT | DTL_FAULT)
294708fba1SKajol Jain *
304708fba1SKajol Jain * Event codes based on Dispatch Trace Log Enable Mask.
314708fba1SKajol Jain */
324708fba1SKajol Jain EVENT(DTL_CEDE, 0x1);
334708fba1SKajol Jain EVENT(DTL_PREEMPT, 0x2);
344708fba1SKajol Jain EVENT(DTL_FAULT, 0x4);
354708fba1SKajol Jain EVENT(DTL_ALL, 0x7);
364708fba1SKajol Jain
374708fba1SKajol Jain GENERIC_EVENT_ATTR(dtl_cede, DTL_CEDE);
384708fba1SKajol Jain GENERIC_EVENT_ATTR(dtl_preempt, DTL_PREEMPT);
394708fba1SKajol Jain GENERIC_EVENT_ATTR(dtl_fault, DTL_FAULT);
404708fba1SKajol Jain GENERIC_EVENT_ATTR(dtl_all, DTL_ALL);
414708fba1SKajol Jain
424708fba1SKajol Jain PMU_FORMAT_ATTR(event, "config:0-7");
434708fba1SKajol Jain
444708fba1SKajol Jain static struct attribute *events_attr[] = {
454708fba1SKajol Jain GENERIC_EVENT_PTR(DTL_CEDE),
464708fba1SKajol Jain GENERIC_EVENT_PTR(DTL_PREEMPT),
474708fba1SKajol Jain GENERIC_EVENT_PTR(DTL_FAULT),
484708fba1SKajol Jain GENERIC_EVENT_PTR(DTL_ALL),
494708fba1SKajol Jain NULL
504708fba1SKajol Jain };
514708fba1SKajol Jain
524708fba1SKajol Jain static struct attribute_group event_group = {
534708fba1SKajol Jain .name = "events",
544708fba1SKajol Jain .attrs = events_attr,
554708fba1SKajol Jain };
564708fba1SKajol Jain
574708fba1SKajol Jain static struct attribute *format_attrs[] = {
584708fba1SKajol Jain &format_attr_event.attr,
594708fba1SKajol Jain NULL,
604708fba1SKajol Jain };
614708fba1SKajol Jain
624708fba1SKajol Jain static const struct attribute_group format_group = {
634708fba1SKajol Jain .name = "format",
644708fba1SKajol Jain .attrs = format_attrs,
654708fba1SKajol Jain };
664708fba1SKajol Jain
674708fba1SKajol Jain static const struct attribute_group *attr_groups[] = {
684708fba1SKajol Jain &format_group,
694708fba1SKajol Jain &event_group,
704708fba1SKajol Jain NULL,
714708fba1SKajol Jain };
724708fba1SKajol Jain
734708fba1SKajol Jain struct vpa_dtl {
744708fba1SKajol Jain struct dtl_entry *buf;
754708fba1SKajol Jain u64 last_idx;
764708fba1SKajol Jain };
774708fba1SKajol Jain
785d75aed8SAthira Rajeev struct vpa_pmu_ctx {
795d75aed8SAthira Rajeev struct perf_output_handle handle;
805d75aed8SAthira Rajeev };
815d75aed8SAthira Rajeev
825d75aed8SAthira Rajeev struct vpa_pmu_buf {
835d75aed8SAthira Rajeev int nr_pages;
845d75aed8SAthira Rajeev bool snapshot;
855d75aed8SAthira Rajeev u64 *base;
865d75aed8SAthira Rajeev u64 size;
875d75aed8SAthira Rajeev u64 head;
88*b5e71cafSAthira Rajeev u64 head_size;
892de8b6ddSAthira Rajeev /* boot timebase and frequency needs to be saved only at once */
902de8b6ddSAthira Rajeev int boottb_freq_saved;
91*b5e71cafSAthira Rajeev u64 threshold;
92*b5e71cafSAthira Rajeev bool full;
932de8b6ddSAthira Rajeev };
942de8b6ddSAthira Rajeev
952de8b6ddSAthira Rajeev /*
962de8b6ddSAthira Rajeev * To corelate each DTL entry with other events across CPU's,
972de8b6ddSAthira Rajeev * we need to map timebase from "struct dtl_entry" which phyp
982de8b6ddSAthira Rajeev * provides with boot timebase. This also needs timebase frequency.
992de8b6ddSAthira Rajeev * Formula is: ((timbase from DTL entry - boot time) / frequency)
1002de8b6ddSAthira Rajeev *
1012de8b6ddSAthira Rajeev * To match with size of "struct dtl_entry" to ease post processing,
1022de8b6ddSAthira Rajeev * padded 24 bytes to the structure.
1032de8b6ddSAthira Rajeev */
1042de8b6ddSAthira Rajeev struct boottb_freq {
1052de8b6ddSAthira Rajeev u64 boot_tb;
1062de8b6ddSAthira Rajeev u64 tb_freq;
1072de8b6ddSAthira Rajeev u64 timebase;
1082de8b6ddSAthira Rajeev u64 padded[3];
1095d75aed8SAthira Rajeev };
1105d75aed8SAthira Rajeev
1115d75aed8SAthira Rajeev static DEFINE_PER_CPU(struct vpa_pmu_ctx, vpa_pmu_ctx);
1124708fba1SKajol Jain static DEFINE_PER_CPU(struct vpa_dtl, vpa_dtl_cpu);
1134708fba1SKajol Jain
1144708fba1SKajol Jain /* variable to capture reference count for the active dtl threads */
1154708fba1SKajol Jain static int dtl_global_refc;
1164708fba1SKajol Jain static spinlock_t dtl_global_lock = __SPIN_LOCK_UNLOCKED(dtl_global_lock);
1174708fba1SKajol Jain
1184708fba1SKajol Jain /*
1192de8b6ddSAthira Rajeev * Capture DTL data in AUX buffer
1202de8b6ddSAthira Rajeev */
vpa_dtl_capture_aux(long * n_entries,struct vpa_pmu_buf * buf,struct vpa_dtl * dtl,int index)1212de8b6ddSAthira Rajeev static void vpa_dtl_capture_aux(long *n_entries, struct vpa_pmu_buf *buf,
1222de8b6ddSAthira Rajeev struct vpa_dtl *dtl, int index)
1232de8b6ddSAthira Rajeev {
1242de8b6ddSAthira Rajeev struct dtl_entry *aux_copy_buf = (struct dtl_entry *)buf->base;
1252de8b6ddSAthira Rajeev
1262de8b6ddSAthira Rajeev /*
127*b5e71cafSAthira Rajeev * check if there is enough space to contain the
128*b5e71cafSAthira Rajeev * DTL data. If not, save the data for available
129*b5e71cafSAthira Rajeev * memory and set full to true.
130*b5e71cafSAthira Rajeev */
131*b5e71cafSAthira Rajeev if (buf->head + *n_entries >= buf->threshold) {
132*b5e71cafSAthira Rajeev *n_entries = buf->threshold - buf->head;
133*b5e71cafSAthira Rajeev buf->full = 1;
134*b5e71cafSAthira Rajeev }
135*b5e71cafSAthira Rajeev
136*b5e71cafSAthira Rajeev /*
1372de8b6ddSAthira Rajeev * Copy to AUX buffer from per-thread address
1382de8b6ddSAthira Rajeev */
1392de8b6ddSAthira Rajeev memcpy(aux_copy_buf + buf->head, &dtl->buf[index], *n_entries * sizeof(struct dtl_entry));
1402de8b6ddSAthira Rajeev
141*b5e71cafSAthira Rajeev if (buf->full) {
142*b5e71cafSAthira Rajeev /*
143*b5e71cafSAthira Rajeev * Set head of private aux to zero when buffer is full
144*b5e71cafSAthira Rajeev * so that next data will be copied to beginning of the
145*b5e71cafSAthira Rajeev * buffer
146*b5e71cafSAthira Rajeev */
147*b5e71cafSAthira Rajeev buf->head = 0;
148*b5e71cafSAthira Rajeev return;
149*b5e71cafSAthira Rajeev }
150*b5e71cafSAthira Rajeev
1512de8b6ddSAthira Rajeev buf->head += *n_entries;
1522de8b6ddSAthira Rajeev
1532de8b6ddSAthira Rajeev return;
1542de8b6ddSAthira Rajeev }
1552de8b6ddSAthira Rajeev
1562de8b6ddSAthira Rajeev /*
1574708fba1SKajol Jain * Function to dump the dispatch trace log buffer data to the
1584708fba1SKajol Jain * perf data.
1592de8b6ddSAthira Rajeev *
1602de8b6ddSAthira Rajeev * perf_aux_output_begin: This function is called before writing
1612de8b6ddSAthira Rajeev * to AUX area. This returns the pointer to aux area private structure,
1622de8b6ddSAthira Rajeev * ie "struct vpa_pmu_buf" here which is set in setup_aux() function.
1632de8b6ddSAthira Rajeev * The function obtains the output handle (used in perf_aux_output_end).
1642de8b6ddSAthira Rajeev * when capture completes in vpa_dtl_capture_aux(), call perf_aux_output_end()
1652de8b6ddSAthira Rajeev * to commit the recorded data.
1662de8b6ddSAthira Rajeev *
1672de8b6ddSAthira Rajeev * perf_aux_output_end: This function commits data by adjusting the
1682de8b6ddSAthira Rajeev * aux_head of "struct perf_buffer". aux_tail will be moved in perf tools
1692de8b6ddSAthira Rajeev * side when writing the data from aux buffer to perf.data file in disk.
1702de8b6ddSAthira Rajeev *
1712de8b6ddSAthira Rajeev * Here in the private aux structure, we maintain head to know where
1722de8b6ddSAthira Rajeev * to copy data next time in the PMU driver. vpa_pmu_buf->head is moved to
1732de8b6ddSAthira Rajeev * maintain the aux head for PMU driver. It is responsiblity of PMU
1742de8b6ddSAthira Rajeev * driver to make sure data is copied between perf_aux_output_begin and
1752de8b6ddSAthira Rajeev * perf_aux_output_end.
1762de8b6ddSAthira Rajeev *
1772de8b6ddSAthira Rajeev * After data is copied in vpa_dtl_capture_aux() function, perf_aux_output_end()
1782de8b6ddSAthira Rajeev * is called to move the aux->head of "struct perf_buffer" to indicate size of
1792de8b6ddSAthira Rajeev * data in aux buffer. This will post a PERF_RECORD_AUX into the perf buffer.
1802de8b6ddSAthira Rajeev * Data will be written to disk only when the allocated buffer is full.
1812de8b6ddSAthira Rajeev *
1822de8b6ddSAthira Rajeev * By this approach, all the DTL data will be present as-is in the
1832de8b6ddSAthira Rajeev * perf.data. The data will be pre-processed in perf tools side when doing
1842de8b6ddSAthira Rajeev * perf report/perf script and this will avoid time taken to create samples
1852de8b6ddSAthira Rajeev * in the kernel space.
1864708fba1SKajol Jain */
vpa_dtl_dump_sample_data(struct perf_event * event)1874708fba1SKajol Jain static void vpa_dtl_dump_sample_data(struct perf_event *event)
1884708fba1SKajol Jain {
1892de8b6ddSAthira Rajeev u64 cur_idx, last_idx, i;
1902de8b6ddSAthira Rajeev u64 boot_tb;
1912de8b6ddSAthira Rajeev struct boottb_freq boottb_freq;
1922de8b6ddSAthira Rajeev
1932de8b6ddSAthira Rajeev /* actual number of entries read */
1942de8b6ddSAthira Rajeev long n_read = 0, read_size = 0;
1952de8b6ddSAthira Rajeev
1962de8b6ddSAthira Rajeev /* number of entries added to dtl buffer */
1972de8b6ddSAthira Rajeev long n_req;
1982de8b6ddSAthira Rajeev
1992de8b6ddSAthira Rajeev struct vpa_pmu_ctx *vpa_ctx = this_cpu_ptr(&vpa_pmu_ctx);
2002de8b6ddSAthira Rajeev
2012de8b6ddSAthira Rajeev struct vpa_pmu_buf *aux_buf;
2022de8b6ddSAthira Rajeev
2032de8b6ddSAthira Rajeev struct vpa_dtl *dtl = &per_cpu(vpa_dtl_cpu, event->cpu);
204*b5e71cafSAthira Rajeev u64 size;
2052de8b6ddSAthira Rajeev
2062de8b6ddSAthira Rajeev cur_idx = be64_to_cpu(lppaca_of(event->cpu).dtl_idx);
2072de8b6ddSAthira Rajeev last_idx = dtl->last_idx;
2082de8b6ddSAthira Rajeev
2092de8b6ddSAthira Rajeev if (last_idx + N_DISPATCH_LOG <= cur_idx)
2102de8b6ddSAthira Rajeev last_idx = cur_idx - N_DISPATCH_LOG + 1;
2112de8b6ddSAthira Rajeev
2122de8b6ddSAthira Rajeev n_req = cur_idx - last_idx;
2132de8b6ddSAthira Rajeev
2142de8b6ddSAthira Rajeev /* no new entry added to the buffer, return */
2152de8b6ddSAthira Rajeev if (n_req <= 0)
2164708fba1SKajol Jain return;
2172de8b6ddSAthira Rajeev
2182de8b6ddSAthira Rajeev dtl->last_idx = last_idx + n_req;
2192de8b6ddSAthira Rajeev boot_tb = get_boot_tb();
2202de8b6ddSAthira Rajeev
2212de8b6ddSAthira Rajeev i = last_idx % N_DISPATCH_LOG;
2222de8b6ddSAthira Rajeev
2232de8b6ddSAthira Rajeev aux_buf = perf_aux_output_begin(&vpa_ctx->handle, event);
2242de8b6ddSAthira Rajeev if (!aux_buf) {
2252de8b6ddSAthira Rajeev pr_debug("returning. no aux\n");
2262de8b6ddSAthira Rajeev return;
2272de8b6ddSAthira Rajeev }
2282de8b6ddSAthira Rajeev
2292de8b6ddSAthira Rajeev if (!aux_buf->boottb_freq_saved) {
2302de8b6ddSAthira Rajeev pr_debug("Copying boot tb to aux buffer: %lld\n", boot_tb);
2312de8b6ddSAthira Rajeev /* Save boot_tb to convert raw timebase to it's relative system boot time */
2322de8b6ddSAthira Rajeev boottb_freq.boot_tb = boot_tb;
2332de8b6ddSAthira Rajeev /* Save tb_ticks_per_sec to convert timebase to sec */
2342de8b6ddSAthira Rajeev boottb_freq.tb_freq = tb_ticks_per_sec;
2352de8b6ddSAthira Rajeev boottb_freq.timebase = 0;
2362de8b6ddSAthira Rajeev memcpy(aux_buf->base, &boottb_freq, sizeof(boottb_freq));
2372de8b6ddSAthira Rajeev aux_buf->head += 1;
2382de8b6ddSAthira Rajeev aux_buf->boottb_freq_saved = 1;
2392de8b6ddSAthira Rajeev n_read += 1;
2402de8b6ddSAthira Rajeev }
2412de8b6ddSAthira Rajeev
2422de8b6ddSAthira Rajeev /* read the tail of the buffer if we've wrapped */
2432de8b6ddSAthira Rajeev if (i + n_req > N_DISPATCH_LOG) {
2442de8b6ddSAthira Rajeev read_size = N_DISPATCH_LOG - i;
2452de8b6ddSAthira Rajeev vpa_dtl_capture_aux(&read_size, aux_buf, dtl, i);
2462de8b6ddSAthira Rajeev n_req -= read_size;
2472de8b6ddSAthira Rajeev n_read += read_size;
2482de8b6ddSAthira Rajeev i = 0;
249*b5e71cafSAthira Rajeev if (aux_buf->full) {
250*b5e71cafSAthira Rajeev size = (n_read * sizeof(struct dtl_entry));
251*b5e71cafSAthira Rajeev if ((size + aux_buf->head_size) > aux_buf->size) {
252*b5e71cafSAthira Rajeev size = aux_buf->size - aux_buf->head_size;
253*b5e71cafSAthira Rajeev perf_aux_output_end(&vpa_ctx->handle, size);
254*b5e71cafSAthira Rajeev aux_buf->head = 0;
255*b5e71cafSAthira Rajeev aux_buf->head_size = 0;
256*b5e71cafSAthira Rajeev } else {
257*b5e71cafSAthira Rajeev aux_buf->head_size += (n_read * sizeof(struct dtl_entry));
258*b5e71cafSAthira Rajeev perf_aux_output_end(&vpa_ctx->handle, n_read * sizeof(struct dtl_entry));
259*b5e71cafSAthira Rajeev }
260*b5e71cafSAthira Rajeev goto out;
261*b5e71cafSAthira Rajeev }
2622de8b6ddSAthira Rajeev }
2632de8b6ddSAthira Rajeev
2642de8b6ddSAthira Rajeev /* .. and now the head */
2652de8b6ddSAthira Rajeev vpa_dtl_capture_aux(&n_req, aux_buf, dtl, i);
2662de8b6ddSAthira Rajeev
267*b5e71cafSAthira Rajeev size = ((n_req + n_read) * sizeof(struct dtl_entry));
268*b5e71cafSAthira Rajeev if ((size + aux_buf->head_size) > aux_buf->size) {
269*b5e71cafSAthira Rajeev size = aux_buf->size - aux_buf->head_size;
270*b5e71cafSAthira Rajeev perf_aux_output_end(&vpa_ctx->handle, size);
271*b5e71cafSAthira Rajeev aux_buf->head = 0;
272*b5e71cafSAthira Rajeev aux_buf->head_size = 0;
273*b5e71cafSAthira Rajeev } else {
274*b5e71cafSAthira Rajeev aux_buf->head_size += ((n_req + n_read) * sizeof(struct dtl_entry));
2752de8b6ddSAthira Rajeev /* Move the aux->head to indicate size of data in aux buffer */
2762de8b6ddSAthira Rajeev perf_aux_output_end(&vpa_ctx->handle, (n_req + n_read) * sizeof(struct dtl_entry));
2774708fba1SKajol Jain }
278*b5e71cafSAthira Rajeev out:
279*b5e71cafSAthira Rajeev aux_buf->full = 0;
280*b5e71cafSAthira Rajeev }
2814708fba1SKajol Jain
2824708fba1SKajol Jain /*
2834708fba1SKajol Jain * The VPA Dispatch Trace log counters do not interrupt on overflow.
2844708fba1SKajol Jain * Therefore, the kernel needs to poll the counters to avoid missing
2854708fba1SKajol Jain * an overflow using hrtimer. The timer interval is based on sample_period
2864708fba1SKajol Jain * count provided by user, and minimum interval is 1 millisecond.
2874708fba1SKajol Jain */
vpa_dtl_hrtimer_handle(struct hrtimer * hrtimer)2884708fba1SKajol Jain static enum hrtimer_restart vpa_dtl_hrtimer_handle(struct hrtimer *hrtimer)
2894708fba1SKajol Jain {
2904708fba1SKajol Jain struct perf_event *event;
2914708fba1SKajol Jain u64 period;
2924708fba1SKajol Jain
2934708fba1SKajol Jain event = container_of(hrtimer, struct perf_event, hw.hrtimer);
2944708fba1SKajol Jain
2954708fba1SKajol Jain if (event->state != PERF_EVENT_STATE_ACTIVE)
2964708fba1SKajol Jain return HRTIMER_NORESTART;
2974708fba1SKajol Jain
2984708fba1SKajol Jain vpa_dtl_dump_sample_data(event);
2994708fba1SKajol Jain period = max_t(u64, NSEC_PER_MSEC, event->hw.sample_period);
3004708fba1SKajol Jain hrtimer_forward_now(hrtimer, ns_to_ktime(period));
3014708fba1SKajol Jain
3024708fba1SKajol Jain return HRTIMER_RESTART;
3034708fba1SKajol Jain }
3044708fba1SKajol Jain
vpa_dtl_start_hrtimer(struct perf_event * event)3054708fba1SKajol Jain static void vpa_dtl_start_hrtimer(struct perf_event *event)
3064708fba1SKajol Jain {
3074708fba1SKajol Jain u64 period;
3084708fba1SKajol Jain struct hw_perf_event *hwc = &event->hw;
3094708fba1SKajol Jain
3104708fba1SKajol Jain period = max_t(u64, NSEC_PER_MSEC, hwc->sample_period);
3114708fba1SKajol Jain hrtimer_start(&hwc->hrtimer, ns_to_ktime(period), HRTIMER_MODE_REL_PINNED);
3124708fba1SKajol Jain }
3134708fba1SKajol Jain
vpa_dtl_stop_hrtimer(struct perf_event * event)3144708fba1SKajol Jain static void vpa_dtl_stop_hrtimer(struct perf_event *event)
3154708fba1SKajol Jain {
3164708fba1SKajol Jain struct hw_perf_event *hwc = &event->hw;
3174708fba1SKajol Jain
3184708fba1SKajol Jain hrtimer_cancel(&hwc->hrtimer);
3194708fba1SKajol Jain }
3204708fba1SKajol Jain
vpa_dtl_reset_global_refc(struct perf_event * event)3214708fba1SKajol Jain static void vpa_dtl_reset_global_refc(struct perf_event *event)
3224708fba1SKajol Jain {
3234708fba1SKajol Jain spin_lock(&dtl_global_lock);
3244708fba1SKajol Jain dtl_global_refc--;
3254708fba1SKajol Jain if (dtl_global_refc <= 0) {
3264708fba1SKajol Jain dtl_global_refc = 0;
3274708fba1SKajol Jain up_write(&dtl_access_lock);
3284708fba1SKajol Jain }
3294708fba1SKajol Jain spin_unlock(&dtl_global_lock);
3304708fba1SKajol Jain }
3314708fba1SKajol Jain
vpa_dtl_mem_alloc(int cpu)3324708fba1SKajol Jain static int vpa_dtl_mem_alloc(int cpu)
3334708fba1SKajol Jain {
3344708fba1SKajol Jain struct vpa_dtl *dtl = &per_cpu(vpa_dtl_cpu, cpu);
3354708fba1SKajol Jain struct dtl_entry *buf = NULL;
3364708fba1SKajol Jain
3374708fba1SKajol Jain /* Check for dispatch trace log buffer cache */
3384708fba1SKajol Jain if (!dtl_cache)
3394708fba1SKajol Jain return -ENOMEM;
3404708fba1SKajol Jain
3414708fba1SKajol Jain buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL | GFP_ATOMIC, cpu_to_node(cpu));
3424708fba1SKajol Jain if (!buf) {
3434708fba1SKajol Jain pr_warn("buffer allocation failed for cpu %d\n", cpu);
3444708fba1SKajol Jain return -ENOMEM;
3454708fba1SKajol Jain }
3464708fba1SKajol Jain dtl->buf = buf;
3474708fba1SKajol Jain return 0;
3484708fba1SKajol Jain }
3494708fba1SKajol Jain
vpa_dtl_event_init(struct perf_event * event)3504708fba1SKajol Jain static int vpa_dtl_event_init(struct perf_event *event)
3514708fba1SKajol Jain {
3524708fba1SKajol Jain struct hw_perf_event *hwc = &event->hw;
3534708fba1SKajol Jain
3544708fba1SKajol Jain /* test the event attr type for PMU enumeration */
3554708fba1SKajol Jain if (event->attr.type != event->pmu->type)
3564708fba1SKajol Jain return -ENOENT;
3574708fba1SKajol Jain
3584708fba1SKajol Jain if (!perfmon_capable())
3594708fba1SKajol Jain return -EACCES;
3604708fba1SKajol Jain
3614708fba1SKajol Jain /* Return if this is a counting event */
3624708fba1SKajol Jain if (!is_sampling_event(event))
3634708fba1SKajol Jain return -EOPNOTSUPP;
3644708fba1SKajol Jain
3654708fba1SKajol Jain /* no branch sampling */
3664708fba1SKajol Jain if (has_branch_stack(event))
3674708fba1SKajol Jain return -EOPNOTSUPP;
3684708fba1SKajol Jain
3694708fba1SKajol Jain /* Invalid eventcode */
3704708fba1SKajol Jain switch (event->attr.config) {
3714708fba1SKajol Jain case DTL_LOG_CEDE:
3724708fba1SKajol Jain case DTL_LOG_PREEMPT:
3734708fba1SKajol Jain case DTL_LOG_FAULT:
3744708fba1SKajol Jain case DTL_LOG_ALL:
3754708fba1SKajol Jain break;
3764708fba1SKajol Jain default:
3774708fba1SKajol Jain return -EINVAL;
3784708fba1SKajol Jain }
3794708fba1SKajol Jain
3804708fba1SKajol Jain spin_lock(&dtl_global_lock);
3814708fba1SKajol Jain
3824708fba1SKajol Jain /*
3834708fba1SKajol Jain * To ensure there are no other conflicting dtl users
3844708fba1SKajol Jain * (example: /proc/powerpc/vcpudispatch_stats or debugfs dtl),
3854708fba1SKajol Jain * below code try to take the dtl_access_lock.
3864708fba1SKajol Jain * The dtl_access_lock is a rwlock defined in dtl.h, which is used
3874708fba1SKajol Jain * to unsure there is no conflicting dtl users.
3884708fba1SKajol Jain * Based on below code, vpa_dtl pmu tries to take write access lock
3894708fba1SKajol Jain * and also checks for dtl_global_refc, to make sure that the
3904708fba1SKajol Jain * dtl_access_lock is taken by vpa_dtl pmu interface.
3914708fba1SKajol Jain */
3924708fba1SKajol Jain if (dtl_global_refc == 0 && !down_write_trylock(&dtl_access_lock)) {
3934708fba1SKajol Jain spin_unlock(&dtl_global_lock);
3944708fba1SKajol Jain return -EBUSY;
3954708fba1SKajol Jain }
3964708fba1SKajol Jain
3974708fba1SKajol Jain /* Allocate dtl buffer memory */
3984708fba1SKajol Jain if (vpa_dtl_mem_alloc(event->cpu)) {
3994708fba1SKajol Jain spin_unlock(&dtl_global_lock);
4004708fba1SKajol Jain return -ENOMEM;
4014708fba1SKajol Jain }
4024708fba1SKajol Jain
4034708fba1SKajol Jain /*
4044708fba1SKajol Jain * Increment the number of active vpa_dtl pmu threads. The
4054708fba1SKajol Jain * dtl_global_refc is used to keep count of cpu threads that
4064708fba1SKajol Jain * currently capturing dtl data using vpa_dtl pmu interface.
4074708fba1SKajol Jain */
4084708fba1SKajol Jain dtl_global_refc++;
4094708fba1SKajol Jain
4104708fba1SKajol Jain spin_unlock(&dtl_global_lock);
4114708fba1SKajol Jain
4124708fba1SKajol Jain hrtimer_setup(&hwc->hrtimer, vpa_dtl_hrtimer_handle, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
4134708fba1SKajol Jain
4144708fba1SKajol Jain /*
4154708fba1SKajol Jain * Since hrtimers have a fixed rate, we can do a static freq->period
4164708fba1SKajol Jain * mapping and avoid the whole period adjust feedback stuff.
4174708fba1SKajol Jain */
4184708fba1SKajol Jain if (event->attr.freq) {
4194708fba1SKajol Jain long freq = event->attr.sample_freq;
4204708fba1SKajol Jain
4214708fba1SKajol Jain event->attr.sample_period = NSEC_PER_SEC / freq;
4224708fba1SKajol Jain hwc->sample_period = event->attr.sample_period;
4234708fba1SKajol Jain local64_set(&hwc->period_left, hwc->sample_period);
4244708fba1SKajol Jain hwc->last_period = hwc->sample_period;
4254708fba1SKajol Jain event->attr.freq = 0;
4264708fba1SKajol Jain }
4274708fba1SKajol Jain
4284708fba1SKajol Jain event->destroy = vpa_dtl_reset_global_refc;
4294708fba1SKajol Jain return 0;
4304708fba1SKajol Jain }
4314708fba1SKajol Jain
vpa_dtl_event_add(struct perf_event * event,int flags)4324708fba1SKajol Jain static int vpa_dtl_event_add(struct perf_event *event, int flags)
4334708fba1SKajol Jain {
4344708fba1SKajol Jain int ret, hwcpu;
4354708fba1SKajol Jain unsigned long addr;
4364708fba1SKajol Jain struct vpa_dtl *dtl = &per_cpu(vpa_dtl_cpu, event->cpu);
4374708fba1SKajol Jain
4384708fba1SKajol Jain /*
4394708fba1SKajol Jain * Register our dtl buffer with the hypervisor. The
4404708fba1SKajol Jain * HV expects the buffer size to be passed in the second
4414708fba1SKajol Jain * word of the buffer. Refer section '14.11.3.2. H_REGISTER_VPA'
4424708fba1SKajol Jain * from PAPR for more information.
4434708fba1SKajol Jain */
4444708fba1SKajol Jain ((u32 *)dtl->buf)[1] = cpu_to_be32(DISPATCH_LOG_BYTES);
4454708fba1SKajol Jain dtl->last_idx = 0;
4464708fba1SKajol Jain
4474708fba1SKajol Jain hwcpu = get_hard_smp_processor_id(event->cpu);
4484708fba1SKajol Jain addr = __pa(dtl->buf);
4494708fba1SKajol Jain
4504708fba1SKajol Jain ret = register_dtl(hwcpu, addr);
4514708fba1SKajol Jain if (ret) {
4524708fba1SKajol Jain pr_warn("DTL registration for cpu %d (hw %d) failed with %d\n",
4534708fba1SKajol Jain event->cpu, hwcpu, ret);
4544708fba1SKajol Jain return ret;
4554708fba1SKajol Jain }
4564708fba1SKajol Jain
4574708fba1SKajol Jain /* set our initial buffer indices */
4584708fba1SKajol Jain lppaca_of(event->cpu).dtl_idx = 0;
4594708fba1SKajol Jain
4604708fba1SKajol Jain /*
4614708fba1SKajol Jain * Ensure that our updates to the lppaca fields have
4624708fba1SKajol Jain * occurred before we actually enable the logging
4634708fba1SKajol Jain */
4644708fba1SKajol Jain smp_wmb();
4654708fba1SKajol Jain
4664708fba1SKajol Jain /* enable event logging */
4674708fba1SKajol Jain lppaca_of(event->cpu).dtl_enable_mask = event->attr.config;
4684708fba1SKajol Jain
4694708fba1SKajol Jain vpa_dtl_start_hrtimer(event);
4704708fba1SKajol Jain
4714708fba1SKajol Jain return 0;
4724708fba1SKajol Jain }
4734708fba1SKajol Jain
vpa_dtl_event_del(struct perf_event * event,int flags)4744708fba1SKajol Jain static void vpa_dtl_event_del(struct perf_event *event, int flags)
4754708fba1SKajol Jain {
4764708fba1SKajol Jain int hwcpu = get_hard_smp_processor_id(event->cpu);
4774708fba1SKajol Jain struct vpa_dtl *dtl = &per_cpu(vpa_dtl_cpu, event->cpu);
4784708fba1SKajol Jain
4794708fba1SKajol Jain vpa_dtl_stop_hrtimer(event);
4804708fba1SKajol Jain unregister_dtl(hwcpu);
4814708fba1SKajol Jain kmem_cache_free(dtl_cache, dtl->buf);
4824708fba1SKajol Jain dtl->buf = NULL;
4834708fba1SKajol Jain lppaca_of(event->cpu).dtl_enable_mask = 0x0;
4844708fba1SKajol Jain }
4854708fba1SKajol Jain
4864708fba1SKajol Jain /*
4874708fba1SKajol Jain * This function definition is empty as vpa_dtl_dump_sample_data
4884708fba1SKajol Jain * is used to parse and dump the dispatch trace log data,
4894708fba1SKajol Jain * to perf data.
4904708fba1SKajol Jain */
vpa_dtl_event_read(struct perf_event * event)4914708fba1SKajol Jain static void vpa_dtl_event_read(struct perf_event *event)
4924708fba1SKajol Jain {
4934708fba1SKajol Jain }
4944708fba1SKajol Jain
4955d75aed8SAthira Rajeev /*
4965d75aed8SAthira Rajeev * Set up pmu-private data structures for an AUX area
4975d75aed8SAthira Rajeev * **pages contains the aux buffer allocated for this event
4985d75aed8SAthira Rajeev * for the corresponding cpu. rb_alloc_aux uses "alloc_pages_node"
4995d75aed8SAthira Rajeev * and returns pointer to each page address. Map these pages to
5005d75aed8SAthira Rajeev * contiguous space using vmap and use that as base address.
5015d75aed8SAthira Rajeev *
5025d75aed8SAthira Rajeev * The aux private data structure ie, "struct vpa_pmu_buf" mainly
5035d75aed8SAthira Rajeev * saves
5045d75aed8SAthira Rajeev * - buf->base: aux buffer base address
5055d75aed8SAthira Rajeev * - buf->head: offset from base address where data will be written to.
5065d75aed8SAthira Rajeev * - buf->size: Size of allocated memory
5075d75aed8SAthira Rajeev */
vpa_dtl_setup_aux(struct perf_event * event,void ** pages,int nr_pages,bool snapshot)5085d75aed8SAthira Rajeev static void *vpa_dtl_setup_aux(struct perf_event *event, void **pages,
5095d75aed8SAthira Rajeev int nr_pages, bool snapshot)
5105d75aed8SAthira Rajeev {
5115d75aed8SAthira Rajeev int i, cpu = event->cpu;
5125d75aed8SAthira Rajeev struct vpa_pmu_buf *buf __free(kfree) = NULL;
5135d75aed8SAthira Rajeev struct page **pglist __free(kfree) = NULL;
5145d75aed8SAthira Rajeev
5155d75aed8SAthira Rajeev /* We need at least one page for this to work. */
5165d75aed8SAthira Rajeev if (!nr_pages)
5175d75aed8SAthira Rajeev return NULL;
5185d75aed8SAthira Rajeev
5195d75aed8SAthira Rajeev if (cpu == -1)
5205d75aed8SAthira Rajeev cpu = raw_smp_processor_id();
5215d75aed8SAthira Rajeev
5225d75aed8SAthira Rajeev buf = kzalloc_node(sizeof(*buf), GFP_KERNEL, cpu_to_node(cpu));
5235d75aed8SAthira Rajeev if (!buf)
5245d75aed8SAthira Rajeev return NULL;
5255d75aed8SAthira Rajeev
5265d75aed8SAthira Rajeev pglist = kcalloc(nr_pages, sizeof(*pglist), GFP_KERNEL);
5275d75aed8SAthira Rajeev if (!pglist)
5285d75aed8SAthira Rajeev return NULL;
5295d75aed8SAthira Rajeev
5305d75aed8SAthira Rajeev for (i = 0; i < nr_pages; ++i)
5315d75aed8SAthira Rajeev pglist[i] = virt_to_page(pages[i]);
5325d75aed8SAthira Rajeev
5335d75aed8SAthira Rajeev buf->base = vmap(pglist, nr_pages, VM_MAP, PAGE_KERNEL);
5345d75aed8SAthira Rajeev if (!buf->base)
5355d75aed8SAthira Rajeev return NULL;
5365d75aed8SAthira Rajeev
5375d75aed8SAthira Rajeev buf->nr_pages = nr_pages;
5385d75aed8SAthira Rajeev buf->snapshot = false;
5395d75aed8SAthira Rajeev
5405d75aed8SAthira Rajeev buf->size = nr_pages << PAGE_SHIFT;
5415d75aed8SAthira Rajeev buf->head = 0;
542*b5e71cafSAthira Rajeev buf->head_size = 0;
5432de8b6ddSAthira Rajeev buf->boottb_freq_saved = 0;
544*b5e71cafSAthira Rajeev buf->threshold = ((buf->size - 32) / sizeof(struct dtl_entry));
5455d75aed8SAthira Rajeev return no_free_ptr(buf);
5465d75aed8SAthira Rajeev }
5475d75aed8SAthira Rajeev
5485d75aed8SAthira Rajeev /*
5495d75aed8SAthira Rajeev * free pmu-private AUX data structures
5505d75aed8SAthira Rajeev */
vpa_dtl_free_aux(void * aux)5515d75aed8SAthira Rajeev static void vpa_dtl_free_aux(void *aux)
5525d75aed8SAthira Rajeev {
5535d75aed8SAthira Rajeev struct vpa_pmu_buf *buf = aux;
5545d75aed8SAthira Rajeev
5555d75aed8SAthira Rajeev vunmap(buf->base);
5565d75aed8SAthira Rajeev kfree(buf);
5575d75aed8SAthira Rajeev }
5585d75aed8SAthira Rajeev
5594708fba1SKajol Jain static struct pmu vpa_dtl_pmu = {
5604708fba1SKajol Jain .task_ctx_nr = perf_invalid_context,
5614708fba1SKajol Jain
5624708fba1SKajol Jain .name = "vpa_dtl",
5634708fba1SKajol Jain .attr_groups = attr_groups,
5644708fba1SKajol Jain .event_init = vpa_dtl_event_init,
5654708fba1SKajol Jain .add = vpa_dtl_event_add,
5664708fba1SKajol Jain .del = vpa_dtl_event_del,
5674708fba1SKajol Jain .read = vpa_dtl_event_read,
5685d75aed8SAthira Rajeev .setup_aux = vpa_dtl_setup_aux,
5695d75aed8SAthira Rajeev .free_aux = vpa_dtl_free_aux,
5704708fba1SKajol Jain .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_EXCLUSIVE,
5714708fba1SKajol Jain };
5724708fba1SKajol Jain
vpa_dtl_init(void)5734708fba1SKajol Jain static int vpa_dtl_init(void)
5744708fba1SKajol Jain {
5754708fba1SKajol Jain int r;
5764708fba1SKajol Jain
5774708fba1SKajol Jain if (!firmware_has_feature(FW_FEATURE_SPLPAR)) {
5784708fba1SKajol Jain pr_debug("not a shared virtualized system, not enabling\n");
5794708fba1SKajol Jain return -ENODEV;
5804708fba1SKajol Jain }
5814708fba1SKajol Jain
5824708fba1SKajol Jain /* This driver is intended only for L1 host. */
5834708fba1SKajol Jain if (is_kvm_guest()) {
5844708fba1SKajol Jain pr_debug("Only supported for L1 host system\n");
5854708fba1SKajol Jain return -ENODEV;
5864708fba1SKajol Jain }
5874708fba1SKajol Jain
5884708fba1SKajol Jain r = perf_pmu_register(&vpa_dtl_pmu, vpa_dtl_pmu.name, -1);
5894708fba1SKajol Jain if (r)
5904708fba1SKajol Jain return r;
5914708fba1SKajol Jain
5924708fba1SKajol Jain return 0;
5934708fba1SKajol Jain }
5944708fba1SKajol Jain
5954708fba1SKajol Jain device_initcall(vpa_dtl_init);
5964708fba1SKajol Jain #endif //CONFIG_PPC_SPLPAR
597