1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Virtual Processor Dispatch Trace Log 4 * 5 * (C) Copyright IBM Corporation 2009 6 * 7 * Author: Jeremy Kerr <jk@ozlabs.org> 8 */ 9 10 #include <linux/slab.h> 11 #include <linux/spinlock.h> 12 #include <asm/smp.h> 13 #include <linux/uaccess.h> 14 #include <linux/debugfs.h> 15 #include <asm/firmware.h> 16 #include <asm/dtl.h> 17 #include <asm/lppaca.h> 18 #include <asm/plpar_wrappers.h> 19 #include <asm/machdep.h> 20 21 #ifdef CONFIG_DTL 22 struct dtl { 23 struct dtl_entry *buf; 24 int cpu; 25 int buf_entries; 26 u64 last_idx; 27 spinlock_t lock; 28 }; 29 static DEFINE_PER_CPU(struct dtl, cpu_dtl); 30 31 static u8 dtl_event_mask = DTL_LOG_ALL; 32 33 34 /* 35 * Size of per-cpu log buffers. Firmware requires that the buffer does 36 * not cross a 4k boundary. 37 */ 38 static int dtl_buf_entries = N_DISPATCH_LOG; 39 40 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 41 42 /* 43 * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls 44 * reading from the dispatch trace log. If other code wants to consume 45 * DTL entries, it can set this pointer to a function that will get 46 * called once for each DTL entry that gets processed. 47 */ 48 static void (*dtl_consumer)(struct dtl_entry *entry, u64 index); 49 50 struct dtl_ring { 51 u64 write_index; 52 struct dtl_entry *write_ptr; 53 struct dtl_entry *buf; 54 struct dtl_entry *buf_end; 55 }; 56 57 static DEFINE_PER_CPU(struct dtl_ring, dtl_rings); 58 59 static atomic_t dtl_count; 60 61 /* 62 * The cpu accounting code controls the DTL ring buffer, and we get 63 * given entries as they are processed. 64 */ 65 static void consume_dtle(struct dtl_entry *dtle, u64 index) 66 { 67 struct dtl_ring *dtlr = this_cpu_ptr(&dtl_rings); 68 struct dtl_entry *wp = dtlr->write_ptr; 69 struct lppaca *vpa = local_paca->lppaca_ptr; 70 71 if (!wp) 72 return; 73 74 *wp = *dtle; 75 barrier(); 76 77 /* check for hypervisor ring buffer overflow, ignore this entry if so */ 78 if (index + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) 79 return; 80 81 ++wp; 82 if (wp == dtlr->buf_end) 83 wp = dtlr->buf; 84 dtlr->write_ptr = wp; 85 86 /* incrementing write_index makes the new entry visible */ 87 smp_wmb(); 88 ++dtlr->write_index; 89 } 90 91 static int dtl_start(struct dtl *dtl) 92 { 93 struct dtl_ring *dtlr = &per_cpu(dtl_rings, dtl->cpu); 94 95 dtlr->buf = dtl->buf; 96 dtlr->buf_end = dtl->buf + dtl->buf_entries; 97 dtlr->write_index = 0; 98 99 /* setting write_ptr enables logging into our buffer */ 100 smp_wmb(); 101 dtlr->write_ptr = dtl->buf; 102 103 /* enable event logging */ 104 lppaca_of(dtl->cpu).dtl_enable_mask |= dtl_event_mask; 105 106 dtl_consumer = consume_dtle; 107 atomic_inc(&dtl_count); 108 return 0; 109 } 110 111 static void dtl_stop(struct dtl *dtl) 112 { 113 struct dtl_ring *dtlr = &per_cpu(dtl_rings, dtl->cpu); 114 115 dtlr->write_ptr = NULL; 116 smp_wmb(); 117 118 dtlr->buf = NULL; 119 120 /* restore dtl_enable_mask */ 121 lppaca_of(dtl->cpu).dtl_enable_mask = DTL_LOG_PREEMPT; 122 123 if (atomic_dec_and_test(&dtl_count)) 124 dtl_consumer = NULL; 125 } 126 127 static u64 dtl_current_index(struct dtl *dtl) 128 { 129 return per_cpu(dtl_rings, dtl->cpu).write_index; 130 } 131 132 #else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 133 134 static int dtl_start(struct dtl *dtl) 135 { 136 unsigned long addr; 137 int ret, hwcpu; 138 139 /* Register our dtl buffer with the hypervisor. The HV expects the 140 * buffer size to be passed in the second word of the buffer */ 141 ((u32 *)dtl->buf)[1] = cpu_to_be32(DISPATCH_LOG_BYTES); 142 143 hwcpu = get_hard_smp_processor_id(dtl->cpu); 144 addr = __pa(dtl->buf); 145 ret = register_dtl(hwcpu, addr); 146 if (ret) { 147 printk(KERN_WARNING "%s: DTL registration for cpu %d (hw %d) " 148 "failed with %d\n", __func__, dtl->cpu, hwcpu, ret); 149 return -EIO; 150 } 151 152 /* set our initial buffer indices */ 153 lppaca_of(dtl->cpu).dtl_idx = 0; 154 155 /* ensure that our updates to the lppaca fields have occurred before 156 * we actually enable the logging */ 157 smp_wmb(); 158 159 /* enable event logging */ 160 lppaca_of(dtl->cpu).dtl_enable_mask = dtl_event_mask; 161 162 return 0; 163 } 164 165 static void dtl_stop(struct dtl *dtl) 166 { 167 int hwcpu = get_hard_smp_processor_id(dtl->cpu); 168 169 lppaca_of(dtl->cpu).dtl_enable_mask = 0x0; 170 171 unregister_dtl(hwcpu); 172 } 173 174 static u64 dtl_current_index(struct dtl *dtl) 175 { 176 return be64_to_cpu(lppaca_of(dtl->cpu).dtl_idx); 177 } 178 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 179 180 static int dtl_enable(struct dtl *dtl) 181 { 182 long int n_entries; 183 long int rc; 184 struct dtl_entry *buf = NULL; 185 186 if (!dtl_cache) 187 return -ENOMEM; 188 189 /* only allow one reader */ 190 if (dtl->buf) 191 return -EBUSY; 192 193 /* ensure there are no other conflicting dtl users */ 194 if (!down_read_trylock(&dtl_access_lock)) 195 return -EBUSY; 196 197 n_entries = dtl_buf_entries; 198 buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL, cpu_to_node(dtl->cpu)); 199 if (!buf) { 200 printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n", 201 __func__, dtl->cpu); 202 up_read(&dtl_access_lock); 203 return -ENOMEM; 204 } 205 206 spin_lock(&dtl->lock); 207 rc = -EBUSY; 208 if (!dtl->buf) { 209 /* store the original allocation size for use during read */ 210 dtl->buf_entries = n_entries; 211 dtl->buf = buf; 212 dtl->last_idx = 0; 213 rc = dtl_start(dtl); 214 if (rc) 215 dtl->buf = NULL; 216 } 217 spin_unlock(&dtl->lock); 218 219 if (rc) { 220 up_read(&dtl_access_lock); 221 kmem_cache_free(dtl_cache, buf); 222 } 223 224 return rc; 225 } 226 227 static void dtl_disable(struct dtl *dtl) 228 { 229 spin_lock(&dtl->lock); 230 dtl_stop(dtl); 231 kmem_cache_free(dtl_cache, dtl->buf); 232 dtl->buf = NULL; 233 dtl->buf_entries = 0; 234 spin_unlock(&dtl->lock); 235 up_read(&dtl_access_lock); 236 } 237 238 /* file interface */ 239 240 static int dtl_file_open(struct inode *inode, struct file *filp) 241 { 242 struct dtl *dtl = inode->i_private; 243 int rc; 244 245 rc = dtl_enable(dtl); 246 if (rc) 247 return rc; 248 249 filp->private_data = dtl; 250 return 0; 251 } 252 253 static int dtl_file_release(struct inode *inode, struct file *filp) 254 { 255 struct dtl *dtl = inode->i_private; 256 dtl_disable(dtl); 257 return 0; 258 } 259 260 static ssize_t dtl_file_read(struct file *filp, char __user *buf, size_t len, 261 loff_t *pos) 262 { 263 long int rc, n_read, n_req, read_size; 264 struct dtl *dtl; 265 u64 cur_idx, last_idx, i; 266 267 if ((len % sizeof(struct dtl_entry)) != 0) 268 return -EINVAL; 269 270 dtl = filp->private_data; 271 272 /* requested number of entries to read */ 273 n_req = len / sizeof(struct dtl_entry); 274 275 /* actual number of entries read */ 276 n_read = 0; 277 278 spin_lock(&dtl->lock); 279 280 cur_idx = dtl_current_index(dtl); 281 last_idx = dtl->last_idx; 282 283 if (last_idx + dtl->buf_entries <= cur_idx) 284 last_idx = cur_idx - dtl->buf_entries + 1; 285 286 if (last_idx + n_req > cur_idx) 287 n_req = cur_idx - last_idx; 288 289 if (n_req > 0) 290 dtl->last_idx = last_idx + n_req; 291 292 spin_unlock(&dtl->lock); 293 294 if (n_req <= 0) 295 return 0; 296 297 i = last_idx % dtl->buf_entries; 298 299 /* read the tail of the buffer if we've wrapped */ 300 if (i + n_req > dtl->buf_entries) { 301 read_size = dtl->buf_entries - i; 302 303 rc = copy_to_user(buf, &dtl->buf[i], 304 read_size * sizeof(struct dtl_entry)); 305 if (rc) 306 return -EFAULT; 307 308 i = 0; 309 n_req -= read_size; 310 n_read += read_size; 311 buf += read_size * sizeof(struct dtl_entry); 312 } 313 314 /* .. and now the head */ 315 rc = copy_to_user(buf, &dtl->buf[i], n_req * sizeof(struct dtl_entry)); 316 if (rc) 317 return -EFAULT; 318 319 n_read += n_req; 320 321 return n_read * sizeof(struct dtl_entry); 322 } 323 324 static const struct file_operations dtl_fops = { 325 .open = dtl_file_open, 326 .release = dtl_file_release, 327 .read = dtl_file_read, 328 }; 329 330 static struct dentry *dtl_dir; 331 332 static void dtl_setup_file(struct dtl *dtl) 333 { 334 char name[10]; 335 336 sprintf(name, "cpu-%d", dtl->cpu); 337 338 debugfs_create_file(name, 0400, dtl_dir, dtl, &dtl_fops); 339 } 340 341 static int dtl_init(void) 342 { 343 int i; 344 345 if (!firmware_has_feature(FW_FEATURE_SPLPAR)) 346 return -ENODEV; 347 348 /* set up common debugfs structure */ 349 350 dtl_dir = debugfs_create_dir("dtl", arch_debugfs_dir); 351 352 debugfs_create_x8("dtl_event_mask", 0600, dtl_dir, &dtl_event_mask); 353 debugfs_create_u32("dtl_buf_entries", 0400, dtl_dir, &dtl_buf_entries); 354 355 /* set up the per-cpu log structures */ 356 for_each_possible_cpu(i) { 357 struct dtl *dtl = &per_cpu(cpu_dtl, i); 358 spin_lock_init(&dtl->lock); 359 dtl->cpu = i; 360 361 dtl_setup_file(dtl); 362 } 363 364 return 0; 365 } 366 machine_arch_initcall(pseries, dtl_init); 367 #endif /* CONFIG_DTL */ 368 369 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 370 /* 371 * Scan the dispatch trace log and count up the stolen time. 372 * Should be called with interrupts disabled. 373 */ 374 static notrace u64 scan_dispatch_log(u64 stop_tb) 375 { 376 u64 i = local_paca->dtl_ridx; 377 struct dtl_entry *dtl = local_paca->dtl_curr; 378 struct dtl_entry *dtl_end = local_paca->dispatch_log_end; 379 struct lppaca *vpa = local_paca->lppaca_ptr; 380 u64 tb_delta; 381 u64 stolen = 0; 382 u64 dtb; 383 384 if (!dtl) 385 return 0; 386 387 if (i == be64_to_cpu(vpa->dtl_idx)) 388 return 0; 389 while (i < be64_to_cpu(vpa->dtl_idx)) { 390 dtb = be64_to_cpu(dtl->timebase); 391 tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) + 392 be32_to_cpu(dtl->ready_to_enqueue_time); 393 barrier(); 394 if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) { 395 /* buffer has overflowed */ 396 i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG; 397 dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG); 398 continue; 399 } 400 if (dtb > stop_tb) 401 break; 402 #ifdef CONFIG_DTL 403 if (dtl_consumer) 404 dtl_consumer(dtl, i); 405 #endif 406 stolen += tb_delta; 407 ++i; 408 ++dtl; 409 if (dtl == dtl_end) 410 dtl = local_paca->dispatch_log; 411 } 412 local_paca->dtl_ridx = i; 413 local_paca->dtl_curr = dtl; 414 return stolen; 415 } 416 417 /* 418 * Accumulate stolen time by scanning the dispatch trace log. 419 * Called on entry from user mode. 420 */ 421 void notrace pseries_accumulate_stolen_time(void) 422 { 423 u64 sst, ust; 424 struct cpu_accounting_data *acct = &local_paca->accounting; 425 426 sst = scan_dispatch_log(acct->starttime_user); 427 ust = scan_dispatch_log(acct->starttime); 428 acct->stime -= sst; 429 acct->utime -= ust; 430 acct->steal_time += ust + sst; 431 } 432 433 u64 pseries_calculate_stolen_time(u64 stop_tb) 434 { 435 if (!firmware_has_feature(FW_FEATURE_SPLPAR)) 436 return 0; 437 438 if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) 439 return scan_dispatch_log(stop_tb); 440 441 return 0; 442 } 443 444 #endif 445