1=================== 2Block io priorities 3=================== 4 5 6Intro 7----- 8 9With the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io 10priorities are supported for reads on files. This enables users to io nice 11processes or process groups, similar to what has been possible with cpu 12scheduling for ages. This document mainly details the current possibilities 13with cfq; other io schedulers do not support io priorities thus far. 14 15Scheduling classes 16------------------ 17 18CFQ implements three generic scheduling classes that determine how io is 19served for a process. 20 21IOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given 22higher priority than any other in the system, processes from this class are 23given first access to the disk every time. Thus it needs to be used with some 24care, one io RT process can starve the entire system. Within the RT class, 25there are 8 levels of class data that determine exactly how much time this 26process needs the disk for on each service. In the future this might change 27to be more directly mappable to performance, by passing in a wanted data 28rate instead. 29 30IOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default 31for any process that hasn't set a specific io priority. The class data 32determines how much io bandwidth the process will get, it's directly mappable 33to the cpu nice levels just more coarsely implemented. 0 is the highest 34BE prio level, 7 is the lowest. The mapping between cpu nice level and io 35nice level is determined as: io_nice = (cpu_nice + 20) / 5. 36 37IOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this 38level only get io time when no one else needs the disk. The idle class has no 39class data, since it doesn't really apply here. 40 41Tools 42----- 43 44See below for a sample ionice tool. Usage:: 45 46 # ionice -c<class> -n<level> -p<pid> 47 48If pid isn't given, the current process is assumed. IO priority settings 49are inherited on fork, so you can use ionice to start the process at a given 50level:: 51 52 # ionice -c2 -n0 /bin/ls 53 54will run ls at the best-effort scheduling class at the highest priority. 55For a running process, you can give the pid instead:: 56 57 # ionice -c1 -n2 -p100 58 59will change pid 100 to run at the realtime scheduling class, at priority 2. 60 61ionice.c tool:: 62 63 #include <stdio.h> 64 #include <stdlib.h> 65 #include <errno.h> 66 #include <getopt.h> 67 #include <unistd.h> 68 #include <sys/ptrace.h> 69 #include <asm/unistd.h> 70 71 extern int sys_ioprio_set(int, int, int); 72 extern int sys_ioprio_get(int, int); 73 74 #if defined(__i386__) 75 #define __NR_ioprio_set 289 76 #define __NR_ioprio_get 290 77 #elif defined(__ppc__) 78 #define __NR_ioprio_set 273 79 #define __NR_ioprio_get 274 80 #elif defined(__x86_64__) 81 #define __NR_ioprio_set 251 82 #define __NR_ioprio_get 252 83 #else 84 #error "Unsupported arch" 85 #endif 86 87 static inline int ioprio_set(int which, int who, int ioprio) 88 { 89 return syscall(__NR_ioprio_set, which, who, ioprio); 90 } 91 92 static inline int ioprio_get(int which, int who) 93 { 94 return syscall(__NR_ioprio_get, which, who); 95 } 96 97 enum { 98 IOPRIO_CLASS_NONE, 99 IOPRIO_CLASS_RT, 100 IOPRIO_CLASS_BE, 101 IOPRIO_CLASS_IDLE, 102 }; 103 104 enum { 105 IOPRIO_WHO_PROCESS = 1, 106 IOPRIO_WHO_PGRP, 107 IOPRIO_WHO_USER, 108 }; 109 110 #define IOPRIO_CLASS_SHIFT 13 111 112 const char *to_prio[] = { "none", "realtime", "best-effort", "idle", }; 113 114 int main(int argc, char *argv[]) 115 { 116 int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE; 117 int c, pid = 0; 118 119 while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) { 120 switch (c) { 121 case 'n': 122 ioprio = strtol(optarg, NULL, 10); 123 set = 1; 124 break; 125 case 'c': 126 ioprio_class = strtol(optarg, NULL, 10); 127 set = 1; 128 break; 129 case 'p': 130 pid = strtol(optarg, NULL, 10); 131 break; 132 } 133 } 134 135 switch (ioprio_class) { 136 case IOPRIO_CLASS_NONE: 137 ioprio_class = IOPRIO_CLASS_BE; 138 break; 139 case IOPRIO_CLASS_RT: 140 case IOPRIO_CLASS_BE: 141 break; 142 case IOPRIO_CLASS_IDLE: 143 ioprio = 7; 144 break; 145 default: 146 printf("bad prio class %d\n", ioprio_class); 147 return 1; 148 } 149 150 if (!set) { 151 if (!pid && argv[optind]) 152 pid = strtol(argv[optind], NULL, 10); 153 154 ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid); 155 156 printf("pid=%d, %d\n", pid, ioprio); 157 158 if (ioprio == -1) 159 perror("ioprio_get"); 160 else { 161 ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT; 162 ioprio = ioprio & 0xff; 163 printf("%s: prio %d\n", to_prio[ioprio_class], ioprio); 164 } 165 } else { 166 if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) { 167 perror("ioprio_set"); 168 return 1; 169 } 170 171 if (argv[optind]) 172 execvp(argv[optind], &argv[optind]); 173 } 174 175 return 0; 176 } 177 178 179March 11 2005, Jens Axboe <jens.axboe@oracle.com> 180