xref: /linux/Documentation/block/ioprio.rst (revision bb118e86dfcc096b8a3889c1a5c88f214e1f65fa)
1===================
2Block io priorities
3===================
4
5
6Intro
7-----
8
9With the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io
10priorities are supported for reads on files.  This enables users to io nice
11processes or process groups, similar to what has been possible with cpu
12scheduling for ages.  This document mainly details the current possibilities
13with cfq; other io schedulers do not support io priorities thus far.
14
15Scheduling classes
16------------------
17
18CFQ implements three generic scheduling classes that determine how io is
19served for a process.
20
21IOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given
22higher priority than any other in the system, processes from this class are
23given first access to the disk every time. Thus it needs to be used with some
24care, one io RT process can starve the entire system. Within the RT class,
25there are 8 levels of class data that determine exactly how much time this
26process needs the disk for on each service. In the future this might change
27to be more directly mappable to performance, by passing in a wanted data
28rate instead.
29
30IOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default
31for any process that hasn't set a specific io priority. The class data
32determines how much io bandwidth the process will get, it's directly mappable
33to the cpu nice levels just more coarsely implemented. 0 is the highest
34BE prio level, 7 is the lowest. The mapping between cpu nice level and io
35nice level is determined as: io_nice = (cpu_nice + 20) / 5.
36
37IOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this
38level only get io time when no one else needs the disk. The idle class has no
39class data, since it doesn't really apply here.
40
41Tools
42-----
43
44See below for a sample ionice tool. Usage::
45
46	# ionice -c<class> -n<level> -p<pid>
47
48If pid isn't given, the current process is assumed. IO priority settings
49are inherited on fork, so you can use ionice to start the process at a given
50level::
51
52	# ionice -c2 -n0 /bin/ls
53
54will run ls at the best-effort scheduling class at the highest priority.
55For a running process, you can give the pid instead::
56
57	# ionice -c1 -n2 -p100
58
59will change pid 100 to run at the realtime scheduling class, at priority 2.
60
61ionice.c tool::
62
63  #include <stdio.h>
64  #include <stdlib.h>
65  #include <errno.h>
66  #include <getopt.h>
67  #include <unistd.h>
68  #include <sys/ptrace.h>
69  #include <asm/unistd.h>
70
71  extern int sys_ioprio_set(int, int, int);
72  extern int sys_ioprio_get(int, int);
73
74  #if defined(__i386__)
75  #define __NR_ioprio_set		289
76  #define __NR_ioprio_get		290
77  #elif defined(__ppc__)
78  #define __NR_ioprio_set		273
79  #define __NR_ioprio_get		274
80  #elif defined(__x86_64__)
81  #define __NR_ioprio_set		251
82  #define __NR_ioprio_get		252
83  #else
84  #error "Unsupported arch"
85  #endif
86
87  static inline int ioprio_set(int which, int who, int ioprio)
88  {
89	return syscall(__NR_ioprio_set, which, who, ioprio);
90  }
91
92  static inline int ioprio_get(int which, int who)
93  {
94	return syscall(__NR_ioprio_get, which, who);
95  }
96
97  enum {
98	IOPRIO_CLASS_NONE,
99	IOPRIO_CLASS_RT,
100	IOPRIO_CLASS_BE,
101	IOPRIO_CLASS_IDLE,
102  };
103
104  enum {
105	IOPRIO_WHO_PROCESS = 1,
106	IOPRIO_WHO_PGRP,
107	IOPRIO_WHO_USER,
108  };
109
110  #define IOPRIO_CLASS_SHIFT	13
111
112  const char *to_prio[] = { "none", "realtime", "best-effort", "idle", };
113
114  int main(int argc, char *argv[])
115  {
116	int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE;
117	int c, pid = 0;
118
119	while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) {
120		switch (c) {
121		case 'n':
122			ioprio = strtol(optarg, NULL, 10);
123			set = 1;
124			break;
125		case 'c':
126			ioprio_class = strtol(optarg, NULL, 10);
127			set = 1;
128			break;
129		case 'p':
130			pid = strtol(optarg, NULL, 10);
131			break;
132		}
133	}
134
135	switch (ioprio_class) {
136		case IOPRIO_CLASS_NONE:
137			ioprio_class = IOPRIO_CLASS_BE;
138			break;
139		case IOPRIO_CLASS_RT:
140		case IOPRIO_CLASS_BE:
141			break;
142		case IOPRIO_CLASS_IDLE:
143			ioprio = 7;
144			break;
145		default:
146			printf("bad prio class %d\n", ioprio_class);
147			return 1;
148	}
149
150	if (!set) {
151		if (!pid && argv[optind])
152			pid = strtol(argv[optind], NULL, 10);
153
154		ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid);
155
156		printf("pid=%d, %d\n", pid, ioprio);
157
158		if (ioprio == -1)
159			perror("ioprio_get");
160		else {
161			ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT;
162			ioprio = ioprio & 0xff;
163			printf("%s: prio %d\n", to_prio[ioprio_class], ioprio);
164		}
165	} else {
166		if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) {
167			perror("ioprio_set");
168			return 1;
169		}
170
171		if (argv[optind])
172			execvp(argv[optind], &argv[optind]);
173	}
174
175	return 0;
176  }
177
178
179March 11 2005, Jens Axboe <jens.axboe@oracle.com>
180