Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 | #ifdef CONFIG_SCHEDSTATS /* * bump this up when changing the output format or the meaning of an existing * format, so that tools can adapt (or abort) */ #define SCHEDSTAT_VERSION 15 static int show_schedstat(struct seq_file *seq, void *v) { int cpu; int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9; char *mask_str = kmalloc(mask_len, GFP_KERNEL); if (mask_str == NULL) return -ENOMEM; seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); seq_printf(seq, "timestamp %lu\n", jiffies); for_each_online_cpu(cpu) { struct rq *rq = cpu_rq(cpu); #ifdef CONFIG_SMP struct sched_domain *sd; int dcount = 0; #endif /* runqueue-specific stats */ seq_printf(seq, "cpu%d %u %u %u %u %u %u %llu %llu %lu", cpu, rq->yld_count, rq->sched_switch, rq->sched_count, rq->sched_goidle, rq->ttwu_count, rq->ttwu_local, rq->rq_cpu_time, rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount); seq_printf(seq, "\n"); #ifdef CONFIG_SMP /* domain-specific stats */ preempt_disable(); for_each_domain(cpu, sd) { enum cpu_idle_type itype; cpumask_scnprintf(mask_str, mask_len, sched_domain_span(sd)); seq_printf(seq, "domain%d %s", dcount++, mask_str); for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES; itype++) { seq_printf(seq, " %u %u %u %u %u %u %u %u", sd->lb_count[itype], sd->lb_balanced[itype], sd->lb_failed[itype], sd->lb_imbalance[itype], sd->lb_gained[itype], sd->lb_hot_gained[itype], sd->lb_nobusyq[itype], sd->lb_nobusyg[itype]); } seq_printf(seq, " %u %u %u %u %u %u %u %u %u %u %u %u\n", sd->alb_count, sd->alb_failed, sd->alb_pushed, sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed, sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed, sd->ttwu_wake_remote, sd->ttwu_move_affine, sd->ttwu_move_balance); } preempt_enable(); #endif } kfree(mask_str); return 0; } static int schedstat_open(struct inode *inode, struct file *file) { unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32); char *buf = kmalloc(size, GFP_KERNEL); struct seq_file *m; int res; if (!buf) return -ENOMEM; res = single_open(file, show_schedstat, NULL); if (!res) { m = file->private_data; m->buf = buf; m->size = size; } else kfree(buf); return res; } static const struct file_operations proc_schedstat_operations = { .open = schedstat_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; static int __init proc_schedstat_init(void) { proc_create("schedstat", 0, NULL, &proc_schedstat_operations); return 0; } module_init(proc_schedstat_init); /* * Expects runqueue lock to be held for atomicity of update */ static inline void rq_sched_info_arrive(struct rq *rq, unsigned long long delta) { if (rq) { rq->rq_sched_info.run_delay += delta; rq->rq_sched_info.pcount++; } } /* * Expects runqueue lock to be held for atomicity of update */ static inline void rq_sched_info_depart(struct rq *rq, unsigned long long delta) { if (rq) rq->rq_cpu_time += delta; } static inline void rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) { if (rq) rq->rq_sched_info.run_delay += delta; } # define schedstat_inc(rq, field) do { (rq)->field++; } while (0) # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) # define schedstat_set(var, val) do { var = (val); } while (0) #else /* !CONFIG_SCHEDSTATS */ static inline void rq_sched_info_arrive(struct rq *rq, unsigned long long delta) {} static inline void rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) {} static inline void rq_sched_info_depart(struct rq *rq, unsigned long long delta) {} # define schedstat_inc(rq, field) do { } while (0) # define schedstat_add(rq, field, amt) do { } while (0) # define schedstat_set(var, val) do { } while (0) #endif #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) static inline void sched_info_reset_dequeued(struct task_struct *t) { t->sched_info.last_queued = 0; } /* * Called when a process is dequeued from the active array and given * the cpu. We should note that with the exception of interactive * tasks, the expired queue will become the active queue after the active * queue is empty, without explicitly dequeuing and requeuing tasks in the * expired queue. (Interactive tasks may be requeued directly to the * active queue, thus delaying tasks in the expired queue from running; * see scheduler_tick()). * * Though we are interested in knowing how long it was from the *first* time a * task was queued to the time that it finally hit a cpu, we call this routine * from dequeue_task() to account for possible rq->clock skew across cpus. The * delta taken on each cpu would annul the skew. */ static inline void sched_info_dequeued(struct task_struct *t) { unsigned long long now = task_rq(t)->clock, delta = 0; if (unlikely(sched_info_on())) if (t->sched_info.last_queued) delta = now - t->sched_info.last_queued; sched_info_reset_dequeued(t); t->sched_info.run_delay += delta; rq_sched_info_dequeued(task_rq(t), delta); } /* * Called when a task finally hits the cpu. We can now calculate how * long it was waiting to run. We also note when it began so that we * can keep stats on how long its timeslice is. */ static void sched_info_arrive(struct task_struct *t) { unsigned long long now = task_rq(t)->clock, delta = 0; if (t->sched_info.last_queued) delta = now - t->sched_info.last_queued; sched_info_reset_dequeued(t); t->sched_info.run_delay += delta; t->sched_info.last_arrival = now; t->sched_info.pcount++; rq_sched_info_arrive(task_rq(t), delta); } /* * Called when a process is queued into either the active or expired * array. The time is noted and later used to determine how long we * had to wait for us to reach the cpu. Since the expired queue will * become the active queue after active queue is empty, without dequeuing * and requeuing any tasks, we are interested in queuing to either. It * is unusual but not impossible for tasks to be dequeued and immediately * requeued in the same or another array: this can happen in sched_yield(), * set_user_nice(), and even load_balance() as it moves tasks from runqueue * to runqueue. * * This function is only called from enqueue_task(), but also only updates * the timestamp if it is already not set. It's assumed that * sched_info_dequeued() will clear that stamp when appropriate. */ static inline void sched_info_queued(struct task_struct *t) { if (unlikely(sched_info_on())) if (!t->sched_info.last_queued) t->sched_info.last_queued = task_rq(t)->clock; } /* * Called when a process ceases being the active-running process, either * voluntarily or involuntarily. Now we can calculate how long we ran. * Also, if the process is still in the TASK_RUNNING state, call * sched_info_queued() to mark that it has now again started waiting on * the runqueue. */ static inline void sched_info_depart(struct task_struct *t) { unsigned long long delta = task_rq(t)->clock - t->sched_info.last_arrival; rq_sched_info_depart(task_rq(t), delta); if (t->state == TASK_RUNNING) sched_info_queued(t); } /* * Called when tasks are switched involuntarily due, typically, to expiring * their time slice. (This may also be called when switching to or from * the idle task.) We are only called when prev != next. */ static inline void __sched_info_switch(struct task_struct *prev, struct task_struct *next) { struct rq *rq = task_rq(prev); /* * prev now departs the cpu. It's not interesting to record * stats about how efficient we were at scheduling the idle * process, however. */ if (prev != rq->idle) sched_info_depart(prev); if (next != rq->idle) sched_info_arrive(next); } static inline void sched_info_switch(struct task_struct *prev, struct task_struct *next) { if (unlikely(sched_info_on())) __sched_info_switch(prev, next); } #else #define sched_info_queued(t) do { } while (0) #define sched_info_reset_dequeued(t) do { } while (0) #define sched_info_dequeued(t) do { } while (0) #define sched_info_switch(t, next) do { } while (0) #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ /* * The following are functions that support scheduler-internal time accounting. * These functions are generally called at the timer tick. None of this depends * on CONFIG_SCHEDSTATS. */ /** * account_group_user_time - Maintain utime for a thread group. * * @tsk: Pointer to task structure. * @cputime: Time value by which to increment the utime field of the * thread_group_cputime structure. * * If thread group time is being maintained, get the structure for the * running CPU and update the utime field there. */ static inline void account_group_user_time(struct task_struct *tsk, cputime_t cputime) { struct thread_group_cputimer *cputimer; /* tsk == current, ensure it is safe to use ->signal */ if (unlikely(tsk->exit_state)) return; cputimer = &tsk->signal->cputimer; if (!cputimer->running) return; spin_lock(&cputimer->lock); cputimer->cputime.utime = cputime_add(cputimer->cputime.utime, cputime); spin_unlock(&cputimer->lock); } /** * account_group_system_time - Maintain stime for a thread group. * * @tsk: Pointer to task structure. * @cputime: Time value by which to increment the stime field of the * thread_group_cputime structure. * * If thread group time is being maintained, get the structure for the * running CPU and update the stime field there. */ static inline void account_group_system_time(struct task_struct *tsk, cputime_t cputime) { struct thread_group_cputimer *cputimer; /* tsk == current, ensure it is safe to use ->signal */ if (unlikely(tsk->exit_state)) return; cputimer = &tsk->signal->cputimer; if (!cputimer->running) return; spin_lock(&cputimer->lock); cputimer->cputime.stime = cputime_add(cputimer->cputime.stime, cputime); spin_unlock(&cputimer->lock); } /** * account_group_exec_runtime - Maintain exec runtime for a thread group. * * @tsk: Pointer to task structure. * @ns: Time value by which to increment the sum_exec_runtime field * of the thread_group_cputime structure. * * If thread group time is being maintained, get the structure for the * running CPU and update the sum_exec_runtime field there. */ static inline void account_group_exec_runtime(struct task_struct *tsk, unsigned long long ns) { struct thread_group_cputimer *cputimer; struct signal_struct *sig; sig = tsk->signal; /* see __exit_signal()->task_rq_unlock_wait() */ barrier(); if (unlikely(!sig)) return; cputimer = &sig->cputimer; if (!cputimer->running) return; spin_lock(&cputimer->lock); cputimer->cputime.sum_exec_runtime += ns; spin_unlock(&cputimer->lock); } |