Commit 8dfb6e71 authored by Nathaniel Chen's avatar Nathaniel Chen Committed by Auke Kok
Browse files

Dynamically allocate bootchart logs

Instead of storing bootchart sample data in arrays, this patch moves
storage to linked lists so that there is no more limit on samples.

This patch also fixes parsing of /proc/<pid>/smaps in kernels > 3.7.
parent 83688684
......@@ -59,14 +59,11 @@
#include "store.h"
#include "svg.h"
#include "bootchart.h"
#include "list.h"
double graph_start;
double log_start;
double sampletime[MAXSAMPLES];
struct ps_struct *ps_first;
struct block_stat_struct blockstat[MAXSAMPLES];
int entropy_avail[MAXSAMPLES];
struct cpu_stat_struct cpustat[MAXCPUS];
int pscount;
int cpus;
double interval;
......@@ -87,6 +84,8 @@ int arg_samples_len = 500; /* we record len+1 (1 start sample) */
double arg_hz = 25.0; /* 20 seconds log time */
double arg_scale_x = 100.0; /* 100px = 1sec */
double arg_scale_y = 20.0; /* 16px = 1 process bar */
static struct list_sample_data *sampledata;
struct list_sample_data *head;
char arg_init_path[PATH_MAX] = "/sbin/init";
char arg_output_path[PATH_MAX] = "/run/log";
......@@ -227,11 +226,6 @@ static int parse_args(int argc, char *argv[]) {
}
}
if (arg_samples_len > MAXSAMPLES) {
fprintf(stderr, "Error: samples exceeds maximum\n");
return -EINVAL;
}
if (arg_hz <= 0.0) {
fprintf(stderr, "Error: Frequency needs to be > 0\n");
return -EINVAL;
......@@ -338,6 +332,8 @@ int main(int argc, char *argv[]) {
log_uptime();
LIST_HEAD_INIT(struct list_sample_data, head);
/* main program loop */
for (samples = 0; !exiting && samples < arg_samples_len; samples++) {
int res;
......@@ -348,7 +344,14 @@ int main(int argc, char *argv[]) {
double elapsed;
double timeleft;
sampletime[samples] = gettime_ns();
sampledata = new0(struct list_sample_data, 1);
if (sampledata == NULL) {
log_error("Failed to allocate memory for a node: %m");
return -1;
}
sampledata->sampletime = gettime_ns();
sampledata->counter = samples;
if (!of && (access(arg_output_path, R_OK|W_OK|X_OK) == 0)) {
t = time(NULL);
......@@ -369,11 +372,11 @@ int main(int argc, char *argv[]) {
if (graph_start <= 0.0)
log_uptime();
else
log_sample(samples);
log_sample(samples, &sampledata);
sample_stop = gettime_ns();
elapsed = (sample_stop - sampletime[samples]) * 1000000000.0;
elapsed = (sample_stop - sampledata->sampletime) * 1000000000.0;
timeleft = interval - elapsed;
newint_s = (time_t)(timeleft / 1000000000.0);
......@@ -403,6 +406,7 @@ int main(int argc, char *argv[]) {
/* calculate how many samples we lost and scrap them */
arg_samples_len -= (int)(newint_ns / interval);
}
LIST_PREPEND(struct list_sample_data, link, head, sampledata);
}
/* do some cleanup, close fd's */
......@@ -443,16 +447,32 @@ int main(int argc, char *argv[]) {
close(sysfd);
/* nitpic cleanups */
ps = ps_first;
ps = ps_first->next_ps;
while (ps->next_ps) {
struct ps_struct *old = ps;
struct ps_struct *old;
old = ps;
old->sample = ps->first;
ps = ps->next_ps;
while (old->sample->next) {
struct ps_sched_struct *oldsample = old->sample;
old->sample = old->sample->next;
free(oldsample);
}
free(old->sample);
free(old);
}
free(ps->sample);
free(ps);
sampledata = head;
while (sampledata->link_prev) {
struct list_sample_data *old_sampledata = sampledata;
sampledata = sampledata->link_prev;
free(old_sampledata);
}
free(sampledata);
/* don't complain when overrun once, happens most commonly on 1st sample */
if (overrun > 1)
fprintf(stderr, "systemd-boochart: Warning: sample time overrun %i times\n", overrun);
......
......@@ -26,6 +26,7 @@
#include <dirent.h>
#include <stdbool.h>
#include "list.h"
#define MAXCPUS 16
#define MAXPIDS 65535
......@@ -54,6 +55,22 @@ struct ps_sched_struct {
double runtime;
double waittime;
int pss;
struct list_sample_data *sampledata;
struct ps_sched_struct *next;
struct ps_sched_struct *prev;
struct ps_sched_struct *cross; /* cross pointer */
struct ps_struct *ps_new;
};
struct list_sample_data {
double runtime[MAXCPUS];
double waittime[MAXCPUS];
double sampletime;
int entropy_avail;
struct block_stat_struct blockstat;
struct cpu_stat_struct cpustat;
LIST_FIELDS(struct list_sample_data, link); /* DLL */
int counter;
};
/* process info */
......@@ -73,9 +90,9 @@ struct ps_struct {
int schedstat;
FILE *smaps;
/* index to first/last seen timestamps */
int first;
int last;
/* pointers to first/last seen timestamps */
struct ps_sched_struct *first;
struct ps_sched_struct *last;
/* records actual start time, may be way before bootchart runs */
double starttime;
......
......@@ -44,6 +44,7 @@
* read() overhead.
*/
static char smaps_buf[4096];
static int skip = 0;
DIR *proc;
int procfd = -1;
......@@ -111,7 +112,7 @@ static int pid_cmdline_strscpy(char *buffer, size_t buf_len, int pid) {
return 0;
}
void log_sample(int sample) {
void log_sample(int sample, struct list_sample_data **ptr) {
static int vmstat;
static int schedstat;
char buf[4096];
......@@ -128,6 +129,12 @@ void log_sample(int sample) {
ssize_t n;
struct dirent *ent;
int fd;
struct list_sample_data *sampledata;
struct ps_sched_struct *ps_prev = NULL;
sampledata = *ptr;
/* all the per-process stuff goes here */
if (!proc) {
......@@ -161,9 +168,9 @@ void log_sample(int sample) {
if (sscanf(m, "%s %s", key, val) < 2)
goto vmstat_next;
if (streq(key, "pgpgin"))
blockstat[sample].bi = atoi(val);
sampledata->blockstat.bi = atoi(val);
if (streq(key, "pgpgout")) {
blockstat[sample].bo = atoi(val);
sampledata->blockstat.bo = atoi(val);
break;
}
vmstat_next:
......@@ -198,8 +205,8 @@ vmstat_next:
if (c > MAXCPUS)
/* Oops, we only have room for MAXCPUS data */
break;
cpustat[c].sample[sample].runtime = atoll(rt);
cpustat[c].sample[sample].waittime = atoll(wt);
sampledata->runtime[c] = atoll(rt);
sampledata->waittime[c] = atoll(wt);
if (c == cpus)
cpus = c + 1;
......@@ -219,7 +226,7 @@ schedstat_next:
n = pread(e_fd, buf, sizeof(buf) - 1, 0);
if (n > 0) {
buf[n] = '\0';
entropy_avail[sample] = atoi(buf);
sampledata->entropy_avail = atoi(buf);
}
}
}
......@@ -258,16 +265,19 @@ schedstat_next:
ps = ps->next_ps;
ps->pid = pid;
ps->sample = calloc(arg_samples_len + 1, sizeof(struct ps_sched_struct));
ps->sample = calloc(1, sizeof(struct ps_sched_struct));
if (!ps->sample) {
perror("calloc(ps_struct)");
exit (EXIT_FAILURE);
}
ps->sample->sampledata = sampledata;
pscount++;
/* mark our first sample */
ps->first = sample;
ps->first = ps->sample;
ps->sample->runtime = atoll(rt);
ps->sample->waittime = atoll(wt);
/* get name, start time */
if (!ps->sched) {
......@@ -383,16 +393,28 @@ schedstat_next:
if (!sscanf(buf, "%s %s %*s", rt, wt))
continue;
ps->last = sample;
ps->sample[sample].runtime = atoll(rt);
ps->sample[sample].waittime = atoll(wt);
ps->total = (ps->sample[ps->last].runtime
- ps->sample[ps->first].runtime)
/ 1000000000.0;
ps->sample->next = calloc(1, sizeof(struct ps_sched_struct));
if (!ps->sample) {
perror("calloc(ps_struct)");
exit (EXIT_FAILURE);
}
ps->sample->next->prev = ps->sample;
ps->sample = ps->sample->next;
ps->last = ps->sample;
ps->sample->runtime = atoll(rt);
ps->sample->waittime = atoll(wt);
ps->sample->sampledata = sampledata;
ps->sample->ps_new = ps;
if (ps_prev) {
ps_prev->cross = ps->sample;
}
ps_prev = ps->sample;
ps->total = (ps->last->runtime - ps->first->runtime)
/ 1000000000.0;
if (!arg_pss)
goto catch_rename;
/* Pss */
if (!ps->smaps) {
sprintf(filename, "%d/smaps", pid);
......@@ -401,31 +423,53 @@ schedstat_next:
if (!ps->smaps)
continue;
setvbuf(ps->smaps, smaps_buf, _IOFBF, sizeof(smaps_buf));
} else {
}
else {
rewind(ps->smaps);
}
/* test to see if we need to skip another field */
if (skip == 0) {
if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
continue;
}
if (fread(buf, 1, 28 * 15, ps->smaps) != (28 * 15)) {
continue;
}
if (buf[392] == 'V') {
skip = 2;
}
else {
skip = 1;
}
rewind(ps->smaps);
}
while (1) {
int pss_kb;
/* skip one line, this contains the object mapped */
if (fgets(buf, sizeof(buf), ps->smaps) == NULL)
/* skip one line, this contains the object mapped. */
if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
break;
}
/* then there's a 28 char 14 line block */
if (fread(buf, 1, 28 * 14, ps->smaps) != 28 * 14)
if (fread(buf, 1, 28 * 14, ps->smaps) != 28 * 14) {
break;
}
pss_kb = atoi(&buf[61]);
ps->sample[sample].pss += pss_kb;
}
ps->sample->pss += pss_kb;
if (ps->sample[sample].pss > ps->pss_max)
ps->pss_max = ps->sample[sample].pss;
/* skip one more line if this is a newer kernel */
if (skip == 2) {
if (fgets(buf, sizeof(buf), ps->smaps) == NULL)
break;
}
}
if (ps->sample->pss > ps->pss_max)
ps->pss_max = ps->sample->pss;
catch_rename:
/* catch process rename, try to randomize time */
mod = (arg_hz < 4.0) ? 4.0 : (arg_hz / 4.0);
if (((samples - ps->first) + pid) % (int)(mod) == 0) {
if (((samples - ps->pid) + pid) % (int)(mod) == 0) {
/* re-fetch name */
/* get name, start time */
......
......@@ -25,10 +25,11 @@
***/
#include <dirent.h>
#include "bootchart.h"
extern DIR *proc;
extern int procfd;
double gettime_ns(void);
void log_uptime(void);
void log_sample(int sample);
void log_sample(int sample, struct list_sample_data **ptr);
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment