calibrate.c 8.55 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
Linus Torvalds's avatar
Linus Torvalds committed
2 3 4 5 6 7
/* calibrate.c: default delay calibration
 *
 * Excised from init/main.c
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

8
#include <linux/jiffies.h>
Linus Torvalds's avatar
Linus Torvalds committed
9 10
#include <linux/delay.h>
#include <linux/init.h>
Andrew Morton's avatar
Andrew Morton committed
11
#include <linux/timex.h>
12
#include <linux/smp.h>
13
#include <linux/percpu.h>
14

15
unsigned long lpj_fine;
Randy Dunlap's avatar
Randy Dunlap committed
16
unsigned long preset_lpj;
Linus Torvalds's avatar
Linus Torvalds committed
17 18 19 20 21 22 23 24
static int __init lpj_setup(char *str)
{
	preset_lpj = simple_strtoul(str,NULL,0);
	return 1;
}

__setup("lpj=", lpj_setup);

25 26 27 28 29 30 31 32 33 34
#ifdef ARCH_HAS_READ_CURRENT_TIMER

/* This routine uses the read_current_timer() routine and gets the
 * loops per jiffy directly, instead of guessing it using delay().
 * Also, this code tries to handle non-maskable asynchronous events
 * (like SMIs)
 */
#define DELAY_CALIBRATION_TICKS			((HZ < 100) ? 1 : (HZ/100))
#define MAX_DIRECT_CALIBRATION_RETRIES		5

35
static unsigned long calibrate_delay_direct(void)
36 37 38 39
{
	unsigned long pre_start, start, post_start;
	unsigned long pre_end, end, post_end;
	unsigned long start_jiffies;
40 41 42
	unsigned long timer_rate_min, timer_rate_max;
	unsigned long good_timer_sum = 0;
	unsigned long good_timer_count = 0;
43 44 45
	unsigned long measured_times[MAX_DIRECT_CALIBRATION_RETRIES];
	int max = -1; /* index of measured_times with max/min values or not set */
	int min = -1;
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
	int i;

	if (read_current_timer(&pre_start) < 0 )
		return 0;

	/*
	 * A simple loop like
	 *	while ( jiffies < start_jiffies+1)
	 *		start = read_current_timer();
	 * will not do. As we don't really know whether jiffy switch
	 * happened first or timer_value was read first. And some asynchronous
	 * event can happen between these two events introducing errors in lpj.
	 *
	 * So, we do
	 * 1. pre_start <- When we are sure that jiffy switch hasn't happened
	 * 2. check jiffy switch
	 * 3. start <- timer value before or after jiffy switch
	 * 4. post_start <- When we are sure that jiffy switch has happened
	 *
	 * Note, we don't know anything about order of 2 and 3.
	 * Now, by looking at post_start and pre_start difference, we can
	 * check whether any asynchronous event happened or not
	 */

	for (i = 0; i < MAX_DIRECT_CALIBRATION_RETRIES; i++) {
		pre_start = 0;
		read_current_timer(&start);
		start_jiffies = jiffies;
74
		while (time_before_eq(jiffies, start_jiffies + 1)) {
75 76 77 78 79 80 81
			pre_start = start;
			read_current_timer(&start);
		}
		read_current_timer(&post_start);

		pre_end = 0;
		end = post_start;
82 83
		while (time_before_eq(jiffies, start_jiffies + 1 +
					       DELAY_CALIBRATION_TICKS)) {
84 85 86 87 88
			pre_end = end;
			read_current_timer(&end);
		}
		read_current_timer(&post_end);

89 90 91 92
		timer_rate_max = (post_end - pre_start) /
					DELAY_CALIBRATION_TICKS;
		timer_rate_min = (pre_end - post_start) /
					DELAY_CALIBRATION_TICKS;
93 94

		/*
95
		 * If the upper limit and lower limit of the timer_rate is
96 97
		 * >= 12.5% apart, redo calibration.
		 */
98 99 100 101 102 103
		if (start >= post_end)
			printk(KERN_NOTICE "calibrate_delay_direct() ignoring "
					"timer_rate as we had a TSC wrap around"
					" start=%lu >=post_end=%lu\n",
				start, post_end);
		if (start < post_end && pre_start != 0 && pre_end != 0 &&
104 105 106
		    (timer_rate_max - timer_rate_min) < (timer_rate_max >> 3)) {
			good_timer_count++;
			good_timer_sum += timer_rate_max;
107 108 109 110 111 112 113 114
			measured_times[i] = timer_rate_max;
			if (max < 0 || timer_rate_max > measured_times[max])
				max = i;
			if (min < 0 || timer_rate_max < measured_times[min])
				min = i;
		} else
			measured_times[i] = 0;

115 116
	}

117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
	/*
	 * Find the maximum & minimum - if they differ too much throw out the
	 * one with the largest difference from the mean and try again...
	 */
	while (good_timer_count > 1) {
		unsigned long estimate;
		unsigned long maxdiff;

		/* compute the estimate */
		estimate = (good_timer_sum/good_timer_count);
		maxdiff = estimate >> 3;

		/* if range is within 12% let's take it */
		if ((measured_times[max] - measured_times[min]) < maxdiff)
			return estimate;

		/* ok - drop the worse value and try again... */
		good_timer_sum = 0;
		good_timer_count = 0;
		if ((measured_times[max] - estimate) <
				(estimate - measured_times[min])) {
			printk(KERN_NOTICE "calibrate_delay_direct() dropping "
					"min bogoMips estimate %d = %lu\n",
				min, measured_times[min]);
			measured_times[min] = 0;
			min = max;
		} else {
			printk(KERN_NOTICE "calibrate_delay_direct() dropping "
					"max bogoMips estimate %d = %lu\n",
				max, measured_times[max]);
			measured_times[max] = 0;
			max = min;
		}

		for (i = 0; i < MAX_DIRECT_CALIBRATION_RETRIES; i++) {
			if (measured_times[i] == 0)
				continue;
			good_timer_count++;
			good_timer_sum += measured_times[i];
			if (measured_times[i] < measured_times[min])
				min = i;
			if (measured_times[i] > measured_times[max])
				max = i;
		}

	}
163

164 165 166
	printk(KERN_NOTICE "calibrate_delay_direct() failed to get a good "
	       "estimate for loops_per_jiffy.\nProbably due to long platform "
		"interrupts. Consider using \"lpj=\" boot option.\n");
167 168 169
	return 0;
}
#else
170 171 172 173
static unsigned long calibrate_delay_direct(void)
{
	return 0;
}
174 175
#endif

Linus Torvalds's avatar
Linus Torvalds committed
176 177
/*
 * This is the number of bits of precision for the loops_per_jiffy.  Each
178 179
 * time we refine our estimate after the first takes 1.5/HZ seconds, so try
 * to start with a good estimate.
180
 * For the boot cpu we can skip the delay calibration and assign it a value
181 182
 * calculated based on the timer frequency.
 * For the rest of the CPUs we cannot assume that the timer frequency is same as
183
 * the cpu frequency, hence do the calibration for those.
Linus Torvalds's avatar
Linus Torvalds committed
184 185 186
 */
#define LPS_PREC 8

187
static unsigned long calibrate_delay_converge(void)
Linus Torvalds's avatar
Linus Torvalds committed
188
{
189
	/* First stage - slowly accelerate to find initial bounds */
190
	unsigned long lpj, lpj_base, ticks, loopadd, loopadd_base, chop_limit;
191
	int trials = 0, band = 0, trial_in_band = 0;
192 193

	lpj = (1<<12);
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213

	/* wait for "start of" clock tick */
	ticks = jiffies;
	while (ticks == jiffies)
		; /* nothing */
	/* Go .. */
	ticks = jiffies;
	do {
		if (++trial_in_band == (1<<band)) {
			++band;
			trial_in_band = 0;
		}
		__delay(lpj * band);
		trials += band;
	} while (ticks == jiffies);
	/*
	 * We overshot, so retreat to a clear underestimate. Then estimate
	 * the largest likely undershoot. This defines our chop bounds.
	 */
	trials -= band;
214 215 216 217 218 219
	loopadd_base = lpj * band;
	lpj_base = lpj * trials;

recalibrate:
	lpj = lpj_base;
	loopadd = loopadd_base;
220 221 222

	/*
	 * Do a binary approximation to get lpj set to
223
	 * equal one clock (up to LPS_PREC bits)
224
	 */
225
	chop_limit = lpj >> LPS_PREC;
226 227
	while (loopadd > chop_limit) {
		lpj += loopadd;
228 229
		ticks = jiffies;
		while (ticks == jiffies)
230
			; /* nothing */
231 232 233
		ticks = jiffies;
		__delay(lpj);
		if (jiffies != ticks)	/* longer than 1 tick */
234 235
			lpj -= loopadd;
		loopadd >>= 1;
236
	}
237 238 239 240 241 242 243 244 245 246
	/*
	 * If we incremented every single time possible, presume we've
	 * massively underestimated initially, and retry with a higher
	 * start, and larger range. (Only seen on x86_64, due to SMIs)
	 */
	if (lpj + loopadd * 2 == lpj_base + loopadd_base * 2) {
		lpj_base = lpj;
		loopadd_base <<= 2;
		goto recalibrate;
	}
247 248 249 250

	return lpj;
}

251 252
static DEFINE_PER_CPU(unsigned long, cpu_loops_per_jiffy) = { 0 };

253 254 255 256 257 258 259 260
/*
 * Check if cpu calibration delay is already known. For example,
 * some processors with multi-core sockets may have all cores
 * with the same calibration delay.
 *
 * Architectures should override this function if a faster calibration
 * method is available.
 */
261
unsigned long __attribute__((weak)) calibrate_delay_is_known(void)
262 263 264 265
{
	return 0;
}

266 267 268 269 270 271 272 273 274
/*
 * Indicate the cpu delay calibration is done. This can be used by
 * architectures to stop accepting delay timer registrations after this point.
 */

void __attribute__((weak)) calibration_delay_done(void)
{
}

275
void calibrate_delay(void)
276
{
277
	unsigned long lpj;
278
	static bool printed;
279
	int this_cpu = smp_processor_id();
Linus Torvalds's avatar
Linus Torvalds committed
280

281 282
	if (per_cpu(cpu_loops_per_jiffy, this_cpu)) {
		lpj = per_cpu(cpu_loops_per_jiffy, this_cpu);
283 284
		if (!printed)
			pr_info("Calibrating delay loop (skipped) "
285 286
				"already calibrated this CPU");
	} else if (preset_lpj) {
287
		lpj = preset_lpj;
288 289 290 291
		if (!printed)
			pr_info("Calibrating delay loop (skipped) "
				"preset value.. ");
	} else if ((!printed) && lpj_fine) {
292
		lpj = lpj_fine;
293
		pr_info("Calibrating delay loop (skipped), "
294
			"value calculated using timer frequency.. ");
295 296
	} else if ((lpj = calibrate_delay_is_known())) {
		;
297
	} else if ((lpj = calibrate_delay_direct()) != 0) {
298 299 300
		if (!printed)
			pr_info("Calibrating delay using timer "
				"specific routine.. ");
Linus Torvalds's avatar
Linus Torvalds committed
301
	} else {
302 303
		if (!printed)
			pr_info("Calibrating delay loop... ");
304
		lpj = calibrate_delay_converge();
Linus Torvalds's avatar
Linus Torvalds committed
305
	}
306
	per_cpu(cpu_loops_per_jiffy, this_cpu) = lpj;
307 308
	if (!printed)
		pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n",
309 310
			lpj/(500000/HZ),
			(lpj/(5000/HZ)) % 100, lpj);
311

312
	loops_per_jiffy = lpj;
313
	printed = true;
314 315

	calibration_delay_done();
Linus Torvalds's avatar
Linus Torvalds committed
316
}