swapfile.c 81 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
/*
 *  linux/mm/swapfile.c
 *
 *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
 *  Swap reorganised 29.12.95, Stephen Tweedie
 */

#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/mman.h>
#include <linux/slab.h>
#include <linux/kernel_stat.h>
#include <linux/swap.h>
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/namei.h>
17
#include <linux/shmem_fs.h>
Linus Torvalds's avatar
Linus Torvalds committed
18
#include <linux/blkdev.h>
19
#include <linux/random.h>
Linus Torvalds's avatar
Linus Torvalds committed
20
21
22
23
#include <linux/writeback.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/init.h>
24
#include <linux/ksm.h>
Linus Torvalds's avatar
Linus Torvalds committed
25
26
27
#include <linux/rmap.h>
#include <linux/security.h>
#include <linux/backing-dev.h>
Ingo Molnar's avatar
Ingo Molnar committed
28
#include <linux/mutex.h>
29
#include <linux/capability.h>
Linus Torvalds's avatar
Linus Torvalds committed
30
#include <linux/syscalls.h>
31
#include <linux/memcontrol.h>
Kay Sievers's avatar
Kay Sievers committed
32
#include <linux/poll.h>
33
#include <linux/oom.h>
34
35
#include <linux/frontswap.h>
#include <linux/swapfile.h>
36
#include <linux/export.h>
Linus Torvalds's avatar
Linus Torvalds committed
37
38
39
40

#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <linux/swapops.h>
41
#include <linux/swap_cgroup.h>
Linus Torvalds's avatar
Linus Torvalds committed
42

43
44
45
static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
				 unsigned char);
static void free_swap_count_continuations(struct swap_info_struct *);
46
static sector_t map_swap_entry(swp_entry_t, struct block_device**);
47

48
DEFINE_SPINLOCK(swap_lock);
Adrian Bunk's avatar
Adrian Bunk committed
49
static unsigned int nr_swapfiles;
50
atomic_long_t nr_swap_pages;
Chris Wilson's avatar
Chris Wilson committed
51
52
53
54
55
56
/*
 * Some modules use swappable objects and may try to swap them out under
 * memory pressure (via the shrinker). Before doing so, they may wish to
 * check to see if any swap space is available.
 */
EXPORT_SYMBOL_GPL(nr_swap_pages);
57
/* protected with swap_lock. reading in vm_swap_full() doesn't need lock */
Linus Torvalds's avatar
Linus Torvalds committed
58
long total_swap_pages;
59
static int least_priority;
Linus Torvalds's avatar
Linus Torvalds committed
60
61
62
63
64
65

static const char Bad_file[] = "Bad swap file entry ";
static const char Unused_file[] = "Unused swap file entry ";
static const char Bad_offset[] = "Bad swap offset entry ";
static const char Unused_offset[] = "Unused swap offset entry ";

66
67
68
69
/*
 * all active swap_info_structs
 * protected with swap_lock, and ordered by priority.
 */
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
PLIST_HEAD(swap_active_head);

/*
 * all available (active, not full) swap_info_structs
 * protected with swap_avail_lock, ordered by priority.
 * This is used by get_swap_page() instead of swap_active_head
 * because swap_active_head includes all swap_info_structs,
 * but get_swap_page() doesn't need to look at full ones.
 * This uses its own lock instead of swap_lock because when a
 * swap_info_struct changes between not-full/full, it needs to
 * add/remove itself to/from this list, but the swap_info_struct->lock
 * is held and the locking order requires swap_lock to be taken
 * before any swap_info_struct->lock.
 */
static PLIST_HEAD(swap_avail_head);
static DEFINE_SPINLOCK(swap_avail_lock);
Linus Torvalds's avatar
Linus Torvalds committed
86

87
struct swap_info_struct *swap_info[MAX_SWAPFILES];
Linus Torvalds's avatar
Linus Torvalds committed
88

Ingo Molnar's avatar
Ingo Molnar committed
89
static DEFINE_MUTEX(swapon_mutex);
Linus Torvalds's avatar
Linus Torvalds committed
90

Kay Sievers's avatar
Kay Sievers committed
91
92
93
94
static DECLARE_WAIT_QUEUE_HEAD(proc_poll_wait);
/* Activity counter to indicate that a swapon or swapoff has occurred */
static atomic_t proc_poll_event = ATOMIC_INIT(0);

95
static inline unsigned char swap_count(unsigned char ent)
96
{
97
	return ent & ~SWAP_HAS_CACHE;	/* may include SWAP_HAS_CONT flag */
98
99
}

100
/* returns 1 if swap entry is freed */
101
102
103
static int
__try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
{
104
	swp_entry_t entry = swp_entry(si->type, offset);
105
106
107
	struct page *page;
	int ret = 0;

108
	page = find_get_page(swap_address_space(entry), swp_offset(entry));
109
110
111
112
113
114
115
116
117
118
119
120
121
	if (!page)
		return 0;
	/*
	 * This function is called from scan_swap_map() and it's called
	 * by vmscan.c at reclaiming pages. So, we hold a lock on a page, here.
	 * We have to use trylock for avoiding deadlock. This is a special
	 * case and you should use try_to_free_swap() with explicit lock_page()
	 * in usual operations.
	 */
	if (trylock_page(page)) {
		ret = try_to_free_swap(page);
		unlock_page(page);
	}
122
	put_page(page);
123
124
	return ret;
}
125

126
127
128
129
130
131
132
/*
 * swapon tell device that all the old swap contents can be discarded,
 * to allow the swap device to optimize its wear-levelling.
 */
static int discard_swap(struct swap_info_struct *si)
{
	struct swap_extent *se;
133
134
	sector_t start_block;
	sector_t nr_blocks;
135
136
	int err = 0;

137
138
139
140
141
142
	/* Do not discard the swap header page! */
	se = &si->first_swap_extent;
	start_block = (se->start_block + 1) << (PAGE_SHIFT - 9);
	nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9);
	if (nr_blocks) {
		err = blkdev_issue_discard(si->bdev, start_block,
143
				nr_blocks, GFP_KERNEL, 0);
144
145
146
147
		if (err)
			return err;
		cond_resched();
	}
148

149
150
151
	list_for_each_entry(se, &si->first_swap_extent.list, list) {
		start_block = se->start_block << (PAGE_SHIFT - 9);
		nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
152
153

		err = blkdev_issue_discard(si->bdev, start_block,
154
				nr_blocks, GFP_KERNEL, 0);
155
156
157
158
159
160
161
162
		if (err)
			break;

		cond_resched();
	}
	return err;		/* That will often be -EOPNOTSUPP */
}

163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
/*
 * swap allocation tell device that a cluster of swap can now be discarded,
 * to allow the swap device to optimize its wear-levelling.
 */
static void discard_swap_cluster(struct swap_info_struct *si,
				 pgoff_t start_page, pgoff_t nr_pages)
{
	struct swap_extent *se = si->curr_swap_extent;
	int found_extent = 0;

	while (nr_pages) {
		if (se->start_page <= start_page &&
		    start_page < se->start_page + se->nr_pages) {
			pgoff_t offset = start_page - se->start_page;
			sector_t start_block = se->start_block + offset;
178
			sector_t nr_blocks = se->nr_pages - offset;
179
180
181
182
183
184
185
186
187
188
189
190

			if (nr_blocks > nr_pages)
				nr_blocks = nr_pages;
			start_page += nr_blocks;
			nr_pages -= nr_blocks;

			if (!found_extent++)
				si->curr_swap_extent = se;

			start_block <<= PAGE_SHIFT - 9;
			nr_blocks <<= PAGE_SHIFT - 9;
			if (blkdev_issue_discard(si->bdev, start_block,
191
				    nr_blocks, GFP_NOIO, 0))
192
193
194
				break;
		}

195
		se = list_next_entry(se, list);
196
197
198
	}
}

199
200
201
#define SWAPFILE_CLUSTER	256
#define LATENCY_LIMIT		256

202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
static inline void cluster_set_flag(struct swap_cluster_info *info,
	unsigned int flag)
{
	info->flags = flag;
}

static inline unsigned int cluster_count(struct swap_cluster_info *info)
{
	return info->data;
}

static inline void cluster_set_count(struct swap_cluster_info *info,
				     unsigned int c)
{
	info->data = c;
}

static inline void cluster_set_count_flag(struct swap_cluster_info *info,
					 unsigned int c, unsigned int f)
{
	info->flags = f;
	info->data = c;
}

static inline unsigned int cluster_next(struct swap_cluster_info *info)
{
	return info->data;
}

static inline void cluster_set_next(struct swap_cluster_info *info,
				    unsigned int n)
{
	info->data = n;
}

static inline void cluster_set_next_flag(struct swap_cluster_info *info,
					 unsigned int n, unsigned int f)
{
	info->flags = f;
	info->data = n;
}

static inline bool cluster_is_free(struct swap_cluster_info *info)
{
	return info->flags & CLUSTER_FLAG_FREE;
}

static inline bool cluster_is_null(struct swap_cluster_info *info)
{
	return info->flags & CLUSTER_FLAG_NEXT_NULL;
}

static inline void cluster_set_null(struct swap_cluster_info *info)
{
	info->flags = CLUSTER_FLAG_NEXT_NULL;
	info->data = 0;
}

Huang, Ying's avatar
Huang, Ying committed
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
						     unsigned long offset)
{
	struct swap_cluster_info *ci;

	ci = si->cluster_info;
	if (ci) {
		ci += offset / SWAPFILE_CLUSTER;
		spin_lock(&ci->lock);
	}
	return ci;
}

static inline void unlock_cluster(struct swap_cluster_info *ci)
{
	if (ci)
		spin_unlock(&ci->lock);
}

static inline struct swap_cluster_info *lock_cluster_or_swap_info(
	struct swap_info_struct *si,
	unsigned long offset)
{
	struct swap_cluster_info *ci;

	ci = lock_cluster(si, offset);
	if (!ci)
		spin_lock(&si->lock);

	return ci;
}

static inline void unlock_cluster_or_swap_info(struct swap_info_struct *si,
					       struct swap_cluster_info *ci)
{
	if (ci)
		unlock_cluster(ci);
	else
		spin_unlock(&si->lock);
}

Huang Ying's avatar
Huang Ying committed
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
static inline bool cluster_list_empty(struct swap_cluster_list *list)
{
	return cluster_is_null(&list->head);
}

static inline unsigned int cluster_list_first(struct swap_cluster_list *list)
{
	return cluster_next(&list->head);
}

static void cluster_list_init(struct swap_cluster_list *list)
{
	cluster_set_null(&list->head);
	cluster_set_null(&list->tail);
}

static void cluster_list_add_tail(struct swap_cluster_list *list,
				  struct swap_cluster_info *ci,
				  unsigned int idx)
{
	if (cluster_list_empty(list)) {
		cluster_set_next_flag(&list->head, idx, 0);
		cluster_set_next_flag(&list->tail, idx, 0);
	} else {
Huang, Ying's avatar
Huang, Ying committed
325
		struct swap_cluster_info *ci_tail;
Huang Ying's avatar
Huang Ying committed
326
327
		unsigned int tail = cluster_next(&list->tail);

Huang, Ying's avatar
Huang, Ying committed
328
329
330
331
332
333
334
335
		/*
		 * Nested cluster lock, but both cluster locks are
		 * only acquired when we held swap_info_struct->lock
		 */
		ci_tail = ci + tail;
		spin_lock_nested(&ci_tail->lock, SINGLE_DEPTH_NESTING);
		cluster_set_next(ci_tail, idx);
		unlock_cluster(ci_tail);
Huang Ying's avatar
Huang Ying committed
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
		cluster_set_next_flag(&list->tail, idx, 0);
	}
}

static unsigned int cluster_list_del_first(struct swap_cluster_list *list,
					   struct swap_cluster_info *ci)
{
	unsigned int idx;

	idx = cluster_next(&list->head);
	if (cluster_next(&list->tail) == idx) {
		cluster_set_null(&list->head);
		cluster_set_null(&list->tail);
	} else
		cluster_set_next_flag(&list->head,
				      cluster_next(&ci[idx]), 0);

	return idx;
}

Shaohua Li's avatar
Shaohua Li committed
356
357
358
359
360
361
362
363
364
365
366
367
368
/* Add a cluster to discard list and schedule it to do discard */
static void swap_cluster_schedule_discard(struct swap_info_struct *si,
		unsigned int idx)
{
	/*
	 * If scan_swap_map() can't find a free cluster, it will check
	 * si->swap_map directly. To make sure the discarding cluster isn't
	 * taken by scan_swap_map(), mark the swap entries bad (occupied). It
	 * will be cleared after discard
	 */
	memset(si->swap_map + idx * SWAPFILE_CLUSTER,
			SWAP_MAP_BAD, SWAPFILE_CLUSTER);

Huang Ying's avatar
Huang Ying committed
369
	cluster_list_add_tail(&si->discard_clusters, si->cluster_info, idx);
Shaohua Li's avatar
Shaohua Li committed
370
371
372
373
374
375
376
377
378
379

	schedule_work(&si->discard_work);
}

/*
 * Doing discard actually. After a cluster discard is finished, the cluster
 * will be added to free cluster list. caller should hold si->lock.
*/
static void swap_do_scheduled_discard(struct swap_info_struct *si)
{
Huang, Ying's avatar
Huang, Ying committed
380
	struct swap_cluster_info *info, *ci;
Shaohua Li's avatar
Shaohua Li committed
381
382
383
384
	unsigned int idx;

	info = si->cluster_info;

Huang Ying's avatar
Huang Ying committed
385
386
	while (!cluster_list_empty(&si->discard_clusters)) {
		idx = cluster_list_del_first(&si->discard_clusters, info);
Shaohua Li's avatar
Shaohua Li committed
387
388
389
390
391
392
		spin_unlock(&si->lock);

		discard_swap_cluster(si, idx * SWAPFILE_CLUSTER,
				SWAPFILE_CLUSTER);

		spin_lock(&si->lock);
Huang, Ying's avatar
Huang, Ying committed
393
394
395
		ci = lock_cluster(si, idx * SWAPFILE_CLUSTER);
		cluster_set_flag(ci, CLUSTER_FLAG_FREE);
		unlock_cluster(ci);
Huang Ying's avatar
Huang Ying committed
396
		cluster_list_add_tail(&si->free_clusters, info, idx);
Huang, Ying's avatar
Huang, Ying committed
397
		ci = lock_cluster(si, idx * SWAPFILE_CLUSTER);
Shaohua Li's avatar
Shaohua Li committed
398
399
		memset(si->swap_map + idx * SWAPFILE_CLUSTER,
				0, SWAPFILE_CLUSTER);
Huang, Ying's avatar
Huang, Ying committed
400
		unlock_cluster(ci);
Shaohua Li's avatar
Shaohua Li committed
401
402
403
404
405
406
407
408
409
410
411
412
413
414
	}
}

static void swap_discard_work(struct work_struct *work)
{
	struct swap_info_struct *si;

	si = container_of(work, struct swap_info_struct, discard_work);

	spin_lock(&si->lock);
	swap_do_scheduled_discard(si);
	spin_unlock(&si->lock);
}

415
416
417
418
419
420
421
422
423
424
425
426
/*
 * The cluster corresponding to page_nr will be used. The cluster will be
 * removed from free cluster list and its usage counter will be increased.
 */
static void inc_cluster_info_page(struct swap_info_struct *p,
	struct swap_cluster_info *cluster_info, unsigned long page_nr)
{
	unsigned long idx = page_nr / SWAPFILE_CLUSTER;

	if (!cluster_info)
		return;
	if (cluster_is_free(&cluster_info[idx])) {
Huang Ying's avatar
Huang Ying committed
427
428
		VM_BUG_ON(cluster_list_first(&p->free_clusters) != idx);
		cluster_list_del_first(&p->free_clusters, cluster_info);
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
		cluster_set_count_flag(&cluster_info[idx], 0, 0);
	}

	VM_BUG_ON(cluster_count(&cluster_info[idx]) >= SWAPFILE_CLUSTER);
	cluster_set_count(&cluster_info[idx],
		cluster_count(&cluster_info[idx]) + 1);
}

/*
 * The cluster corresponding to page_nr decreases one usage. If the usage
 * counter becomes 0, which means no page in the cluster is in using, we can
 * optionally discard the cluster and add it to free cluster list.
 */
static void dec_cluster_info_page(struct swap_info_struct *p,
	struct swap_cluster_info *cluster_info, unsigned long page_nr)
{
	unsigned long idx = page_nr / SWAPFILE_CLUSTER;

	if (!cluster_info)
		return;

	VM_BUG_ON(cluster_count(&cluster_info[idx]) == 0);
	cluster_set_count(&cluster_info[idx],
		cluster_count(&cluster_info[idx]) - 1);

	if (cluster_count(&cluster_info[idx]) == 0) {
Shaohua Li's avatar
Shaohua Li committed
455
456
457
458
459
		/*
		 * If the swap is discardable, prepare discard the cluster
		 * instead of free it immediately. The cluster will be freed
		 * after discard.
		 */
460
461
		if ((p->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) ==
				 (SWP_WRITEOK | SWP_PAGE_DISCARD)) {
Shaohua Li's avatar
Shaohua Li committed
462
463
464
465
			swap_cluster_schedule_discard(p, idx);
			return;
		}

466
		cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE);
Huang Ying's avatar
Huang Ying committed
467
		cluster_list_add_tail(&p->free_clusters, cluster_info, idx);
468
469
470
471
472
473
474
	}
}

/*
 * It's possible scan_swap_map() uses a free cluster in the middle of free
 * cluster list. Avoiding such abuse to avoid list corruption.
 */
475
476
static bool
scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si,
477
478
	unsigned long offset)
{
479
480
481
	struct percpu_cluster *percpu_cluster;
	bool conflict;

482
	offset /= SWAPFILE_CLUSTER;
Huang Ying's avatar
Huang Ying committed
483
484
	conflict = !cluster_list_empty(&si->free_clusters) &&
		offset != cluster_list_first(&si->free_clusters) &&
485
		cluster_is_free(&si->cluster_info[offset]);
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502

	if (!conflict)
		return false;

	percpu_cluster = this_cpu_ptr(si->percpu_cluster);
	cluster_set_null(&percpu_cluster->index);
	return true;
}

/*
 * Try to get a swap entry from current cpu's swap entry pool (a cluster). This
 * might involve allocating a new cluster for current CPU too.
 */
static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
	unsigned long *offset, unsigned long *scan_base)
{
	struct percpu_cluster *cluster;
Huang, Ying's avatar
Huang, Ying committed
503
	struct swap_cluster_info *ci;
504
	bool found_free;
Huang, Ying's avatar
Huang, Ying committed
505
	unsigned long tmp, max;
506
507
508
509

new_cluster:
	cluster = this_cpu_ptr(si->percpu_cluster);
	if (cluster_is_null(&cluster->index)) {
Huang Ying's avatar
Huang Ying committed
510
511
		if (!cluster_list_empty(&si->free_clusters)) {
			cluster->index = si->free_clusters.head;
512
513
			cluster->next = cluster_next(&cluster->index) *
					SWAPFILE_CLUSTER;
Huang Ying's avatar
Huang Ying committed
514
		} else if (!cluster_list_empty(&si->discard_clusters)) {
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
			/*
			 * we don't have free cluster but have some clusters in
			 * discarding, do discard now and reclaim them
			 */
			swap_do_scheduled_discard(si);
			*scan_base = *offset = si->cluster_next;
			goto new_cluster;
		} else
			return;
	}

	found_free = false;

	/*
	 * Other CPUs can use our cluster if they can't find a free cluster,
	 * check if there is still free entry in the cluster
	 */
	tmp = cluster->next;
Huang, Ying's avatar
Huang, Ying committed
533
534
535
536
537
538
539
540
	max = min_t(unsigned long, si->max,
		    (cluster_next(&cluster->index) + 1) * SWAPFILE_CLUSTER);
	if (tmp >= max) {
		cluster_set_null(&cluster->index);
		goto new_cluster;
	}
	ci = lock_cluster(si, tmp);
	while (tmp < max) {
541
542
543
544
545
546
		if (!si->swap_map[tmp]) {
			found_free = true;
			break;
		}
		tmp++;
	}
Huang, Ying's avatar
Huang, Ying committed
547
	unlock_cluster(ci);
548
549
550
551
552
553
554
	if (!found_free) {
		cluster_set_null(&cluster->index);
		goto new_cluster;
	}
	cluster->next = tmp + 1;
	*offset = tmp;
	*scan_base = tmp;
555
556
}

557
558
static unsigned long scan_swap_map(struct swap_info_struct *si,
				   unsigned char usage)
Linus Torvalds's avatar
Linus Torvalds committed
559
{
Huang, Ying's avatar
Huang, Ying committed
560
	struct swap_cluster_info *ci;
561
	unsigned long offset;
562
	unsigned long scan_base;
563
	unsigned long last_in_cluster = 0;
564
	int latency_ration = LATENCY_LIMIT;
565

566
	/*
567
568
569
570
571
572
573
	 * We try to cluster swap pages by allocating them sequentially
	 * in swap.  Once we've allocated SWAPFILE_CLUSTER pages this
	 * way, however, we resort to first-free allocation, starting
	 * a new cluster.  This prevents us from scattering swap pages
	 * all over the entire swap partition, so that we reduce
	 * overall disk seek times between swap pages.  -- sct
	 * But we do now try to find an empty cluster.  -Andrea
574
	 * And we let swap pages go all over an SSD partition.  Hugh
575
576
	 */

577
	si->flags += SWP_SCANNING;
578
	scan_base = offset = si->cluster_next;
579

580
581
582
583
584
585
	/* SSD algorithm */
	if (si->cluster_info) {
		scan_swap_map_try_ssd_cluster(si, &offset, &scan_base);
		goto checks;
	}

586
587
588
589
590
	if (unlikely(!si->cluster_nr--)) {
		if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) {
			si->cluster_nr = SWAPFILE_CLUSTER - 1;
			goto checks;
		}
591

592
		spin_unlock(&si->lock);
593

594
595
596
		/*
		 * If seek is expensive, start searching for new cluster from
		 * start of partition, to minimize the span of allocated swap.
597
598
		 * If seek is cheap, that is the SWP_SOLIDSTATE si->cluster_info
		 * case, just handled by scan_swap_map_try_ssd_cluster() above.
599
		 */
600
		scan_base = offset = si->lowest_bit;
601
602
603
604
		last_in_cluster = offset + SWAPFILE_CLUSTER - 1;

		/* Locate the first empty (unaligned) cluster */
		for (; last_in_cluster <= si->highest_bit; offset++) {
Linus Torvalds's avatar
Linus Torvalds committed
605
			if (si->swap_map[offset])
606
607
				last_in_cluster = offset + SWAPFILE_CLUSTER;
			else if (offset == last_in_cluster) {
608
				spin_lock(&si->lock);
609
610
611
				offset -= SWAPFILE_CLUSTER - 1;
				si->cluster_next = offset;
				si->cluster_nr = SWAPFILE_CLUSTER - 1;
612
613
614
615
616
617
618
619
620
				goto checks;
			}
			if (unlikely(--latency_ration < 0)) {
				cond_resched();
				latency_ration = LATENCY_LIMIT;
			}
		}

		offset = scan_base;
621
		spin_lock(&si->lock);
622
		si->cluster_nr = SWAPFILE_CLUSTER - 1;
Linus Torvalds's avatar
Linus Torvalds committed
623
	}
624

625
checks:
626
627
628
629
	if (si->cluster_info) {
		while (scan_swap_map_ssd_cluster_conflict(si, offset))
			scan_swap_map_try_ssd_cluster(si, &offset, &scan_base);
	}
630
	if (!(si->flags & SWP_WRITEOK))
631
		goto no_page;
632
633
	if (!si->highest_bit)
		goto no_page;
634
	if (offset > si->highest_bit)
635
		scan_base = offset = si->lowest_bit;
636

Huang, Ying's avatar
Huang, Ying committed
637
	ci = lock_cluster(si, offset);
638
639
	/* reuse swap entry of cache-only swap if not busy. */
	if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
640
		int swap_was_freed;
Huang, Ying's avatar
Huang, Ying committed
641
		unlock_cluster(ci);
642
		spin_unlock(&si->lock);
643
		swap_was_freed = __try_to_reclaim_swap(si, offset);
644
		spin_lock(&si->lock);
645
646
647
648
649
650
		/* entry was freed successfully, try to use this again */
		if (swap_was_freed)
			goto checks;
		goto scan; /* check next one */
	}

Huang, Ying's avatar
Huang, Ying committed
651
652
	if (si->swap_map[offset]) {
		unlock_cluster(ci);
653
		goto scan;
Huang, Ying's avatar
Huang, Ying committed
654
	}
655
656
657
658
659
660
661
662
663

	if (offset == si->lowest_bit)
		si->lowest_bit++;
	if (offset == si->highest_bit)
		si->highest_bit--;
	si->inuse_pages++;
	if (si->inuse_pages == si->pages) {
		si->lowest_bit = si->max;
		si->highest_bit = 0;
664
665
666
		spin_lock(&swap_avail_lock);
		plist_del(&si->avail_list, &swap_avail_head);
		spin_unlock(&swap_avail_lock);
Linus Torvalds's avatar
Linus Torvalds committed
667
	}
668
	si->swap_map[offset] = usage;
669
	inc_cluster_info_page(si, si->cluster_info, offset);
Huang, Ying's avatar
Huang, Ying committed
670
	unlock_cluster(ci);
671
672
	si->cluster_next = offset + 1;
	si->flags -= SWP_SCANNING;
673

674
	return offset;
675

676
scan:
677
	spin_unlock(&si->lock);
678
	while (++offset <= si->highest_bit) {
679
		if (!si->swap_map[offset]) {
680
			spin_lock(&si->lock);
681
682
			goto checks;
		}
683
		if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
684
			spin_lock(&si->lock);
685
686
			goto checks;
		}
687
688
689
690
		if (unlikely(--latency_ration < 0)) {
			cond_resched();
			latency_ration = LATENCY_LIMIT;
		}
691
	}
692
	offset = si->lowest_bit;
693
	while (offset < scan_base) {
694
		if (!si->swap_map[offset]) {
695
			spin_lock(&si->lock);
696
697
			goto checks;
		}
698
		if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
699
			spin_lock(&si->lock);
700
701
			goto checks;
		}
702
703
704
705
		if (unlikely(--latency_ration < 0)) {
			cond_resched();
			latency_ration = LATENCY_LIMIT;
		}
706
		offset++;
707
	}
708
	spin_lock(&si->lock);
709
710

no_page:
711
	si->flags -= SWP_SCANNING;
Linus Torvalds's avatar
Linus Torvalds committed
712
713
714
715
716
	return 0;
}

swp_entry_t get_swap_page(void)
{
717
	struct swap_info_struct *si, *next;
718
	pgoff_t offset;
Linus Torvalds's avatar
Linus Torvalds committed
719

720
	if (atomic_long_read(&nr_swap_pages) <= 0)
721
		goto noswap;
722
	atomic_long_dec(&nr_swap_pages);
723

724
725
726
727
728
729
730
	spin_lock(&swap_avail_lock);

start_over:
	plist_for_each_entry_safe(si, next, &swap_avail_head, avail_list) {
		/* requeue si to after same-priority siblings */
		plist_requeue(&si->avail_list, &swap_avail_head);
		spin_unlock(&swap_avail_lock);
731
		spin_lock(&si->lock);
732
		if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) {
733
734
735
736
737
738
739
740
741
742
743
744
			spin_lock(&swap_avail_lock);
			if (plist_node_empty(&si->avail_list)) {
				spin_unlock(&si->lock);
				goto nextsi;
			}
			WARN(!si->highest_bit,
			     "swap_info %d in list but !highest_bit\n",
			     si->type);
			WARN(!(si->flags & SWP_WRITEOK),
			     "swap_info %d in list but !SWP_WRITEOK\n",
			     si->type);
			plist_del(&si->avail_list, &swap_avail_head);
745
			spin_unlock(&si->lock);
746
			goto nextsi;
747
		}
748

749
		/* This is called for allocating swap entry for cache */
750
		offset = scan_swap_map(si, SWAP_HAS_CACHE);
751
752
		spin_unlock(&si->lock);
		if (offset)
753
			return swp_entry(si->type, offset);
754
755
756
757
		pr_debug("scan_swap_map of si %d failed to find offset\n",
		       si->type);
		spin_lock(&swap_avail_lock);
nextsi:
758
759
760
761
		/*
		 * if we got here, it's likely that si was almost full before,
		 * and since scan_swap_map() can drop the si->lock, multiple
		 * callers probably all tried to get a page from the same si
762
763
764
765
766
		 * and it filled up before we could get one; or, the si filled
		 * up between us dropping swap_avail_lock and taking si->lock.
		 * Since we dropped the swap_avail_lock, the swap_avail_head
		 * list may have been modified; so if next is still in the
		 * swap_avail_head list then try it, otherwise start over.
767
		 */
768
769
		if (plist_node_empty(&next->avail_list))
			goto start_over;
Linus Torvalds's avatar
Linus Torvalds committed
770
	}
771

772
773
	spin_unlock(&swap_avail_lock);

774
	atomic_long_inc(&nr_swap_pages);
775
776
noswap:
	return (swp_entry_t) {0};
Linus Torvalds's avatar
Linus Torvalds committed
777
778
}

779
/* The only caller of this function is now suspend routine */
780
781
782
783
784
785
swp_entry_t get_swap_page_of_type(int type)
{
	struct swap_info_struct *si;
	pgoff_t offset;

	si = swap_info[type];
786
	spin_lock(&si->lock);
787
	if (si && (si->flags & SWP_WRITEOK)) {
788
		atomic_long_dec(&nr_swap_pages);
789
790
791
		/* This is called for allocating swap entry, not cache */
		offset = scan_swap_map(si, 1);
		if (offset) {
792
			spin_unlock(&si->lock);
793
794
			return swp_entry(type, offset);
		}
795
		atomic_long_inc(&nr_swap_pages);
796
	}
797
	spin_unlock(&si->lock);
798
799
800
	return (swp_entry_t) {0};
}

Huang, Ying's avatar
Huang, Ying committed
801
static struct swap_info_struct *_swap_info_get(swp_entry_t entry)
Linus Torvalds's avatar
Linus Torvalds committed
802
{
803
	struct swap_info_struct *p;
Linus Torvalds's avatar
Linus Torvalds committed
804
805
806
807
808
809
810
	unsigned long offset, type;

	if (!entry.val)
		goto out;
	type = swp_type(entry);
	if (type >= nr_swapfiles)
		goto bad_nofile;
811
	p = swap_info[type];
Linus Torvalds's avatar
Linus Torvalds committed
812
813
814
815
816
817
818
819
820
821
	if (!(p->flags & SWP_USED))
		goto bad_device;
	offset = swp_offset(entry);
	if (offset >= p->max)
		goto bad_offset;
	if (!p->swap_map[offset])
		goto bad_free;
	return p;

bad_free:
822
	pr_err("swap_info_get: %s%08lx\n", Unused_offset, entry.val);
Linus Torvalds's avatar
Linus Torvalds committed
823
824
	goto out;
bad_offset:
825
	pr_err("swap_info_get: %s%08lx\n", Bad_offset, entry.val);
Linus Torvalds's avatar
Linus Torvalds committed
826
827
	goto out;
bad_device:
828
	pr_err("swap_info_get: %s%08lx\n", Unused_file, entry.val);
Linus Torvalds's avatar
Linus Torvalds committed
829
830
	goto out;
bad_nofile:
831
	pr_err("swap_info_get: %s%08lx\n", Bad_file, entry.val);
Linus Torvalds's avatar
Linus Torvalds committed
832
833
out:
	return NULL;
834
}
Linus Torvalds's avatar
Linus Torvalds committed
835

Huang, Ying's avatar
Huang, Ying committed
836
837
838
839
840
841
842
843
844
845
static struct swap_info_struct *swap_info_get(swp_entry_t entry)
{
	struct swap_info_struct *p;

	p = _swap_info_get(entry);
	if (p)
		spin_lock(&p->lock);
	return p;
}

846
static unsigned char swap_entry_free(struct swap_info_struct *p,
Huang, Ying's avatar
Huang, Ying committed
847
848
				     swp_entry_t entry, unsigned char usage,
				     bool swap_info_locked)
Linus Torvalds's avatar
Linus Torvalds committed
849
{
Huang, Ying's avatar
Huang, Ying committed
850
	struct swap_cluster_info *ci;
851
	unsigned long offset = swp_offset(entry);
852
853
	unsigned char count;
	unsigned char has_cache;
Huang, Ying's avatar
Huang, Ying committed
854
855
856
857
858
859
860
861
862
863
864
865
866
	bool lock_swap_info = false;

	if (!swap_info_locked) {
		count = p->swap_map[offset];
		if (!p->cluster_info || count == usage || count == SWAP_MAP_SHMEM) {
lock_swap_info:
			swap_info_locked = true;
			lock_swap_info = true;
			spin_lock(&p->lock);
		}
	}

	ci = lock_cluster(p, offset);
867

868
	count = p->swap_map[offset];
Huang, Ying's avatar
Huang, Ying committed
869
870
871
872
873
874

	if (!swap_info_locked && (count == usage || count == SWAP_MAP_SHMEM)) {
		unlock_cluster(ci);
		goto lock_swap_info;
	}

875
876
	has_cache = count & SWAP_HAS_CACHE;
	count &= ~SWAP_HAS_CACHE;
877

878
	if (usage == SWAP_HAS_CACHE) {
879
		VM_BUG_ON(!has_cache);
880
		has_cache = 0;
Hugh Dickins's avatar
Hugh Dickins committed
881
882
883
884
885
886
	} else if (count == SWAP_MAP_SHMEM) {
		/*
		 * Or we could insist on shmem.c using a special
		 * swap_shmem_free() and free_shmem_swap_and_cache()...
		 */
		count = 0;
887
888
889
890
891
892
893
894
895
	} else if ((count & ~COUNT_CONTINUED) <= SWAP_MAP_MAX) {
		if (count == COUNT_CONTINUED) {
			if (swap_count_continued(p, offset, count))
				count = SWAP_MAP_MAX | COUNT_CONTINUED;
			else
				count = SWAP_MAP_MAX;
		} else
			count--;
	}
896
897
898

	usage = count | has_cache;
	p->swap_map[offset] = usage;
899

Huang, Ying's avatar
Huang, Ying committed
900
901
	unlock_cluster(ci);

902
	/* free if no reference */
903
	if (!usage) {
Huang, Ying's avatar
Huang, Ying committed
904
		VM_BUG_ON(!swap_info_locked);
905
		mem_cgroup_uncharge_swap(entry);
Huang, Ying's avatar
Huang, Ying committed
906
		ci = lock_cluster(p, offset);
907
		dec_cluster_info_page(p, p->cluster_info, offset);
Huang, Ying's avatar
Huang, Ying committed
908
		unlock_cluster(ci);
909
910
		if (offset < p->lowest_bit)
			p->lowest_bit = offset;
911
912
		if (offset > p->highest_bit) {
			bool was_full = !p->highest_bit;
913
			p->highest_bit = offset;
914
915
916
917
918
919
920
921
922
			if (was_full && (p->flags & SWP_WRITEOK)) {
				spin_lock(&swap_avail_lock);
				WARN_ON(!plist_node_empty(&p->avail_list));
				if (plist_node_empty(&p->avail_list))
					plist_add(&p->avail_list,
						  &swap_avail_head);
				spin_unlock(&swap_avail_lock);
			}
		}
923
		atomic_long_inc(&nr_swap_pages);
924
		p->inuse_pages--;
925
		frontswap_invalidate_page(p->type, offset);
926
927
928
929
930
931
		if (p->flags & SWP_BLKDEV) {
			struct gendisk *disk = p->bdev->bd_disk;
			if (disk->fops->swap_slot_free_notify)
				disk->fops->swap_slot_free_notify(p->bdev,
								  offset);
		}
Linus Torvalds's avatar
Linus Torvalds committed
932
	}
933

Huang, Ying's avatar
Huang, Ying committed
934
935
936
	if (lock_swap_info)
		spin_unlock(&p->lock);

937
	return usage;
Linus Torvalds's avatar
Linus Torvalds committed
938
939
940
}

/*
941
 * Caller has made sure that the swap device corresponding to entry
Linus Torvalds's avatar
Linus Torvalds committed
942
943
944
945
 * is still around or has not been recycled.
 */
void swap_free(swp_entry_t entry)
{
946
	struct swap_info_struct *p;
Linus Torvalds's avatar
Linus Torvalds committed
947

Huang, Ying's avatar
Huang, Ying committed
948
949
950
	p = _swap_info_get(entry);
	if (p)
		swap_entry_free(p, entry, 1, false);
Linus Torvalds's avatar
Linus Torvalds committed
951
952
}

953
954
955
/*
 * Called after dropping swapcache to decrease refcnt to swap entries.
 */
956
void swapcache_free(swp_entry_t entry)
957
{
958
959
	struct swap_info_struct *p;

Huang, Ying's avatar
Huang, Ying committed
960
961
962
	p = _swap_info_get(entry);
	if (p)
		swap_entry_free(p, entry, SWAP_HAS_CACHE, false);
963
964
}

Linus Torvalds's avatar
Linus Torvalds committed
965
/*
966
 * How many references to page are currently swapped out?
967
968
 * This does not give an exact answer when swap count is continued,
 * but does include the high COUNT_CONTINUED flag to allow for that.
Linus Torvalds's avatar
Linus Torvalds committed
969
 */
970
int page_swapcount(struct page *page)
Linus Torvalds's avatar
Linus Torvalds committed
971
{
972
973
	int count = 0;
	struct swap_info_struct *p;
Huang, Ying's avatar
Huang, Ying committed
974
	struct swap_cluster_info *ci;
Linus Torvalds's avatar
Linus Torvalds committed
975
	swp_entry_t entry;
Huang, Ying's avatar
Huang, Ying committed
976
	unsigned long offset;
Linus Torvalds's avatar
Linus Torvalds committed
977

978
	entry.val = page_private(page);
Huang, Ying's avatar
Huang, Ying committed
979
	p = _swap_info_get(entry);
Linus Torvalds's avatar
Linus Torvalds committed
980
	if (p) {
Huang, Ying's avatar
Huang, Ying committed
981
982
983
984
		offset = swp_offset(entry);
		ci = lock_cluster_or_swap_info(p, offset);
		count = swap_count(p->swap_map[offset]);
		unlock_cluster_or_swap_info(p, ci);
Linus Torvalds's avatar
Linus Torvalds committed
985
	}
986
	return count;
Linus Torvalds's avatar
Linus Torvalds committed
987
988
}

989
990
991
992
993
994
995
996
/*
 * How many references to @entry are currently swapped out?
 * This considers COUNT_CONTINUED so it returns exact answer.
 */
int swp_swapcount(swp_entry_t entry)
{
	int count, tmp_count, n;
	struct swap_info_struct *p;
Huang, Ying's avatar
Huang, Ying committed
997
	struct swap_cluster_info *ci;
998
999
1000
1001
	struct page *page;
	pgoff_t offset;
	unsigned char *map;

Huang, Ying's avatar
Huang, Ying committed
1002
	p = _swap_info_get(entry);
1003
1004
1005
	if (!p)
		return 0;

Huang, Ying's avatar
Huang, Ying committed
1006
1007
1008
1009
1010
	offset = swp_offset(entry);

	ci = lock_cluster_or_swap_info(p, offset);

	count = swap_count(p->swap_map[offset]);
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
	if (!(count & COUNT_CONTINUED))
		goto out;

	count &= ~COUNT_CONTINUED;
	n = SWAP_MAP_MAX + 1;

	page = vmalloc_to_page(p->swap_map + offset);
	offset &= ~PAGE_MASK;
	VM_BUG_ON(page_private(page) != SWP_CONTINUED);

	do {
1022
		page = list_next_entry(page, lru);
1023
1024
1025
1026
1027
1028
1029
1030
		map = kmap_atomic(page);
		tmp_count = map[offset];
		kunmap_atomic(map);

		count += (tmp_count & ~COUNT_CONTINUED) * n;
		n *= (SWAP_CONT_MAX + 1);
	} while (tmp_count & COUNT_CONTINUED);
out:
Huang, Ying's avatar
Huang, Ying committed
1031
	unlock_cluster_or_swap_info(p, ci);
1032
1033
1034
	return count;
}

Linus Torvalds's avatar
Linus Torvalds committed
1035
/*
1036
1037
1038
1039
 * We can write to an anon page without COW if there are no other references
 * to it.  And as a side-effect, free up its swap: because the old content
 * on disk will never be read, and seeking back there to write new content
 * later would only waste time away from clustering.
1040
1041
1042
1043
 *
 * NOTE: total_mapcount should not be relied upon by the caller if
 * reuse_swap_page() returns false, but it may be always overwritten
 * (see the other implementation for CONFIG_SWAP=n).
Linus Torvalds's avatar
Linus Torvalds committed
1044
 */
1045
bool reuse_swap_page(struct page *page, int *total_mapcount)
Linus Torvalds's avatar
Linus Torvalds committed
1046
{
1047
1048
	int count;

1049
	VM_BUG_ON_PAGE(!PageLocked(page), page);
1050
	if (unlikely(PageKsm(page)))
1051
1052
		return false;
	count = page_trans_huge_mapcount(page, total_mapcount);
1053
	if (count <= 1 && PageSwapCache(page)) {
1054
		count += page_swapcount(page);
1055
1056
1057
		if (count != 1)
			goto out;
		if (!PageWriteback(page)) {
1058
1059
			delete_from_swap_cache(page);
			SetPageDirty(page);
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
		} else {
			swp_entry_t entry;
			struct swap_info_struct *p;

			entry.val = page_private(page);
			p = swap_info_get(entry);
			if (p->flags & SWP_STABLE_WRITES) {
				spin_unlock(&p->lock);
				return false;
			}
			spin_unlock(&p->lock);
1071
1072
		}
	}
1073
out:
1074
	return count <= 1;
Linus Torvalds's avatar
Linus Torvalds committed
1075
1076
1077
}

/*
1078
1079
 * If swap is getting full, or if there are no more mappings of this page,
 * then try_to_free_swap is called to free its swap space.
Linus Torvalds's avatar
Linus Torvalds committed
1080
 */
1081
int try_to_free_swap(struct page *page)
Linus Torvalds's avatar
Linus Torvalds committed
1082
{
1083
	VM_BUG_ON_PAGE(!PageLocked(page), page);
Linus Torvalds's avatar
Linus Torvalds committed
1084
1085
1086
1087
1088

	if (!PageSwapCache(page))
		return 0;
	if (PageWriteback(page))
		return 0;
1089
	if (page_swapcount(page))
Linus Torvalds's avatar
Linus Torvalds committed
1090
1091
		return 0;

1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
	/*
	 * Once hibernation has begun to create its image of memory,
	 * there's a danger that one of the calls to try_to_free_swap()
	 * - most probably a call from __try_to_reclaim_swap() while
	 * hibernation is allocating its own swap pages for the image,
	 * but conceivably even a call from memory reclaim - will free
	 * the swap from a page which has already been recorded in the
	 * image as a clean swapcache page, and then reuse its swap for
	 * another page of the image.  On waking from hibernation, the
	 * original page might be freed under memory pressure, then
	 * later read back in from swap, now with the wrong data.
	 *
1104
	 * Hibernation suspends storage while it is writing the image
1105
	 * to disk so check that here.
1106
	 */
1107
	if (pm_suspended_storage())
1108
1109
		return 0;

1110
1111
1112
	delete_from_swap_cache(page);
	SetPageDirty(page);
	return 1;
1113
1114
}

Linus Torvalds's avatar
Linus Torvalds committed
1115
1116
1117
1118
/*
 * Free the swap entry like above, but also try to
 * free the page cache entry if it is the last user.
 */
1119
int free_swap_and_cache(swp_entry_t entry)
Linus Torvalds's avatar
Linus Torvalds committed
1120
{
1121
	struct swap_info_struct *p;
Linus Torvalds's avatar
Linus Torvalds committed
1122
1123
	struct page *page = NULL;

1124
	if (non_swap_entry(entry))
1125
		return 1;
1126

Linus Torvalds's avatar
Linus Torvalds committed
1127
1128
	p = swap_info_get(entry);
	if (p) {
Huang, Ying's avatar
Huang, Ying committed
1129
		if (swap_entry_free(p, entry, 1, true) == SWAP_HAS_CACHE) {
1130
			page = find_get_page(swap_address_space(entry),
1131
					     swp_offset(entry));
Nick Piggin's avatar
Nick Piggin committed
1132
			if (page && !trylock_page(page)) {
1133
				put_page(page);
1134
1135
1136
				page = NULL;
			}
		}
1137
		spin_unlock(&p->lock);
Linus Torvalds's avatar
Linus Torvalds committed
1138
1139
	}
	if (page) {
1140
1141
1142
1143
		/*
		 * Not mapped elsewhere, or swap space full? Free it!
		 * Also recheck PageSwapCache now page is locked (above).
		 */
1144
		if (PageSwapCache(page) && !PageWriteback(page) &&
1145
		    (!page_mapped(page) || mem_cgroup_swap_full(page))) {
Linus Torvalds's avatar
Linus Torvalds committed
1146
1147
1148
1149
			delete_from_swap_cache(page);
			SetPageDirty(page);
		}
		unlock_page(page);
1150
		put_page(page);
Linus Torvalds's avatar
Linus Torvalds committed
1151
	}
1152
	return p != NULL;
Linus Torvalds's avatar
Linus Torvalds committed
1153
1154
}

1155
#ifdef CONFIG_HIBERNATION
1156
/*
1157
 * Find the swap type that corresponds to given device (if any).
1158
 *
1159
1160
1161
1162
 * @offset - number of the PAGE_SIZE-sized block of the device, starting
 * from 0, in which the swap header is expected to be located.
 *
 * This is needed for the suspend to disk (aka swsusp).
1163
 */
1164
int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
1165
{
1166
	struct block_device *bdev = NULL;
1167
	int type;
1168

1169
1170
1171
	if (device)
		bdev = bdget(device);

1172
	spin_lock(&swap_lock);
1173
1174
	for (type = 0; type < nr_swapfiles; type++) {
		struct swap_info_struct *sis = swap_info[type];
1175

1176
		if (!(sis->flags & SWP_WRITEOK))
1177
			continue;
1178

1179
		if (!bdev) {
1180
			if (bdev_p)
1181
				*bdev_p = bdgrab(sis->bdev);
1182

1183
			spin_unlock(&swap_lock);
1184
			return type;
1185
		}
1186
		if (bdev == sis->bdev) {
1187
			struct swap_extent *se = &sis->first_swap_extent;
1188
1189

			if (se->start_block == offset) {
1190
				if (bdev_p)
1191
					*bdev_p = bdgrab(sis->bdev);
1192

1193
1194
				spin_unlock(&swap_lock);
				bdput(bdev);
1195
				return type;