drbd_main.c 112 KB
Newer Older
Philipp Reisner's avatar
Philipp Reisner committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
/*
   drbd.c

   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.

   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.

   Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
   from Logicworks, Inc. for making SDP replication support possible.

   drbd is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   drbd is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with drbd; see the file COPYING.  If not, write to
   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.

 */

29
30
#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt

Philipp Reisner's avatar
Philipp Reisner committed
31
#include <linux/module.h>
32
#include <linux/jiffies.h>
Philipp Reisner's avatar
Philipp Reisner committed
33
#include <linux/drbd.h>
34
#include <linux/uaccess.h>
Philipp Reisner's avatar
Philipp Reisner committed
35
36
37
#include <asm/types.h>
#include <net/sock.h>
#include <linux/ctype.h>
38
#include <linux/mutex.h>
Philipp Reisner's avatar
Philipp Reisner committed
39
40
41
42
43
44
45
46
47
48
49
50
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/proc_fs.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/memcontrol.h>
#include <linux/mm_inline.h>
#include <linux/slab.h>
#include <linux/random.h>
#include <linux/reboot.h>
#include <linux/notifier.h>
#include <linux/kthread.h>
51
#include <linux/workqueue.h>
Philipp Reisner's avatar
Philipp Reisner committed
52
53
54
55
56
57
#define __KERNEL_SYSCALLS__
#include <linux/unistd.h>
#include <linux/vmalloc.h>

#include <linux/drbd_limits.h>
#include "drbd_int.h"
58
#include "drbd_protocol.h"
Philipp Reisner's avatar
Philipp Reisner committed
59
60
#include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */
#include "drbd_vli.h"
61
#include "drbd_debugfs.h"
Philipp Reisner's avatar
Philipp Reisner committed
62

63
static DEFINE_MUTEX(drbd_main_mutex);
Philipp Reisner's avatar
Philipp Reisner committed
64
static int drbd_open(struct block_device *bdev, fmode_t mode);
65
static void drbd_release(struct gendisk *gd, fmode_t mode);
Philipp Reisner's avatar
Philipp Reisner committed
66
static void md_sync_timer_fn(unsigned long data);
67
static int w_bitmap_io(struct drbd_work *w, int unused);
Philipp Reisner's avatar
Philipp Reisner committed
68
69
70
71
72
73

MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
	      "Lars Ellenberg <lars@linbit.com>");
MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
MODULE_VERSION(REL_VERSION);
MODULE_LICENSE("GPL");
74
MODULE_PARM_DESC(minor_count, "Approximate number of drbd devices ("
75
		 __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
Philipp Reisner's avatar
Philipp Reisner committed
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);

#include <linux/moduleparam.h>
/* allow_open_on_secondary */
MODULE_PARM_DESC(allow_oos, "DONT USE!");
/* thanks to these macros, if compiled into the kernel (not-module),
 * this becomes the boot parameter drbd.minor_count */
module_param(minor_count, uint, 0444);
module_param(disable_sendpage, bool, 0644);
module_param(allow_oos, bool, 0);
module_param(proc_details, int, 0644);

#ifdef CONFIG_DRBD_FAULT_INJECTION
int enable_faults;
int fault_rate;
static int fault_count;
int fault_devs;
/* bitmap of enabled faults */
module_param(enable_faults, int, 0664);
/* fault rate % value - applies to all enabled faults */
module_param(fault_rate, int, 0664);
/* count of faults inserted */
module_param(fault_count, int, 0664);
/* bitmap of devices to insert faults on */
module_param(fault_devs, int, 0644);
#endif

/* module parameter, defined */
104
unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
105
106
bool disable_sendpage;
bool allow_oos;
Philipp Reisner's avatar
Philipp Reisner committed
107
108
109
110
111
112
113
114
115
116
117
int proc_details;       /* Detail level in proc drbd*/

/* Module parameter for setting the user mode helper program
 * to run. Default is /sbin/drbdadm */
char usermode_helper[80] = "/sbin/drbdadm";

module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0644);

/* in 2.6.x, our device mapping and config info contains our virtual gendisks
 * as member "struct gendisk *vdisk;"
 */
118
struct idr drbd_devices;
119
struct list_head drbd_resources;
120
struct mutex resources_mutex;
Philipp Reisner's avatar
Philipp Reisner committed
121
122

struct kmem_cache *drbd_request_cache;
123
struct kmem_cache *drbd_ee_cache;	/* peer requests */
Philipp Reisner's avatar
Philipp Reisner committed
124
125
126
127
struct kmem_cache *drbd_bm_ext_cache;	/* bitmap extents */
struct kmem_cache *drbd_al_ext_cache;	/* activity log extents */
mempool_t *drbd_request_mempool;
mempool_t *drbd_ee_mempool;
128
mempool_t *drbd_md_io_page_pool;
129
struct bio_set *drbd_md_io_bio_set;
Philipp Reisner's avatar
Philipp Reisner committed
130
131
132
133
134
135
136
137
138
139
140
141
142
143

/* I do not use a standard mempool, because:
   1) I want to hand out the pre-allocated objects first.
   2) I want to be able to interrupt sleeping allocation with a signal.
   Note: This is a single linked list, the next pointer is the private
	 member of struct page.
 */
struct page *drbd_pp_pool;
spinlock_t   drbd_pp_lock;
int          drbd_pp_vacant;
wait_queue_head_t drbd_pp_wait;

DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5);

144
static const struct block_device_operations drbd_ops = {
Philipp Reisner's avatar
Philipp Reisner committed
145
146
147
148
	.owner =   THIS_MODULE,
	.open =    drbd_open,
	.release = drbd_release,
};
149

150
151
152
struct bio *bio_alloc_drbd(gfp_t gfp_mask)
{
	struct bio *bio;
153

154
155
	if (!drbd_md_io_bio_set)
		return bio_alloc(gfp_mask, 1);
156

157
158
159
160
	bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
	if (!bio)
		return NULL;
	return bio;
161
162
}

Philipp Reisner's avatar
Philipp Reisner committed
163
164
165
166
#ifdef __CHECKER__
/* When checking with sparse, and this is an inline function, sparse will
   give tons of false positives. When this is a real functions sparse works.
 */
167
int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_state mins)
Philipp Reisner's avatar
Philipp Reisner committed
168
169
170
{
	int io_allowed;

171
172
	atomic_inc(&device->local_cnt);
	io_allowed = (device->state.disk >= mins);
Philipp Reisner's avatar
Philipp Reisner committed
173
	if (!io_allowed) {
174
175
		if (atomic_dec_and_test(&device->local_cnt))
			wake_up(&device->misc_wait);
Philipp Reisner's avatar
Philipp Reisner committed
176
177
178
179
180
	}
	return io_allowed;
}

#endif
181

Philipp Reisner's avatar
Philipp Reisner committed
182
/**
183
 * tl_release() - mark as BARRIER_ACKED all requests in the corresponding transfer log epoch
184
 * @connection:	DRBD connection.
Philipp Reisner's avatar
Philipp Reisner committed
185
186
187
188
 * @barrier_nr:	Expected identifier of the DRBD write barrier packet.
 * @set_size:	Expected number of requests before that barrier.
 *
 * In case the passed barrier_nr or set_size does not match the oldest
189
190
 * epoch of not yet barrier-acked requests, this function will cause a
 * termination of the connection.
Philipp Reisner's avatar
Philipp Reisner committed
191
 */
192
void tl_release(struct drbd_connection *connection, unsigned int barrier_nr,
193
		unsigned int set_size)
Philipp Reisner's avatar
Philipp Reisner committed
194
195
{
	struct drbd_request *r;
196
197
198
	struct drbd_request *req = NULL;
	int expect_epoch = 0;
	int expect_size = 0;
Philipp Reisner's avatar
Philipp Reisner committed
199

200
	spin_lock_irq(&connection->resource->req_lock);
Philipp Reisner's avatar
Philipp Reisner committed
201

202
	/* find oldest not yet barrier-acked write request,
203
	 * count writes in its epoch. */
204
	list_for_each_entry(r, &connection->transfer_log, tl_requests) {
205
		const unsigned s = r->rq_state;
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
		if (!req) {
			if (!(s & RQ_WRITE))
				continue;
			if (!(s & RQ_NET_MASK))
				continue;
			if (s & RQ_NET_DONE)
				continue;
			req = r;
			expect_epoch = req->epoch;
			expect_size ++;
		} else {
			if (r->epoch != expect_epoch)
				break;
			if (!(s & RQ_WRITE))
				continue;
			/* if (s & RQ_DONE): not expected */
			/* if (!(s & RQ_NET_MASK)): not expected */
			expect_size++;
224
		}
Philipp Reisner's avatar
Philipp Reisner committed
225
	}
226

Philipp Reisner's avatar
Philipp Reisner committed
227
	/* first some paranoia code */
228
	if (req == NULL) {
229
		drbd_err(connection, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
230
			 barrier_nr);
Philipp Reisner's avatar
Philipp Reisner committed
231
232
		goto bail;
	}
233
	if (expect_epoch != barrier_nr) {
234
		drbd_err(connection, "BAD! BarrierAck #%u received, expected #%u!\n",
235
			 barrier_nr, expect_epoch);
Philipp Reisner's avatar
Philipp Reisner committed
236
		goto bail;
237
238
	}

239
	if (expect_size != set_size) {
240
		drbd_err(connection, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n",
241
			 barrier_nr, set_size, expect_size);
Philipp Reisner's avatar
Philipp Reisner committed
242
243
244
		goto bail;
	}

245
246
247
248
	/* Clean up list of requests processed during current epoch. */
	/* this extra list walk restart is paranoia,
	 * to catch requests being barrier-acked "unexpectedly".
	 * It usually should find the same req again, or some READ preceding it. */
249
	list_for_each_entry(req, &connection->transfer_log, tl_requests)
250
251
		if (req->epoch == expect_epoch)
			break;
252
	list_for_each_entry_safe_from(req, r, &connection->transfer_log, tl_requests) {
253
254
255
		if (req->epoch != expect_epoch)
			break;
		_req_mod(req, BARRIER_ACKED);
256
	}
257
	spin_unlock_irq(&connection->resource->req_lock);
258

Philipp Reisner's avatar
Philipp Reisner committed
259
260
261
	return;

bail:
262
	spin_unlock_irq(&connection->resource->req_lock);
263
	conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisner's avatar
Philipp Reisner committed
264
265
266
267
}


/**
268
 * _tl_restart() - Walks the transfer log, and applies an action to all requests
269
 * @connection:	DRBD connection to operate on.
270
 * @what:       The action/event to perform with all request objects
Philipp Reisner's avatar
Philipp Reisner committed
271
 *
272
273
 * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO,
 * RESTART_FROZEN_DISK_IO.
Philipp Reisner's avatar
Philipp Reisner committed
274
 */
275
/* must hold resource->req_lock */
276
void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what)
Philipp Reisner's avatar
Philipp Reisner committed
277
{
278
	struct drbd_request *req, *r;
Philipp Reisner's avatar
Philipp Reisner committed
279

280
	list_for_each_entry_safe(req, r, &connection->transfer_log, tl_requests)
281
282
		_req_mod(req, what);
}
283

284
void tl_restart(struct drbd_connection *connection, enum drbd_req_event what)
285
{
286
	spin_lock_irq(&connection->resource->req_lock);
287
	_tl_restart(connection, what);
288
	spin_unlock_irq(&connection->resource->req_lock);
289
}
Philipp Reisner's avatar
Philipp Reisner committed
290
291
292

/**
 * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
293
 * @device:	DRBD device.
Philipp Reisner's avatar
Philipp Reisner committed
294
295
296
297
298
 *
 * This is called after the connection to the peer was lost. The storage covered
 * by the requests on the transfer gets marked as our of sync. Called from the
 * receiver thread and the worker thread.
 */
299
void tl_clear(struct drbd_connection *connection)
Philipp Reisner's avatar
Philipp Reisner committed
300
{
301
	tl_restart(connection, CONNECTION_LOST_WHILE_PENDING);
Philipp Reisner's avatar
Philipp Reisner committed
302
}
303

304
/**
305
306
 * tl_abort_disk_io() - Abort disk I/O for all requests for a certain device in the TL
 * @device:	DRBD device.
307
 */
308
void tl_abort_disk_io(struct drbd_device *device)
309
{
310
	struct drbd_connection *connection = first_peer_device(device)->connection;
311
	struct drbd_request *req, *r;
312

313
	spin_lock_irq(&connection->resource->req_lock);
314
	list_for_each_entry_safe(req, r, &connection->transfer_log, tl_requests) {
315
316
		if (!(req->rq_state & RQ_LOCAL_PENDING))
			continue;
317
		if (req->device != device)
318
319
			continue;
		_req_mod(req, ABORT_DISK_IO);
Philipp Reisner's avatar
Philipp Reisner committed
320
	}
321
	spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisner's avatar
Philipp Reisner committed
322
323
324
325
326
}

static int drbd_thread_setup(void *arg)
{
	struct drbd_thread *thi = (struct drbd_thread *) arg;
327
	struct drbd_resource *resource = thi->resource;
Philipp Reisner's avatar
Philipp Reisner committed
328
329
330
	unsigned long flags;
	int retval;

331
	snprintf(current->comm, sizeof(current->comm), "drbd_%c_%s",
332
		 thi->name[0],
333
		 resource->name);
334

Philipp Reisner's avatar
Philipp Reisner committed
335
336
337
338
339
restart:
	retval = thi->function(thi);

	spin_lock_irqsave(&thi->t_lock, flags);

340
	/* if the receiver has been "EXITING", the last thing it did
Philipp Reisner's avatar
Philipp Reisner committed
341
342
343
	 * was set the conn state to "StandAlone",
	 * if now a re-connect request comes in, conn state goes C_UNCONNECTED,
	 * and receiver thread will be "started".
344
	 * drbd_thread_start needs to set "RESTARTING" in that case.
Philipp Reisner's avatar
Philipp Reisner committed
345
	 * t_state check and assignment needs to be within the same spinlock,
346
347
	 * so either thread_start sees EXITING, and can remap to RESTARTING,
	 * or thread_start see NONE, and can proceed as normal.
Philipp Reisner's avatar
Philipp Reisner committed
348
349
	 */

350
	if (thi->t_state == RESTARTING) {
351
		drbd_info(resource, "Restarting %s thread\n", thi->name);
352
		thi->t_state = RUNNING;
Philipp Reisner's avatar
Philipp Reisner committed
353
354
355
356
357
		spin_unlock_irqrestore(&thi->t_lock, flags);
		goto restart;
	}

	thi->task = NULL;
358
	thi->t_state = NONE;
Philipp Reisner's avatar
Philipp Reisner committed
359
	smp_mb();
360
	complete_all(&thi->stop);
Philipp Reisner's avatar
Philipp Reisner committed
361
362
	spin_unlock_irqrestore(&thi->t_lock, flags);

363
	drbd_info(resource, "Terminating %s\n", current->comm);
Philipp Reisner's avatar
Philipp Reisner committed
364
365

	/* Release mod reference taken when thread was started */
366

367
368
369
	if (thi->connection)
		kref_put(&thi->connection->kref, drbd_destroy_connection);
	kref_put(&resource->kref, drbd_destroy_resource);
Philipp Reisner's avatar
Philipp Reisner committed
370
371
372
373
	module_put(THIS_MODULE);
	return retval;
}

374
static void drbd_thread_init(struct drbd_resource *resource, struct drbd_thread *thi,
375
			     int (*func) (struct drbd_thread *), const char *name)
Philipp Reisner's avatar
Philipp Reisner committed
376
377
378
{
	spin_lock_init(&thi->t_lock);
	thi->task    = NULL;
379
	thi->t_state = NONE;
Philipp Reisner's avatar
Philipp Reisner committed
380
	thi->function = func;
381
382
	thi->resource = resource;
	thi->connection = NULL;
383
	thi->name = name;
Philipp Reisner's avatar
Philipp Reisner committed
384
385
386
387
}

int drbd_thread_start(struct drbd_thread *thi)
{
388
	struct drbd_resource *resource = thi->resource;
Philipp Reisner's avatar
Philipp Reisner committed
389
390
391
392
393
394
395
396
	struct task_struct *nt;
	unsigned long flags;

	/* is used from state engine doing drbd_thread_stop_nowait,
	 * while holding the req lock irqsave */
	spin_lock_irqsave(&thi->t_lock, flags);

	switch (thi->t_state) {
397
	case NONE:
398
		drbd_info(resource, "Starting %s thread (from %s [%d])\n",
399
			 thi->name, current->comm, current->pid);
Philipp Reisner's avatar
Philipp Reisner committed
400
401
402

		/* Get ref on module for thread - this is released when thread exits */
		if (!try_module_get(THIS_MODULE)) {
403
			drbd_err(resource, "Failed to get module reference in drbd_thread_start\n");
Philipp Reisner's avatar
Philipp Reisner committed
404
			spin_unlock_irqrestore(&thi->t_lock, flags);
405
			return false;
Philipp Reisner's avatar
Philipp Reisner committed
406
407
		}

408
409
410
		kref_get(&resource->kref);
		if (thi->connection)
			kref_get(&thi->connection->kref);
411

Philipp Reisner's avatar
Philipp Reisner committed
412
413
		init_completion(&thi->stop);
		thi->reset_cpu_mask = 1;
414
		thi->t_state = RUNNING;
Philipp Reisner's avatar
Philipp Reisner committed
415
416
417
418
		spin_unlock_irqrestore(&thi->t_lock, flags);
		flush_signals(current); /* otherw. may get -ERESTARTNOINTR */

		nt = kthread_create(drbd_thread_setup, (void *) thi,
419
				    "drbd_%c_%s", thi->name[0], thi->resource->name);
Philipp Reisner's avatar
Philipp Reisner committed
420
421

		if (IS_ERR(nt)) {
422
			drbd_err(resource, "Couldn't start thread\n");
Philipp Reisner's avatar
Philipp Reisner committed
423

424
425
426
			if (thi->connection)
				kref_put(&thi->connection->kref, drbd_destroy_connection);
			kref_put(&resource->kref, drbd_destroy_resource);
Philipp Reisner's avatar
Philipp Reisner committed
427
			module_put(THIS_MODULE);
428
			return false;
Philipp Reisner's avatar
Philipp Reisner committed
429
430
431
		}
		spin_lock_irqsave(&thi->t_lock, flags);
		thi->task = nt;
432
		thi->t_state = RUNNING;
Philipp Reisner's avatar
Philipp Reisner committed
433
434
435
		spin_unlock_irqrestore(&thi->t_lock, flags);
		wake_up_process(nt);
		break;
436
437
	case EXITING:
		thi->t_state = RESTARTING;
438
		drbd_info(resource, "Restarting %s thread (from %s [%d])\n",
439
				thi->name, current->comm, current->pid);
Philipp Reisner's avatar
Philipp Reisner committed
440
		/* fall through */
441
442
	case RUNNING:
	case RESTARTING:
Philipp Reisner's avatar
Philipp Reisner committed
443
444
445
446
447
	default:
		spin_unlock_irqrestore(&thi->t_lock, flags);
		break;
	}

448
	return true;
Philipp Reisner's avatar
Philipp Reisner committed
449
450
451
452
453
454
455
}


void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait)
{
	unsigned long flags;

456
	enum drbd_thread_state ns = restart ? RESTARTING : EXITING;
Philipp Reisner's avatar
Philipp Reisner committed
457
458
459
460

	/* may be called from state engine, holding the req lock irqsave */
	spin_lock_irqsave(&thi->t_lock, flags);

461
	if (thi->t_state == NONE) {
Philipp Reisner's avatar
Philipp Reisner committed
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
		spin_unlock_irqrestore(&thi->t_lock, flags);
		if (restart)
			drbd_thread_start(thi);
		return;
	}

	if (thi->t_state != ns) {
		if (thi->task == NULL) {
			spin_unlock_irqrestore(&thi->t_lock, flags);
			return;
		}

		thi->t_state = ns;
		smp_mb();
		init_completion(&thi->stop);
		if (thi->task != current)
			force_sig(DRBD_SIGKILL, thi->task);
	}

	spin_unlock_irqrestore(&thi->t_lock, flags);

	if (wait)
		wait_for_completion(&thi->stop);
}

487
int conn_lowest_minor(struct drbd_connection *connection)
488
{
489
490
	struct drbd_peer_device *peer_device;
	int vnr = 0, minor = -1;
491

492
	rcu_read_lock();
493
494
495
	peer_device = idr_get_next(&connection->peer_devices, &vnr);
	if (peer_device)
		minor = device_to_minor(peer_device->device);
496
497
	rcu_read_unlock();

498
	return minor;
499
}
500

Philipp Reisner's avatar
Philipp Reisner committed
501
502
503
504
#ifdef CONFIG_SMP
/**
 * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs
 *
505
 * Forces all threads of a resource onto the same CPU. This is beneficial for
Philipp Reisner's avatar
Philipp Reisner committed
506
507
 * DRBD's performance. May be overwritten by user's configuration.
 */
508
static void drbd_calc_cpu_mask(cpumask_var_t *cpu_mask)
Philipp Reisner's avatar
Philipp Reisner committed
509
{
510
	unsigned int *resources_per_cpu, min_index = ~0;
Philipp Reisner's avatar
Philipp Reisner committed
511

512
513
514
515
	resources_per_cpu = kzalloc(nr_cpu_ids * sizeof(*resources_per_cpu), GFP_KERNEL);
	if (resources_per_cpu) {
		struct drbd_resource *resource;
		unsigned int cpu, min = ~0;
Philipp Reisner's avatar
Philipp Reisner committed
516

517
518
519
520
		rcu_read_lock();
		for_each_resource_rcu(resource, &drbd_resources) {
			for_each_cpu(cpu, resource->cpu_mask)
				resources_per_cpu[cpu]++;
Philipp Reisner's avatar
Philipp Reisner committed
521
		}
522
523
524
525
526
527
528
529
530
531
532
533
		rcu_read_unlock();
		for_each_online_cpu(cpu) {
			if (resources_per_cpu[cpu] < min) {
				min = resources_per_cpu[cpu];
				min_index = cpu;
			}
		}
		kfree(resources_per_cpu);
	}
	if (min_index == ~0) {
		cpumask_setall(*cpu_mask);
		return;
Philipp Reisner's avatar
Philipp Reisner committed
534
	}
535
	cpumask_set_cpu(min_index, *cpu_mask);
Philipp Reisner's avatar
Philipp Reisner committed
536
537
538
539
}

/**
 * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread
540
 * @device:	DRBD device.
541
 * @thi:	drbd_thread object
Philipp Reisner's avatar
Philipp Reisner committed
542
543
544
545
 *
 * call in the "main loop" of _all_ threads, no need for any mutex, current won't die
 * prematurely.
 */
546
void drbd_thread_current_set_cpu(struct drbd_thread *thi)
Philipp Reisner's avatar
Philipp Reisner committed
547
{
548
	struct drbd_resource *resource = thi->resource;
Philipp Reisner's avatar
Philipp Reisner committed
549
	struct task_struct *p = current;
550

Philipp Reisner's avatar
Philipp Reisner committed
551
552
553
	if (!thi->reset_cpu_mask)
		return;
	thi->reset_cpu_mask = 0;
554
	set_cpus_allowed_ptr(p, resource->cpu_mask);
Philipp Reisner's avatar
Philipp Reisner committed
555
}
556
557
#else
#define drbd_calc_cpu_mask(A) ({})
Philipp Reisner's avatar
Philipp Reisner committed
558
559
#endif

560
561
562
563
564
565
566
/**
 * drbd_header_size  -  size of a packet header
 *
 * The header size is a multiple of 8, so any payload following the header is
 * word aligned on 64-bit architectures.  (The bitmap send and receive code
 * relies on this.)
 */
567
unsigned int drbd_header_size(struct drbd_connection *connection)
Philipp Reisner's avatar
Philipp Reisner committed
568
{
569
	if (connection->agreed_pro_version >= 100) {
570
571
572
573
574
575
576
577
		BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header100), 8));
		return sizeof(struct p_header100);
	} else {
		BUILD_BUG_ON(sizeof(struct p_header80) !=
			     sizeof(struct p_header95));
		BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header80), 8));
		return sizeof(struct p_header80);
	}
578
}
Philipp Reisner's avatar
Philipp Reisner committed
579

580
static unsigned int prepare_header80(struct p_header80 *h, enum drbd_packet cmd, int size)
Philipp Reisner's avatar
Philipp Reisner committed
581
582
583
584
{
	h->magic   = cpu_to_be32(DRBD_MAGIC);
	h->command = cpu_to_be16(cmd);
	h->length  = cpu_to_be16(size);
585
	return sizeof(struct p_header80);
Philipp Reisner's avatar
Philipp Reisner committed
586
}
Philipp Reisner's avatar
Philipp Reisner committed
587

588
static unsigned int prepare_header95(struct p_header95 *h, enum drbd_packet cmd, int size)
Philipp Reisner's avatar
Philipp Reisner committed
589
590
{
	h->magic   = cpu_to_be16(DRBD_MAGIC_BIG);
Philipp Reisner's avatar
Philipp Reisner committed
591
	h->command = cpu_to_be16(cmd);
592
	h->length = cpu_to_be32(size);
593
	return sizeof(struct p_header95);
Philipp Reisner's avatar
Philipp Reisner committed
594
}
Philipp Reisner's avatar
Philipp Reisner committed
595

596
597
598
599
600
601
602
603
604
605
static unsigned int prepare_header100(struct p_header100 *h, enum drbd_packet cmd,
				      int size, int vnr)
{
	h->magic = cpu_to_be32(DRBD_MAGIC_100);
	h->volume = cpu_to_be16(vnr);
	h->command = cpu_to_be16(cmd);
	h->length = cpu_to_be32(size);
	h->pad = 0;
	return sizeof(struct p_header100);
}
Philipp Reisner's avatar
Philipp Reisner committed
606

607
static unsigned int prepare_header(struct drbd_connection *connection, int vnr,
608
				   void *buffer, enum drbd_packet cmd, int size)
609
{
610
	if (connection->agreed_pro_version >= 100)
611
		return prepare_header100(buffer, cmd, size, vnr);
612
	else if (connection->agreed_pro_version >= 95 &&
613
		 size > DRBD_MAX_SIZE_H80_PACKET)
614
		return prepare_header95(buffer, cmd, size);
615
	else
616
		return prepare_header80(buffer, cmd, size);
Philipp Reisner's avatar
Philipp Reisner committed
617
618
}

619
static void *__conn_prepare_command(struct drbd_connection *connection,
620
				    struct drbd_socket *sock)
Philipp Reisner's avatar
Philipp Reisner committed
621
{
622
623
	if (!sock->socket)
		return NULL;
624
	return sock->sbuf + drbd_header_size(connection);
625
}
Philipp Reisner's avatar
Philipp Reisner committed
626

627
void *conn_prepare_command(struct drbd_connection *connection, struct drbd_socket *sock)
628
{
629
	void *p;
Philipp Reisner's avatar
Philipp Reisner committed
630

631
	mutex_lock(&sock->mutex);
632
	p = __conn_prepare_command(connection, sock);
633
	if (!p)
634
		mutex_unlock(&sock->mutex);
Philipp Reisner's avatar
Philipp Reisner committed
635

636
	return p;
Philipp Reisner's avatar
Philipp Reisner committed
637
638
}

639
void *drbd_prepare_command(struct drbd_peer_device *peer_device, struct drbd_socket *sock)
Philipp Reisner's avatar
Philipp Reisner committed
640
{
641
	return conn_prepare_command(peer_device->connection, sock);
642
}
Philipp Reisner's avatar
Philipp Reisner committed
643

644
static int __send_command(struct drbd_connection *connection, int vnr,
645
646
647
648
649
650
			  struct drbd_socket *sock, enum drbd_packet cmd,
			  unsigned int header_size, void *data,
			  unsigned int size)
{
	int msg_flags;
	int err;
Philipp Reisner's avatar
Philipp Reisner committed
651

652
653
654
655
656
657
658
659
660
	/*
	 * Called with @data == NULL and the size of the data blocks in @size
	 * for commands that send data blocks.  For those commands, omit the
	 * MSG_MORE flag: this will increase the likelihood that data blocks
	 * which are page aligned on the sender will end up page aligned on the
	 * receiver.
	 */
	msg_flags = data ? MSG_MORE : 0;

661
	header_size += prepare_header(connection, vnr, sock->sbuf, cmd,
662
				      header_size + size);
663
	err = drbd_send_all(connection, sock->socket, sock->sbuf, header_size,
664
665
			    msg_flags);
	if (data && !err)
666
		err = drbd_send_all(connection, sock->socket, data, size, 0);
667
668
669
670
671
	/* DRBD protocol "pings" are latency critical.
	 * This is supposed to trigger tcp_push_pending_frames() */
	if (!err && (cmd == P_PING || cmd == P_PING_ACK))
		drbd_tcp_nodelay(sock->socket);

672
673
674
	return err;
}

675
static int __conn_send_command(struct drbd_connection *connection, struct drbd_socket *sock,
676
677
678
			       enum drbd_packet cmd, unsigned int header_size,
			       void *data, unsigned int size)
{
679
	return __send_command(connection, 0, sock, cmd, header_size, data, size);
680
681
}

682
int conn_send_command(struct drbd_connection *connection, struct drbd_socket *sock,
683
684
685
686
		      enum drbd_packet cmd, unsigned int header_size,
		      void *data, unsigned int size)
{
	int err;
Philipp Reisner's avatar
Philipp Reisner committed
687

688
	err = __conn_send_command(connection, sock, cmd, header_size, data, size);
689
690
691
692
	mutex_unlock(&sock->mutex);
	return err;
}

693
int drbd_send_command(struct drbd_peer_device *peer_device, struct drbd_socket *sock,
694
695
696
697
698
		      enum drbd_packet cmd, unsigned int header_size,
		      void *data, unsigned int size)
{
	int err;

699
700
	err = __send_command(peer_device->connection, peer_device->device->vnr,
			     sock, cmd, header_size, data, size);
701
702
703
	mutex_unlock(&sock->mutex);
	return err;
}
Philipp Reisner's avatar
Philipp Reisner committed
704

705
int drbd_send_ping(struct drbd_connection *connection)
706
{
707
708
	struct drbd_socket *sock;

709
710
	sock = &connection->meta;
	if (!conn_prepare_command(connection, sock))
711
		return -EIO;
712
	return conn_send_command(connection, sock, P_PING, 0, NULL, 0);
713
}
Philipp Reisner's avatar
Philipp Reisner committed
714

715
int drbd_send_ping_ack(struct drbd_connection *connection)
716
{
717
718
	struct drbd_socket *sock;

719
720
	sock = &connection->meta;
	if (!conn_prepare_command(connection, sock))
721
		return -EIO;
722
	return conn_send_command(connection, sock, P_PING_ACK, 0, NULL, 0);
Philipp Reisner's avatar
Philipp Reisner committed
723
724
}

725
int drbd_send_sync_param(struct drbd_peer_device *peer_device)
Philipp Reisner's avatar
Philipp Reisner committed
726
{
727
	struct drbd_socket *sock;
728
	struct p_rs_param_95 *p;
729
	int size;
730
	const int apv = peer_device->connection->agreed_pro_version;
731
	enum drbd_packet cmd;
732
	struct net_conf *nc;
Philipp Reisner's avatar
Philipp Reisner committed
733
	struct disk_conf *dc;
734

735
736
	sock = &peer_device->connection->data;
	p = drbd_prepare_command(peer_device, sock);
737
738
	if (!p)
		return -EIO;
Philipp Reisner's avatar
Philipp Reisner committed
739

740
	rcu_read_lock();
741
	nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner's avatar
Philipp Reisner committed
742
743
744

	size = apv <= 87 ? sizeof(struct p_rs_param)
		: apv == 88 ? sizeof(struct p_rs_param)
745
			+ strlen(nc->verify_alg) + 1
746
747
		: apv <= 94 ? sizeof(struct p_rs_param_89)
		: /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisner's avatar
Philipp Reisner committed
748

749
	cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM;
Philipp Reisner's avatar
Philipp Reisner committed
750

751
752
	/* initialize verify_alg and csums_alg */
	memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
Philipp Reisner's avatar
Philipp Reisner committed
753

754
755
	if (get_ldev(peer_device->device)) {
		dc = rcu_dereference(peer_device->device->ldev->disk_conf);
756
		p->resync_rate = cpu_to_be32(dc->resync_rate);
Philipp Reisner's avatar
Philipp Reisner committed
757
758
759
760
		p->c_plan_ahead = cpu_to_be32(dc->c_plan_ahead);
		p->c_delay_target = cpu_to_be32(dc->c_delay_target);
		p->c_fill_target = cpu_to_be32(dc->c_fill_target);
		p->c_max_rate = cpu_to_be32(dc->c_max_rate);
761
		put_ldev(peer_device->device);
762
	} else {
763
		p->resync_rate = cpu_to_be32(DRBD_RESYNC_RATE_DEF);
764
765
766
767
768
		p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF);
		p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF);
		p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF);
		p->c_max_rate = cpu_to_be32(DRBD_C_MAX_RATE_DEF);
	}
Philipp Reisner's avatar
Philipp Reisner committed
769

770
	if (apv >= 88)
771
		strcpy(p->verify_alg, nc->verify_alg);
772
	if (apv >= 89)
773
774
		strcpy(p->csums_alg, nc->csums_alg);
	rcu_read_unlock();
Philipp Reisner's avatar
Philipp Reisner committed
775

776
	return drbd_send_command(peer_device, sock, cmd, size, NULL, 0);
Philipp Reisner's avatar
Philipp Reisner committed
777
778
}

779
int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cmd)
Philipp Reisner's avatar
Philipp Reisner committed
780
{
781
	struct drbd_socket *sock;
Philipp Reisner's avatar
Philipp Reisner committed
782
	struct p_protocol *p;
783
	struct net_conf *nc;
784
	int size, cf;
Philipp Reisner's avatar
Philipp Reisner committed
785

786
787
	sock = &connection->data;
	p = __conn_prepare_command(connection, sock);
788
789
	if (!p)
		return -EIO;
Philipp Reisner's avatar
Philipp Reisner committed
790

791
	rcu_read_lock();
792
	nc = rcu_dereference(connection->net_conf);
Philipp Reisner's avatar
Philipp Reisner committed
793

794
	if (nc->tentative && connection->agreed_pro_version < 92) {
795
796
		rcu_read_unlock();
		mutex_unlock(&sock->mutex);
797
		drbd_err(connection, "--dry-run is not supported by peer");
798
799
		return -EOPNOTSUPP;
	}
Philipp Reisner's avatar
Philipp Reisner committed
800

801
	size = sizeof(*p);
802
	if (connection->agreed_pro_version >= 87)
803
		size += strlen(nc->integrity_alg) + 1;
Philipp Reisner's avatar
Philipp Reisner committed
804

805
806
807
808
809
	p->protocol      = cpu_to_be32(nc->wire_protocol);
	p->after_sb_0p   = cpu_to_be32(nc->after_sb_0p);
	p->after_sb_1p   = cpu_to_be32(nc->after_sb_1p);
	p->after_sb_2p   = cpu_to_be32(nc->after_sb_2p);
	p->two_primaries = cpu_to_be32(nc->two_primaries);
810
	cf = 0;
811
812
	if (nc->discard_my_data)
		cf |= CF_DISCARD_MY_DATA;
813
	if (nc->tentative)
814
		cf |= CF_DRY_RUN;
815
816
	p->conn_flags    = cpu_to_be32(cf);

817
	if (connection->agreed_pro_version >= 87)
818
819
		strcpy(p->integrity_alg, nc->integrity_alg);
	rcu_read_unlock();
Philipp Reisner's avatar
Philipp Reisner committed
820

821
	return __conn_send_command(connection, sock, cmd, size, NULL, 0);
822
823
}

824
int drbd_send_protocol(struct drbd_connection *connection)
825
826
827
{
	int err;

828
829
830
	mutex_lock(&connection->data.mutex);
	err = __drbd_send_protocol(connection, P_PROTOCOL);
	mutex_unlock(&connection->data.mutex);
831
832

	return err;
Philipp Reisner's avatar
Philipp Reisner committed
833
834
}

835
static int _drbd_send_uuids(struct drbd_peer_device *peer_device, u64 uuid_flags)
Philipp Reisner's avatar
Philipp Reisner committed
836
{
837
	struct drbd_device *device = peer_device->device;
838
839
	struct drbd_socket *sock;
	struct p_uuids *p;
Philipp Reisner's avatar
Philipp Reisner committed
840
841
	int i;

842
	if (!get_ldev_if_state(device, D_NEGOTIATING))
843
		return 0;
Philipp Reisner's avatar
Philipp Reisner committed
844

845
846
	sock = &peer_device->connection->data;
	p = drbd_prepare_command(peer_device, sock);
847
	if (!p) {
848
		put_ldev(device);
849
850
		return -EIO;
	}
851
	spin_lock_irq(&device->ldev->md.uuid_lock);
Philipp Reisner's avatar
Philipp Reisner committed
852
	for (i = UI_CURRENT; i < UI_SIZE; i++)
853
854
		p->uuid[i] = cpu_to_be64(device->ldev->md.uuid[i]);
	spin_unlock_irq(&device->ldev->md.uuid_lock);
Philipp Reisner's avatar
Philipp Reisner committed
855

856
857
	device->comm_bm_set = drbd_bm_total_weight(device);
	p->uuid[UI_SIZE] = cpu_to_be64(device->comm_bm_set);
858
	rcu_read_lock();
859
	uuid_flags |= rcu_dereference(peer_device->connection->net_conf)->discard_my_data ? 1 : 0;
860
	rcu_read_unlock();
861
862
	uuid_flags |= test_bit(CRASHED_PRIMARY, &device->flags) ? 2 : 0;
	uuid_flags |= device->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0;
863
	p->uuid[UI_FLAGS] = cpu_to_be64(uuid_flags);
Philipp Reisner's avatar
Philipp Reisner committed
864

865
	put_ldev(device);
866
	return drbd_send_command(peer_device, sock, P_UUIDS, sizeof(*p), NULL, 0);
Philipp Reisner's avatar
Philipp Reisner committed
867
868
}

869
int drbd_send_uuids(struct drbd_peer_device *peer_device)
Philipp Reisner's avatar
Philipp Reisner committed
870
{
871
	return _drbd_send_uuids(peer_device, 0);
Philipp Reisner's avatar
Philipp Reisner committed
872
873
}

874
int drbd_send_uuids_skip_initial_sync(struct drbd_peer_device *peer_device)
Philipp Reisner's avatar
Philipp Reisner committed
875
{
876
	return _drbd_send_uuids(peer_device, 8);
Philipp Reisner's avatar
Philipp Reisner committed
877
878
}

879
void drbd_print_uuids(struct drbd_device *device, const char *text)
880
{
881
882
	if (get_ldev_if_state(device, D_NEGOTIATING)) {
		u64 *uuid = device->ldev->md.uuid;
883
		drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX\n",
884
885
886
887
888
		     text,
		     (unsigned long long)uuid[UI_CURRENT],
		     (unsigned long long)uuid[UI_BITMAP],
		     (unsigned long long)uuid[UI_HISTORY_START],
		     (unsigned long long)uuid[UI_HISTORY_END]);
889
		put_ldev(device);
890
	} else {
891
		drbd_info(device, "%s effective data uuid: %016llX\n",
892
				text,
893
				(unsigned long long)device->ed_uuid);
894
895
896
	}
}

897
void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *peer_device)
Philipp Reisner's avatar
Philipp Reisner committed
898
{
899
	struct drbd_device *device = peer_device->device;
900
901
	struct drbd_socket *sock;
	struct p_rs_uuid *p;
902
903
	u64 uuid;

904
	D_ASSERT(device, device->state.disk == D_UP_TO_DATE);
Philipp Reisner's avatar
Philipp Reisner committed
905