Lines Matching +full:trim +full:- +full:data +full:- +full:valid

1 // SPDX-License-Identifier: GPL-2.0-only
7 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
8 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
9 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
48 void *data;
73 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
74 * "criss-cross" setup, that might cause write-out on some other DRBD,
93 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
105 * We do not use max-buffers as hard limit, because it could lead to
106 * congestion and further to a distributed deadlock during online-verify or
107 * (checksum based) resync, if the max-buffers, socket buffer sizes and
108 * resync-rate settings are mis-configured.
110 * Returns a page chain linked via page->private.
115 struct drbd_device *device = peer_device->device;
121 nc = rcu_dereference(peer_device->connection->net_conf);
122 mxb = nc ? nc->max_buffers : 1000000;
125 if (atomic_read(&device->pp_in_use) >= mxb)
130 atomic_add(number, &device->pp_in_use);
135 * Is also used from inside an other spin_lock_irq(&resource->req_lock);
154 i = atomic_sub_return(i, &device->pp_in_use);
175 * trim: payload_size == 0 */
180 struct drbd_device *device = peer_device->device;
201 peer_req->flags |= EE_RELEASE_TO_MEMPOOL;
205 INIT_LIST_HEAD(&peer_req->w.list);
206 drbd_clear_interval(&peer_req->i);
207 peer_req->i.size = request_size;
208 peer_req->i.sector = sector;
209 peer_req->submit_jif = jiffies;
210 peer_req->peer_device = peer_device;
211 peer_req->pages = page;
216 peer_req->block_id = id;
228 if (peer_req->flags & EE_HAS_DIGEST)
229 kfree(peer_req->digest);
230 drbd_free_pages(device, peer_req->pages);
231 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
232 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
233 if (!expect(device, !(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
234 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
235 drbd_al_complete_io(device, &peer_req->i);
246 spin_lock_irq(&device->resource->req_lock);
248 spin_unlock_irq(&device->resource->req_lock);
266 spin_lock_irq(&device->resource->req_lock);
267 list_splice_init(&device->done_ee, &work_list);
268 spin_unlock_irq(&device->resource->req_lock);
278 err2 = peer_req->w.cb(&peer_req->w, !!err);
283 wake_up(&device->ee_wait);
296 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
297 spin_unlock_irq(&device->resource->req_lock);
299 finish_wait(&device->ee_wait, &wait);
300 spin_lock_irq(&device->resource->req_lock);
307 spin_lock_irq(&device->resource->req_lock);
309 spin_unlock_irq(&device->resource->req_lock);
329 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
332 if (rv == -ECONNRESET)
334 else if (rv != -ERESTARTSYS)
337 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
340 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
343 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
365 err = -EIO;
391 sock->sk->sk_sndbuf = snd;
392 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
395 sock->sk->sk_rcvbuf = rcv;
396 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
412 nc = rcu_dereference(connection->net_conf);
417 sndbuf_size = nc->sndbuf_size;
418 rcvbuf_size = nc->rcvbuf_size;
419 connect_int = nc->connect_int;
422 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
423 memcpy(&src_in6, &connection->my_addr, my_addr_len);
425 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
428 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
430 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
431 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
434 err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family,
441 sock->sk->sk_rcvtimeo =
442 sock->sk->sk_sndtimeo = connect_int * HZ;
453 err = sock->ops->bind(sock, (struct sockaddr_unsized *) &src_in6, my_addr_len);
461 err = sock->ops->connect(sock, (struct sockaddr_unsized *) &peer_in6, peer_addr_len, 0);
469 switch (-err) {
498 struct accept_wait_data *ad = sk->sk_user_data;
501 state_change = ad->original_sk_state_change;
502 if (sk->sk_state == TCP_ESTABLISHED)
503 complete(&ad->door_bell);
516 nc = rcu_dereference(connection->net_conf);
519 return -EIO;
521 sndbuf_size = nc->sndbuf_size;
522 rcvbuf_size = nc->rcvbuf_size;
525 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
526 memcpy(&my_addr, &connection->my_addr, my_addr_len);
529 err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family,
536 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
540 err = s_listen->ops->bind(s_listen, (struct sockaddr_unsized *)&my_addr, my_addr_len);
544 ad->s_listen = s_listen;
545 write_lock_bh(&s_listen->sk->sk_callback_lock);
546 ad->original_sk_state_change = s_listen->sk->sk_state_change;
547 s_listen->sk->sk_state_change = drbd_incoming_connection;
548 s_listen->sk->sk_user_data = ad;
549 write_unlock_bh(&s_listen->sk->sk_callback_lock);
552 err = s_listen->ops->listen(s_listen, 5);
561 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
567 return -EIO;
572 write_lock_bh(&sk->sk_callback_lock);
573 sk->sk_state_change = ad->original_sk_state_change;
574 sk->sk_user_data = NULL;
575 write_unlock_bh(&sk->sk_callback_lock);
585 nc = rcu_dereference(connection->net_conf);
590 connect_int = nc->connect_int;
595 timeo += get_random_u32_below(2) ? timeo / 7 : -timeo / 7;
597 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
601 err = kernel_accept(ad->s_listen, &s_estab, 0);
603 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
610 unregister_state_change(s_estab->sk, ad);
621 return -EIO;
633 nc = rcu_dereference(connection->net_conf);
636 return -EIO;
638 sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
641 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
644 err = -EIO;
647 err = decode_header(connection, connection->data.rbuf, &pi);
654 * drbd_socket_okay() - Free the socket if its connection is not okay
667 if (rr > 0 || rr == -EAGAIN) {
688 nc = rcu_dereference(connection->net_conf);
689 timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
703 struct drbd_device *device = peer_device->device;
706 atomic_set(&device->packet_seq, 0);
707 device->peer_seq = 0;
709 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
710 &peer_device->connection->cstate_mutex :
711 &device->own_state_mutex;
720 clear_bit(USE_DEGR_WFC_T, &device->flags);
721 clear_bit(RESIZE_PENDING, &device->flags);
722 atomic_set(&device->ap_in_flight, 0);
723 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
729 * 1 yes, we have a valid connection
731 * -1 peer talks different language,
733 * -2 We do not have a network config...
748 clear_bit(DISCONNECT_SENT, &connection->flags);
750 return -2;
753 sock.sbuf = connection->data.sbuf;
754 sock.rbuf = connection->data.rbuf;
757 msock.sbuf = connection->meta.sbuf;
758 msock.rbuf = connection->meta.rbuf;
762 connection->agreed_pro_version = 80;
776 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
805 set_bit(RESOLVE_CONFLICTS, &connection->flags);
823 if (connection->cstate <= C_DISCONNECTING)
828 if (get_t_state(&connection->receiver) == EXITING)
838 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
839 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
841 sock.socket->sk->sk_allocation = GFP_NOIO;
842 msock.socket->sk->sk_allocation = GFP_NOIO;
844 sock.socket->sk->sk_use_task_frag = false;
845 msock.socket->sk->sk_use_task_frag = false;
847 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
848 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
851 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
852 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
856 nc = rcu_dereference(connection->net_conf);
858 sock.socket->sk->sk_sndtimeo =
859 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
861 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
862 timeout = nc->timeout * HZ / 10;
863 discard_my_data = nc->discard_my_data;
866 msock.socket->sk->sk_sndtimeo = timeout;
870 tcp_sock_set_nodelay(sock.socket->sk);
871 tcp_sock_set_nodelay(msock.socket->sk);
873 connection->data.socket = sock.socket;
874 connection->meta.socket = msock.socket;
875 connection->last_received = jiffies;
881 if (connection->cram_hmac_tfm) {
884 case -1:
886 return -1;
893 connection->data.socket->sk->sk_sndtimeo = timeout;
894 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
896 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
897 return -1;
899 /* Prevent a race between resync-handshake and
906 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
907 mutex_lock(peer_device->device->state_mutex);
910 spin_lock_irq(&connection->resource->req_lock);
911 set_bit(STATE_SENT, &connection->flags);
912 spin_unlock_irq(&connection->resource->req_lock);
914 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
915 mutex_unlock(peer_device->device->state_mutex);
918 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
919 struct drbd_device *device = peer_device->device;
920 kref_get(&device->kref);
924 set_bit(DISCARD_MY_DATA, &device->flags);
926 clear_bit(DISCARD_MY_DATA, &device->flags);
929 kref_put(&device->kref, drbd_destroy_device);
935 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
936 clear_bit(STATE_SENT, &connection->flags);
940 drbd_thread_start(&connection->ack_receiver);
943 connection->ack_sender =
944 alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name);
945 if (!connection->ack_sender) {
950 mutex_lock(&connection->resource->conf_update);
951 /* The discard_my_data flag is a single-shot modifier to the next
955 connection->net_conf->discard_my_data = 0;
956 mutex_unlock(&connection->resource->conf_update);
967 return -1;
977 if (h->pad != 0) {
979 return -EINVAL;
981 pi->vnr = be16_to_cpu(h->volume);
982 pi->cmd = be16_to_cpu(h->command);
983 pi->size = be32_to_cpu(h->length);
987 pi->cmd = be16_to_cpu(h->command);
988 pi->size = be32_to_cpu(h->length);
989 pi->vnr = 0;
993 pi->cmd = be16_to_cpu(h->command);
994 pi->size = be16_to_cpu(h->length);
995 pi->vnr = 0;
999 connection->agreed_pro_version);
1000 return -EINVAL;
1002 pi->data = header + header_size;
1008 if (current->plug == &connection->receiver_plug) {
1009 blk_finish_plug(&connection->receiver_plug);
1010 blk_start_plug(&connection->receiver_plug);
1016 void *buffer = connection->data.rbuf;
1024 connection->last_received = jiffies;
1031 void *buffer = connection->data.rbuf;
1035 err = drbd_recv_short(connection->data.socket, buffer, size, MSG_NOSIGNAL|MSG_DONTWAIT);
1041 if (err == -EAGAIN) {
1042 tcp_sock_set_quickack(connection->data.socket->sk, 2);
1047 size -= err;
1054 err = decode_header(connection, connection->data.rbuf, pi);
1055 connection->last_received = jiffies;
1075 struct one_flush_context *octx = bio->bi_private;
1076 struct drbd_device *device = octx->device;
1077 struct issue_flush_context *ctx = octx->ctx;
1079 if (bio->bi_status) {
1080 ctx->error = blk_status_to_errno(bio->bi_status);
1081 drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status);
1086 clear_bit(FLUSH_PENDING, &device->flags);
1088 kref_put(&device->kref, drbd_destroy_device);
1090 if (atomic_dec_and_test(&ctx->pending))
1091 complete(&ctx->done);
1096 struct bio *bio = bio_alloc(device->ldev->backing_bdev, 0,
1107 ctx->error = -ENOMEM;
1109 kref_put(&device->kref, drbd_destroy_device);
1113 octx->device = device;
1114 octx->ctx = ctx;
1115 bio->bi_private = octx;
1116 bio->bi_end_io = one_flush_endio;
1118 device->flush_jif = jiffies;
1119 set_bit(FLUSH_PENDING, &device->flags);
1120 atomic_inc(&ctx->pending);
1126 if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
1136 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1137 struct drbd_device *device = peer_device->device;
1141 kref_get(&device->kref);
1151 * if disk-timeout is set? */
1158 * if (rv == -EOPNOTSUPP) */
1160 drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
1166 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
1179 spin_lock(&connection->epoch_lock);
1183 epoch_size = atomic_read(&epoch->epoch_size);
1187 atomic_dec(&epoch->active);
1190 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
1198 atomic_read(&epoch->active) == 0 &&
1199 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
1201 spin_unlock(&connection->epoch_lock);
1202 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1203 spin_lock(&connection->epoch_lock);
1208 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
1209 dec_unacked(epoch->connection);
1212 if (connection->current_epoch != epoch) {
1213 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1214 list_del(&epoch->list);
1216 connection->epochs--;
1222 epoch->flags = 0;
1223 atomic_set(&epoch->epoch_size, 0);
1224 /* atomic_set(&epoch->active, 0); is already zero */
1236 spin_unlock(&connection->epoch_lock);
1246 dc = rcu_dereference(bdev->disk_conf);
1248 if (wo == WO_BDEV_FLUSH && !dc->disk_flushes)
1250 if (wo == WO_DRAIN_IO && !dc->disk_drain)
1257 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1272 pwo = resource->write_ordering;
1276 idr_for_each_entry(&resource->devices, device, vnr) {
1278 wo = max_allowed_wo(device->ldev, wo);
1279 if (device->ldev == bdev)
1290 resource->write_ordering = wo;
1291 if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH)
1292 drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
1297 * Drivers have to "announce" q->limits.max_write_zeroes_sectors, or it
1301 * And dm-thin does not do this (yet), mostly because in general it has
1303 * https://www.mail-archive.com/dm-devel%40redhat.com/msg07965.html
1304 * https://www.redhat.com/archives/dm-devel/2018-January/msg00271.html
1306 * We *may* ignore the discard-zeroes-data setting, if so configured.
1312 * LVM version: 2.02.115(2)-RHEL7 (2015-01-28)
1313 * Library version: 1.02.93-RHEL7 (2015-01-28)
1318 * we zero-out the initial (and/or) trailing unaligned partial chunks,
1327 struct block_device *bdev = device->ldev->backing_bdev;
1336 /* Zero-sector (unknown) and one-sector granularities are the same. */
1341 max_discard_sectors -= max_discard_sectors % granularity;
1352 /* start + gran - (start + gran - align) % gran */
1353 tmp = start + granularity - alignment;
1354 tmp = start + granularity - sector_div(tmp, granularity);
1356 nr = tmp - start;
1360 nr_sectors -= nr;
1366 nr_sectors -= max_discard_sectors;
1375 nr -= (unsigned int)nr % granularity;
1378 nr_sectors -= nr;
1395 if (!bdev_max_discard_sectors(device->ldev->backing_bdev))
1399 dc = rcu_dereference(device->ldev->disk_conf);
1400 can_do = dc->discard_zeroes_if_aligned;
1408 * read-back zeroes in discarded ranges, we fall back to
1409 * zero-out. Unless configuration specifically requested
1412 peer_req->flags |= EE_ZEROOUT;
1414 if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector,
1415 peer_req->i.size >> 9, peer_req->flags & (EE_ZEROOUT|EE_TRIM)))
1416 peer_req->flags |= EE_WAS_ERROR;
1423 return peer_req->flags & EE_APPLICATION ?
1426 return peer_req->flags & EE_APPLICATION ?
1439 * -ENOMEM if we could not allocate enough bios,
1440 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1448 struct drbd_device *device = peer_req->peer_device->device;
1451 struct page *page = peer_req->pages;
1452 sector_t sector = peer_req->i.sector;
1453 unsigned int data_size = peer_req->i.size;
1457 /* TRIM/DISCARD: for now, always use the helper function
1463 if (peer_req->flags & (EE_TRIM | EE_ZEROOUT)) {
1466 conn_wait_active_ee_empty(peer_req->peer_device->connection);
1469 peer_req->submit_jif = jiffies;
1470 peer_req->flags |= EE_SUBMITTED;
1474 if (list_empty(&peer_req->w.list)) {
1475 spin_lock_irq(&device->resource->req_lock);
1476 list_add_tail(&peer_req->w.list, &device->active_ee);
1477 spin_unlock_irq(&device->resource->req_lock);
1500 drbd_err(device, "Invalid bio op received: 0x%x\n", peer_req->opf);
1501 return -EINVAL;
1504 bio = bio_alloc(device->ldev->backing_bdev, nr_pages, peer_req->opf, GFP_NOIO);
1505 /* > peer_req->i.sector, unless this is the first bio */
1506 bio->bi_iter.bi_sector = sector;
1507 bio->bi_private = peer_req;
1508 bio->bi_end_io = drbd_peer_request_endio;
1510 bio->bi_next = bios;
1518 data_size -= len;
1520 --nr_pages;
1525 atomic_set(&peer_req->pending_bios, n_bios);
1527 peer_req->submit_jif = jiffies;
1528 peer_req->flags |= EE_SUBMITTED;
1531 bios = bios->bi_next;
1532 bio->bi_next = NULL;
1542 struct drbd_interval *i = &peer_req->i;
1544 drbd_remove_interval(&device->write_requests, i);
1548 if (i->waiting)
1549 wake_up(&device->misc_wait);
1558 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1559 struct drbd_device *device = peer_device->device;
1561 kref_get(&device->kref);
1563 drbd_wait_ee_list_empty(device, &device->active_ee);
1564 kref_put(&device->kref, drbd_destroy_device);
1573 struct p_barrier *p = pi->data;
1579 connection->current_epoch->barrier_nr = p->barrier;
1580 connection->current_epoch->connection = connection;
1581 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
1588 switch (connection->resource->write_ordering) {
1607 if (atomic_read(&connection->current_epoch->epoch_size)) {
1615 drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1616 connection->resource->write_ordering);
1617 return -EIO;
1620 epoch->flags = 0;
1621 atomic_set(&epoch->epoch_size, 0);
1622 atomic_set(&epoch->active, 0);
1624 spin_lock(&connection->epoch_lock);
1625 if (atomic_read(&connection->current_epoch->epoch_size)) {
1626 list_add(&epoch->list, &connection->current_epoch->list);
1627 connection->current_epoch = epoch;
1628 connection->epochs++;
1633 spin_unlock(&connection->epoch_lock);
1643 unsigned int tmp = r->i.size;
1644 r->i.size = payload_size;
1646 r->i.size = tmp;
1651 * data_size: actual payload ("data in")
1655 * both trim and write same have the bi_size ("data len to be affected")
1662 struct drbd_device *device = peer_device->device;
1663 const sector_t capacity = get_capacity(device->vdisk);
1667 unsigned int data_size = pi->size, ds;
1668 void *dig_in = peer_device->connection->int_dig_in;
1669 void *dig_vv = peer_device->connection->int_dig_vv;
1670 unsigned long *data;
1671 struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
1672 struct p_trim *zeroes = (pi->cmd == P_ZEROES) ? pi->data : NULL;
1675 if (!trim && peer_device->connection->peer_integrity_tfm) {
1676 digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
1681 err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
1684 data_size -= digest_size;
1687 /* assume request_size == data_size, but special case trim. */
1689 if (trim) {
1692 ds = be32_to_cpu(trim->size);
1696 ds = be32_to_cpu(zeroes->size);
1701 if (trim || zeroes) {
1717 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1718 * "criss-cross" setup, that might cause write-out on some other DRBD,
1724 peer_req->flags |= EE_WRITE;
1725 if (trim) {
1726 peer_req->flags |= EE_TRIM;
1730 peer_req->flags |= EE_ZEROOUT;
1736 page = peer_req->pages;
1739 data = kmap_local_page(page);
1740 err = drbd_recv_all_warn(peer_device->connection, data, len);
1742 drbd_err(device, "Fault injection: Corrupting data on receive\n");
1743 data[0] = data[0] ^ (unsigned long)-1;
1745 kunmap_local(data);
1750 ds -= len;
1754 drbd_csum_ee_size(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv, data_size);
1762 device->recv_cnt += data_size >> 9;
1766 /* drbd_drain_block() just takes a data block
1773 void *data;
1780 data = kmap_local_page(page);
1784 err = drbd_recv_all_warn(peer_device->connection, data, len);
1787 data_size -= len;
1789 kunmap_local(data);
1790 drbd_free_pages(peer_device->device, page);
1801 void *dig_in = peer_device->connection->int_dig_in;
1802 void *dig_vv = peer_device->connection->int_dig_vv;
1805 if (peer_device->connection->peer_integrity_tfm) {
1806 digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
1807 err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
1810 data_size -= digest_size;
1815 peer_device->device->recv_cnt += data_size>>9;
1817 bio = req->master_bio;
1818 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
1823 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
1827 data_size -= expect;
1831 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
1834 return -EINVAL;
1838 D_ASSERT(peer_device->device, data_size == 0);
1850 struct drbd_peer_device *peer_device = peer_req->peer_device;
1851 struct drbd_device *device = peer_device->device;
1852 sector_t sector = peer_req->i.sector;
1855 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
1857 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1858 drbd_set_in_sync(peer_device, sector, peer_req->i.size);
1862 drbd_rs_failed_io(peer_device, sector, peer_req->i.size);
1874 struct drbd_device *device = peer_device->device;
1887 peer_req->w.cb = e_end_resync_block;
1888 peer_req->opf = REQ_OP_WRITE;
1889 peer_req->submit_jif = jiffies;
1891 spin_lock_irq(&device->resource->req_lock);
1892 list_add_tail(&peer_req->w.list, &device->sync_ee);
1893 spin_unlock_irq(&device->resource->req_lock);
1895 atomic_add(pi->size >> 9, &device->rs_sect_ev);
1900 drbd_err(device, "submit failed, triggering re-connect\n");
1901 spin_lock_irq(&device->resource->req_lock);
1902 list_del(&peer_req->w.list);
1903 spin_unlock_irq(&device->resource->req_lock);
1908 return -EIO;
1919 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
1935 struct p_data *p = pi->data;
1937 peer_device = conn_peer_device(connection, pi->vnr);
1939 return -EIO;
1940 device = peer_device->device;
1942 sector = be64_to_cpu(p->sector);
1944 spin_lock_irq(&device->resource->req_lock);
1945 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
1946 spin_unlock_irq(&device->resource->req_lock);
1948 return -EIO;
1950 err = recv_dless_read(peer_device, req, sector, pi->size);
1955 * in case we are "on-disconnect: freeze" */
1966 struct p_data *p = pi->data;
1968 peer_device = conn_peer_device(connection, pi->vnr);
1970 return -EIO;
1971 device = peer_device->device;
1973 sector = be64_to_cpu(p->sector);
1974 D_ASSERT(device, p->block_id == ID_SYNCER);
1977 /* data is submitted to disk within recv_resync_read.
1983 drbd_err(device, "Can not write resync data to local disk.\n");
1985 err = drbd_drain_block(peer_device, pi->size);
1987 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
1990 atomic_add(pi->size >> 9, &device->rs_sect_in);
2001 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
2002 if (!i->local)
2005 if (req->rq_state & RQ_LOCAL_PENDING ||
2006 !(req->rq_state & RQ_POSTPONED))
2021 struct drbd_peer_device *peer_device = peer_req->peer_device;
2022 struct drbd_device *device = peer_device->device;
2023 sector_t sector = peer_req->i.sector;
2026 if (peer_req->flags & EE_SEND_WRITE_ACK) {
2027 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
2028 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
2029 device->state.conn <= C_PAUSED_SYNC_T &&
2030 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
2034 drbd_set_in_sync(peer_device, sector, peer_req->i.size);
2045 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
2046 spin_lock_irq(&device->resource->req_lock);
2047 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
2049 if (peer_req->flags & EE_RESTART_REQUESTS)
2050 restart_conflicting_writes(device, sector, peer_req->i.size);
2051 spin_unlock_irq(&device->resource->req_lock);
2053 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
2055 drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
2064 struct drbd_peer_device *peer_device = peer_req->peer_device;
2068 dec_unacked(peer_device->device);
2082 struct drbd_connection *connection = peer_req->peer_device->connection;
2084 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
2091 * We assume 32-bit wrap-around here.
2092 * For 24-bit wrap-around, we would have to shift:
2095 return (s32)a - (s32)b > 0;
2105 struct drbd_device *device = peer_device->device;
2108 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
2109 spin_lock(&device->peer_seq_lock);
2110 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
2111 device->peer_seq = newest_peer_seq;
2112 spin_unlock(&device->peer_seq_lock);
2113 /* wake up only if we actually changed device->peer_seq */
2115 wake_up(&device->seq_wait);
2130 spin_lock_irq(&device->resource->req_lock);
2131 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
2132 if (overlaps(peer_req->i.sector, peer_req->i.size,
2133 rs_req->i.sector, rs_req->i.size)) {
2138 spin_unlock_irq(&device->resource->req_lock);
2152 * In case packet_seq is larger than device->peer_seq number, there are
2154 * In case we are the logically next packet, we update device->peer_seq
2163 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
2166 struct drbd_device *device = peer_device->device;
2171 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
2174 spin_lock(&device->peer_seq_lock);
2176 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2177 device->peer_seq = seq_max(device->peer_seq, peer_seq);
2182 ret = -ERESTARTSYS;
2187 tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
2194 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2195 spin_unlock(&device->peer_seq_lock);
2197 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
2200 spin_lock(&device->peer_seq_lock);
2202 ret = -ETIMEDOUT;
2207 spin_unlock(&device->peer_seq_lock);
2208 finish_wait(&device->seq_wait, &wait);
2238 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
2242 if (!i->local)
2245 if (!(req->rq_state & RQ_POSTPONED))
2247 req->rq_state &= ~RQ_POSTPONED;
2249 spin_unlock_irq(&device->resource->req_lock);
2252 spin_lock_irq(&device->resource->req_lock);
2260 struct drbd_connection *connection = peer_req->peer_device->connection;
2261 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
2262 sector_t sector = peer_req->i.sector;
2263 const unsigned int size = peer_req->i.size;
2272 drbd_insert_interval(&device->write_requests, &peer_req->i);
2275 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
2276 if (i == &peer_req->i)
2278 if (i->completed)
2281 if (!i->local) {
2284 * should not happen in a two-node setup. Wait for the
2293 equal = i->sector == sector && i->size == size;
2301 bool superseded = i->sector <= sector && i->sector +
2302 (i->size >> 9) >= sector + (size >> 9);
2308 (unsigned long long)i->sector, i->size,
2312 peer_req->w.cb = superseded ? e_send_superseded :
2314 list_add_tail(&peer_req->w.list, &device->done_ee);
2316 kref_get(&device->kref);
2317 if (!queue_work(connection->ack_sender,
2318 &peer_req->peer_device->send_acks_work))
2319 kref_put(&device->kref, drbd_destroy_device);
2321 err = -ENOENT;
2330 (unsigned long long)i->sector, i->size,
2333 if (req->rq_state & RQ_LOCAL_PENDING ||
2334 !(req->rq_state & RQ_POSTPONED)) {
2346 err = drbd_wait_misc(device, &req->i);
2358 peer_req->flags |= EE_RESTART_REQUESTS;
2377 struct p_data *p = pi->data;
2378 u32 peer_seq = be32_to_cpu(p->seq_num);
2382 peer_device = conn_peer_device(connection, pi->vnr);
2384 return -EIO;
2385 device = peer_device->device;
2391 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
2392 atomic_inc(&connection->current_epoch->epoch_size);
2393 err2 = drbd_drain_block(peer_device, pi->size);
2401 * drbd_peer_request_endio, if we successfully submit the data at the
2405 sector = be64_to_cpu(p->sector);
2406 peer_req = read_in_block(peer_device, p->block_id, sector, pi);
2409 return -EIO;
2412 peer_req->w.cb = e_end_block;
2413 peer_req->submit_jif = jiffies;
2414 peer_req->flags |= EE_APPLICATION;
2416 dp_flags = be32_to_cpu(p->dp_flags);
2417 peer_req->opf = wire_flags_to_bio(connection, dp_flags);
2418 if (pi->cmd == P_TRIM) {
2419 D_ASSERT(peer_device, peer_req->i.size > 0);
2421 D_ASSERT(peer_device, peer_req->pages == NULL);
2423 * may mean zero-out while sending P_TRIM. */
2424 if (0 == (connection->agreed_features & DRBD_FF_WZEROES))
2425 peer_req->flags |= EE_ZEROOUT;
2426 } else if (pi->cmd == P_ZEROES) {
2427 D_ASSERT(peer_device, peer_req->i.size > 0);
2429 D_ASSERT(peer_device, peer_req->pages == NULL);
2432 peer_req->flags |= EE_TRIM;
2433 } else if (peer_req->pages == NULL) {
2434 D_ASSERT(device, peer_req->i.size == 0);
2439 peer_req->flags |= EE_MAY_SET_IN_SYNC;
2441 spin_lock(&connection->epoch_lock);
2442 peer_req->epoch = connection->current_epoch;
2443 atomic_inc(&peer_req->epoch->epoch_size);
2444 atomic_inc(&peer_req->epoch->active);
2445 spin_unlock(&connection->epoch_lock);
2448 nc = rcu_dereference(peer_device->connection->net_conf);
2449 tp = nc->two_primaries;
2450 if (peer_device->connection->agreed_pro_version < 100) {
2451 switch (nc->wire_protocol) {
2463 peer_req->flags |= EE_SEND_WRITE_ACK;
2478 peer_req->flags |= EE_IN_INTERVAL_TREE;
2482 spin_lock_irq(&device->resource->req_lock);
2485 spin_unlock_irq(&device->resource->req_lock);
2486 if (err == -ENOENT) {
2494 spin_lock_irq(&device->resource->req_lock);
2496 /* TRIM and is processed synchronously,
2500 if ((peer_req->flags & (EE_TRIM | EE_ZEROOUT)) == 0)
2501 list_add_tail(&peer_req->w.list, &device->active_ee);
2502 spin_unlock_irq(&device->resource->req_lock);
2504 if (device->state.conn == C_SYNC_TARGET)
2505 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
2507 if (device->state.pdsk < D_INCONSISTENT) {
2509 drbd_set_out_of_sync(peer_device, peer_req->i.sector, peer_req->i.size);
2510 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
2511 drbd_al_begin_io(device, &peer_req->i);
2512 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2520 drbd_err(device, "submit failed, triggering re-connect\n");
2521 spin_lock_irq(&device->resource->req_lock);
2522 list_del(&peer_req->w.list);
2524 spin_unlock_irq(&device->resource->req_lock);
2525 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) {
2526 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
2527 drbd_al_complete_io(device, &peer_req->i);
2531 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT | EV_CLEANUP);
2551 struct drbd_device *device = peer_device->device;
2558 spin_lock_irq(&device->al_lock);
2559 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2562 if (test_bit(BME_PRIORITY, &bm_ext->flags))
2567 spin_unlock_irq(&device->al_lock);
2574 struct gendisk *disk = device->ldev->backing_bdev->bd_disk;
2580 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
2587 curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
2588 atomic_read(&device->rs_sect_ev);
2590 if (atomic_read(&device->ap_actlog_cnt)
2591 || curr_events - device->rs_last_events > 64) {
2595 device->rs_last_events = curr_events;
2599 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2601 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2602 rs_left = device->ov_left;
2604 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
2606 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
2609 db = device->rs_mark_left[i] - rs_left;
2627 struct p_block_req *p = pi->data;
2629 peer_device = conn_peer_device(connection, pi->vnr);
2631 return -EIO;
2632 device = peer_device->device;
2633 capacity = get_capacity(device->vdisk);
2635 sector = be64_to_cpu(p->sector);
2636 size = be32_to_cpu(p->blksize);
2641 return -EINVAL;
2646 return -EINVAL;
2651 switch (pi->cmd) {
2671 "no local data.\n");
2674 return drbd_drain_block(peer_device, pi->size);
2677 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2678 * "criss-cross" setup, that might cause write-out on some other DRBD,
2680 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
2684 return -ENOMEM;
2686 peer_req->opf = REQ_OP_READ;
2688 switch (pi->cmd) {
2690 peer_req->w.cb = w_e_end_data_req;
2692 peer_req->flags |= EE_APPLICATION;
2697 find out if this data block is completely deallocated,
2700 peer_req->flags |= EE_RS_THIN_REQ;
2703 peer_req->w.cb = w_e_end_rsdata_req;
2705 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2710 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
2714 di->digest_size = pi->size;
2715 di->digest = (((char *)di)+sizeof(struct digest_info));
2717 peer_req->digest = di;
2718 peer_req->flags |= EE_HAS_DIGEST;
2720 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
2723 if (pi->cmd == P_CSUM_RS_REQUEST) {
2724 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
2725 peer_req->w.cb = w_e_end_csum_rs_req;
2727 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2729 device->use_csums = true;
2730 } else if (pi->cmd == P_OV_REPLY) {
2732 atomic_add(size >> 9, &device->rs_sect_in);
2733 peer_req->w.cb = w_e_end_ov_reply;
2742 if (device->ov_start_sector == ~(sector_t)0 &&
2743 peer_device->connection->agreed_pro_version >= 90) {
2746 device->ov_start_sector = sector;
2747 device->ov_position = sector;
2748 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2749 device->rs_total = device->ov_left;
2751 device->rs_mark_left[i] = device->ov_left;
2752 device->rs_mark_time[i] = now;
2757 peer_req->w.cb = w_e_end_ov_req;
2791 spin_lock_irq(&device->resource->req_lock);
2792 list_add_tail(&peer_req->w.list, &device->read_ee);
2793 spin_unlock_irq(&device->resource->req_lock);
2796 if (device->state.peer != R_PRIMARY
2804 atomic_add(size >> 9, &device->rs_sect_ev);
2813 drbd_err(device, "submit failed, triggering re-connect\n");
2816 spin_lock_irq(&device->resource->req_lock);
2817 list_del(&peer_req->w.list);
2818 spin_unlock_irq(&device->resource->req_lock);
2823 return -EIO;
2827 * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
2831 struct drbd_device *device = peer_device->device;
2832 int self, peer, rv = -100;
2836 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2837 peer = device->p_uuid[UI_BITMAP] & 1;
2839 ch_peer = device->p_uuid[UI_SIZE];
2840 ch_self = device->comm_bm_set;
2843 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
2856 rv = -1;
2870 rv = -1;
2875 "Using discard-least-changes instead\n");
2879 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
2880 ? -1 : 1;
2884 if (ch_self == 0) { rv = -1; break; }
2891 rv = -1;
2896 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
2897 ? -1 : 1;
2900 rv = -1;
2910 * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
2914 struct drbd_device *device = peer_device->device;
2915 int hg, rv = -100;
2919 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
2934 if (hg == -1 && device->state.role == R_SECONDARY)
2936 if (hg == 1 && device->state.role == R_PRIMARY)
2943 return device->state.role == R_PRIMARY ? 1 : -1;
2946 if (hg == -1 && device->state.role == R_PRIMARY) {
2954 drbd_khelper(device, "pri-lost-after-sb");
2967 * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
2971 struct drbd_device *device = peer_device->device;
2972 int hg, rv = -100;
2976 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
2996 if (hg == -1) {
3004 drbd_khelper(device, "pri-lost-after-sb");
3038 -1 C_SYNC_TARGET use BitMap
3039 -2 C_SYNC_TARGET set BitMap
3040 -100 after split brain, disconnect
3041 -1000 unrelated data
3042 -1091 requires proto 91
3043 -1096 requires proto 96
3049 struct drbd_connection *const connection = peer_device->connection;
3050 struct drbd_device *device = peer_device->device;
3054 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3055 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3064 return -2;
3074 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
3076 if (connection->agreed_pro_version < 91)
3077 return -1091;
3079 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
3080 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
3083 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
3084 device->ldev->md.uuid[UI_BITMAP] = 0;
3086 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3087 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3097 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
3099 if (connection->agreed_pro_version < 91)
3100 return -1091;
3102 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
3103 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
3106 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
3107 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
3108 device->p_uuid[UI_BITMAP] = 0UL;
3110 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3117 return -1;
3121 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
3122 (device->p_uuid[UI_FLAGS] & 2);
3134 * frozen, so no UUID-bump happened.
3136 * for "new-enough" peer DRBD version. */
3137 if (device->state.role == R_PRIMARY || peer_role == R_PRIMARY) {
3139 if (!(connection->agreed_features & DRBD_FF_WSAME)) {
3141 return -(0x10000 | PRO_VERSION_MAX | (DRBD_FF_WSAME << 8));
3143 if (device->state.role == R_PRIMARY && peer_role == R_PRIMARY) {
3148 return -100;
3150 if (device->state.role == R_PRIMARY)
3152 return -1;
3161 case 2: /* !self_pri && peer_pri */ return -1;
3163 dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
3164 return dc ? -1 : 1;
3169 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3171 return -1;
3174 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
3176 if (connection->agreed_pro_version < 96 ?
3177 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
3178 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
3179 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
3183 if (connection->agreed_pro_version < 91)
3184 return -1091;
3186 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
3187 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
3190 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3192 return -1;
3197 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3199 peer = device->p_uuid[i] & ~((u64)1);
3201 return -2;
3205 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3206 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3211 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
3213 if (connection->agreed_pro_version < 96 ?
3214 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
3215 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3216 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
3220 if (connection->agreed_pro_version < 91)
3221 return -1091;
3223 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3224 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
3227 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3228 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3236 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3238 self = device->ldev->md.uuid[i] & ~((u64)1);
3244 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3245 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3251 self = device->ldev->md.uuid[i] & ~((u64)1);
3253 peer = device->p_uuid[j] & ~((u64)1);
3255 return -100;
3259 return -1000;
3263 CONN_MASK (-1) on failure.
3269 struct drbd_device *device = peer_device->device;
3275 mydisk = device->state.disk;
3277 mydisk = device->new_state_tmp.disk;
3281 spin_lock_irq(&device->ldev->md.uuid_lock);
3282 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3283 drbd_uuid_dump(device, "peer", device->p_uuid,
3284 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3287 spin_unlock_irq(&device->ldev->md.uuid_lock);
3291 if (hg == -1000) {
3292 drbd_alert(device, "Unrelated data, aborting!\n");
3295 if (hg < -0x10000) {
3297 hg = -hg;
3304 if (hg < -1000) {
3305 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
3311 int f = (hg == -100) || abs(hg) == 2;
3312 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3320 drbd_khelper(device, "initial-split-brain");
3323 nc = rcu_dereference(peer_device->connection->net_conf);
3324 always_asbp = nc->always_asbp;
3325 rr_conflict = nc->rr_conflict;
3326 tentative = nc->tentative;
3329 if (hg == 100 || (hg == -100 && always_asbp)) {
3330 int pcount = (device->state.role == R_PRIMARY)
3332 int forced = (hg == -100);
3346 drbd_warn(device, "Split-Brain detected, %d primaries, "
3357 if (hg == -100) {
3358 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
3359 hg = -1;
3360 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
3364 drbd_warn(device, "Split-Brain detected, manually solved. "
3369 if (hg == -100) {
3372 * We just refuse to attach -- well, we drop the "connection"
3374 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
3375 drbd_khelper(device, "split-brain");
3385 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
3388 drbd_khelper(device, "pri-lost");
3394 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
3399 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
3401 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
3403 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
3405 abs(hg) >= 2 ? "full" : "bit-map based");
3433 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
3441 /* everything else is valid if they are equal on both sides. */
3447 struct p_protocol *p = pi->data;
3455 p_proto = be32_to_cpu(p->protocol);
3456 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3457 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3458 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
3459 p_two_primaries = be32_to_cpu(p->two_primaries);
3460 cf = be32_to_cpu(p->conn_flags);
3463 if (connection->agreed_pro_version >= 87) {
3466 if (pi->size > sizeof(integrity_alg))
3467 return -EIO;
3468 err = drbd_recv_all(connection, integrity_alg, pi->size);
3471 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
3474 if (pi->cmd != P_PROTOCOL_UPDATE) {
3475 clear_bit(CONN_DRY_RUN, &connection->flags);
3478 set_bit(CONN_DRY_RUN, &connection->flags);
3481 nc = rcu_dereference(connection->net_conf);
3483 if (p_proto != nc->wire_protocol) {
3488 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
3489 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
3493 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
3494 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
3498 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
3499 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
3503 if (p_discard_my_data && nc->discard_my_data) {
3504 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
3508 if (p_two_primaries != nc->two_primaries) {
3509 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
3513 if (strcmp(integrity_alg, nc->integrity_alg)) {
3514 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
3525 * We can only change the peer data integrity algorithm
3526 * here. Changing our own data integrity algorithm
3536 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
3545 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
3554 mutex_lock(&connection->data.mutex);
3555 mutex_lock(&connection->resource->conf_update);
3556 old_net_conf = connection->net_conf;
3559 new_net_conf->wire_protocol = p_proto;
3560 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3561 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3562 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3563 new_net_conf->two_primaries = p_two_primaries;
3565 rcu_assign_pointer(connection->net_conf, new_net_conf);
3566 mutex_unlock(&connection->resource->conf_update);
3567 mutex_unlock(&connection->data.mutex);
3569 crypto_free_shash(connection->peer_integrity_tfm);
3570 kfree(connection->int_dig_in);
3571 kfree(connection->int_dig_vv);
3572 connection->peer_integrity_tfm = peer_integrity_tfm;
3573 connection->int_dig_in = int_dig_in;
3574 connection->int_dig_vv = int_dig_vv;
3576 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
3577 drbd_info(connection, "peer data-integrity-alg: %s\n",
3590 return -EIO;
3618 void *buffer = connection->data.rbuf;
3619 int size = pi->size;
3629 size -= s;
3632 return -EIO;
3637 * config_unknown_volume - device configuration command for unknown volume
3650 cmdname(pi->cmd), pi->vnr);
3664 const int apv = connection->agreed_pro_version;
3669 peer_device = conn_peer_device(connection, pi->vnr);
3672 device = peer_device->device;
3680 if (pi->size > exp_max_sz) {
3682 pi->size, exp_max_sz);
3683 return -EIO;
3688 data_size = pi->size - header_size;
3691 data_size = pi->size - header_size;
3695 data_size = pi->size - header_size;
3700 p = pi->data;
3701 BUILD_BUG_ON(sizeof(p->algs) != 2 * SHARED_SECRET_MAX);
3702 memset(&p->algs, 0, sizeof(p->algs));
3704 err = drbd_recv_all(peer_device->connection, p, header_size);
3708 mutex_lock(&connection->resource->conf_update);
3709 old_net_conf = peer_device->connection->net_conf;
3714 mutex_unlock(&connection->resource->conf_update);
3716 return -ENOMEM;
3719 old_disk_conf = device->ldev->disk_conf;
3722 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
3728 drbd_err(device, "verify-alg of wrong size, "
3734 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
3739 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
3740 p->verify_alg[data_size-1] = 0;
3745 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3746 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3747 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3748 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3751 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
3752 if (device->state.conn == C_WF_REPORT_PARAMS) {
3753 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
3754 old_net_conf->verify_alg, p->verify_alg);
3758 p->verify_alg, "verify-alg");
3765 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
3766 if (device->state.conn == C_WF_REPORT_PARAMS) {
3767 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
3768 old_net_conf->csums_alg, p->csums_alg);
3772 p->csums_alg, "csums-alg");
3780 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3781 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3782 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3783 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
3785 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
3786 if (fifo_size != device->rs_plan_s->size) {
3804 strcpy(new_net_conf->verify_alg, p->verify_alg);
3805 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
3806 crypto_free_shash(peer_device->connection->verify_tfm);
3807 peer_device->connection->verify_tfm = verify_tfm;
3808 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
3811 strcpy(new_net_conf->csums_alg, p->csums_alg);
3812 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
3813 crypto_free_shash(peer_device->connection->csums_tfm);
3814 peer_device->connection->csums_tfm = csums_tfm;
3815 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
3817 rcu_assign_pointer(connection->net_conf, new_net_conf);
3822 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3827 old_plan = device->rs_plan_s;
3828 rcu_assign_pointer(device->rs_plan_s, new_plan);
3831 mutex_unlock(&connection->resource->conf_update);
3845 mutex_unlock(&connection->resource->conf_update);
3846 return -EIO;
3854 mutex_unlock(&connection->resource->conf_update);
3860 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
3861 return -EIO;
3871 d = (a > b) ? (a - b) : (b - a);
3881 struct p_sizes *p = pi->data;
3882 struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL;
3889 peer_device = conn_peer_device(connection, pi->vnr);
3892 device = peer_device->device;
3893 cur_size = get_capacity(device->vdisk);
3895 p_size = be64_to_cpu(p->d_size);
3896 p_usize = be64_to_cpu(p->u_size);
3897 p_csize = be64_to_cpu(p->c_size);
3901 device->p_size = p_size;
3905 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
3909 p_size, drbd_get_max_capacity(device->ldev));
3915 if (device->state.conn == C_WF_REPORT_PARAMS)
3918 /* Never shrink a device with usable data during connect,
3921 new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0);
3923 device->state.disk >= D_OUTDATED &&
3924 (device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS)) {
3927 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
3929 return -EIO;
3938 return -ENOMEM;
3941 mutex_lock(&connection->resource->conf_update);
3942 old_disk_conf = device->ldev->disk_conf;
3944 new_disk_conf->disk_size = p_usize;
3946 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3947 mutex_unlock(&connection->resource->conf_update);
3957 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3963 ddsf = be16_to_cpu(p->dds_flags);
3965 drbd_reconsider_queue_parameters(device, device->ldev, o);
3969 return -EIO;
3983 * take his (user-capped or) backing disk size anyways.
3997 } else if (new_size < cur_size && device->state.role == R_PRIMARY) {
4000 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
4001 return -EIO;
4004 * - I don't have a current size myself
4005 * - we agree on the size anyways
4006 * - I do have a current size, am Secondary,
4008 * - I do have a current size, am Primary,
4017 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
4018 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
4025 if (device->state.conn > C_WF_REPORT_PARAMS) {
4026 if (be64_to_cpu(p->c_size) != get_capacity(device->vdisk) ||
4032 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
4033 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
4034 if (device->state.pdsk >= D_INCONSISTENT &&
4035 device->state.disk >= D_INCONSISTENT) {
4037 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
4041 set_bit(RESYNC_AFTER_NEG, &device->flags);
4052 struct p_uuids *p = pi->data;
4056 peer_device = conn_peer_device(connection, pi->vnr);
4059 device = peer_device->device;
4066 p_uuid[i] = be64_to_cpu(p->uuid[i]);
4068 kfree(device->p_uuid);
4069 device->p_uuid = p_uuid;
4071 if ((device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS) &&
4072 device->state.disk < D_INCONSISTENT &&
4073 device->state.role == R_PRIMARY &&
4074 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
4075 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
4076 (unsigned long long)device->ed_uuid);
4077 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
4078 return -EIO;
4083 device->state.conn == C_CONNECTED &&
4084 peer_device->connection->agreed_pro_version >= 90 &&
4085 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
4100 } else if (device->state.disk < D_INCONSISTENT &&
4101 device->state.role == R_PRIMARY) {
4111 mutex_lock(device->state_mutex);
4112 mutex_unlock(device->state_mutex);
4113 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
4123 * convert_state() - Converts the peer's view of the cluster state to our point of view
4157 struct p_req_state *p = pi->data;
4161 peer_device = conn_peer_device(connection, pi->vnr);
4163 return -EIO;
4164 device = peer_device->device;
4166 mask.i = be32_to_cpu(p->mask);
4167 val.i = be32_to_cpu(p->val);
4169 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
4170 mutex_is_locked(device->state_mutex)) {
4188 struct p_req_state *p = pi->data;
4192 mask.i = be32_to_cpu(p->mask);
4193 val.i = be32_to_cpu(p->val);
4195 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
4196 mutex_is_locked(&connection->cstate_mutex)) {
4214 struct p_state *p = pi->data;
4220 peer_device = conn_peer_device(connection, pi->vnr);
4223 device = peer_device->device;
4225 peer_state.i = be32_to_cpu(p->state);
4229 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
4233 spin_lock_irq(&device->resource->req_lock);
4236 spin_unlock_irq(&device->resource->req_lock);
4240 * we must not "re-establish" it here. */
4242 return -ECONNRESET;
4246 * set) resync started in PausedSyncT, or if the timing of pause-/
4247 * unpause-sync events has been "just right", the peer disk may
4254 * preparation, ignore its uptodate-ness to avoid flapping, it
4257 * It may have changed syncer-paused flags, however, so we
4268 if (drbd_bm_total_weight(device) <= device->rs_failed)
4301 * If this node does not have good data, was already connected, but
4316 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4328 * forced to be UpToDate with --force */
4329 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
4331 * start a sync by "invalidate" or "invalidate-remote" */
4342 if (device->state.disk == D_NEGOTIATING) {
4349 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
4350 return -EIO;
4352 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
4353 return -EIO;
4358 spin_lock_irq(&device->resource->req_lock);
4361 clear_bit(CONSIDER_RESYNC, &device->flags);
4366 ns.disk = device->new_state_tmp.disk;
4369 test_bit(NEW_CUR_UUID, &device->flags)) {
4372 spin_unlock_irq(&device->resource->req_lock);
4374 tl_clear(peer_device->connection);
4376 clear_bit(NEW_CUR_UUID, &device->flags);
4377 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
4378 return -EIO;
4382 spin_unlock_irq(&device->resource->req_lock);
4385 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
4386 return -EIO;
4400 clear_bit(DISCARD_MY_DATA, &device->flags);
4411 struct p_rs_uuid *p = pi->data;
4413 peer_device = conn_peer_device(connection, pi->vnr);
4415 return -EIO;
4416 device = peer_device->device;
4418 wait_event(device->misc_wait,
4419 device->state.conn == C_WF_SYNC_UUID ||
4420 device->state.conn == C_BEHIND ||
4421 device->state.conn < C_CONNECTED ||
4422 device->state.disk < D_NEGOTIATING);
4424 /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
4429 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4452 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
4453 drbd_header_size(peer_device->connection);
4455 c->bm_words - c->word_offset);
4461 return -EIO;
4465 err = drbd_recv_all(peer_device->connection, p, want);
4469 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
4471 c->word_offset += num_words;
4472 c->bit_offset = c->word_offset * BITS_PER_LONG;
4473 if (c->bit_offset > c->bm_bits)
4474 c->bit_offset = c->bm_bits;
4481 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4486 return (p->encoding & 0x80) != 0;
4491 return (p->encoding >> 4) & 0x7;
4510 unsigned long s = c->bit_offset;
4516 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
4520 return -EIO;
4525 return -EIO;
4528 e = s + rl -1;
4529 if (e >= c->bm_bits) {
4531 return -EIO;
4533 _drbd_bm_set_bits(peer_device->device, s, e);
4539 (unsigned int)(bs.cur.b - p->code),
4541 return -EIO;
4548 have -= bits;
4550 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4552 return -EIO;
4557 c->bit_offset = s;
4560 return (s != c->bm_bits);
4576 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
4582 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4583 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
4584 return -EIO;
4591 unsigned int header_size = drbd_header_size(peer_device->connection);
4592 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4594 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4595 c->bm_words * sizeof(unsigned long);
4596 unsigned int total = c->bytes[0] + c->bytes[1];
4614 r = 1000 - r;
4618 c->bytes[1], c->packets[1],
4619 c->bytes[0], c->packets[0],
4638 peer_device = conn_peer_device(connection, pi->vnr);
4640 return -EIO;
4641 device = peer_device->device;
4644 /* you are supposed to send additional out-of-sync information
4653 if (pi->cmd == P_BITMAP)
4654 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
4655 else if (pi->cmd == P_COMPRESSED_BITMAP) {
4658 struct p_compressed_bm *p = pi->data;
4660 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
4662 err = -EIO;
4665 if (pi->size <= sizeof(*p)) {
4666 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
4667 err = -EIO;
4670 err = drbd_recv_all(peer_device->connection, p, pi->size);
4673 err = decode_bitmap_c(peer_device, p, &c, pi->size);
4675 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
4676 err = -EIO;
4680 c.packets[pi->cmd == P_BITMAP]++;
4681 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
4688 err = drbd_recv_header(peer_device->connection, pi);
4695 if (device->state.conn == C_WF_BITMAP_T) {
4704 } else if (device->state.conn != C_WF_BITMAP_S) {
4708 drbd_conn_str(device->state.conn));
4714 if (!err && device->state.conn == C_WF_BITMAP_S)
4722 pi->cmd, pi->size);
4729 /* Make sure we've acked all the TCP data associated
4730 * with the data requests being unplugged */
4731 tcp_sock_set_quickack(connection->data.socket->sk, 2);
4739 struct p_block_desc *p = pi->data;
4741 peer_device = conn_peer_device(connection, pi->vnr);
4743 return -EIO;
4744 device = peer_device->device;
4746 switch (device->state.conn) {
4753 drbd_conn_str(device->state.conn));
4756 drbd_set_out_of_sync(peer_device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
4764 struct p_block_desc *p = pi->data;
4769 peer_device = conn_peer_device(connection, pi->vnr);
4771 return -EIO;
4772 device = peer_device->device;
4774 sector = be64_to_cpu(p->sector);
4775 size = be32_to_cpu(p->blksize);
4786 return -ENOMEM;
4789 peer_req->w.cb = e_end_resync_block;
4790 peer_req->opf = REQ_OP_DISCARD;
4791 peer_req->submit_jif = jiffies;
4792 peer_req->flags |= EE_TRIM;
4794 spin_lock_irq(&device->resource->req_lock);
4795 list_add_tail(&peer_req->w.list, &device->sync_ee);
4796 spin_unlock_irq(&device->resource->req_lock);
4798 atomic_add(pi->size >> 9, &device->rs_sect_ev);
4802 spin_lock_irq(&device->resource->req_lock);
4803 list_del(&peer_req->w.list);
4804 spin_unlock_irq(&device->resource->req_lock);
4822 atomic_add(size >> 9, &device->rs_sect_in);
4870 while (get_t_state(&connection->receiver) == RUNNING) {
4873 drbd_thread_current_set_cpu(&connection->receiver);
4879 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
4880 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
4885 shs = cmd->pkt_size;
4886 if (pi.cmd == P_SIZES && connection->agreed_features & DRBD_FF_WSAME)
4888 if (pi.size > shs && !cmd->expect_payload) {
4901 err = drbd_recv_all_warn(connection, pi.data, shs);
4904 pi.size -= shs;
4907 update_receiver_timing_details(connection, cmd->fn);
4908 err = cmd->fn(connection, &pi);
4927 if (connection->cstate == C_STANDALONE)
4938 drbd_thread_stop(&connection->ack_receiver);
4939 if (connection->ack_sender) {
4940 destroy_workqueue(connection->ack_sender);
4941 connection->ack_sender = NULL;
4946 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4947 struct drbd_device *device = peer_device->device;
4948 kref_get(&device->kref);
4951 kref_put(&device->kref, drbd_destroy_device);
4956 if (!list_empty(&connection->current_epoch->list))
4957 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
4959 atomic_set(&connection->current_epoch->epoch_size, 0);
4960 connection->send.seen_any_write_yet = false;
4967 spin_lock_irq(&connection->resource->req_lock);
4968 oc = connection->cstate;
4972 spin_unlock_irq(&connection->resource->req_lock);
4980 struct drbd_device *device = peer_device->device;
4984 spin_lock_irq(&device->resource->req_lock);
4985 _drbd_wait_ee_list_empty(device, &device->active_ee);
4986 _drbd_wait_ee_list_empty(device, &device->sync_ee);
4987 _drbd_wait_ee_list_empty(device, &device->read_ee);
4988 spin_unlock_irq(&device->resource->req_lock);
4990 /* We do not have data structures that would allow us to
4992 * * On C_SYNC_TARGET we do not have any data structures describing
4994 * * On C_SYNC_SOURCE there is no data structure that tracks
4998 * the disk-IO, while the rs_pending_cnt only tracks the blocks
5001 device->rs_total = 0;
5002 device->rs_failed = 0;
5003 atomic_set(&device->rs_pending_cnt, 0);
5004 wake_up(&device->misc_wait);
5006 timer_delete_sync(&device->resync_timer);
5007 resync_timer_fn(&device->resync_timer);
5012 drbd_flush_workqueue(&peer_device->connection->sender_work);
5019 drbd_flush_workqueue(&peer_device->connection->sender_work);
5025 kfree(device->p_uuid);
5026 device->p_uuid = NULL;
5029 tl_clear(peer_device->connection);
5039 i = atomic_read(&device->pp_in_use_by_net);
5042 i = atomic_read(&device->pp_in_use);
5046 D_ASSERT(device, list_empty(&device->read_ee));
5047 D_ASSERT(device, list_empty(&device->active_ee));
5048 D_ASSERT(device, list_empty(&device->sync_ee));
5049 D_ASSERT(device, list_empty(&device->done_ee));
5068 sock = &connection->data;
5071 return -EIO;
5073 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
5074 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
5075 p->feature_flags = cpu_to_be32(PRO_FEATURES);
5081 * 1 yes, we have a valid connection
5083 * -1 peer talks different language,
5088 /* ASSERT current == connection->receiver ... */
5105 return -1;
5111 return -1;
5114 p = pi.data;
5119 p->protocol_min = be32_to_cpu(p->protocol_min);
5120 p->protocol_max = be32_to_cpu(p->protocol_max);
5121 if (p->protocol_max == 0)
5122 p->protocol_max = p->protocol_min;
5124 if (PRO_VERSION_MAX < p->protocol_min ||
5125 PRO_VERSION_MIN > p->protocol_max)
5128 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
5129 connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
5132 "Agreed network protocol version %d\n", connection->agreed_pro_version);
5135 connection->agreed_features,
5136 connection->agreed_features & DRBD_FF_TRIM ? " TRIM" : "",
5137 connection->agreed_features & DRBD_FF_THIN_RESYNC ? " THIN_RESYNC" : "",
5138 connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" : "",
5139 connection->agreed_features & DRBD_FF_WZEROES ? " WRITE_ZEROES" :
5140 connection->agreed_features ? "" : " none");
5146 "I support %d-%d, peer supports %d-%d\n",
5148 p->protocol_min, p->protocol_max);
5149 return -1;
5156 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
5157 return -1;
5163 1 - auth succeeded,
5164 0 - failed, try again (network error),
5165 -1 - auth failed, don't try again.
5186 nc = rcu_dereference(connection->net_conf);
5187 key_len = strlen(nc->shared_secret);
5188 memcpy(secret, nc->shared_secret, key_len);
5192 crypto_shash_descsize(connection->cram_hmac_tfm),
5195 rv = -1;
5198 desc->tfm = connection->cram_hmac_tfm;
5200 rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
5203 rv = -1;
5209 sock = &connection->data;
5228 rv = -1;
5234 rv = -1;
5240 rv = -1;
5246 rv = -1;
5258 rv = -1;
5262 resp_size = crypto_shash_digestsize(connection->cram_hmac_tfm);
5265 rv = -1;
5272 rv = -1;
5312 rv = -1;
5320 rv = -1;
5330 rv = -1;
5347 struct drbd_connection *connection = thi->connection;
5358 if (h == -1) {
5365 blk_start_plug(&connection->receiver_plug);
5367 blk_finish_plug(&connection->receiver_plug);
5380 struct p_req_state_reply *p = pi->data;
5381 int retcode = be32_to_cpu(p->retcode);
5384 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
5386 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
5390 wake_up(&connection->ping_wait);
5399 struct p_req_state_reply *p = pi->data;
5400 int retcode = be32_to_cpu(p->retcode);
5402 peer_device = conn_peer_device(connection, pi->vnr);
5404 return -EIO;
5405 device = peer_device->device;
5407 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
5408 D_ASSERT(device, connection->agreed_pro_version < 100);
5413 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
5415 set_bit(CL_ST_CHG_FAIL, &device->flags);
5419 wake_up(&device->state_wait);
5433 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5434 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5435 wake_up(&connection->ping_wait);
5444 struct p_block_ack *p = pi->data;
5445 sector_t sector = be64_to_cpu(p->sector);
5446 int blksize = be32_to_cpu(p->blksize);
5448 peer_device = conn_peer_device(connection, pi->vnr);
5450 return -EIO;
5451 device = peer_device->device;
5453 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
5455 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5461 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5465 atomic_add(blksize >> 9, &device->rs_sect_in);
5475 struct drbd_device *device = peer_device->device;
5479 spin_lock_irq(&device->resource->req_lock);
5482 spin_unlock_irq(&device->resource->req_lock);
5483 return -EIO;
5486 spin_unlock_irq(&device->resource->req_lock);
5497 struct p_block_ack *p = pi->data;
5498 sector_t sector = be64_to_cpu(p->sector);
5499 int blksize = be32_to_cpu(p->blksize);
5502 peer_device = conn_peer_device(connection, pi->vnr);
5504 return -EIO;
5505 device = peer_device->device;
5507 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5509 if (p->block_id == ID_SYNCER) {
5514 switch (pi->cmd) {
5534 return validate_req_change_req_state(peer_device, p->block_id, sector,
5535 &device->write_requests, __func__,
5543 struct p_block_ack *p = pi->data;
5544 sector_t sector = be64_to_cpu(p->sector);
5545 int size = be32_to_cpu(p->blksize);
5548 peer_device = conn_peer_device(connection, pi->vnr);
5550 return -EIO;
5551 device = peer_device->device;
5553 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5555 if (p->block_id == ID_SYNCER) {
5561 err = validate_req_change_req_state(peer_device, p->block_id, sector,
5562 &device->write_requests, __func__,
5579 struct p_block_ack *p = pi->data;
5580 sector_t sector = be64_to_cpu(p->sector);
5582 peer_device = conn_peer_device(connection, pi->vnr);
5584 return -EIO;
5585 device = peer_device->device;
5587 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5590 (unsigned long long)sector, be32_to_cpu(p->blksize));
5592 return validate_req_change_req_state(peer_device, p->block_id, sector,
5593 &device->read_requests, __func__,
5603 struct p_block_ack *p = pi->data;
5605 peer_device = conn_peer_device(connection, pi->vnr);
5607 return -EIO;
5608 device = peer_device->device;
5610 sector = be64_to_cpu(p->sector);
5611 size = be32_to_cpu(p->blksize);
5613 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5619 switch (pi->cmd) {
5636 struct p_barrier_ack *p = pi->data;
5640 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
5643 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5644 struct drbd_device *device = peer_device->device;
5646 if (device->state.conn == C_AHEAD &&
5647 atomic_read(&device->ap_in_flight) == 0 &&
5648 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5649 device->start_resync_timer.expires = jiffies + HZ;
5650 add_timer(&device->start_resync_timer);
5662 struct p_block_ack *p = pi->data;
5667 peer_device = conn_peer_device(connection, pi->vnr);
5669 return -EIO;
5670 device = peer_device->device;
5672 sector = be64_to_cpu(p->sector);
5673 size = be32_to_cpu(p->blksize);
5675 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5677 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
5688 --device->ov_left;
5691 if ((device->ov_left & 0x200) == 0x200)
5692 drbd_advance_rs_marks(peer_device, device->ov_left);
5694 if (device->ov_left == 0) {
5697 dw->w.cb = w_ov_finished;
5698 dw->device = device;
5699 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
5726 nc = rcu_dereference(connection->net_conf);
5727 t = ping_timeout ? nc->ping_timeo : nc->ping_int;
5734 connection->meta.socket->sk->sk_rcvtimeo = t;
5769 struct drbd_connection *connection = thi->connection;
5774 void *buf = connection->meta.rbuf;
5785 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5795 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5798 * -EINTR (on meta) we got a signal
5799 * -EAGAIN (on meta) rcvtimeo expired
5800 * -ECONNRESET other side closed the connection
5801 * -ERESTARTSYS (on data) we got a signal
5811 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
5814 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
5817 t = wait_event_timeout(connection->ping_wait,
5818 connection->cstate < C_WF_REPORT_PARAMS,
5825 } else if (rv == -EAGAIN) {
5826 /* If the data socket received something meanwhile,
5828 if (time_after(connection->last_received, pre_recv_jif))
5834 set_bit(SEND_PING, &connection->flags);
5836 } else if (rv == -EINTR) {
5848 if (decode_header(connection, connection->meta.rbuf, &pi))
5851 if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) {
5856 expect = header_size + cmd->pkt_size;
5857 if (pi.size != expect - header_size) {
5866 err = cmd->fn(connection, &pi);
5868 drbd_err(connection, "%ps failed\n", cmd->fn);
5872 connection->last_received = jiffies;
5879 buf = connection->meta.rbuf;
5905 struct drbd_connection *connection = peer_device->connection;
5906 struct drbd_device *device = peer_device->device;
5911 nc = rcu_dereference(connection->net_conf);
5912 tcp_cork = nc->tcp_cork;
5916 tcp_sock_set_cork(connection->meta.socket->sk, true);
5919 kref_put(&device->kref, drbd_destroy_device);
5929 tcp_sock_set_cork(connection->meta.socket->sk, false);