From b411b3637fa71fce9cf2acf0639009500f5892fe Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 25 Sep 2009 16:07:19 -0700 Subject: The DRBD driver Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2258 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2258 insertions(+) create mode 100644 drivers/block/drbd/drbd_int.h (limited to 'drivers/block/drbd/drbd_int.h') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h new file mode 100644 index 000000000000..8da602e010bb --- /dev/null +++ b/drivers/block/drbd/drbd_int.h @@ -0,0 +1,2258 @@ +/* + drbd_int.h + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. + Copyright (C) 1999-2008, Philipp Reisner . + Copyright (C) 2002-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + +*/ + +#ifndef _DRBD_INT_H +#define _DRBD_INT_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __CHECKER__ +# define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr"))) +# define __protected_read_by(x) __attribute__((require_context(x,1,999,"read"))) +# define __protected_write_by(x) __attribute__((require_context(x,1,999,"write"))) +# define __must_hold(x) __attribute__((context(x,1,1), require_context(x,1,999,"call"))) +#else +# define __protected_by(x) +# define __protected_read_by(x) +# define __protected_write_by(x) +# define __must_hold(x) +#endif + +#define __no_warn(lock, stmt) do { __acquire(lock); stmt; __release(lock); } while (0) + +/* module parameter, defined in drbd_main.c */ +extern unsigned int minor_count; +extern int disable_sendpage; +extern int allow_oos; +extern unsigned int cn_idx; + +#ifdef CONFIG_DRBD_FAULT_INJECTION +extern int enable_faults; +extern int fault_rate; +extern int fault_devs; +#endif + +extern char usermode_helper[]; + + +#ifndef TRUE +#define TRUE 1 +#endif +#ifndef FALSE +#define FALSE 0 +#endif + +/* I don't remember why XCPU ... + * This is used to wake the asender, + * and to interrupt sending the sending task + * on disconnect. + */ +#define DRBD_SIG SIGXCPU + +/* This is used to stop/restart our threads. + * Cannot use SIGTERM nor SIGKILL, since these + * are sent out by init on runlevel changes + * I choose SIGHUP for now. + */ +#define DRBD_SIGKILL SIGHUP + +/* All EEs on the free list should have ID_VACANT (== 0) + * freshly allocated EEs get !ID_VACANT (== 1) + * so if it says "cannot dereference null pointer at adress 0x00000001", + * it is most likely one of these :( */ + +#define ID_IN_SYNC (4711ULL) +#define ID_OUT_OF_SYNC (4712ULL) + +#define ID_SYNCER (-1ULL) +#define ID_VACANT 0 +#define is_syncer_block_id(id) ((id) == ID_SYNCER) + +struct drbd_conf; + + +/* to shorten dev_warn(DEV, "msg"); and relatives statements */ +#define DEV (disk_to_dev(mdev->vdisk)) + +#define D_ASSERT(exp) if (!(exp)) \ + dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__) + +#define ERR_IF(exp) if (({ \ + int _b = (exp) != 0; \ + if (_b) dev_err(DEV, "%s: (%s) in %s:%d\n", \ + __func__, #exp, __FILE__, __LINE__); \ + _b; \ + })) + +/* Defines to control fault insertion */ +enum { + DRBD_FAULT_MD_WR = 0, /* meta data write */ + DRBD_FAULT_MD_RD = 1, /* read */ + DRBD_FAULT_RS_WR = 2, /* resync */ + DRBD_FAULT_RS_RD = 3, + DRBD_FAULT_DT_WR = 4, /* data */ + DRBD_FAULT_DT_RD = 5, + DRBD_FAULT_DT_RA = 6, /* data read ahead */ + DRBD_FAULT_BM_ALLOC = 7, /* bitmap allocation */ + DRBD_FAULT_AL_EE = 8, /* alloc ee */ + + DRBD_FAULT_MAX, +}; + +extern void trace_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, ...); + +#ifdef CONFIG_DRBD_FAULT_INJECTION +extern unsigned int +_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type); +static inline int +drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { + return fault_rate && + (enable_faults & (1< P_MAY_IGNORE) ... */ + P_MAX_OPT_CMD = 0x101, + + /* special command ids for handshake */ + + P_HAND_SHAKE_M = 0xfff1, /* First Packet on the MetaSock */ + P_HAND_SHAKE_S = 0xfff2, /* First Packet on the Socket */ + + P_HAND_SHAKE = 0xfffe /* FIXED for the next century! */ +}; + +static inline const char *cmdname(enum drbd_packets cmd) +{ + /* THINK may need to become several global tables + * when we want to support more than + * one PRO_VERSION */ + static const char *cmdnames[] = { + [P_DATA] = "Data", + [P_DATA_REPLY] = "DataReply", + [P_RS_DATA_REPLY] = "RSDataReply", + [P_BARRIER] = "Barrier", + [P_BITMAP] = "ReportBitMap", + [P_BECOME_SYNC_TARGET] = "BecomeSyncTarget", + [P_BECOME_SYNC_SOURCE] = "BecomeSyncSource", + [P_UNPLUG_REMOTE] = "UnplugRemote", + [P_DATA_REQUEST] = "DataRequest", + [P_RS_DATA_REQUEST] = "RSDataRequest", + [P_SYNC_PARAM] = "SyncParam", + [P_SYNC_PARAM89] = "SyncParam89", + [P_PROTOCOL] = "ReportProtocol", + [P_UUIDS] = "ReportUUIDs", + [P_SIZES] = "ReportSizes", + [P_STATE] = "ReportState", + [P_SYNC_UUID] = "ReportSyncUUID", + [P_AUTH_CHALLENGE] = "AuthChallenge", + [P_AUTH_RESPONSE] = "AuthResponse", + [P_PING] = "Ping", + [P_PING_ACK] = "PingAck", + [P_RECV_ACK] = "RecvAck", + [P_WRITE_ACK] = "WriteAck", + [P_RS_WRITE_ACK] = "RSWriteAck", + [P_DISCARD_ACK] = "DiscardAck", + [P_NEG_ACK] = "NegAck", + [P_NEG_DREPLY] = "NegDReply", + [P_NEG_RS_DREPLY] = "NegRSDReply", + [P_BARRIER_ACK] = "BarrierAck", + [P_STATE_CHG_REQ] = "StateChgRequest", + [P_STATE_CHG_REPLY] = "StateChgReply", + [P_OV_REQUEST] = "OVRequest", + [P_OV_REPLY] = "OVReply", + [P_OV_RESULT] = "OVResult", + [P_MAX_CMD] = NULL, + }; + + if (cmd == P_HAND_SHAKE_M) + return "HandShakeM"; + if (cmd == P_HAND_SHAKE_S) + return "HandShakeS"; + if (cmd == P_HAND_SHAKE) + return "HandShake"; + if (cmd >= P_MAX_CMD) + return "Unknown"; + return cmdnames[cmd]; +} + +/* for sending/receiving the bitmap, + * possibly in some encoding scheme */ +struct bm_xfer_ctx { + /* "const" + * stores total bits and long words + * of the bitmap, so we don't need to + * call the accessor functions over and again. */ + unsigned long bm_bits; + unsigned long bm_words; + /* during xfer, current position within the bitmap */ + unsigned long bit_offset; + unsigned long word_offset; + + /* statistics; index: (h->command == P_BITMAP) */ + unsigned packets[2]; + unsigned bytes[2]; +}; + +extern void INFO_bm_xfer_stats(struct drbd_conf *mdev, + const char *direction, struct bm_xfer_ctx *c); + +static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c) +{ + /* word_offset counts "native long words" (32 or 64 bit), + * aligned at 64 bit. + * Encoded packet may end at an unaligned bit offset. + * In case a fallback clear text packet is transmitted in + * between, we adjust this offset back to the last 64bit + * aligned "native long word", which makes coding and decoding + * the plain text bitmap much more convenient. */ +#if BITS_PER_LONG == 64 + c->word_offset = c->bit_offset >> 6; +#elif BITS_PER_LONG == 32 + c->word_offset = c->bit_offset >> 5; + c->word_offset &= ~(1UL); +#else +# error "unsupported BITS_PER_LONG" +#endif +} + +#ifndef __packed +#define __packed __attribute__((packed)) +#endif + +/* This is the layout for a packet on the wire. + * The byteorder is the network byte order. + * (except block_id and barrier fields. + * these are pointers to local structs + * and have no relevance for the partner, + * which just echoes them as received.) + * + * NOTE that the payload starts at a long aligned offset, + * regardless of 32 or 64 bit arch! + */ +struct p_header { + u32 magic; + u16 command; + u16 length; /* bytes of data after this header */ + u8 payload[0]; +} __packed; +/* 8 bytes. packet FIXED for the next century! */ + +/* + * short commands, packets without payload, plain p_header: + * P_PING + * P_PING_ACK + * P_BECOME_SYNC_TARGET + * P_BECOME_SYNC_SOURCE + * P_UNPLUG_REMOTE + */ + +/* + * commands with out-of-struct payload: + * P_BITMAP (no additional fields) + * P_DATA, P_DATA_REPLY (see p_data) + * P_COMPRESSED_BITMAP (see receive_compressed_bitmap) + */ + +/* these defines must not be changed without changing the protocol version */ +#define DP_HARDBARRIER 1 +#define DP_RW_SYNC 2 +#define DP_MAY_SET_IN_SYNC 4 + +struct p_data { + struct p_header head; + u64 sector; /* 64 bits sector number */ + u64 block_id; /* to identify the request in protocol B&C */ + u32 seq_num; + u32 dp_flags; +} __packed; + +/* + * commands which share a struct: + * p_block_ack: + * P_RECV_ACK (proto B), P_WRITE_ACK (proto C), + * P_DISCARD_ACK (proto C, two-primaries conflict detection) + * p_block_req: + * P_DATA_REQUEST, P_RS_DATA_REQUEST + */ +struct p_block_ack { + struct p_header head; + u64 sector; + u64 block_id; + u32 blksize; + u32 seq_num; +} __packed; + + +struct p_block_req { + struct p_header head; + u64 sector; + u64 block_id; + u32 blksize; + u32 pad; /* to multiple of 8 Byte */ +} __packed; + +/* + * commands with their own struct for additional fields: + * P_HAND_SHAKE + * P_BARRIER + * P_BARRIER_ACK + * P_SYNC_PARAM + * ReportParams + */ + +struct p_handshake { + struct p_header head; /* 8 bytes */ + u32 protocol_min; + u32 feature_flags; + u32 protocol_max; + + /* should be more than enough for future enhancements + * for now, feature_flags and the reserverd array shall be zero. + */ + + u32 _pad; + u64 reserverd[7]; +} __packed; +/* 80 bytes, FIXED for the next century */ + +struct p_barrier { + struct p_header head; + u32 barrier; /* barrier number _handle_ only */ + u32 pad; /* to multiple of 8 Byte */ +} __packed; + +struct p_barrier_ack { + struct p_header head; + u32 barrier; + u32 set_size; +} __packed; + +struct p_rs_param { + struct p_header head; + u32 rate; + + /* Since protocol version 88 and higher. */ + char verify_alg[0]; +} __packed; + +struct p_rs_param_89 { + struct p_header head; + u32 rate; + /* protocol version 89: */ + char verify_alg[SHARED_SECRET_MAX]; + char csums_alg[SHARED_SECRET_MAX]; +} __packed; + +struct p_protocol { + struct p_header head; + u32 protocol; + u32 after_sb_0p; + u32 after_sb_1p; + u32 after_sb_2p; + u32 want_lose; + u32 two_primaries; + + /* Since protocol version 87 and higher. */ + char integrity_alg[0]; + +} __packed; + +struct p_uuids { + struct p_header head; + u64 uuid[UI_EXTENDED_SIZE]; +} __packed; + +struct p_rs_uuid { + struct p_header head; + u64 uuid; +} __packed; + +struct p_sizes { + struct p_header head; + u64 d_size; /* size of disk */ + u64 u_size; /* user requested size */ + u64 c_size; /* current exported size */ + u32 max_segment_size; /* Maximal size of a BIO */ + u32 queue_order_type; +} __packed; + +struct p_state { + struct p_header head; + u32 state; +} __packed; + +struct p_req_state { + struct p_header head; + u32 mask; + u32 val; +} __packed; + +struct p_req_state_reply { + struct p_header head; + u32 retcode; +} __packed; + +struct p_drbd06_param { + u64 size; + u32 state; + u32 blksize; + u32 protocol; + u32 version; + u32 gen_cnt[5]; + u32 bit_map_gen[5]; +} __packed; + +struct p_discard { + struct p_header head; + u64 block_id; + u32 seq_num; + u32 pad; +} __packed; + +/* Valid values for the encoding field. + * Bump proto version when changing this. */ +enum drbd_bitmap_code { + /* RLE_VLI_Bytes = 0, + * and other bit variants had been defined during + * algorithm evaluation. */ + RLE_VLI_Bits = 2, +}; + +struct p_compressed_bm { + struct p_header head; + /* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code + * (encoding & 0x80): polarity (set/unset) of first runlength + * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits + * used to pad up to head.length bytes + */ + u8 encoding; + + u8 code[0]; +} __packed; + +/* DCBP: Drbd Compressed Bitmap Packet ... */ +static inline enum drbd_bitmap_code +DCBP_get_code(struct p_compressed_bm *p) +{ + return (enum drbd_bitmap_code)(p->encoding & 0x0f); +} + +static inline void +DCBP_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code) +{ + BUG_ON(code & ~0xf); + p->encoding = (p->encoding & ~0xf) | code; +} + +static inline int +DCBP_get_start(struct p_compressed_bm *p) +{ + return (p->encoding & 0x80) != 0; +} + +static inline void +DCBP_set_start(struct p_compressed_bm *p, int set) +{ + p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0); +} + +static inline int +DCBP_get_pad_bits(struct p_compressed_bm *p) +{ + return (p->encoding >> 4) & 0x7; +} + +static inline void +DCBP_set_pad_bits(struct p_compressed_bm *p, int n) +{ + BUG_ON(n & ~0x7); + p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4); +} + +/* one bitmap packet, including the p_header, + * should fit within one _architecture independend_ page. + * so we need to use the fixed size 4KiB page size + * most architechtures have used for a long time. + */ +#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header)) +#define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long)) +#define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm)) +#if (PAGE_SIZE < 4096) +/* drbd_send_bitmap / receive_bitmap would break horribly */ +#error "PAGE_SIZE too small" +#endif + +union p_polymorph { + struct p_header header; + struct p_handshake handshake; + struct p_data data; + struct p_block_ack block_ack; + struct p_barrier barrier; + struct p_barrier_ack barrier_ack; + struct p_rs_param_89 rs_param_89; + struct p_protocol protocol; + struct p_sizes sizes; + struct p_uuids uuids; + struct p_state state; + struct p_req_state req_state; + struct p_req_state_reply req_state_reply; + struct p_block_req block_req; +} __packed; + +/**********************************************************************/ +enum drbd_thread_state { + None, + Running, + Exiting, + Restarting +}; + +struct drbd_thread { + spinlock_t t_lock; + struct task_struct *task; + struct completion stop; + enum drbd_thread_state t_state; + int (*function) (struct drbd_thread *); + struct drbd_conf *mdev; + int reset_cpu_mask; +}; + +static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi) +{ + /* THINK testing the t_state seems to be uncritical in all cases + * (but thread_{start,stop}), so we can read it *without* the lock. + * --lge */ + + smp_rmb(); + return thi->t_state; +} + + +/* + * Having this as the first member of a struct provides sort of "inheritance". + * "derived" structs can be "drbd_queue_work()"ed. + * The callback should know and cast back to the descendant struct. + * drbd_request and drbd_epoch_entry are descendants of drbd_work. + */ +struct drbd_work; +typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel); +struct drbd_work { + struct list_head list; + drbd_work_cb cb; +}; + +struct drbd_tl_epoch; +struct drbd_request { + struct drbd_work w; + struct drbd_conf *mdev; + + /* if local IO is not allowed, will be NULL. + * if local IO _is_ allowed, holds the locally submitted bio clone, + * or, after local IO completion, the ERR_PTR(error). + * see drbd_endio_pri(). */ + struct bio *private_bio; + + struct hlist_node colision; + sector_t sector; + unsigned int size; + unsigned int epoch; /* barrier_nr */ + + /* barrier_nr: used to check on "completion" whether this req was in + * the current epoch, and we therefore have to close it, + * starting a new epoch... + */ + + /* up to here, the struct layout is identical to drbd_epoch_entry; + * we might be able to use that to our advantage... */ + + struct list_head tl_requests; /* ring list in the transfer log */ + struct bio *master_bio; /* master bio pointer */ + unsigned long rq_state; /* see comments above _req_mod() */ + int seq_num; + unsigned long start_time; +}; + +struct drbd_tl_epoch { + struct drbd_work w; + struct list_head requests; /* requests before */ + struct drbd_tl_epoch *next; /* pointer to the next barrier */ + unsigned int br_number; /* the barriers identifier. */ + int n_req; /* number of requests attached before this barrier */ +}; + +struct drbd_request; + +/* These Tl_epoch_entries may be in one of 6 lists: + active_ee .. data packet being written + sync_ee .. syncer block being written + done_ee .. block written, need to send P_WRITE_ACK + read_ee .. [RS]P_DATA_REQUEST being read +*/ + +struct drbd_epoch { + struct list_head list; + unsigned int barrier_nr; + atomic_t epoch_size; /* increased on every request added. */ + atomic_t active; /* increased on every req. added, and dec on every finished. */ + unsigned long flags; +}; + +/* drbd_epoch flag bits */ +enum { + DE_BARRIER_IN_NEXT_EPOCH_ISSUED, + DE_BARRIER_IN_NEXT_EPOCH_DONE, + DE_CONTAINS_A_BARRIER, + DE_HAVE_BARRIER_NUMBER, + DE_IS_FINISHING, +}; + +enum epoch_event { + EV_PUT, + EV_GOT_BARRIER_NR, + EV_BARRIER_DONE, + EV_BECAME_LAST, + EV_TRACE_FLUSH, /* TRACE_ are not real events, only used for tracing */ + EV_TRACE_ADD_BARRIER, /* Doing the first write as a barrier write */ + EV_TRACE_SETTING_BI, /* Barrier is expressed with the first write of the next epoch */ + EV_TRACE_ALLOC, + EV_TRACE_FREE, + EV_CLEANUP = 32, /* used as flag */ +}; + +struct drbd_epoch_entry { + struct drbd_work w; + struct drbd_conf *mdev; + struct bio *private_bio; + struct hlist_node colision; + sector_t sector; + unsigned int size; + struct drbd_epoch *epoch; + + /* up to here, the struct layout is identical to drbd_request; + * we might be able to use that to our advantage... */ + + unsigned int flags; + u64 block_id; +}; + +struct drbd_wq_barrier { + struct drbd_work w; + struct completion done; +}; + +struct digest_info { + int digest_size; + void *digest; +}; + +/* ee flag bits */ +enum { + __EE_CALL_AL_COMPLETE_IO, + __EE_CONFLICT_PENDING, + __EE_MAY_SET_IN_SYNC, + __EE_IS_BARRIER, +}; +#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) +#define EE_CONFLICT_PENDING (1<<__EE_CONFLICT_PENDING) +#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) +#define EE_IS_BARRIER (1<<__EE_IS_BARRIER) + +/* global flag bits */ +enum { + CREATE_BARRIER, /* next P_DATA is preceeded by a P_BARRIER */ + SIGNAL_ASENDER, /* whether asender wants to be interrupted */ + SEND_PING, /* whether asender should send a ping asap */ + + STOP_SYNC_TIMER, /* tell timer to cancel itself */ + UNPLUG_QUEUED, /* only relevant with kernel 2.4 */ + UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */ + MD_DIRTY, /* current uuids and flags not yet on disk */ + DISCARD_CONCURRENT, /* Set on one node, cleared on the peer! */ + USE_DEGR_WFC_T, /* degr-wfc-timeout instead of wfc-timeout. */ + CLUSTER_ST_CHANGE, /* Cluster wide state change going on... */ + CL_ST_CHG_SUCCESS, + CL_ST_CHG_FAIL, + CRASHED_PRIMARY, /* This node was a crashed primary. + * Gets cleared when the state.conn + * goes into C_CONNECTED state. */ + WRITE_BM_AFTER_RESYNC, /* A kmalloc() during resync failed */ + NO_BARRIER_SUPP, /* underlying block device doesn't implement barriers */ + CONSIDER_RESYNC, + + MD_NO_BARRIER, /* meta data device does not support barriers, + so don't even try */ + SUSPEND_IO, /* suspend application io */ + BITMAP_IO, /* suspend application io; + once no more io in flight, start bitmap io */ + BITMAP_IO_QUEUED, /* Started bitmap IO */ + RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ + NET_CONGESTED, /* The data socket is congested */ + + CONFIG_PENDING, /* serialization of (re)configuration requests. + * if set, also prevents the device from dying */ + DEVICE_DYING, /* device became unconfigured, + * but worker thread is still handling the cleanup. + * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed, + * while this is set. */ + RESIZE_PENDING, /* Size change detected locally, waiting for the response from + * the peer, if it changed there as well. */ +}; + +struct drbd_bitmap; /* opaque for drbd_conf */ + +/* TODO sort members for performance + * MAYBE group them further */ + +/* THINK maybe we actually want to use the default "event/%s" worker threads + * or similar in linux 2.6, which uses per cpu data and threads. + * + * To be general, this might need a spin_lock member. + * For now, please use the mdev->req_lock to protect list_head, + * see drbd_queue_work below. + */ +struct drbd_work_queue { + struct list_head q; + struct semaphore s; /* producers up it, worker down()s it */ + spinlock_t q_lock; /* to protect the list. */ +}; + +struct drbd_socket { + struct drbd_work_queue work; + struct mutex mutex; + struct socket *socket; + /* this way we get our + * send/receive buffers off the stack */ + union p_polymorph sbuf; + union p_polymorph rbuf; +}; + +struct drbd_md { + u64 md_offset; /* sector offset to 'super' block */ + + u64 la_size_sect; /* last agreed size, unit sectors */ + u64 uuid[UI_SIZE]; + u64 device_uuid; + u32 flags; + u32 md_size_sect; + + s32 al_offset; /* signed relative sector offset to al area */ + s32 bm_offset; /* signed relative sector offset to bitmap */ + + /* u32 al_nr_extents; important for restoring the AL + * is stored into sync_conf.al_extents, which in turn + * gets applied to act_log->nr_elements + */ +}; + +/* for sync_conf and other types... */ +#define NL_PACKET(name, number, fields) struct name { fields }; +#define NL_INTEGER(pn,pr,member) int member; +#define NL_INT64(pn,pr,member) __u64 member; +#define NL_BIT(pn,pr,member) unsigned member:1; +#define NL_STRING(pn,pr,member,len) unsigned char member[len]; int member ## _len; +#include "linux/drbd_nl.h" + +struct drbd_backing_dev { + struct block_device *backing_bdev; + struct block_device *md_bdev; + struct file *lo_file; + struct file *md_file; + struct drbd_md md; + struct disk_conf dc; /* The user provided config... */ + sector_t known_size; /* last known size of that backing device */ +}; + +struct drbd_md_io { + struct drbd_conf *mdev; + struct completion event; + int error; +}; + +struct bm_io_work { + struct drbd_work w; + char *why; + int (*io_fn)(struct drbd_conf *mdev); + void (*done)(struct drbd_conf *mdev, int rv); +}; + +enum write_ordering_e { + WO_none, + WO_drain_io, + WO_bdev_flush, + WO_bio_barrier +}; + +struct drbd_conf { + /* things that are stored as / read from meta data on disk */ + unsigned long flags; + + /* configured by drbdsetup */ + struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ + struct syncer_conf sync_conf; + struct drbd_backing_dev *ldev __protected_by(local); + + sector_t p_size; /* partner's disk size */ + struct request_queue *rq_queue; + struct block_device *this_bdev; + struct gendisk *vdisk; + + struct drbd_socket data; /* data/barrier/cstate/parameter packets */ + struct drbd_socket meta; /* ping/ack (metadata) packets */ + int agreed_pro_version; /* actually used protocol version */ + unsigned long last_received; /* in jiffies, either socket */ + unsigned int ko_count; + struct drbd_work resync_work, + unplug_work, + md_sync_work; + struct timer_list resync_timer; + struct timer_list md_sync_timer; + + /* Used after attach while negotiating new disk state. */ + union drbd_state new_state_tmp; + + union drbd_state state; + wait_queue_head_t misc_wait; + wait_queue_head_t state_wait; /* upon each state change. */ + unsigned int send_cnt; + unsigned int recv_cnt; + unsigned int read_cnt; + unsigned int writ_cnt; + unsigned int al_writ_cnt; + unsigned int bm_writ_cnt; + atomic_t ap_bio_cnt; /* Requests we need to complete */ + atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */ + atomic_t rs_pending_cnt; /* RS request/data packets on the wire */ + atomic_t unacked_cnt; /* Need to send replys for */ + atomic_t local_cnt; /* Waiting for local completion */ + atomic_t net_cnt; /* Users of net_conf */ + spinlock_t req_lock; + struct drbd_tl_epoch *unused_spare_tle; /* for pre-allocation */ + struct drbd_tl_epoch *newest_tle; + struct drbd_tl_epoch *oldest_tle; + struct list_head out_of_sequence_requests; + struct hlist_head *tl_hash; + unsigned int tl_hash_s; + + /* blocks to sync in this run [unit BM_BLOCK_SIZE] */ + unsigned long rs_total; + /* number of sync IOs that failed in this run */ + unsigned long rs_failed; + /* Syncer's start time [unit jiffies] */ + unsigned long rs_start; + /* cumulated time in PausedSyncX state [unit jiffies] */ + unsigned long rs_paused; + /* block not up-to-date at mark [unit BM_BLOCK_SIZE] */ + unsigned long rs_mark_left; + /* marks's time [unit jiffies] */ + unsigned long rs_mark_time; + /* skipped because csum was equeal [unit BM_BLOCK_SIZE] */ + unsigned long rs_same_csum; + + /* where does the admin want us to start? (sector) */ + sector_t ov_start_sector; + /* where are we now? (sector) */ + sector_t ov_position; + /* Start sector of out of sync range (to merge printk reporting). */ + sector_t ov_last_oos_start; + /* size of out-of-sync range in sectors. */ + sector_t ov_last_oos_size; + unsigned long ov_left; /* in bits */ + struct crypto_hash *csums_tfm; + struct crypto_hash *verify_tfm; + + struct drbd_thread receiver; + struct drbd_thread worker; + struct drbd_thread asender; + struct drbd_bitmap *bitmap; + unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */ + + /* Used to track operations of resync... */ + struct lru_cache *resync; + /* Number of locked elements in resync LRU */ + unsigned int resync_locked; + /* resync extent number waiting for application requests */ + unsigned int resync_wenr; + + int open_cnt; + u64 *p_uuid; + struct drbd_epoch *current_epoch; + spinlock_t epoch_lock; + unsigned int epochs; + enum write_ordering_e write_ordering; + struct list_head active_ee; /* IO in progress */ + struct list_head sync_ee; /* IO in progress */ + struct list_head done_ee; /* send ack */ + struct list_head read_ee; /* IO in progress */ + struct list_head net_ee; /* zero-copy network send in progress */ + struct hlist_head *ee_hash; /* is proteced by req_lock! */ + unsigned int ee_hash_s; + + /* this one is protected by ee_lock, single thread */ + struct drbd_epoch_entry *last_write_w_barrier; + + int next_barrier_nr; + struct hlist_head *app_reads_hash; /* is proteced by req_lock */ + struct list_head resync_reads; + atomic_t pp_in_use; + wait_queue_head_t ee_wait; + struct page *md_io_page; /* one page buffer for md_io */ + struct page *md_io_tmpp; /* for logical_block_size != 512 */ + struct mutex md_io_mutex; /* protects the md_io_buffer */ + spinlock_t al_lock; + wait_queue_head_t al_wait; + struct lru_cache *act_log; /* activity log */ + unsigned int al_tr_number; + int al_tr_cycle; + int al_tr_pos; /* position of the next transaction in the journal */ + struct crypto_hash *cram_hmac_tfm; + struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */ + struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */ + void *int_dig_out; + void *int_dig_in; + void *int_dig_vv; + wait_queue_head_t seq_wait; + atomic_t packet_seq; + unsigned int peer_seq; + spinlock_t peer_seq_lock; + unsigned int minor; + unsigned long comm_bm_set; /* communicated number of set bits. */ + cpumask_var_t cpu_mask; + struct bm_io_work bm_io_work; + u64 ed_uuid; /* UUID of the exposed data */ + struct mutex state_mutex; + char congestion_reason; /* Why we where congested... */ +}; + +static inline struct drbd_conf *minor_to_mdev(unsigned int minor) +{ + struct drbd_conf *mdev; + + mdev = minor < minor_count ? minor_table[minor] : NULL; + + return mdev; +} + +static inline unsigned int mdev_to_minor(struct drbd_conf *mdev) +{ + return mdev->minor; +} + +/* returns 1 if it was successfull, + * returns 0 if there was no data socket. + * so wherever you are going to use the data.socket, e.g. do + * if (!drbd_get_data_sock(mdev)) + * return 0; + * CODE(); + * drbd_put_data_sock(mdev); + */ +static inline int drbd_get_data_sock(struct drbd_conf *mdev) +{ + mutex_lock(&mdev->data.mutex); + /* drbd_disconnect() could have called drbd_free_sock() + * while we were waiting in down()... */ + if (unlikely(mdev->data.socket == NULL)) { + mutex_unlock(&mdev->data.mutex); + return 0; + } + return 1; +} + +static inline void drbd_put_data_sock(struct drbd_conf *mdev) +{ + mutex_unlock(&mdev->data.mutex); +} + +/* + * function declarations + *************************/ + +/* drbd_main.c */ + +enum chg_state_flags { + CS_HARD = 1, + CS_VERBOSE = 2, + CS_WAIT_COMPLETE = 4, + CS_SERIALIZE = 8, + CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE, +}; + +extern void drbd_init_set_defaults(struct drbd_conf *mdev); +extern int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, + union drbd_state mask, union drbd_state val); +extern void drbd_force_state(struct drbd_conf *, union drbd_state, + union drbd_state); +extern int _drbd_request_state(struct drbd_conf *, union drbd_state, + union drbd_state, enum chg_state_flags); +extern int __drbd_set_state(struct drbd_conf *, union drbd_state, + enum chg_state_flags, struct completion *done); +extern void print_st_err(struct drbd_conf *, union drbd_state, + union drbd_state, int); +extern int drbd_thread_start(struct drbd_thread *thi); +extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); +#ifdef CONFIG_SMP +extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev); +extern void drbd_calc_cpu_mask(struct drbd_conf *mdev); +#else +#define drbd_thread_current_set_cpu(A) ({}) +#define drbd_calc_cpu_mask(A) ({}) +#endif +extern void drbd_free_resources(struct drbd_conf *mdev); +extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, + unsigned int set_size); +extern void tl_clear(struct drbd_conf *mdev); +extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *); +extern void drbd_free_sock(struct drbd_conf *mdev); +extern int drbd_send(struct drbd_conf *mdev, struct socket *sock, + void *buf, size_t size, unsigned msg_flags); +extern int drbd_send_protocol(struct drbd_conf *mdev); +extern int drbd_send_uuids(struct drbd_conf *mdev); +extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); +extern int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val); +extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply); +extern int _drbd_send_state(struct drbd_conf *mdev); +extern int drbd_send_state(struct drbd_conf *mdev); +extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, + enum drbd_packets cmd, struct p_header *h, + size_t size, unsigned msg_flags); +#define USE_DATA_SOCKET 1 +#define USE_META_SOCKET 0 +extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, + enum drbd_packets cmd, struct p_header *h, + size_t size); +extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, + char *data, size_t size); +extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc); +extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, + u32 set_size); +extern int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, + struct drbd_epoch_entry *e); +extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd, + struct p_block_req *rp); +extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, + struct p_data *dp); +extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd, + sector_t sector, int blksize, u64 block_id); +extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, + struct drbd_epoch_entry *e); +extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req); +extern int _drbd_send_barrier(struct drbd_conf *mdev, + struct drbd_tl_epoch *barrier); +extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd, + sector_t sector, int size, u64 block_id); +extern int drbd_send_drequest_csum(struct drbd_conf *mdev, + sector_t sector,int size, + void *digest, int digest_size, + enum drbd_packets cmd); +extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size); + +extern int drbd_send_bitmap(struct drbd_conf *mdev); +extern int _drbd_send_bitmap(struct drbd_conf *mdev); +extern int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode); +extern void drbd_free_bc(struct drbd_backing_dev *ldev); +extern void drbd_mdev_cleanup(struct drbd_conf *mdev); + +/* drbd_meta-data.c (still in drbd_main.c) */ +extern void drbd_md_sync(struct drbd_conf *mdev); +extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev); +/* maybe define them below as inline? */ +extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); +extern void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); +extern void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local); +extern void _drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local); +extern void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local); +extern void drbd_md_set_flag(struct drbd_conf *mdev, int flags) __must_hold(local); +extern void drbd_md_clear_flag(struct drbd_conf *mdev, int flags)__must_hold(local); +extern int drbd_md_test_flag(struct drbd_backing_dev *, int); +extern void drbd_md_mark_dirty(struct drbd_conf *mdev); +extern void drbd_queue_bitmap_io(struct drbd_conf *mdev, + int (*io_fn)(struct drbd_conf *), + void (*done)(struct drbd_conf *, int), + char *why); +extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); +extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); +extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why); + + +/* Meta data layout + We reserve a 128MB Block (4k aligned) + * either at the end of the backing device + * or on a seperate meta data device. */ + +#define MD_RESERVED_SECT (128LU << 11) /* 128 MB, unit sectors */ +/* The following numbers are sectors */ +#define MD_AL_OFFSET 8 /* 8 Sectors after start of meta area */ +#define MD_AL_MAX_SIZE 64 /* = 32 kb LOG ~ 3776 extents ~ 14 GB Storage */ +/* Allows up to about 3.8TB */ +#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_MAX_SIZE) + +/* Since the smalles IO unit is usually 512 byte */ +#define MD_SECTOR_SHIFT 9 +#define MD_SECTOR_SIZE (1< we need 32 KB bitmap. + * Bit 0 ==> local node thinks this block is binary identical on both nodes + * Bit 1 ==> local node thinks this block needs to be synced. + */ + +#define BM_BLOCK_SHIFT 12 /* 4k per bit */ +#define BM_BLOCK_SIZE (1<>(BM_BLOCK_SHIFT-9)) +#define BM_BIT_TO_SECT(x) ((sector_t)(x)<<(BM_BLOCK_SHIFT-9)) +#define BM_SECT_PER_BIT BM_BIT_TO_SECT(1) + +/* bit to represented kilo byte conversion */ +#define Bit2KB(bits) ((bits)<<(BM_BLOCK_SHIFT-10)) + +/* in which _bitmap_ extent (resp. sector) the bit for a certain + * _storage_ sector is located in */ +#define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SHIFT-9)) + +/* how much _storage_ sectors we have per bitmap sector */ +#define BM_EXT_TO_SECT(x) ((sector_t)(x) << (BM_EXT_SHIFT-9)) +#define BM_SECT_PER_EXT BM_EXT_TO_SECT(1) + +/* in one sector of the bitmap, we have this many activity_log extents. */ +#define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT)) +#define BM_WORDS_PER_AL_EXT (1 << (AL_EXTENT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL)) + +#define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SHIFT - BM_BLOCK_SHIFT) +#define BM_BLOCKS_PER_BM_EXT_MASK ((1<ov_last_oos_size) { + dev_err(DEV, "Out of sync: start=%llu, size=%lu (sectors)\n", + (unsigned long long)mdev->ov_last_oos_start, + (unsigned long)mdev->ov_last_oos_size); + } + mdev->ov_last_oos_size=0; +} + + +extern void drbd_csum(struct drbd_conf *, struct crypto_hash *, struct bio *, void *); +/* worker callbacks */ +extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int); +extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int); +extern int w_e_end_data_req(struct drbd_conf *, struct drbd_work *, int); +extern int w_e_end_rsdata_req(struct drbd_conf *, struct drbd_work *, int); +extern int w_e_end_csum_rs_req(struct drbd_conf *, struct drbd_work *, int); +extern int w_e_end_ov_reply(struct drbd_conf *, struct drbd_work *, int); +extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int); +extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); +extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int); +extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); +extern int w_io_error(struct drbd_conf *, struct drbd_work *, int); +extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); +extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int); +extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); +extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int); +extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int); +extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int); +extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int); + +extern void resync_timer_fn(unsigned long data); + +/* drbd_receiver.c */ +extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); +extern struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, + u64 id, + sector_t sector, + unsigned int data_size, + gfp_t gfp_mask) __must_hold(local); +extern void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e); +extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev, + struct list_head *head); +extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, + struct list_head *head); +extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled); +extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed); +extern void drbd_flush_workqueue(struct drbd_conf *mdev); + +/* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to + * mess with get_fs/set_fs, we know we are KERNEL_DS always. */ +static inline int drbd_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, int optlen) +{ + int err; + if (level == SOL_SOCKET) + err = sock_setsockopt(sock, level, optname, optval, optlen); + else + err = sock->ops->setsockopt(sock, level, optname, optval, + optlen); + return err; +} + +static inline void drbd_tcp_cork(struct socket *sock) +{ + int __user val = 1; + (void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK, + (char __user *)&val, sizeof(val)); +} + +static inline void drbd_tcp_uncork(struct socket *sock) +{ + int __user val = 0; + (void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK, + (char __user *)&val, sizeof(val)); +} + +static inline void drbd_tcp_nodelay(struct socket *sock) +{ + int __user val = 1; + (void) drbd_setsockopt(sock, SOL_TCP, TCP_NODELAY, + (char __user *)&val, sizeof(val)); +} + +static inline void drbd_tcp_quickack(struct socket *sock) +{ + int __user val = 1; + (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK, + (char __user *)&val, sizeof(val)); +} + +void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo); + +/* drbd_proc.c */ +extern struct proc_dir_entry *drbd_proc; +extern struct file_operations drbd_proc_fops; +extern const char *drbd_conn_str(enum drbd_conns s); +extern const char *drbd_role_str(enum drbd_role s); + +/* drbd_actlog.c */ +extern void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector); +extern void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector); +extern void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector); +extern int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector); +extern int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector); +extern void drbd_rs_cancel_all(struct drbd_conf *mdev); +extern int drbd_rs_del_all(struct drbd_conf *mdev); +extern void drbd_rs_failed_io(struct drbd_conf *mdev, + sector_t sector, int size); +extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *); +extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, + int size, const char *file, const unsigned int line); +#define drbd_set_in_sync(mdev, sector, size) \ + __drbd_set_in_sync(mdev, sector, size, __FILE__, __LINE__) +extern void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, + int size, const char *file, const unsigned int line); +#define drbd_set_out_of_sync(mdev, sector, size) \ + __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__) +extern void drbd_al_apply_to_bm(struct drbd_conf *mdev); +extern void drbd_al_to_on_disk_bm(struct drbd_conf *mdev); +extern void drbd_al_shrink(struct drbd_conf *mdev); + + +/* drbd_nl.c */ + +void drbd_nl_cleanup(void); +int __init drbd_nl_init(void); +void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state); +void drbd_bcast_sync_progress(struct drbd_conf *mdev); +void drbd_bcast_ee(struct drbd_conf *mdev, + const char *reason, const int dgs, + const char* seen_hash, const char* calc_hash, + const struct drbd_epoch_entry* e); + + +/** + * DOC: DRBD State macros + * + * These macros are used to express state changes in easily readable form. + * + * The NS macros expand to a mask and a value, that can be bit ored onto the + * current state as soon as the spinlock (req_lock) was taken. + * + * The _NS macros are used for state functions that get called with the + * spinlock. These macros expand directly to the new state value. + * + * Besides the basic forms NS() and _NS() additional _?NS[23] are defined + * to express state changes that affect more than one aspect of the state. + * + * E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY) + * Means that the network connection was established and that the peer + * is in secondary role. + */ +#define role_MASK R_MASK +#define peer_MASK R_MASK +#define disk_MASK D_MASK +#define pdsk_MASK D_MASK +#define conn_MASK C_MASK +#define susp_MASK 1 +#define user_isp_MASK 1 +#define aftr_isp_MASK 1 + +#define NS(T, S) \ + ({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \ + ({ union drbd_state val; val.i = 0; val.T = (S); val; }) +#define NS2(T1, S1, T2, S2) \ + ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \ + mask.T2 = T2##_MASK; mask; }), \ + ({ union drbd_state val; val.i = 0; val.T1 = (S1); \ + val.T2 = (S2); val; }) +#define NS3(T1, S1, T2, S2, T3, S3) \ + ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \ + mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \ + ({ union drbd_state val; val.i = 0; val.T1 = (S1); \ + val.T2 = (S2); val.T3 = (S3); val; }) + +#define _NS(D, T, S) \ + D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T = (S); __ns; }) +#define _NS2(D, T1, S1, T2, S2) \ + D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ + __ns.T2 = (S2); __ns; }) +#define _NS3(D, T1, S1, T2, S2, T3, S3) \ + D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ + __ns.T2 = (S2); __ns.T3 = (S3); __ns; }) + +/* + * inline helper functions + *************************/ + +static inline void drbd_state_lock(struct drbd_conf *mdev) +{ + wait_event(mdev->misc_wait, + !test_and_set_bit(CLUSTER_ST_CHANGE, &mdev->flags)); +} + +static inline void drbd_state_unlock(struct drbd_conf *mdev) +{ + clear_bit(CLUSTER_ST_CHANGE, &mdev->flags); + wake_up(&mdev->misc_wait); +} + +static inline int _drbd_set_state(struct drbd_conf *mdev, + union drbd_state ns, enum chg_state_flags flags, + struct completion *done) +{ + int rv; + + read_lock(&global_state_lock); + rv = __drbd_set_state(mdev, ns, flags, done); + read_unlock(&global_state_lock); + + return rv; +} + +/** + * drbd_request_state() - Reqest a state change + * @mdev: DRBD device. + * @mask: mask of state bits to change. + * @val: value of new state bits. + * + * This is the most graceful way of requesting a state change. It is verbose + * quite verbose in case the state change is not possible, and all those + * state changes are globally serialized. + */ +static inline int drbd_request_state(struct drbd_conf *mdev, + union drbd_state mask, + union drbd_state val) +{ + return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED); +} + +#define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) +static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where) +{ + switch (mdev->ldev->dc.on_io_error) { + case EP_PASS_ON: + if (!forcedetach) { + if (printk_ratelimit()) + dev_err(DEV, "Local IO failed in %s." + "Passing error on...\n", where); + break; + } + /* NOTE fall through to detach case if forcedetach set */ + case EP_DETACH: + case EP_CALL_HELPER: + if (mdev->state.disk > D_FAILED) { + _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL); + dev_err(DEV, "Local IO failed in %s." + "Detaching...\n", where); + } + break; + } +} + +/** + * drbd_chk_io_error: Handle the on_io_error setting, should be called from all io completion handlers + * @mdev: DRBD device. + * @error: Error code passed to the IO completion callback + * @forcedetach: Force detach. I.e. the error happened while accessing the meta data + * + * See also drbd_main.c:after_state_ch() if (os.disk > D_FAILED && ns.disk == D_FAILED) + */ +#define drbd_chk_io_error(m,e,f) drbd_chk_io_error_(m,e,f, __func__) +static inline void drbd_chk_io_error_(struct drbd_conf *mdev, + int error, int forcedetach, const char *where) +{ + if (error) { + unsigned long flags; + spin_lock_irqsave(&mdev->req_lock, flags); + __drbd_chk_io_error_(mdev, forcedetach, where); + spin_unlock_irqrestore(&mdev->req_lock, flags); + } +} + + +/** + * drbd_md_first_sector() - Returns the first sector number of the meta data area + * @bdev: Meta data block device. + * + * BTW, for internal meta data, this happens to be the maximum capacity + * we could agree upon with our peer node. + */ +static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev) +{ + switch (bdev->dc.meta_dev_idx) { + case DRBD_MD_INDEX_INTERNAL: + case DRBD_MD_INDEX_FLEX_INT: + return bdev->md.md_offset + bdev->md.bm_offset; + case DRBD_MD_INDEX_FLEX_EXT: + default: + return bdev->md.md_offset; + } +} + +/** + * drbd_md_last_sector() - Return the last sector number of the meta data area + * @bdev: Meta data block device. + */ +static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev) +{ + switch (bdev->dc.meta_dev_idx) { + case DRBD_MD_INDEX_INTERNAL: + case DRBD_MD_INDEX_FLEX_INT: + return bdev->md.md_offset + MD_AL_OFFSET - 1; + case DRBD_MD_INDEX_FLEX_EXT: + default: + return bdev->md.md_offset + bdev->md.md_size_sect; + } +} + +/* Returns the number of 512 byte sectors of the device */ +static inline sector_t drbd_get_capacity(struct block_device *bdev) +{ + /* return bdev ? get_capacity(bdev->bd_disk) : 0; */ + return bdev ? bdev->bd_inode->i_size >> 9 : 0; +} + +/** + * drbd_get_max_capacity() - Returns the capacity we announce to out peer + * @bdev: Meta data block device. + * + * returns the capacity we announce to out peer. we clip ourselves at the + * various MAX_SECTORS, because if we don't, current implementation will + * oops sooner or later + */ +static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev) +{ + sector_t s; + switch (bdev->dc.meta_dev_idx) { + case DRBD_MD_INDEX_INTERNAL: + case DRBD_MD_INDEX_FLEX_INT: + s = drbd_get_capacity(bdev->backing_bdev) + ? min_t(sector_t, DRBD_MAX_SECTORS_FLEX, + drbd_md_first_sector(bdev)) + : 0; + break; + case DRBD_MD_INDEX_FLEX_EXT: + s = min_t(sector_t, DRBD_MAX_SECTORS_FLEX, + drbd_get_capacity(bdev->backing_bdev)); + /* clip at maximum size the meta device can support */ + s = min_t(sector_t, s, + BM_EXT_TO_SECT(bdev->md.md_size_sect + - bdev->md.bm_offset)); + break; + default: + s = min_t(sector_t, DRBD_MAX_SECTORS, + drbd_get_capacity(bdev->backing_bdev)); + } + return s; +} + +/** + * drbd_md_ss__() - Return the sector number of our meta data super block + * @mdev: DRBD device. + * @bdev: Meta data block device. + */ +static inline sector_t drbd_md_ss__(struct drbd_conf *mdev, + struct drbd_backing_dev *bdev) +{ + switch (bdev->dc.meta_dev_idx) { + default: /* external, some index */ + return MD_RESERVED_SECT * bdev->dc.meta_dev_idx; + case DRBD_MD_INDEX_INTERNAL: + /* with drbd08, internal meta data is always "flexible" */ + case DRBD_MD_INDEX_FLEX_INT: + /* sizeof(struct md_on_disk_07) == 4k + * position: last 4k aligned block of 4k size */ + if (!bdev->backing_bdev) { + if (__ratelimit(&drbd_ratelimit_state)) { + dev_err(DEV, "bdev->backing_bdev==NULL\n"); + dump_stack(); + } + return 0; + } + return (drbd_get_capacity(bdev->backing_bdev) & ~7ULL) + - MD_AL_OFFSET; + case DRBD_MD_INDEX_FLEX_EXT: + return 0; + } +} + +static inline void +_drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) +{ + list_add_tail(&w->list, &q->q); + up(&q->s); +} + +static inline void +drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w) +{ + unsigned long flags; + spin_lock_irqsave(&q->q_lock, flags); + list_add(&w->list, &q->q); + up(&q->s); /* within the spinlock, + see comment near end of drbd_worker() */ + spin_unlock_irqrestore(&q->q_lock, flags); +} + +static inline void +drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) +{ + unsigned long flags; + spin_lock_irqsave(&q->q_lock, flags); + list_add_tail(&w->list, &q->q); + up(&q->s); /* within the spinlock, + see comment near end of drbd_worker() */ + spin_unlock_irqrestore(&q->q_lock, flags); +} + +static inline void wake_asender(struct drbd_conf *mdev) +{ + if (test_bit(SIGNAL_ASENDER, &mdev->flags)) + force_sig(DRBD_SIG, mdev->asender.task); +} + +static inline void request_ping(struct drbd_conf *mdev) +{ + set_bit(SEND_PING, &mdev->flags); + wake_asender(mdev); +} + +static inline int drbd_send_short_cmd(struct drbd_conf *mdev, + enum drbd_packets cmd) +{ + struct p_header h; + return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h)); +} + +static inline int drbd_send_ping(struct drbd_conf *mdev) +{ + struct p_header h; + return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h)); +} + +static inline int drbd_send_ping_ack(struct drbd_conf *mdev) +{ + struct p_header h; + return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h)); +} + +static inline void drbd_thread_stop(struct drbd_thread *thi) +{ + _drbd_thread_stop(thi, FALSE, TRUE); +} + +static inline void drbd_thread_stop_nowait(struct drbd_thread *thi) +{ + _drbd_thread_stop(thi, FALSE, FALSE); +} + +static inline void drbd_thread_restart_nowait(struct drbd_thread *thi) +{ + _drbd_thread_stop(thi, TRUE, FALSE); +} + +/* counts how many answer packets packets we expect from our peer, + * for either explicit application requests, + * or implicit barrier packets as necessary. + * increased: + * w_send_barrier + * _req_mod(req, queue_for_net_write or queue_for_net_read); + * it is much easier and equally valid to count what we queue for the + * worker, even before it actually was queued or send. + * (drbd_make_request_common; recovery path on read io-error) + * decreased: + * got_BarrierAck (respective tl_clear, tl_clear_barrier) + * _req_mod(req, data_received) + * [from receive_DataReply] + * _req_mod(req, write_acked_by_peer or recv_acked_by_peer or neg_acked) + * [from got_BlockAck (P_WRITE_ACK, P_RECV_ACK)] + * for some reason it is NOT decreased in got_NegAck, + * but in the resulting cleanup code from report_params. + * we should try to remember the reason for that... + * _req_mod(req, send_failed or send_canceled) + * _req_mod(req, connection_lost_while_pending) + * [from tl_clear_barrier] + */ +static inline void inc_ap_pending(struct drbd_conf *mdev) +{ + atomic_inc(&mdev->ap_pending_cnt); +} + +#define ERR_IF_CNT_IS_NEGATIVE(which) \ + if (atomic_read(&mdev->which) < 0) \ + dev_err(DEV, "in %s:%d: " #which " = %d < 0 !\n", \ + __func__ , __LINE__ , \ + atomic_read(&mdev->which)) + +#define dec_ap_pending(mdev) do { \ + typecheck(struct drbd_conf *, mdev); \ + if (atomic_dec_and_test(&mdev->ap_pending_cnt)) \ + wake_up(&mdev->misc_wait); \ + ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt); } while (0) + +/* counts how many resync-related answers we still expect from the peer + * increase decrease + * C_SYNC_TARGET sends P_RS_DATA_REQUEST (and expects P_RS_DATA_REPLY) + * C_SYNC_SOURCE sends P_RS_DATA_REPLY (and expects P_WRITE_ACK whith ID_SYNCER) + * (or P_NEG_ACK with ID_SYNCER) + */ +static inline void inc_rs_pending(struct drbd_conf *mdev) +{ + atomic_inc(&mdev->rs_pending_cnt); +} + +#define dec_rs_pending(mdev) do { \ + typecheck(struct drbd_conf *, mdev); \ + atomic_dec(&mdev->rs_pending_cnt); \ + ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt); } while (0) + +/* counts how many answers we still need to send to the peer. + * increased on + * receive_Data unless protocol A; + * we need to send a P_RECV_ACK (proto B) + * or P_WRITE_ACK (proto C) + * receive_RSDataReply (recv_resync_read) we need to send a P_WRITE_ACK + * receive_DataRequest (receive_RSDataRequest) we need to send back P_DATA + * receive_Barrier_* we need to send a P_BARRIER_ACK + */ +static inline void inc_unacked(struct drbd_conf *mdev) +{ + atomic_inc(&mdev->unacked_cnt); +} + +#define dec_unacked(mdev) do { \ + typecheck(struct drbd_conf *, mdev); \ + atomic_dec(&mdev->unacked_cnt); \ + ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0) + +#define sub_unacked(mdev, n) do { \ + typecheck(struct drbd_conf *, mdev); \ + atomic_sub(n, &mdev->unacked_cnt); \ + ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0) + + +static inline void put_net_conf(struct drbd_conf *mdev) +{ + if (atomic_dec_and_test(&mdev->net_cnt)) + wake_up(&mdev->misc_wait); +} + +/** + * get_net_conf() - Increase ref count on mdev->net_conf; Returns 0 if nothing there + * @mdev: DRBD device. + * + * You have to call put_net_conf() when finished working with mdev->net_conf. + */ +static inline int get_net_conf(struct drbd_conf *mdev) +{ + int have_net_conf; + + atomic_inc(&mdev->net_cnt); + have_net_conf = mdev->state.conn >= C_UNCONNECTED; + if (!have_net_conf) + put_net_conf(mdev); + return have_net_conf; +} + +/** + * get_ldev() - Increase the ref count on mdev->ldev. Returns 0 if there is no ldev + * @M: DRBD device. + * + * You have to call put_ldev() when finished working with mdev->ldev. + */ +#define get_ldev(M) __cond_lock(local, _get_ldev_if_state(M,D_INCONSISTENT)) +#define get_ldev_if_state(M,MINS) __cond_lock(local, _get_ldev_if_state(M,MINS)) + +static inline void put_ldev(struct drbd_conf *mdev) +{ + __release(local); + if (atomic_dec_and_test(&mdev->local_cnt)) + wake_up(&mdev->misc_wait); + D_ASSERT(atomic_read(&mdev->local_cnt) >= 0); +} + +#ifndef __CHECKER__ +static inline int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) +{ + int io_allowed; + + atomic_inc(&mdev->local_cnt); + io_allowed = (mdev->state.disk >= mins); + if (!io_allowed) + put_ldev(mdev); + return io_allowed; +} +#else +extern int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins); +#endif + +/* you must have an "get_ldev" reference */ +static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, + unsigned long *bits_left, unsigned int *per_mil_done) +{ + /* + * this is to break it at compile time when we change that + * (we may feel 4TB maximum storage per drbd is not enough) + */ + typecheck(unsigned long, mdev->rs_total); + + /* note: both rs_total and rs_left are in bits, i.e. in + * units of BM_BLOCK_SIZE. + * for the percentage, we don't care. */ + + *bits_left = drbd_bm_total_weight(mdev) - mdev->rs_failed; + /* >> 10 to prevent overflow, + * +1 to prevent division by zero */ + if (*bits_left > mdev->rs_total) { + /* doh. maybe a logic bug somewhere. + * may also be just a race condition + * between this and a disconnect during sync. + * for now, just prevent in-kernel buffer overflow. + */ + smp_rmb(); + dev_warn(DEV, "cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu)\n", + drbd_conn_str(mdev->state.conn), + *bits_left, mdev->rs_total, mdev->rs_failed); + *per_mil_done = 0; + } else { + /* make sure the calculation happens in long context */ + unsigned long tmp = 1000UL - + (*bits_left >> 10)*1000UL + / ((mdev->rs_total >> 10) + 1UL); + *per_mil_done = tmp; + } +} + + +/* this throttles on-the-fly application requests + * according to max_buffers settings; + * maybe re-implement using semaphores? */ +static inline int drbd_get_max_buffers(struct drbd_conf *mdev) +{ + int mxb = 1000000; /* arbitrary limit on open requests */ + if (get_net_conf(mdev)) { + mxb = mdev->net_conf->max_buffers; + put_net_conf(mdev); + } + return mxb; +} + +static inline int drbd_state_is_stable(union drbd_state s) +{ + + /* DO NOT add a default clause, we want the compiler to warn us + * for any newly introduced state we may have forgotten to add here */ + + switch ((enum drbd_conns)s.conn) { + /* new io only accepted when there is no connection, ... */ + case C_STANDALONE: + case C_WF_CONNECTION: + /* ... or there is a well established connection. */ + case C_CONNECTED: + case C_SYNC_SOURCE: + case C_SYNC_TARGET: + case C_VERIFY_S: + case C_VERIFY_T: + case C_PAUSED_SYNC_S: + case C_PAUSED_SYNC_T: + /* maybe stable, look at the disk state */ + break; + + /* no new io accepted during tansitional states + * like handshake or teardown */ + case C_DISCONNECTING: + case C_UNCONNECTED: + case C_TIMEOUT: + case C_BROKEN_PIPE: + case C_NETWORK_FAILURE: + case C_PROTOCOL_ERROR: + case C_TEAR_DOWN: + case C_WF_REPORT_PARAMS: + case C_STARTING_SYNC_S: + case C_STARTING_SYNC_T: + case C_WF_BITMAP_S: + case C_WF_BITMAP_T: + case C_WF_SYNC_UUID: + case C_MASK: + /* not "stable" */ + return 0; + } + + switch ((enum drbd_disk_state)s.disk) { + case D_DISKLESS: + case D_INCONSISTENT: + case D_OUTDATED: + case D_CONSISTENT: + case D_UP_TO_DATE: + /* disk state is stable as well. */ + break; + + /* no new io accepted during tansitional states */ + case D_ATTACHING: + case D_FAILED: + case D_NEGOTIATING: + case D_UNKNOWN: + case D_MASK: + /* not "stable" */ + return 0; + } + + return 1; +} + +static inline int __inc_ap_bio_cond(struct drbd_conf *mdev) +{ + int mxb = drbd_get_max_buffers(mdev); + + if (mdev->state.susp) + return 0; + if (test_bit(SUSPEND_IO, &mdev->flags)) + return 0; + + /* to avoid potential deadlock or bitmap corruption, + * in various places, we only allow new application io + * to start during "stable" states. */ + + /* no new io accepted when attaching or detaching the disk */ + if (!drbd_state_is_stable(mdev->state)) + return 0; + + /* since some older kernels don't have atomic_add_unless, + * and we are within the spinlock anyways, we have this workaround. */ + if (atomic_read(&mdev->ap_bio_cnt) > mxb) + return 0; + if (test_bit(BITMAP_IO, &mdev->flags)) + return 0; + return 1; +} + +/* I'd like to use wait_event_lock_irq, + * but I'm not sure when it got introduced, + * and not sure when it has 3 or 4 arguments */ +static inline void inc_ap_bio(struct drbd_conf *mdev, int one_or_two) +{ + /* compare with after_state_ch, + * os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S */ + DEFINE_WAIT(wait); + + /* we wait here + * as long as the device is suspended + * until the bitmap is no longer on the fly during connection + * handshake as long as we would exeed the max_buffer limit. + * + * to avoid races with the reconnect code, + * we need to atomic_inc within the spinlock. */ + + spin_lock_irq(&mdev->req_lock); + while (!__inc_ap_bio_cond(mdev)) { + prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE); + spin_unlock_irq(&mdev->req_lock); + schedule(); + finish_wait(&mdev->misc_wait, &wait); + spin_lock_irq(&mdev->req_lock); + } + atomic_add(one_or_two, &mdev->ap_bio_cnt); + spin_unlock_irq(&mdev->req_lock); +} + +static inline void dec_ap_bio(struct drbd_conf *mdev) +{ + int mxb = drbd_get_max_buffers(mdev); + int ap_bio = atomic_dec_return(&mdev->ap_bio_cnt); + + D_ASSERT(ap_bio >= 0); + /* this currently does wake_up for every dec_ap_bio! + * maybe rather introduce some type of hysteresis? + * e.g. (ap_bio == mxb/2 || ap_bio == 0) ? */ + if (ap_bio < mxb) + wake_up(&mdev->misc_wait); + if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) { + if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) + drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w); + } +} + +static inline void drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) +{ + mdev->ed_uuid = val; +} + +static inline int seq_cmp(u32 a, u32 b) +{ + /* we assume wrap around at 32bit. + * for wrap around at 24bit (old atomic_t), + * we'd have to + * a <<= 8; b <<= 8; + */ + return (s32)(a) - (s32)(b); +} +#define seq_lt(a, b) (seq_cmp((a), (b)) < 0) +#define seq_gt(a, b) (seq_cmp((a), (b)) > 0) +#define seq_ge(a, b) (seq_cmp((a), (b)) >= 0) +#define seq_le(a, b) (seq_cmp((a), (b)) <= 0) +/* CAUTION: please no side effects in arguments! */ +#define seq_max(a, b) ((u32)(seq_gt((a), (b)) ? (a) : (b))) + +static inline void update_peer_seq(struct drbd_conf *mdev, unsigned int new_seq) +{ + unsigned int m; + spin_lock(&mdev->peer_seq_lock); + m = seq_max(mdev->peer_seq, new_seq); + mdev->peer_seq = m; + spin_unlock(&mdev->peer_seq_lock); + if (m == new_seq) + wake_up(&mdev->seq_wait); +} + +static inline void drbd_update_congested(struct drbd_conf *mdev) +{ + struct sock *sk = mdev->data.socket->sk; + if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5) + set_bit(NET_CONGESTED, &mdev->flags); +} + +static inline int drbd_queue_order_type(struct drbd_conf *mdev) +{ + /* sorry, we currently have no working implementation + * of distributed TCQ stuff */ +#ifndef QUEUE_ORDERED_NONE +#define QUEUE_ORDERED_NONE 0 +#endif + return QUEUE_ORDERED_NONE; +} + +static inline void drbd_blk_run_queue(struct request_queue *q) +{ + if (q && q->unplug_fn) + q->unplug_fn(q); +} + +static inline void drbd_kick_lo(struct drbd_conf *mdev) +{ + if (get_ldev(mdev)) { + drbd_blk_run_queue(bdev_get_queue(mdev->ldev->backing_bdev)); + put_ldev(mdev); + } +} + +static inline void drbd_md_flush(struct drbd_conf *mdev) +{ + int r; + + if (test_bit(MD_NO_BARRIER, &mdev->flags)) + return; + + r = blkdev_issue_flush(mdev->ldev->md_bdev, NULL); + if (r) { + set_bit(MD_NO_BARRIER, &mdev->flags); + dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r); + } +} + +#endif -- cgit v1.2.3 From 6a0afdf58d40200abd0c717261d1bc4c49195c2f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 1 Oct 2009 09:04:14 +0200 Subject: drbd: remove tracing bits They should be reimplemented in the current scheme. Signed-off-by: Jens Axboe --- drivers/block/drbd/Kconfig | 11 - drivers/block/drbd/Makefile | 3 - drivers/block/drbd/drbd_actlog.c | 62 +-- drivers/block/drbd/drbd_int.h | 7 - drivers/block/drbd/drbd_main.c | 36 +- drivers/block/drbd/drbd_nl.c | 9 - drivers/block/drbd/drbd_receiver.c | 30 +- drivers/block/drbd/drbd_req.c | 11 - drivers/block/drbd/drbd_tracing.c | 752 ------------------------------------- drivers/block/drbd/drbd_tracing.h | 87 ----- drivers/block/drbd/drbd_worker.c | 16 - 11 files changed, 3 insertions(+), 1021 deletions(-) delete mode 100644 drivers/block/drbd/drbd_tracing.c delete mode 100644 drivers/block/drbd/drbd_tracing.h (limited to 'drivers/block/drbd/drbd_int.h') diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig index 4e6f90f487c2..f4acd04ebeef 100644 --- a/drivers/block/drbd/Kconfig +++ b/drivers/block/drbd/Kconfig @@ -38,17 +38,6 @@ config BLK_DEV_DRBD If unsure, say N. -config DRBD_TRACE - tristate "DRBD tracing" - depends on BLK_DEV_DRBD - select TRACEPOINTS - default n - help - - Say Y here if you want to be able to trace various events in DRBD. - - If unsure, say N. - config DRBD_FAULT_INJECTION bool "DRBD fault injection" depends on BLK_DEV_DRBD diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile index 7d86ef8a8b40..0d3f337ff5ff 100644 --- a/drivers/block/drbd/Makefile +++ b/drivers/block/drbd/Makefile @@ -2,7 +2,4 @@ drbd-y := drbd_bitmap.o drbd_proc.o drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o drbd-y += drbd_main.o drbd_strings.o drbd_nl.o -drbd_trace-y := drbd_tracing.o - obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o -obj-$(CONFIG_DRBD_TRACE) += drbd_trace.o diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 74b4835d3107..17956ff6a08d 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -26,7 +26,6 @@ #include #include #include "drbd_int.h" -#include "drbd_tracing.h" #include "drbd_wrappers.h" /* We maintain a trivial check sum in our on disk activity log. @@ -66,17 +65,6 @@ struct drbd_atodb_wait { int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int); -/* The actual tracepoint needs to have constant number of known arguments... - */ -void trace_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - trace__drbd_resync(mdev, level, fmt, ap); - va_end(ap); -} - static int _drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, struct page *page, sector_t sector, @@ -105,8 +93,6 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, bio->bi_end_io = drbd_md_io_complete; bio->bi_rw = rw; - trace_drbd_bio(mdev, "Md", bio, 0, NULL); - if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) bio_endio(bio, -EIO); else @@ -236,8 +222,6 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) D_ASSERT(atomic_read(&mdev->local_cnt) > 0); - trace_drbd_actlog(mdev, sector, "al_begin_io"); - wait_event(mdev->al_wait, (al_ext = _al_get(mdev, enr))); if (al_ext->lc_number != enr) { @@ -270,8 +254,6 @@ void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector) struct lc_element *extent; unsigned long flags; - trace_drbd_actlog(mdev, sector, "al_complete_io"); - spin_lock_irqsave(&mdev->al_lock, flags); extent = lc_find(mdev->act_log, enr); @@ -967,10 +949,6 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); - trace_drbd_resync(mdev, TRACE_LVL_METRICS, - "drbd_set_in_sync: sector=%llus size=%u sbnr=%lu ebnr=%lu\n", - (unsigned long long)sector, size, sbnr, ebnr); - if (sbnr > ebnr) return; @@ -1045,10 +1023,6 @@ void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, sbnr = BM_SECT_TO_BIT(sector); ebnr = BM_SECT_TO_BIT(esector); - trace_drbd_resync(mdev, TRACE_LVL_METRICS, - "drbd_set_out_of_sync: sector=%llus size=%u sbnr=%lu ebnr=%lu\n", - (unsigned long long)sector, size, sbnr, ebnr); - /* ok, (capacity & 7) != 0 sometimes, but who cares... * we count rs_{total,left} in bits, not sectors. */ spin_lock_irqsave(&mdev->al_lock, flags); @@ -1143,10 +1117,6 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector) struct bm_extent *bm_ext; int i, sig; - trace_drbd_resync(mdev, TRACE_LVL_ALL, - "drbd_rs_begin_io: sector=%llus (rs_end=%d)\n", - (unsigned long long)sector, enr); - sig = wait_event_interruptible(mdev->al_wait, (bm_ext = _bme_get(mdev, enr))); if (sig) @@ -1192,9 +1162,6 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) struct bm_extent *bm_ext; int i; - trace_drbd_resync(mdev, TRACE_LVL_ALL, "drbd_try_rs_begin_io: sector=%llus\n", - (unsigned long long)sector); - spin_lock_irq(&mdev->al_lock); if (mdev->resync_wenr != LC_FREE && mdev->resync_wenr != enr) { /* in case you have very heavy scattered io, it may @@ -1210,11 +1177,6 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) * the lc_put here... * we also have to wake_up */ - - trace_drbd_resync(mdev, TRACE_LVL_ALL, - "dropping %u, apparently got 'synced' by application io\n", - mdev->resync_wenr); - e = lc_find(mdev->resync, mdev->resync_wenr); bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; if (bm_ext) { @@ -1242,21 +1204,14 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) * but then could not set BME_LOCKED, * so we tried again. * drop the extra reference. */ - trace_drbd_resync(mdev, TRACE_LVL_ALL, - "dropping extra reference on %u\n", enr); - bm_ext->lce.refcnt--; D_ASSERT(bm_ext->lce.refcnt > 0); } goto check_al; } else { /* do we rather want to try later? */ - if (mdev->resync_locked > mdev->resync->nr_elements-3) { - trace_drbd_resync(mdev, TRACE_LVL_ALL, - "resync_locked = %u!\n", mdev->resync_locked); - + if (mdev->resync_locked > mdev->resync->nr_elements-3) goto try_again; - } /* Do or do not. There is no try. -- Yoda */ e = lc_get(mdev->resync, enr); bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; @@ -1281,8 +1236,6 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) goto check_al; } check_al: - trace_drbd_resync(mdev, TRACE_LVL_ALL, "checking al for %u\n", enr); - for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { if (unlikely(al_enr+i == mdev->act_log->new_number)) goto try_again; @@ -1296,7 +1249,6 @@ proceed: return 0; try_again: - trace_drbd_resync(mdev, TRACE_LVL_ALL, "need to try again for %u\n", enr); if (bm_ext) mdev->resync_wenr = enr; spin_unlock_irq(&mdev->al_lock); @@ -1310,10 +1262,6 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) struct bm_extent *bm_ext; unsigned long flags; - trace_drbd_resync(mdev, TRACE_LVL_ALL, - "drbd_rs_complete_io: sector=%llus (rs_enr=%d)\n", - (long long)sector, enr); - spin_lock_irqsave(&mdev->al_lock, flags); e = lc_find(mdev->resync, enr); bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; @@ -1348,8 +1296,6 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) */ void drbd_rs_cancel_all(struct drbd_conf *mdev) { - trace_drbd_resync(mdev, TRACE_LVL_METRICS, "drbd_rs_cancel_all\n"); - spin_lock_irq(&mdev->al_lock); if (get_ldev_if_state(mdev, D_FAILED)) { /* Makes sure ->resync is there. */ @@ -1375,8 +1321,6 @@ int drbd_rs_del_all(struct drbd_conf *mdev) struct bm_extent *bm_ext; int i; - trace_drbd_resync(mdev, TRACE_LVL_METRICS, "drbd_rs_del_all\n"); - spin_lock_irq(&mdev->al_lock); if (get_ldev_if_state(mdev, D_FAILED)) { @@ -1429,10 +1373,6 @@ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) sector_t esector, nr_sectors; int wake_up = 0; - trace_drbd_resync(mdev, TRACE_LVL_SUMMARY, - "drbd_rs_failed_io: sector=%llus, size=%u\n", - (unsigned long long)sector, size); - if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", (unsigned long long)sector, size); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8da602e010bb..4e6255991e5b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -135,8 +135,6 @@ enum { DRBD_FAULT_MAX, }; -extern void trace_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, ...); - #ifdef CONFIG_DRBD_FAULT_INJECTION extern unsigned int _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type); @@ -712,11 +710,6 @@ enum epoch_event { EV_GOT_BARRIER_NR, EV_BARRIER_DONE, EV_BECAME_LAST, - EV_TRACE_FLUSH, /* TRACE_ are not real events, only used for tracing */ - EV_TRACE_ADD_BARRIER, /* Doing the first write as a barrier write */ - EV_TRACE_SETTING_BI, /* Barrier is expressed with the first write of the next epoch */ - EV_TRACE_ALLOC, - EV_TRACE_FREE, EV_CLEANUP = 32, /* used as flag */ }; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 80273f21a4aa..11d8ff6016ac 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -53,7 +53,6 @@ #include #include "drbd_int.h" -#include "drbd_tracing.h" #include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */ #include "drbd_vli.h" @@ -80,18 +79,6 @@ static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); static void md_sync_timer_fn(unsigned long data); static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); -DEFINE_TRACE(drbd_unplug); -DEFINE_TRACE(drbd_uuid); -DEFINE_TRACE(drbd_ee); -DEFINE_TRACE(drbd_packet); -DEFINE_TRACE(drbd_md_io); -DEFINE_TRACE(drbd_epoch); -DEFINE_TRACE(drbd_netlink); -DEFINE_TRACE(drbd_actlog); -DEFINE_TRACE(drbd_bio); -DEFINE_TRACE(_drbd_resync); -DEFINE_TRACE(drbd_req); - MODULE_AUTHOR("Philipp Reisner , " "Lars Ellenberg "); MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION); @@ -1576,7 +1563,6 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, h->command = cpu_to_be16(cmd); h->length = cpu_to_be16(size-sizeof(struct p_header)); - trace_drbd_packet(mdev, sock, 0, (void *)h, __FILE__, __LINE__); sent = drbd_send(mdev, sock, h, size, msg_flags); ok = (sent == size); @@ -1628,8 +1614,6 @@ int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, if (!drbd_get_data_sock(mdev)) return 0; - trace_drbd_packet(mdev, mdev->data.socket, 0, (void *)&h, __FILE__, __LINE__); - ok = (sizeof(h) == drbd_send(mdev, mdev->data.socket, &h, sizeof(h), 0)); ok = ok && (size == @@ -2359,7 +2343,6 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) dp_flags |= DP_MAY_SET_IN_SYNC; p.dp_flags = cpu_to_be32(dp_flags); - trace_drbd_packet(mdev, mdev->data.socket, 0, (void *)&p, __FILE__, __LINE__); set_bit(UNPLUG_REMOTE, &mdev->flags); ok = (sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE)); @@ -2410,7 +2393,6 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, if (!drbd_get_data_sock(mdev)) return 0; - trace_drbd_packet(mdev, mdev->data.socket, 0, (void *)&p, __FILE__, __LINE__); ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE); if (ok && dgs) { @@ -2546,8 +2528,6 @@ static void drbd_unplug_fn(struct request_queue *q) { struct drbd_conf *mdev = q->queuedata; - trace_drbd_unplug(mdev, "got unplugged"); - /* unplug FIRST */ spin_lock_irq(q->queue_lock); blk_remove_plug(q); @@ -3252,8 +3232,6 @@ void drbd_md_sync(struct drbd_conf *mdev) if (!get_ldev_if_state(mdev, D_FAILED)) return; - trace_drbd_md_io(mdev, WRITE, mdev->ldev); - mutex_lock(&mdev->md_io_mutex); buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); memset(buffer, 0, 512); @@ -3308,8 +3286,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (!get_ldev_if_state(mdev, D_ATTACHING)) return ERR_IO_MD_DISK; - trace_drbd_md_io(mdev, READ, bdev); - mutex_lock(&mdev->md_io_mutex); buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); @@ -3388,11 +3364,8 @@ static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local) { int i; - for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) { + for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i]; - - trace_drbd_uuid(mdev, i+1); - } } void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) @@ -3407,7 +3380,6 @@ void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) } mdev->ldev->md.uuid[idx] = val; - trace_drbd_uuid(mdev, idx); drbd_md_mark_dirty(mdev); } @@ -3417,7 +3389,6 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) if (mdev->ldev->md.uuid[idx]) { drbd_uuid_move_history(mdev); mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx]; - trace_drbd_uuid(mdev, UI_HISTORY_START); } _drbd_uuid_set(mdev, idx, val); } @@ -3436,7 +3407,6 @@ void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local) dev_info(DEV, "Creating new current UUID\n"); D_ASSERT(mdev->ldev->md.uuid[UI_BITMAP] == 0); mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT]; - trace_drbd_uuid(mdev, UI_BITMAP); get_random_bytes(&val, sizeof(u64)); _drbd_uuid_set(mdev, UI_CURRENT, val); @@ -3451,8 +3421,6 @@ void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local) drbd_uuid_move_history(mdev); mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP]; mdev->ldev->md.uuid[UI_BITMAP] = 0; - trace_drbd_uuid(mdev, UI_HISTORY_START); - trace_drbd_uuid(mdev, UI_BITMAP); } else { if (mdev->ldev->md.uuid[UI_BITMAP]) dev_warn(DEV, "bm UUID already set"); @@ -3460,7 +3428,6 @@ void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local) mdev->ldev->md.uuid[UI_BITMAP] = val; mdev->ldev->md.uuid[UI_BITMAP] &= ~((u64)1); - trace_drbd_uuid(mdev, UI_BITMAP); } drbd_md_mark_dirty(mdev); } @@ -3727,7 +3694,6 @@ const char *drbd_buildtag(void) module_init(drbd_init) module_exit(drbd_cleanup) -/* For drbd_tracing: */ EXPORT_SYMBOL(drbd_conn_str); EXPORT_SYMBOL(drbd_role_str); EXPORT_SYMBOL(drbd_disk_str); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index cfde31002dff..73c55ccb629a 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -33,7 +33,6 @@ #include #include #include "drbd_int.h" -#include "drbd_tracing.h" #include "drbd_wrappers.h" #include #include @@ -2024,8 +2023,6 @@ static void drbd_connector_callback(struct cn_msg *req) goto fail; } - trace_drbd_netlink(req, 1); - if (nlp->packet_type >= P_nl_after_last_packet) { retcode = ERR_PACKET_NR; goto fail; @@ -2063,7 +2060,6 @@ static void drbd_connector_callback(struct cn_msg *req) cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + rr; cn_reply->flags = 0; - trace_drbd_netlink(cn_reply, 0); rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); if (rr && rr != -ESRCH) printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr); @@ -2157,7 +2153,6 @@ void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state) reply->minor = mdev_to_minor(mdev); reply->ret_code = NO_ERROR; - trace_drbd_netlink(cn_reply, 0); cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); } @@ -2190,7 +2185,6 @@ void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name) reply->minor = mdev_to_minor(mdev); reply->ret_code = NO_ERROR; - trace_drbd_netlink(cn_reply, 0); cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); } @@ -2262,7 +2256,6 @@ void drbd_bcast_ee(struct drbd_conf *mdev, reply->minor = mdev_to_minor(mdev); reply->ret_code = NO_ERROR; - trace_drbd_netlink(cn_reply, 0); cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); kfree(cn_reply); } @@ -2302,7 +2295,6 @@ void drbd_bcast_sync_progress(struct drbd_conf *mdev) reply->minor = mdev_to_minor(mdev); reply->ret_code = NO_ERROR; - trace_drbd_netlink(cn_reply, 0); cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); } @@ -2356,7 +2348,6 @@ void drbd_nl_send_reply(struct cn_msg *req, int ret_code) reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor; reply->ret_code = ret_code; - trace_drbd_netlink(cn_reply, 0); rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); if (rr && rr != -ESRCH) printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2f81821c2e06..360baf60f574 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -47,7 +47,6 @@ #include #include #include "drbd_int.h" -#include "drbd_tracing.h" #include "drbd_req.h" #include "drbd_vli.h" @@ -350,8 +349,6 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, e->epoch = NULL; e->flags = 0; - trace_drbd_ee(mdev, e, "allocated"); - return e; fail2: @@ -366,7 +363,6 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) { struct bio *bio = e->private_bio; - trace_drbd_ee(mdev, e, "freed"); drbd_pp_free_bio_pages(mdev, bio); bio_put(bio); D_ASSERT(hlist_unhashed(&e->colision)); @@ -420,7 +416,6 @@ static int drbd_process_done_ee(struct drbd_conf *mdev) * all ignore the last argument. */ list_for_each_entry_safe(e, t, &work_list, w.list) { - trace_drbd_ee(mdev, e, "process_done_ee"); /* list_del not necessary, next/prev members not touched */ ok = e->w.cb(mdev, &e->w, !ok) && ok; drbd_free_ee(mdev, e); @@ -1021,8 +1016,6 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, break; } - trace_drbd_epoch(mdev, epoch, ev); - if (epoch_size != 0 && atomic_read(&epoch->active) == 0 && test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) && @@ -1054,7 +1047,6 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, list_del(&epoch->list); ev = EV_BECAME_LAST | (ev & EV_CLEANUP); mdev->epochs--; - trace_drbd_epoch(mdev, epoch, EV_TRACE_FREE); kfree(epoch); if (rv == FE_STILL_LIVE) @@ -1080,7 +1072,6 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, struct flush_work *fw; fw = kmalloc(sizeof(*fw), GFP_ATOMIC); if (fw) { - trace_drbd_epoch(mdev, epoch, EV_TRACE_FLUSH); fw->w.cb = w_flush; fw->epoch = epoch; drbd_queue_work(&mdev->data.work, &fw->w); @@ -1251,7 +1242,6 @@ static int receive_Barrier(struct drbd_conf *mdev, struct p_header *h) list_add(&epoch->list, &mdev->current_epoch->list); mdev->current_epoch = epoch; mdev->epochs++; - trace_drbd_epoch(mdev, epoch, EV_TRACE_ALLOC); } else { /* The current_epoch got recycled while we allocated this one... */ kfree(epoch); @@ -1458,8 +1448,6 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si list_add(&e->w.list, &mdev->sync_ee); spin_unlock_irq(&mdev->req_lock); - trace_drbd_ee(mdev, e, "submitting for (rs)write"); - trace_drbd_bio(mdev, "Sec", e->private_bio, 0, NULL); drbd_generic_make_request(mdev, DRBD_FAULT_RS_WR, e->private_bio); /* accounting done in endio */ @@ -1721,16 +1709,13 @@ static int receive_Data(struct drbd_conf *mdev, struct p_header *h) epoch = list_entry(e->epoch->list.prev, struct drbd_epoch, list); if (epoch == e->epoch) { set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags); - trace_drbd_epoch(mdev, e->epoch, EV_TRACE_ADD_BARRIER); rw |= (1<flags |= EE_IS_BARRIER; } else { if (atomic_read(&epoch->epoch_size) > 1 || !test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) { set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags); - trace_drbd_epoch(mdev, epoch, EV_TRACE_SETTING_BI); set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags); - trace_drbd_epoch(mdev, e->epoch, EV_TRACE_ADD_BARRIER); rw |= (1<flags |= EE_IS_BARRIER; } @@ -1905,8 +1890,6 @@ static int receive_Data(struct drbd_conf *mdev, struct p_header *h) } e->private_bio->bi_rw = rw; - trace_drbd_ee(mdev, e, "submitting for (data)write"); - trace_drbd_bio(mdev, "Sec", e->private_bio, 0, NULL); drbd_generic_make_request(mdev, DRBD_FAULT_DT_WR, e->private_bio); /* accounting done in endio */ @@ -2065,8 +2048,6 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) inc_unacked(mdev); - trace_drbd_ee(mdev, e, "submitting for read"); - trace_drbd_bio(mdev, "Sec", e->private_bio, 0, NULL); drbd_generic_make_request(mdev, fault_type, e->private_bio); maybe_kick_lo(mdev); @@ -3543,9 +3524,6 @@ static void drbdd(struct drbd_conf *mdev) drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); break; } - - trace_drbd_packet(mdev, mdev->data.socket, 2, &mdev->data.rbuf, - __FILE__, __LINE__); } } @@ -3825,9 +3803,6 @@ static int drbd_do_handshake(struct drbd_conf *mdev) return 0; } - trace_drbd_packet(mdev, mdev->data.socket, 2, &mdev->data.rbuf, - __FILE__, __LINE__); - p->protocol_min = be32_to_cpu(p->protocol_min); p->protocol_max = be32_to_cpu(p->protocol_max); if (p->protocol_max == 0) @@ -4420,14 +4395,11 @@ int drbd_asender(struct drbd_thread *thi) goto disconnect; } expect = cmd->pkt_size; - ERR_IF(len != expect-sizeof(struct p_header)) { - trace_drbd_packet(mdev, mdev->meta.socket, 1, (void *)h, __FILE__, __LINE__); + ERR_IF(len != expect-sizeof(struct p_header)) goto reconnect; - } } if (received == expect) { D_ASSERT(cmd != NULL); - trace_drbd_packet(mdev, mdev->meta.socket, 1, (void *)h, __FILE__, __LINE__); if (!cmd->process(mdev, h)) goto reconnect; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 1aaa397669a8..3678d3d66c6c 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -28,7 +28,6 @@ #include #include #include "drbd_int.h" -#include "drbd_tracing.h" #include "drbd_req.h" @@ -218,7 +217,6 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, void complete_master_bio(struct drbd_conf *mdev, struct bio_and_error *m) { - trace_drbd_bio(mdev, "Rq", m->bio, 1, NULL); bio_endio(m->bio, m->error); dec_ap_bio(mdev); } @@ -236,8 +234,6 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) /* only WRITES may end up here without a master bio (on barrier ack) */ int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE; - trace_drbd_req(req, nothing, "_req_may_be_done"); - /* we must not complete the master bio, while it is * still being processed by _drbd_send_zc_bio (drbd_send_dblock) * not yet acknowledged by the peer @@ -415,8 +411,6 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, struct drbd_conf *mdev = req->mdev; m->bio = NULL; - trace_drbd_req(req, what, NULL); - switch (what) { default: dev_err(DEV, "LOGIC BUG in %s:%u\n", __FILE__ , __LINE__); @@ -666,7 +660,6 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, * this is bad, because if the connection is lost now, * we won't be able to clean them up... */ dev_err(DEV, "FIXME (barrier_acked but pending)\n"); - trace_drbd_req(req, nothing, "FIXME (barrier_acked but pending)"); list_move(&req->tl_requests, &mdev->out_of_sequence_requests); } D_ASSERT(req->rq_state & RQ_NET_SENT); @@ -736,8 +729,6 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) return 0; } - trace_drbd_bio(mdev, "Rq", bio, 0, req); - local = get_ldev(mdev); if (!local) { bio_put(req->private_bio); /* or we get a bio leak */ @@ -928,8 +919,6 @@ allocate_barrier: if (local) { req->private_bio->bi_bdev = mdev->ldev->backing_bdev; - trace_drbd_bio(mdev, "Pri", req->private_bio, 0, NULL); - if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR : rw == READ ? DRBD_FAULT_DT_RD : DRBD_FAULT_DT_RA)) diff --git a/drivers/block/drbd/drbd_tracing.c b/drivers/block/drbd/drbd_tracing.c deleted file mode 100644 index d18d4f7b4bef..000000000000 --- a/drivers/block/drbd/drbd_tracing.c +++ /dev/null @@ -1,752 +0,0 @@ -/* - drbd_tracing.c - - This file is part of DRBD by Philipp Reisner and Lars Ellenberg. - - Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. - Copyright (C) 2003-2008, Philipp Reisner . - Copyright (C) 2003-2008, Lars Ellenberg . - - drbd is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - drbd is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with drbd; see the file COPYING. If not, write to - the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - - */ - -#include -#include -#include -#include "drbd_int.h" -#include "drbd_tracing.h" -#include - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Philipp Reisner, Lars Ellenberg"); -MODULE_DESCRIPTION("DRBD tracepoint probes"); -MODULE_PARM_DESC(trace_mask, "Bitmap of events to trace see drbd_tracing.c"); -MODULE_PARM_DESC(trace_level, "Current tracing level (changeable in /sys)"); -MODULE_PARM_DESC(trace_devs, "Bitmap of devices to trace (changeable in /sys)"); - -unsigned int trace_mask = 0; /* Bitmap of events to trace */ -int trace_level; /* Current trace level */ -int trace_devs; /* Bitmap of devices to trace */ - -module_param(trace_mask, uint, 0444); -module_param(trace_level, int, 0644); -module_param(trace_devs, int, 0644); - -enum { - TRACE_PACKET = 0x0001, - TRACE_RQ = 0x0002, - TRACE_UUID = 0x0004, - TRACE_RESYNC = 0x0008, - TRACE_EE = 0x0010, - TRACE_UNPLUG = 0x0020, - TRACE_NL = 0x0040, - TRACE_AL_EXT = 0x0080, - TRACE_INT_RQ = 0x0100, - TRACE_MD_IO = 0x0200, - TRACE_EPOCH = 0x0400, -}; - -/* Buffer printing support - * dbg_print_flags: used for Flags arg to drbd_print_buffer - * - DBGPRINT_BUFFADDR; if set, each line starts with the - * virtual address of the line being output. If clear, - * each line starts with the offset from the beginning - * of the buffer. */ -enum dbg_print_flags { - DBGPRINT_BUFFADDR = 0x0001, -}; - -/* Macro stuff */ -static char *nl_packet_name(int packet_type) -{ -/* Generate packet type strings */ -#define NL_PACKET(name, number, fields) \ - [P_ ## name] = # name, -#define NL_INTEGER Argh! -#define NL_BIT Argh! -#define NL_INT64 Argh! -#define NL_STRING Argh! - - static char *nl_tag_name[P_nl_after_last_packet] = { -#include "linux/drbd_nl.h" - }; - - return (packet_type < sizeof(nl_tag_name)/sizeof(nl_tag_name[0])) ? - nl_tag_name[packet_type] : "*Unknown*"; -} -/* /Macro stuff */ - -static inline int is_mdev_trace(struct drbd_conf *mdev, unsigned int level) -{ - return trace_level >= level && ((1 << mdev_to_minor(mdev)) & trace_devs); -} - -static void probe_drbd_unplug(struct drbd_conf *mdev, char *msg) -{ - if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) - return; - - dev_info(DEV, "%s, ap_bio_count=%d\n", msg, atomic_read(&mdev->ap_bio_cnt)); -} - -static void probe_drbd_uuid(struct drbd_conf *mdev, enum drbd_uuid_index index) -{ - static char *uuid_str[UI_EXTENDED_SIZE] = { - [UI_CURRENT] = "CURRENT", - [UI_BITMAP] = "BITMAP", - [UI_HISTORY_START] = "HISTORY_START", - [UI_HISTORY_END] = "HISTORY_END", - [UI_SIZE] = "SIZE", - [UI_FLAGS] = "FLAGS", - }; - - if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) - return; - - if (index >= UI_EXTENDED_SIZE) { - dev_warn(DEV, " uuid_index >= EXTENDED_SIZE\n"); - return; - } - - dev_info(DEV, " uuid[%s] now %016llX\n", - uuid_str[index], - (unsigned long long)mdev->ldev->md.uuid[index]); -} - -static void probe_drbd_md_io(struct drbd_conf *mdev, int rw, - struct drbd_backing_dev *bdev) -{ - if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) - return; - - dev_info(DEV, " %s metadata superblock now\n", - rw == READ ? "Reading" : "Writing"); -} - -static void probe_drbd_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, char* msg) -{ - if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) - return; - - dev_info(DEV, "EE %s sec=%llus size=%u e=%p\n", - msg, (unsigned long long)e->sector, e->size, e); -} - -static void probe_drbd_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch, - enum epoch_event ev) -{ - static char *epoch_event_str[] = { - [EV_PUT] = "put", - [EV_GOT_BARRIER_NR] = "got_barrier_nr", - [EV_BARRIER_DONE] = "barrier_done", - [EV_BECAME_LAST] = "became_last", - [EV_TRACE_FLUSH] = "issuing_flush", - [EV_TRACE_ADD_BARRIER] = "added_barrier", - [EV_TRACE_SETTING_BI] = "just set barrier_in_next_epoch", - }; - - if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) - return; - - ev &= ~EV_CLEANUP; - - switch (ev) { - case EV_TRACE_ALLOC: - dev_info(DEV, "Allocate epoch %p/xxxx { } nr_epochs=%d\n", epoch, mdev->epochs); - break; - case EV_TRACE_FREE: - dev_info(DEV, "Freeing epoch %p/%d { size=%d } nr_epochs=%d\n", - epoch, epoch->barrier_nr, atomic_read(&epoch->epoch_size), - mdev->epochs); - break; - default: - dev_info(DEV, "Update epoch %p/%d { size=%d active=%d %c%c n%c%c } ev=%s\n", - epoch, epoch->barrier_nr, atomic_read(&epoch->epoch_size), - atomic_read(&epoch->active), - test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) ? 'n' : '-', - test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags) ? 'b' : '-', - test_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags) ? 'i' : '-', - test_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags) ? 'd' : '-', - epoch_event_str[ev]); - } -} - -static void probe_drbd_netlink(void *data, int is_req) -{ - struct cn_msg *msg = data; - - if (is_req) { - struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)msg->data; - - printk(KERN_INFO "drbd%d: " - "Netlink: << %s (%d) - seq: %x, ack: %x, len: %x\n", - nlp->drbd_minor, - nl_packet_name(nlp->packet_type), - nlp->packet_type, - msg->seq, msg->ack, msg->len); - } else { - struct drbd_nl_cfg_reply *nlp = (struct drbd_nl_cfg_reply *)msg->data; - - printk(KERN_INFO "drbd%d: " - "Netlink: >> %s (%d) - seq: %x, ack: %x, len: %x\n", - nlp->minor, - nlp->packet_type == P_nl_after_last_packet ? - "Empty-Reply" : nl_packet_name(nlp->packet_type), - nlp->packet_type, - msg->seq, msg->ack, msg->len); - } -} - -static void probe_drbd_actlog(struct drbd_conf *mdev, sector_t sector, char* msg) -{ - unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9)); - - if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) - return; - - dev_info(DEV, "%s (sec=%llus, al_enr=%u, rs_enr=%d)\n", - msg, (unsigned long long) sector, enr, - (int)BM_SECT_TO_EXT(sector)); -} - -/** - * drbd_print_buffer() - Hexdump arbitrary binary data into a buffer - * @prefix: String is output at the beginning of each line output. - * @flags: Currently only defined flag: DBGPRINT_BUFFADDR; if set, each - * line starts with the virtual address of the line being - * output. If clear, each line starts with the offset from the - * beginning of the buffer. - * @size: Indicates the size of each entry in the buffer. Supported - * values are sizeof(char), sizeof(short) and sizeof(int) - * @buffer: Start address of buffer - * @buffer_va: Virtual address of start of buffer (normally the same - * as Buffer, but having it separate allows it to hold - * file address for example) - * @length: length of buffer - */ -static void drbd_print_buffer(const char *prefix, unsigned int flags, int size, - const void *buffer, const void *buffer_va, - unsigned int length) - -#define LINE_SIZE 16 -#define LINE_ENTRIES (int)(LINE_SIZE/size) -{ - const unsigned char *pstart; - const unsigned char *pstart_va; - const unsigned char *pend; - char bytes_str[LINE_SIZE*3+8], ascii_str[LINE_SIZE+8]; - char *pbytes = bytes_str, *pascii = ascii_str; - int offset = 0; - long sizemask; - int field_width; - int index; - const unsigned char *pend_str; - const unsigned char *p; - int count; - - /* verify size parameter */ - if (size != sizeof(char) && - size != sizeof(short) && - size != sizeof(int)) { - printk(KERN_DEBUG "drbd_print_buffer: " - "ERROR invalid size %d\n", size); - return; - } - - sizemask = size-1; - field_width = size*2; - - /* Adjust start/end to be on appropriate boundary for size */ - buffer = (const char *)((long)buffer & ~sizemask); - pend = (const unsigned char *) - (((long)buffer + length + sizemask) & ~sizemask); - - if (flags & DBGPRINT_BUFFADDR) { - /* Move start back to nearest multiple of line size, - * if printing address. This results in nicely formatted output - * with addresses being on line size (16) byte boundaries */ - pstart = (const unsigned char *)((long)buffer & ~(LINE_SIZE-1)); - } else { - pstart = (const unsigned char *)buffer; - } - - /* Set value of start VA to print if addresses asked for */ - pstart_va = (const unsigned char *)buffer_va - - ((const unsigned char *)buffer-pstart); - - /* Calculate end position to nicely align right hand side */ - pend_str = pstart + (((pend-pstart) + LINE_SIZE-1) & ~(LINE_SIZE-1)); - - /* Init strings */ - *pbytes = *pascii = '\0'; - - /* Start at beginning of first line */ - p = pstart; - count = 0; - - while (p < pend_str) { - if (p < (const unsigned char *)buffer || p >= pend) { - /* Before start of buffer or after end- print spaces */ - pbytes += sprintf(pbytes, "%*c ", field_width, ' '); - pascii += sprintf(pascii, "%*c", size, ' '); - p += size; - } else { - /* Add hex and ascii to strings */ - int val; - switch (size) { - default: - case 1: - val = *(unsigned char *)p; - break; - case 2: - val = *(unsigned short *)p; - break; - case 4: - val = *(unsigned int *)p; - break; - } - - pbytes += sprintf(pbytes, "%0*x ", field_width, val); - - for (index = size; index; index--) { - *pascii++ = isprint(*p) ? *p : '.'; - p++; - } - } - - count++; - - if (count == LINE_ENTRIES || p >= pend_str) { - /* Null terminate and print record */ - *pascii = '\0'; - printk(KERN_DEBUG "%s%8.8lx: %*s|%*s|\n", - prefix, - (flags & DBGPRINT_BUFFADDR) - ? (long)pstart_va:(long)offset, - LINE_ENTRIES*(field_width+1), bytes_str, - LINE_SIZE, ascii_str); - - /* Move onto next line */ - pstart_va += (p-pstart); - pstart = p; - count = 0; - offset += LINE_SIZE; - - /* Re-init strings */ - pbytes = bytes_str; - pascii = ascii_str; - *pbytes = *pascii = '\0'; - } - } -} - -static void probe_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, va_list args) -{ - char str[256]; - - if (!is_mdev_trace(mdev, level)) - return; - - if (vsnprintf(str, 256, fmt, args) >= 256) - str[255] = 0; - - printk(KERN_INFO "%s %s: %s", dev_driver_string(disk_to_dev(mdev->vdisk)), - dev_name(disk_to_dev(mdev->vdisk)), str); -} - -static void probe_drbd_bio(struct drbd_conf *mdev, const char *pfx, struct bio *bio, int complete, - struct drbd_request *r) -{ -#if defined(CONFIG_LBDAF) || defined(CONFIG_LBD) -#define SECTOR_FORMAT "%Lx" -#else -#define SECTOR_FORMAT "%lx" -#endif -#define SECTOR_SHIFT 9 - - unsigned long lowaddr = (unsigned long)(bio->bi_sector << SECTOR_SHIFT); - char *faddr = (char *)(lowaddr); - char rb[sizeof(void *)*2+6] = { 0, }; - struct bio_vec *bvec; - int segno; - - const int rw = bio->bi_rw; - const int biorw = (rw & (RW_MASK|RWA_MASK)); - const int biobarrier = (rw & (1<>>", - pfx, - biorw == WRITE ? "Write" : "Read", - biobarrier ? " : B" : "", - biosync ? " : S" : "", - bio, - rb, - complete ? (bio_flagged(bio, BIO_UPTODATE) ? "Success, " : "Failed, ") : "", - bio->bi_sector << SECTOR_SHIFT, - bio->bi_size); - - if (trace_level >= TRACE_LVL_METRICS && - ((biorw == WRITE) ^ complete)) { - printk(KERN_DEBUG " ind page offset length\n"); - __bio_for_each_segment(bvec, bio, segno, 0) { - printk(KERN_DEBUG " [%d] %p %8.8x %8.8x\n", segno, - bvec->bv_page, bvec->bv_offset, bvec->bv_len); - - if (trace_level >= TRACE_LVL_ALL) { - char *bvec_buf; - unsigned long flags; - - bvec_buf = bvec_kmap_irq(bvec, &flags); - - drbd_print_buffer(" ", DBGPRINT_BUFFADDR, 1, - bvec_buf, - faddr, - (bvec->bv_len <= 0x80) - ? bvec->bv_len : 0x80); - - bvec_kunmap_irq(bvec_buf, &flags); - - if (bvec->bv_len > 0x40) - printk(KERN_DEBUG " ....\n"); - - faddr += bvec->bv_len; - } - } - } -} - -static void probe_drbd_req(struct drbd_request *req, enum drbd_req_event what, char *msg) -{ - static const char *rq_event_names[] = { - [created] = "created", - [to_be_send] = "to_be_send", - [to_be_submitted] = "to_be_submitted", - [queue_for_net_write] = "queue_for_net_write", - [queue_for_net_read] = "queue_for_net_read", - [send_canceled] = "send_canceled", - [send_failed] = "send_failed", - [handed_over_to_network] = "handed_over_to_network", - [connection_lost_while_pending] = - "connection_lost_while_pending", - [recv_acked_by_peer] = "recv_acked_by_peer", - [write_acked_by_peer] = "write_acked_by_peer", - [neg_acked] = "neg_acked", - [conflict_discarded_by_peer] = "conflict_discarded_by_peer", - [barrier_acked] = "barrier_acked", - [data_received] = "data_received", - [read_completed_with_error] = "read_completed_with_error", - [read_ahead_completed_with_error] = "reada_completed_with_error", - [write_completed_with_error] = "write_completed_with_error", - [completed_ok] = "completed_ok", - }; - - struct drbd_conf *mdev = req->mdev; - - const int rw = (req->master_bio == NULL || - bio_data_dir(req->master_bio) == WRITE) ? - 'W' : 'R'; - const unsigned long s = req->rq_state; - - if (what != nothing) { - dev_info(DEV, "__req_mod(%p %c ,%s)\n", req, rw, rq_event_names[what]); - } else { - dev_info(DEV, "%s %p %c L%c%c%cN%c%c%c%c%c %u (%llus +%u) %s\n", - msg, req, rw, - s & RQ_LOCAL_PENDING ? 'p' : '-', - s & RQ_LOCAL_COMPLETED ? 'c' : '-', - s & RQ_LOCAL_OK ? 'o' : '-', - s & RQ_NET_PENDING ? 'p' : '-', - s & RQ_NET_QUEUED ? 'q' : '-', - s & RQ_NET_SENT ? 's' : '-', - s & RQ_NET_DONE ? 'd' : '-', - s & RQ_NET_OK ? 'o' : '-', - req->epoch, - (unsigned long long)req->sector, - req->size, - drbd_conn_str(mdev->state.conn)); - } -} - - -#define drbd_peer_str drbd_role_str -#define drbd_pdsk_str drbd_disk_str - -#define PSM(A) \ -do { \ - if (mask.A) { \ - int i = snprintf(p, len, " " #A "( %s )", \ - drbd_##A##_str(val.A)); \ - if (i >= len) \ - return op; \ - p += i; \ - len -= i; \ - } \ -} while (0) - -static char *dump_st(char *p, int len, union drbd_state mask, union drbd_state val) -{ - char *op = p; - *p = '\0'; - PSM(role); - PSM(peer); - PSM(conn); - PSM(disk); - PSM(pdsk); - - return op; -} - -#define INFOP(fmt, args...) \ -do { \ - if (trace_level >= TRACE_LVL_ALL) { \ - dev_info(DEV, "%s:%d: %s [%d] %s %s " fmt , \ - file, line, current->comm, current->pid, \ - sockname, recv ? "<<<" : ">>>" , \ - ## args); \ - } else { \ - dev_info(DEV, "%s %s " fmt, sockname, \ - recv ? "<<<" : ">>>" , \ - ## args); \ - } \ -} while (0) - -static char *_dump_block_id(u64 block_id, char *buff) -{ - if (is_syncer_block_id(block_id)) - strcpy(buff, "SyncerId"); - else - sprintf(buff, "%llx", (unsigned long long)block_id); - - return buff; -} - -static void probe_drbd_packet(struct drbd_conf *mdev, struct socket *sock, - int recv, union p_polymorph *p, char *file, int line) -{ - char *sockname = sock == mdev->meta.socket ? "meta" : "data"; - int cmd = (recv == 2) ? p->header.command : be16_to_cpu(p->header.command); - char tmp[300]; - union drbd_state m, v; - - switch (cmd) { - case P_HAND_SHAKE: - INFOP("%s (protocol %u-%u)\n", cmdname(cmd), - be32_to_cpu(p->handshake.protocol_min), - be32_to_cpu(p->handshake.protocol_max)); - break; - - case P_BITMAP: /* don't report this */ - case P_COMPRESSED_BITMAP: /* don't report this */ - break; - - case P_DATA: - INFOP("%s (sector %llus, id %s, seq %u, f %x)\n", cmdname(cmd), - (unsigned long long)be64_to_cpu(p->data.sector), - _dump_block_id(p->data.block_id, tmp), - be32_to_cpu(p->data.seq_num), - be32_to_cpu(p->data.dp_flags) - ); - break; - - case P_DATA_REPLY: - case P_RS_DATA_REPLY: - INFOP("%s (sector %llus, id %s)\n", cmdname(cmd), - (unsigned long long)be64_to_cpu(p->data.sector), - _dump_block_id(p->data.block_id, tmp) - ); - break; - - case P_RECV_ACK: - case P_WRITE_ACK: - case P_RS_WRITE_ACK: - case P_DISCARD_ACK: - case P_NEG_ACK: - case P_NEG_RS_DREPLY: - INFOP("%s (sector %llus, size %u, id %s, seq %u)\n", - cmdname(cmd), - (long long)be64_to_cpu(p->block_ack.sector), - be32_to_cpu(p->block_ack.blksize), - _dump_block_id(p->block_ack.block_id, tmp), - be32_to_cpu(p->block_ack.seq_num) - ); - break; - - case P_DATA_REQUEST: - case P_RS_DATA_REQUEST: - INFOP("%s (sector %llus, size %u, id %s)\n", cmdname(cmd), - (long long)be64_to_cpu(p->block_req.sector), - be32_to_cpu(p->block_req.blksize), - _dump_block_id(p->block_req.block_id, tmp) - ); - break; - - case P_BARRIER: - case P_BARRIER_ACK: - INFOP("%s (barrier %u)\n", cmdname(cmd), p->barrier.barrier); - break; - - case P_SYNC_PARAM: - case P_SYNC_PARAM89: - INFOP("%s (rate %u, verify-alg \"%.64s\", csums-alg \"%.64s\")\n", - cmdname(cmd), be32_to_cpu(p->rs_param_89.rate), - p->rs_param_89.verify_alg, p->rs_param_89.csums_alg); - break; - - case P_UUIDS: - INFOP("%s Curr:%016llX, Bitmap:%016llX, " - "HisSt:%016llX, HisEnd:%016llX\n", - cmdname(cmd), - (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_CURRENT]), - (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_BITMAP]), - (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_HISTORY_START]), - (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_HISTORY_END])); - break; - - case P_SIZES: - INFOP("%s (d %lluMiB, u %lluMiB, c %lldMiB, " - "max bio %x, q order %x)\n", - cmdname(cmd), - (long long)(be64_to_cpu(p->sizes.d_size)>>(20-9)), - (long long)(be64_to_cpu(p->sizes.u_size)>>(20-9)), - (long long)(be64_to_cpu(p->sizes.c_size)>>(20-9)), - be32_to_cpu(p->sizes.max_segment_size), - be32_to_cpu(p->sizes.queue_order_type)); - break; - - case P_STATE: - v.i = be32_to_cpu(p->state.state); - m.i = 0xffffffff; - dump_st(tmp, sizeof(tmp), m, v); - INFOP("%s (s %x {%s})\n", cmdname(cmd), v.i, tmp); - break; - - case P_STATE_CHG_REQ: - m.i = be32_to_cpu(p->req_state.mask); - v.i = be32_to_cpu(p->req_state.val); - dump_st(tmp, sizeof(tmp), m, v); - INFOP("%s (m %x v %x {%s})\n", cmdname(cmd), m.i, v.i, tmp); - break; - - case P_STATE_CHG_REPLY: - INFOP("%s (ret %x)\n", cmdname(cmd), - be32_to_cpu(p->req_state_reply.retcode)); - break; - - case P_PING: - case P_PING_ACK: - /* - * Dont trace pings at summary level - */ - if (trace_level < TRACE_LVL_ALL) - break; - /* fall through... */ - default: - INFOP("%s (%u)\n", cmdname(cmd), cmd); - break; - } -} - - -static int __init drbd_trace_init(void) -{ - int ret; - - if (trace_mask & TRACE_UNPLUG) { - ret = register_trace_drbd_unplug(probe_drbd_unplug); - WARN_ON(ret); - } - if (trace_mask & TRACE_UUID) { - ret = register_trace_drbd_uuid(probe_drbd_uuid); - WARN_ON(ret); - } - if (trace_mask & TRACE_EE) { - ret = register_trace_drbd_ee(probe_drbd_ee); - WARN_ON(ret); - } - if (trace_mask & TRACE_PACKET) { - ret = register_trace_drbd_packet(probe_drbd_packet); - WARN_ON(ret); - } - if (trace_mask & TRACE_MD_IO) { - ret = register_trace_drbd_md_io(probe_drbd_md_io); - WARN_ON(ret); - } - if (trace_mask & TRACE_EPOCH) { - ret = register_trace_drbd_epoch(probe_drbd_epoch); - WARN_ON(ret); - } - if (trace_mask & TRACE_NL) { - ret = register_trace_drbd_netlink(probe_drbd_netlink); - WARN_ON(ret); - } - if (trace_mask & TRACE_AL_EXT) { - ret = register_trace_drbd_actlog(probe_drbd_actlog); - WARN_ON(ret); - } - if (trace_mask & TRACE_RQ) { - ret = register_trace_drbd_bio(probe_drbd_bio); - WARN_ON(ret); - } - if (trace_mask & TRACE_INT_RQ) { - ret = register_trace_drbd_req(probe_drbd_req); - WARN_ON(ret); - } - if (trace_mask & TRACE_RESYNC) { - ret = register_trace__drbd_resync(probe_drbd_resync); - WARN_ON(ret); - } - return 0; -} - -module_init(drbd_trace_init); - -static void __exit drbd_trace_exit(void) -{ - if (trace_mask & TRACE_UNPLUG) - unregister_trace_drbd_unplug(probe_drbd_unplug); - if (trace_mask & TRACE_UUID) - unregister_trace_drbd_uuid(probe_drbd_uuid); - if (trace_mask & TRACE_EE) - unregister_trace_drbd_ee(probe_drbd_ee); - if (trace_mask & TRACE_PACKET) - unregister_trace_drbd_packet(probe_drbd_packet); - if (trace_mask & TRACE_MD_IO) - unregister_trace_drbd_md_io(probe_drbd_md_io); - if (trace_mask & TRACE_EPOCH) - unregister_trace_drbd_epoch(probe_drbd_epoch); - if (trace_mask & TRACE_NL) - unregister_trace_drbd_netlink(probe_drbd_netlink); - if (trace_mask & TRACE_AL_EXT) - unregister_trace_drbd_actlog(probe_drbd_actlog); - if (trace_mask & TRACE_RQ) - unregister_trace_drbd_bio(probe_drbd_bio); - if (trace_mask & TRACE_INT_RQ) - unregister_trace_drbd_req(probe_drbd_req); - if (trace_mask & TRACE_RESYNC) - unregister_trace__drbd_resync(probe_drbd_resync); - - tracepoint_synchronize_unregister(); -} - -module_exit(drbd_trace_exit); diff --git a/drivers/block/drbd/drbd_tracing.h b/drivers/block/drbd/drbd_tracing.h deleted file mode 100644 index c4531a137f65..000000000000 --- a/drivers/block/drbd/drbd_tracing.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - drbd_tracing.h - - This file is part of DRBD by Philipp Reisner and Lars Ellenberg. - - Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. - Copyright (C) 2003-2008, Philipp Reisner . - Copyright (C) 2003-2008, Lars Ellenberg . - - drbd is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - drbd is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with drbd; see the file COPYING. If not, write to - the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - - */ - -#ifndef DRBD_TRACING_H -#define DRBD_TRACING_H - -#include -#include "drbd_int.h" -#include "drbd_req.h" - -enum { - TRACE_LVL_ALWAYS = 0, - TRACE_LVL_SUMMARY, - TRACE_LVL_METRICS, - TRACE_LVL_ALL, - TRACE_LVL_MAX -}; - -DECLARE_TRACE(drbd_unplug, - TP_PROTO(struct drbd_conf *mdev, char* msg), - TP_ARGS(mdev, msg)); - -DECLARE_TRACE(drbd_uuid, - TP_PROTO(struct drbd_conf *mdev, enum drbd_uuid_index index), - TP_ARGS(mdev, index)); - -DECLARE_TRACE(drbd_ee, - TP_PROTO(struct drbd_conf *mdev, struct drbd_epoch_entry *e, char* msg), - TP_ARGS(mdev, e, msg)); - -DECLARE_TRACE(drbd_md_io, - TP_PROTO(struct drbd_conf *mdev, int rw, struct drbd_backing_dev *bdev), - TP_ARGS(mdev, rw, bdev)); - -DECLARE_TRACE(drbd_epoch, - TP_PROTO(struct drbd_conf *mdev, struct drbd_epoch *epoch, enum epoch_event ev), - TP_ARGS(mdev, epoch, ev)); - -DECLARE_TRACE(drbd_netlink, - TP_PROTO(void *data, int is_req), - TP_ARGS(data, is_req)); - -DECLARE_TRACE(drbd_actlog, - TP_PROTO(struct drbd_conf *mdev, sector_t sector, char* msg), - TP_ARGS(mdev, sector, msg)); - -DECLARE_TRACE(drbd_bio, - TP_PROTO(struct drbd_conf *mdev, const char *pfx, struct bio *bio, int complete, - struct drbd_request *r), - TP_ARGS(mdev, pfx, bio, complete, r)); - -DECLARE_TRACE(drbd_req, - TP_PROTO(struct drbd_request *req, enum drbd_req_event what, char *msg), - TP_ARGS(req, what, msg)); - -DECLARE_TRACE(drbd_packet, - TP_PROTO(struct drbd_conf *mdev, struct socket *sock, - int recv, union p_polymorph *p, char *file, int line), - TP_ARGS(mdev, sock, recv, p, file, line)); - -DECLARE_TRACE(_drbd_resync, - TP_PROTO(struct drbd_conf *mdev, int level, const char *fmt, va_list args), - TP_ARGS(mdev, level, fmt, args)); - -#endif diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 34a4b3ef6c0e..ed8796f1112d 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -40,7 +40,6 @@ #include "drbd_int.h" #include "drbd_req.h" -#include "drbd_tracing.h" #define SLEEP_TIME (HZ/10) @@ -82,8 +81,6 @@ void drbd_md_io_complete(struct bio *bio, int error) md_io = (struct drbd_md_io *)bio->bi_private; md_io->error = error; - trace_drbd_bio(md_io->mdev, "Md", bio, 1, NULL); - complete(&md_io->event); } @@ -114,8 +111,6 @@ void drbd_endio_read_sec(struct bio *bio, int error) __releases(local) D_ASSERT(e->block_id != ID_VACANT); - trace_drbd_bio(mdev, "Sec", bio, 1, NULL); - spin_lock_irqsave(&mdev->req_lock, flags); mdev->read_cnt += e->size >> 9; list_del(&e->w.list); @@ -126,8 +121,6 @@ void drbd_endio_read_sec(struct bio *bio, int error) __releases(local) drbd_chk_io_error(mdev, error, FALSE); drbd_queue_work(&mdev->data.work, &e->w); put_ldev(mdev); - - trace_drbd_ee(mdev, e, "read completed"); } /* writes on behalf of the partner, or resync writes, @@ -176,8 +169,6 @@ void drbd_endio_write_sec(struct bio *bio, int error) __releases(local) D_ASSERT(e->block_id != ID_VACANT); - trace_drbd_bio(mdev, "Sec", bio, 1, NULL); - spin_lock_irqsave(&mdev->req_lock, flags); mdev->writ_cnt += e->size >> 9; is_syncer_req = is_syncer_block_id(e->block_id); @@ -192,8 +183,6 @@ void drbd_endio_write_sec(struct bio *bio, int error) __releases(local) list_del(&e->w.list); /* has been on active_ee or sync_ee */ list_add_tail(&e->w.list, &mdev->done_ee); - trace_drbd_ee(mdev, e, "write completed"); - /* No hlist_del_init(&e->colision) here, we did not send the Ack yet, * neither did we wake possibly waiting conflicting requests. * done from "drbd_process_done_ee" within the appropriate w.cb @@ -244,8 +233,6 @@ void drbd_endio_pri(struct bio *bio, int error) error = -EIO; } - trace_drbd_bio(mdev, "Pri", bio, 1, NULL); - /* to avoid recursion in __req_mod */ if (unlikely(error)) { what = (bio_data_dir(bio) == WRITE) @@ -1321,9 +1308,6 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) return; } - trace_drbd_resync(mdev, TRACE_LVL_SUMMARY, "Resync starting: side=%s\n", - side == C_SYNC_TARGET ? "SyncTarget" : "SyncSource"); - /* In case a previous resync run was aborted by an IO error/detach on the peer. */ drbd_rs_cancel_all(mdev); -- cgit v1.2.3 From 132cc538cd90f60a0b5df6a512dfd4bc5fe2039a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 7 Oct 2009 19:26:00 +0200 Subject: drbd: needs __ratelimit() drbd_int.h uses __ratelimit(), so it needs to #include ratelimit.h: drivers/block/drbd/drbd_int.h:1765: error: implicit declaration of function '__ratelimit' Signed-off-by: Randy Dunlap Cc: drbd-dev@lists.linbit.com Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/block/drbd/drbd_int.h') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4e6255991e5b..2312d782fe99 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3