diff options
Diffstat (limited to 'fs')
128 files changed, 4631 insertions, 6014 deletions
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index cef584451113..378acdafa356 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1751,7 +1751,7 @@ cifs_iovec_read(struct file *file, const struct iovec *iov, io_parms.pid = pid; io_parms.tcon = pTcon; io_parms.offset = *poffset; - io_parms.length = len; + io_parms.length = cur_len; rc = CIFSSMBRead(xid, &io_parms, &bytes_read, &read_data, &buf_type); pSMBr = (struct smb_com_read_rsp *)read_data; diff --git a/fs/dcache.c b/fs/dcache.c index 3c34ac0e9a1b..be18598c7fd7 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1735,8 +1735,6 @@ seqretry: tname = dentry->d_name.name; i = dentry->d_inode; prefetch(tname); - if (i) - prefetch(i); /* * This seqcount check is required to ensure name and * len are loaded atomically, so as not to walk off the diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c index abc49f292454..90e5997262ea 100644 --- a/fs/dlm/ast.c +++ b/fs/dlm/ast.c @@ -14,17 +14,9 @@ #include "dlm_internal.h" #include "lock.h" #include "user.h" -#include "ast.h" - -#define WAKE_ASTS 0 - -static uint64_t ast_seq_count; -static struct list_head ast_queue; -static spinlock_t ast_queue_lock; -static struct task_struct * astd_task; -static unsigned long astd_wakeflags; -static struct mutex astd_running; +static uint64_t dlm_cb_seq; +static spinlock_t dlm_cb_seq_spin; static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb) { @@ -57,21 +49,13 @@ static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb) } } -void dlm_del_ast(struct dlm_lkb *lkb) -{ - spin_lock(&ast_queue_lock); - if (!list_empty(&lkb->lkb_astqueue)) - list_del_init(&lkb->lkb_astqueue); - spin_unlock(&ast_queue_lock); -} - int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, uint32_t sbflags, uint64_t seq) { struct dlm_ls *ls = lkb->lkb_resource->res_ls; uint64_t prev_seq; int prev_mode; - int i; + int i, rv; for (i = 0; i < DLM_CALLBACKS_SIZE; i++) { if (lkb->lkb_callbacks[i].seq) @@ -100,7 +84,8 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode, mode, (unsigned long long)prev_seq, prev_mode); - return 0; + rv = 0; + goto out; } } @@ -109,6 +94,7 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode, lkb->lkb_callbacks[i].mode = mode; lkb->lkb_callbacks[i].sb_status = status; lkb->lkb_callbacks[i].sb_flags = (sbflags & 0x000000FF); + rv = 0; break; } @@ -117,21 +103,24 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode, lkb->lkb_id, (unsigned long long)seq, flags, mode, status, sbflags); dlm_dump_lkb_callbacks(lkb); - return -1; + rv = -1; + goto out; } - - return 0; + out: + return rv; } int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_callback *cb, int *resid) { - int i; + int i, rv; *resid = 0; - if (!lkb->lkb_callbacks[0].seq) - return -ENOENT; + if (!lkb->lkb_callbacks[0].seq) { + rv = -ENOENT; + goto out; + } /* oldest undelivered cb is callbacks[0] */ @@ -163,7 +152,8 @@ int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb, cb->mode, (unsigned long long)lkb->lkb_last_cast.seq, lkb->lkb_last_cast.mode); - return 0; + rv = 0; + goto out; } } @@ -176,171 +166,150 @@ int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb, memcpy(&lkb->lkb_last_bast, cb, sizeof(struct dlm_callback)); lkb->lkb_last_bast_time = ktime_get(); } - - return 0; + rv = 0; + out: + return rv; } -void dlm_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, - uint32_t sbflags) +void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, + uint32_t sbflags) { - uint64_t seq; + struct dlm_ls *ls = lkb->lkb_resource->res_ls; + uint64_t new_seq, prev_seq; int rv; - spin_lock(&ast_queue_lock); - - seq = ++ast_seq_count; + spin_lock(&dlm_cb_seq_spin); + new_seq = ++dlm_cb_seq; + spin_unlock(&dlm_cb_seq_spin); if (lkb->lkb_flags & DLM_IFL_USER) { - spin_unlock(&ast_queue_lock); - dlm_user_add_ast(lkb, flags, mode, status, sbflags, seq); + dlm_user_add_ast(lkb, flags, mode, status, sbflags, new_seq); return; } - rv = dlm_add_lkb_callback(lkb, flags, mode, status, sbflags, seq); - if (rv < 0) { - spin_unlock(&ast_queue_lock); - return; - } + mutex_lock(&lkb->lkb_cb_mutex); + prev_seq = lkb->lkb_callbacks[0].seq; - if (list_empty(&lkb->lkb_astqueue)) { + rv = dlm_add_lkb_callback(lkb, flags, mode, status, sbflags, new_seq); + if (rv < 0) + goto out; + + if (!prev_seq) { kref_get(&lkb->lkb_ref); - list_add_tail(&lkb->lkb_astqueue, &ast_queue); - } - spin_unlock(&ast_queue_lock); - set_bit(WAKE_ASTS, &astd_wakeflags); - wake_up_process(astd_task); + if (test_bit(LSFL_CB_DELAY, &ls->ls_flags)) { + mutex_lock(&ls->ls_cb_mutex); + list_add(&lkb->lkb_cb_list, &ls->ls_cb_delay); + mutex_unlock(&ls->ls_cb_mutex); + } else { + queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work); + } + } + out: + mutex_unlock(&lkb->lkb_cb_mutex); } -static void process_asts(void) +void dlm_callback_work(struct work_struct *work) { - struct dlm_ls *ls = NULL; - struct dlm_rsb *r = NULL; - struct dlm_lkb *lkb; + struct dlm_lkb *lkb = container_of(work, struct dlm_lkb, lkb_cb_work); + struct dlm_ls *ls = lkb->lkb_resource->res_ls; void (*castfn) (void *astparam); void (*bastfn) (void *astparam, int mode); struct dlm_callback callbacks[DLM_CALLBACKS_SIZE]; int i, rv, resid; -repeat: - spin_lock(&ast_queue_lock); - list_for_each_entry(lkb, &ast_queue, lkb_astqueue) { - r = lkb->lkb_resource; - ls = r->res_ls; + memset(&callbacks, 0, sizeof(callbacks)); - if (dlm_locking_stopped(ls)) - continue; - - /* we remove from astqueue list and remove everything in - lkb_callbacks before releasing the spinlock so empty - lkb_astqueue is always consistent with empty lkb_callbacks */ - - list_del_init(&lkb->lkb_astqueue); - - castfn = lkb->lkb_astfn; - bastfn = lkb->lkb_bastfn; + mutex_lock(&lkb->lkb_cb_mutex); + if (!lkb->lkb_callbacks[0].seq) { + /* no callback work exists, shouldn't happen */ + log_error(ls, "dlm_callback_work %x no work", lkb->lkb_id); + dlm_print_lkb(lkb); + dlm_dump_lkb_callbacks(lkb); + } - memset(&callbacks, 0, sizeof(callbacks)); + for (i = 0; i < DLM_CALLBACKS_SIZE; i++) { + rv = dlm_rem_lkb_callback(ls, lkb, &callbacks[i], &resid); + if (rv < 0) + break; + } - for (i = 0; i < DLM_CALLBACKS_SIZE; i++) { - rv = dlm_rem_lkb_callback(ls, lkb, &callbacks[i], &resid); - if (rv < 0) - break; - } - spin_unlock(&ast_queue_lock); + if (resid) { + /* cbs remain, loop should have removed all, shouldn't happen */ + log_error(ls, "dlm_callback_work %x resid %d", lkb->lkb_id, + resid); + dlm_print_lkb(lkb); + dlm_dump_lkb_callbacks(lkb); + } + mutex_unlock(&lkb->lkb_cb_mutex); - if (resid) { - /* shouldn't happen, for loop should have removed all */ - log_error(ls, "callback resid %d lkb %x", - resid, lkb->lkb_id); - } + castfn = lkb->lkb_astfn; + bastfn = lkb->lkb_bastfn; - for (i = 0; i < DLM_CALLBACKS_SIZE; i++) { - if (!callbacks[i].seq) - break; - if (callbacks[i].flags & DLM_CB_SKIP) { - continue; - } else if (callbacks[i].flags & DLM_CB_BAST) { - bastfn(lkb->lkb_astparam, callbacks[i].mode); - } else if (callbacks[i].flags & DLM_CB_CAST) { - lkb->lkb_lksb->sb_status = callbacks[i].sb_status; - lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags; - castfn(lkb->lkb_astparam); - } + for (i = 0; i < DLM_CALLBACKS_SIZE; i++) { + if (!callbacks[i].seq) + break; + if (callbacks[i].flags & DLM_CB_SKIP) { + continue; + } else if (callbacks[i].flags & DLM_CB_BAST) { + bastfn(lkb->lkb_astparam, callbacks[i].mode); + } else if (callbacks[i].flags & DLM_CB_CAST) { + lkb->lkb_lksb->sb_status = callbacks[i].sb_status; + lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags; + castfn(lkb->lkb_astparam); } - - /* removes ref for ast_queue, may cause lkb to be freed */ - dlm_put_lkb(lkb); - - cond_resched(); - goto repeat; } - spin_unlock(&ast_queue_lock); -} - -static inline int no_asts(void) -{ - int ret; - spin_lock(&ast_queue_lock); - ret = list_empty(&ast_queue); - spin_unlock(&ast_queue_lock); - return ret; + /* undo kref_get from dlm_add_callback, may cause lkb to be freed */ + dlm_put_lkb(lkb); } -static int dlm_astd(void *data) +int dlm_callback_start(struct dlm_ls *ls) { - while (!kthread_should_stop()) { - set_current_state(TASK_INTERRUPTIBLE); - if (!test_bit(WAKE_ASTS, &astd_wakeflags)) - schedule(); - set_current_state(TASK_RUNNING); - - mutex_lock(&astd_running); - if (test_and_clear_bit(WAKE_ASTS, &astd_wakeflags)) - process_asts(); - mutex_unlock(&astd_running); + ls->ls_callback_wq = alloc_workqueue("dlm_callback", + WQ_UNBOUND | + WQ_MEM_RECLAIM | + WQ_NON_REENTRANT, + 0); + if (!ls->ls_callback_wq) { + log_print("can't start dlm_callback workqueue"); + return -ENOMEM; } return 0; } -void dlm_astd_wake(void) +void dlm_callback_stop(struct dlm_ls *ls) { - if (!no_asts()) { - set_bit(WAKE_ASTS, &astd_wakeflags); - wake_up_process(astd_task); - } + if (ls->ls_callback_wq) + destroy_workqueue(ls->ls_callback_wq); } -int dlm_astd_start(void) +void dlm_callback_suspend(struct dlm_ls *ls) { - struct task_struct *p; - int error = 0; - - INIT_LIST_HEAD(&ast_queue); - spin_lock_init(&ast_queue_lock); - mutex_init(&astd_running); - - p = kthread_run(dlm_astd, NULL, "dlm_astd"); - if (IS_ERR(p)) - error = PTR_ERR(p); - else - astd_task = p; - return error; -} + set_bit(LSFL_CB_DELAY, &ls->ls_flags); -void dlm_astd_stop(void) -{ - kthread_stop(astd_task); + if (ls->ls_callback_wq) + flush_workqueue(ls->ls_callback_wq); } -void dlm_astd_suspend(void) +void dlm_callback_resume(struct dlm_ls *ls) { - mutex_lock(&astd_running); -} + struct dlm_lkb *lkb, *safe; + int count = 0; -void dlm_astd_resume(void) -{ - mutex_unlock(&astd_running); + clear_bit(LSFL_CB_DELAY, &ls->ls_flags); + + if (!ls->ls_callback_wq) + return; + + mutex_lock(&ls->ls_cb_mutex); + list_for_each_entry_safe(lkb, safe, &ls->ls_cb_delay, lkb_cb_list) { + list_del_init(&lkb->lkb_cb_list); + queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work); + count++; + } + mutex_unlock(&ls->ls_cb_mutex); + + log_debug(ls, "dlm_callback_resume %d", count); } diff --git a/fs/dlm/ast.h b/fs/dlm/ast.h index 8aa89c9b5611..757b551c6820 100644 --- a/fs/dlm/ast.h +++ b/fs/dlm/ast.h @@ -18,14 +18,15 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, uint32_t sbflags, uint64_t seq); int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_callback *cb, int *resid); -void dlm_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, - uint32_t sbflags); +void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, + uint32_t sbflags); -void dlm_astd_wake(void); -int dlm_astd_start(void); -void dlm_astd_stop(void); -void dlm_astd_suspend(void); -void dlm_astd_resume(void); +void dlm_callback_work(struct work_struct *work); +int dlm_callback_start(struct dlm_ls *ls); +void dlm_callback_stop(struct dlm_ls *ls); +void dlm_callback_suspend(struct dlm_ls *ls); +void dlm_callback_resume(struct dlm_ls *ls); #endif + diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 9b026ea8baa9..6cf72fcc0d0c 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -28,7 +28,8 @@ * /config/dlm/<cluster>/spaces/<space>/nodes/<node>/weight * /config/dlm/<cluster>/comms/<comm>/nodeid * /config/dlm/<cluster>/comms/<comm>/local - * /config/dlm/<cluster>/comms/<comm>/addr + * /config/dlm/<cluster>/comms/<comm>/addr (write only) + * /config/dlm/<cluster>/comms/<comm>/addr_list (read only) * The <cluster> level is useless, but I haven't figured out how to avoid it. */ @@ -80,6 +81,7 @@ static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf, size_t len); static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len); +static ssize_t comm_addr_list_read(struct dlm_comm *cm, char *buf); static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf); static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, size_t len); @@ -92,7 +94,6 @@ struct dlm_cluster { unsigned int cl_tcp_port; unsigned int cl_buffer_size; unsigned int cl_rsbtbl_size; - unsigned int cl_lkbtbl_size; unsigned int cl_dirtbl_size; unsigned int cl_recover_timer; unsigned int cl_toss_secs; @@ -101,13 +102,13 @@ struct dlm_cluster { unsigned int cl_protocol; unsigned int cl_timewarn_cs; unsigned int cl_waitwarn_us; + unsigned int cl_new_rsb_count; }; enum { CLUSTER_ATTR_TCP_PORT = 0, CLUSTER_ATTR_BUFFER_SIZE, CLUSTER_ATTR_RSBTBL_SIZE, - CLUSTER_ATTR_LKBTBL_SIZE, CLUSTER_ATTR_DIRTBL_SIZE, CLUSTER_ATTR_RECOVER_TIMER, CLUSTER_ATTR_TOSS_SECS, @@ -116,6 +117,7 @@ enum { CLUSTER_ATTR_PROTOCOL, CLUSTER_ATTR_TIMEWARN_CS, CLUSTER_ATTR_WAITWARN_US, + CLUSTER_ATTR_NEW_RSB_COUNT, }; struct cluster_attribute { @@ -160,7 +162,6 @@ __CONFIGFS_ATTR(name, 0644, name##_read, name##_write) CLUSTER_ATTR(tcp_port, 1); CLUSTER_ATTR(buffer_size, 1); CLUSTER_ATTR(rsbtbl_size, 1); -CLUSTER_ATTR(lkbtbl_size, 1); CLUSTER_ATTR(dirtbl_size, 1); CLUSTER_ATTR(recover_timer, 1); CLUSTER_ATTR(toss_secs, 1); @@ -169,12 +170,12 @@ CLUSTER_ATTR(log_debug, 0); CLUSTER_ATTR(protocol, 0); CLUSTER_ATTR(timewarn_cs, 1); CLUSTER_ATTR(waitwarn_us, 0); +CLUSTER_ATTR(new_rsb_count, 0); static struct configfs_attribute *cluster_attrs[] = { [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr, [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr, - [CLUSTER_ATTR_LKBTBL_SIZE] = &cluster_attr_lkbtbl_size.attr, [CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr, [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr, [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr, @@ -183,6 +184,7 @@ static struct configfs_attribute *cluster_attrs[] = { [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr, + [CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count.attr, NULL, }; @@ -190,6 +192,7 @@ enum { COMM_ATTR_NODEID = 0, COMM_ATTR_LOCAL, COMM_ATTR_ADDR, + COMM_ATTR_ADDR_LIST, }; struct comm_attribute { @@ -217,14 +220,22 @@ static struct comm_attribute comm_attr_local = { static struct comm_attribute comm_attr_addr = { .attr = { .ca_owner = THIS_MODULE, .ca_name = "addr", - .ca_mode = S_IRUGO | S_IWUSR }, + .ca_mode = S_IWUSR }, .store = comm_addr_write, }; +static struct comm_attribute comm_attr_addr_list = { + .attr = { .ca_owner = THIS_MODULE, + .ca_name = "addr_list", + .ca_mode = S_IRUGO }, + .show = comm_addr_list_read, +}; + static struct configfs_attribute *comm_attrs[] = { [COMM_ATTR_NODEID] = &comm_attr_nodeid.attr, [COMM_ATTR_LOCAL] = &comm_attr_local.attr, [COMM_ATTR_ADDR] = &comm_attr_addr.attr, + [COMM_ATTR_ADDR_LIST] = &comm_attr_addr_list.attr, NULL, }; @@ -435,7 +446,6 @@ static struct config_group *make_cluster(struct config_group *g, cl->cl_tcp_port = dlm_config.ci_tcp_port; cl->cl_buffer_size = dlm_config.ci_buffer_size; cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size; - cl->cl_lkbtbl_size = dlm_config.ci_lkbtbl_size; cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size; cl->cl_recover_timer = dlm_config.ci_recover_timer; cl->cl_toss_secs = dlm_config.ci_toss_secs; @@ -444,6 +454,7 @@ static struct config_group *make_cluster(struct config_group *g, cl->cl_protocol = dlm_config.ci_protocol; cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us; + cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count; space_list = &sps->ss_group; comm_list = &cms->cs_group; @@ -720,6 +731,50 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) return len; } +static ssize_t comm_addr_list_read(struct dlm_comm *cm, char *buf) +{ + ssize_t s; + ssize_t allowance; + int i; + struct sockaddr_storage *addr; + struct sockaddr_in *addr_in; + struct sockaddr_in6 *addr_in6; + + /* Taken from ip6_addr_string() defined in lib/vsprintf.c */ + char buf0[sizeof("AF_INET6 xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255\n")]; + + + /* Derived from SIMPLE_ATTR_SIZE of fs/configfs/file.c */ + allowance = 4096; + buf[0] = '\0'; + + for (i = 0; i < cm->addr_count; i++) { + addr = cm->addr[i]; + + switch(addr->ss_family) { + case AF_INET: + addr_in = (struct sockaddr_in *)addr; + s = sprintf(buf0, "AF_INET %pI4\n", &addr_in->sin_addr.s_addr); + break; + case AF_INET6: + addr_in6 = (struct sockaddr_in6 *)addr; + s = sprintf(buf0, "AF_INET6 %pI6\n", &addr_in6->sin6_addr); + break; + default: + s = sprintf(buf0, "%s\n", "<UNKNOWN>"); + break; + } + allowance -= s; + if (allowance >= 0) + strcat(buf, buf0); + else { + allowance += s; + break; + } + } + return 4096 - allowance; +} + static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, char *buf) { @@ -983,7 +1038,6 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) #define DEFAULT_TCP_PORT 21064 #define DEFAULT_BUFFER_SIZE 4096 #define DEFAULT_RSBTBL_SIZE 1024 -#define DEFAULT_LKBTBL_SIZE 1024 #define DEFAULT_DIRTBL_SIZE 1024 #define DEFAULT_RECOVER_TIMER 5 #define DEFAULT_TOSS_SECS 10 @@ -992,12 +1046,12 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) #define DEFAULT_PROTOCOL 0 #define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ #define DEFAULT_WAITWARN_US 0 +#define DEFAULT_NEW_RSB_COUNT 128 struct dlm_config_info dlm_config = { .ci_tcp_port = DEFAULT_TCP_PORT, .ci_buffer_size = DEFAULT_BUFFER_SIZE, .ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE, - .ci_lkbtbl_size = DEFAULT_LKBTBL_SIZE, .ci_dirtbl_size = DEFAULT_DIRTBL_SIZE, .ci_recover_timer = DEFAULT_RECOVER_TIMER, .ci_toss_secs = DEFAULT_TOSS_SECS, @@ -1005,6 +1059,7 @@ struct dlm_config_info dlm_config = { .ci_log_debug = DEFAULT_LOG_DEBUG, .ci_protocol = DEFAULT_PROTOCOL, .ci_timewarn_cs = DEFAULT_TIMEWARN_CS, - .ci_waitwarn_us = DEFAULT_WAITWARN_US + .ci_waitwarn_us = DEFAULT_WAITWARN_US, + .ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT }; diff --git a/fs/dlm/config.h b/fs/dlm/config.h index dd0ce24d5a80..3099d0dd26c0 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h @@ -20,7 +20,6 @@ struct dlm_config_info { int ci_tcp_port; int ci_buffer_size; int ci_rsbtbl_size; - int ci_lkbtbl_size; int ci_dirtbl_size; int ci_recover_timer; int ci_toss_secs; @@ -29,6 +28,7 @@ struct dlm_config_info { int ci_protocol; int ci_timewarn_cs; int ci_waitwarn_us; + int ci_new_rsb_count; }; extern struct dlm_config_info dlm_config; diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 0262451eb9c6..fe2860c02449 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -37,6 +37,7 @@ #include <linux/jhash.h> #include <linux/miscdevice.h> #include <linux/mutex.h> +#include <linux/idr.h> #include <asm/uaccess.h> #include <linux/dlm.h> @@ -52,7 +53,6 @@ struct dlm_ls; struct dlm_lkb; struct dlm_rsb; struct dlm_member; -struct dlm_lkbtable; struct dlm_rsbtable; struct dlm_dirtable; struct dlm_direntry; @@ -108,11 +108,6 @@ struct dlm_rsbtable { spinlock_t lock; }; -struct dlm_lkbtable { - struct list_head list; - rwlock_t lock; - uint16_t counter; -}; /* * Lockspace member (per node in a ls) @@ -248,17 +243,18 @@ struct dlm_lkb { int8_t lkb_wait_count; int lkb_wait_nodeid; /* for debugging */ - struct list_head lkb_idtbl_list; /* lockspace lkbtbl */ struct list_head lkb_statequeue; /* rsb g/c/w list */ struct list_head lkb_rsb_lookup; /* waiting for rsb lookup */ struct list_head lkb_wait_reply; /* waiting for remote reply */ - struct list_head lkb_astqueue; /* need ast to be sent */ struct list_head lkb_ownqueue; /* list of locks for a process */ struct list_head lkb_time_list; ktime_t lkb_timestamp; ktime_t lkb_wait_time; unsigned long lkb_timeout_cs; + struct mutex lkb_cb_mutex; + struct work_struct lkb_cb_work; + struct list_head lkb_cb_list; /* for ls_cb_delay or proc->asts */ struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE]; struct dlm_callback lkb_last_cast; struct dlm_callback lkb_last_bast; @@ -299,7 +295,7 @@ struct dlm_rsb { int res_recover_locks_count; char *res_lvbptr; - char res_name[1]; + char res_name[DLM_RESNAME_MAXLEN+1]; }; /* find_rsb() flags */ @@ -465,12 +461,12 @@ struct dlm_ls { unsigned long ls_scan_time; struct kobject ls_kobj; + struct idr ls_lkbidr; + spinlock_t ls_lkbidr_spin; + struct dlm_rsbtable *ls_rsbtbl; uint32_t ls_rsbtbl_size; - struct dlm_lkbtable *ls_lkbtbl; - uint32_t ls_lkbtbl_size; - struct dlm_dirtable *ls_dirtbl; uint32_t ls_dirtbl_size; @@ -483,6 +479,10 @@ struct dlm_ls { struct mutex ls_timeout_mutex; struct list_head ls_timeout; + spinlock_t ls_new_rsb_spin; + int ls_new_rsb_count; + struct list_head ls_new_rsb; /* new rsb structs */ + struct list_head ls_nodes; /* current nodes in ls */ struct list_head ls_nodes_gone; /* dead node list, recovery */ int ls_num_nodes; /* number of nodes in ls */ @@ -506,8 +506,12 @@ struct dlm_ls { struct miscdevice ls_device; + struct workqueue_struct *ls_callback_wq; + /* recovery related */ + struct mutex ls_cb_mutex; + struct list_head ls_cb_delay; /* save for queue_work later */ struct timer_list ls_timer; struct task_struct *ls_recoverd_task; struct mutex ls_recoverd_active; @@ -544,6 +548,7 @@ struct dlm_ls { #define LSFL_RCOM_WAIT 4 #define LSFL_UEVENT_WAIT 5 #define LSFL_TIMEWARN 6 +#define LSFL_CB_DELAY 7 /* much of this is just saving user space pointers associated with the lock that we pass back to the user lib with an ast */ diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index f71d0b5abd95..83b5e32514e1 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -305,7 +305,7 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) rv = -EDEADLK; } - dlm_add_ast(lkb, DLM_CB_CAST, lkb->lkb_grmode, rv, lkb->lkb_sbflags); + dlm_add_cb(lkb, DLM_CB_CAST, lkb->lkb_grmode, rv, lkb->lkb_sbflags); } static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb) @@ -319,7 +319,7 @@ static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode) if (is_master_copy(lkb)) { send_bast(r, lkb, rqmode); } else { - dlm_add_ast(lkb, DLM_CB_BAST, rqmode, 0, 0); + dlm_add_cb(lkb, DLM_CB_BAST, rqmode, 0, 0); } } @@ -327,19 +327,68 @@ static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode) * Basic operations on rsb's and lkb's */ -static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len) +static int pre_rsb_struct(struct dlm_ls *ls) +{ + struct dlm_rsb *r1, *r2; + int count = 0; + + spin_lock(&ls->ls_new_rsb_spin); + if (ls->ls_new_rsb_count > dlm_config.ci_new_rsb_count / 2) { + spin_unlock(&ls->ls_new_rsb_spin); + return 0; + } + spin_unlock(&ls->ls_new_rsb_spin); + + r1 = dlm_allocate_rsb(ls); + r2 = dlm_allocate_rsb(ls); + + spin_lock(&ls->ls_new_rsb_spin); + if (r1) { + list_add(&r1->res_hashchain, &ls->ls_new_rsb); + ls->ls_new_rsb_count++; + } + if (r2) { + list_add(&r2->res_hashchain, &ls->ls_new_rsb); + ls->ls_new_rsb_count++; + } + count = ls->ls_new_rsb_count; + spin_unlock(&ls->ls_new_rsb_spin); + + if (!count) + return -ENOMEM; + return 0; +} + +/* If ls->ls_new_rsb is empty, return -EAGAIN, so the caller can + unlock any spinlocks, go back and call pre_rsb_struct again. + Otherwise, take an rsb off the list and return it. */ + +static int get_rsb_struct(struct dlm_ls *ls, char *name, int len, + struct dlm_rsb **r_ret) { struct dlm_rsb *r; + int count; - r = dlm_allocate_rsb(ls, len); - if (!r) - return NULL; + spin_lock(&ls->ls_new_rsb_spin); + if (list_empty(&ls->ls_new_rsb)) { + count = ls->ls_new_rsb_count; + spin_unlock(&ls->ls_new_rsb_spin); + log_debug(ls, "find_rsb retry %d %d %s", + count, dlm_config.ci_new_rsb_count, name); + return -EAGAIN; + } + + r = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, res_hashchain); + list_del(&r->res_hashchain); + ls->ls_new_rsb_count--; + spin_unlock(&ls->ls_new_rsb_spin); r->res_ls = ls; r->res_length = len; memcpy(r->res_name, name, len); mutex_init(&r->res_mutex); + INIT_LIST_HEAD(&r->res_hashchain); INIT_LIST_HEAD(&r->res_lookup); INIT_LIST_HEAD(&r->res_grantqueue); INIT_LIST_HEAD(&r->res_convertqueue); @@ -347,7 +396,8 @@ static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len) INIT_LIST_HEAD(&r->res_root_list); INIT_LIST_HEAD(&r->res_recover_list); - return r; + *r_ret = r; + return 0; } static int search_rsb_list(struct list_head *head, char *name, int len, @@ -405,16 +455,6 @@ static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b, return error; } -static int search_rsb(struct dlm_ls *ls, char *name, int len, int b, - unsigned int flags, struct dlm_rsb **r_ret) -{ - int error; - spin_lock(&ls->ls_rsbtbl[b].lock); - error = _search_rsb(ls, name, len, b, flags, r_ret); - spin_unlock(&ls->ls_rsbtbl[b].lock); - return error; -} - /* * Find rsb in rsbtbl and potentially create/add one * @@ -432,35 +472,48 @@ static int search_rsb(struct dlm_ls *ls, char *name, int len, int b, static int find_rsb(struct dlm_ls *ls, char *name, int namelen, unsigned int flags, struct dlm_rsb **r_ret) { - struct dlm_rsb *r = NULL, *tmp; + struct dlm_rsb *r = NULL; uint32_t hash, bucket; - int error = -EINVAL; + int error; - if (namelen > DLM_RESNAME_MAXLEN) + if (namelen > DLM_RESNAME_MAXLEN) { + error = -EINVAL; goto out; + } if (dlm_no_directory(ls)) flags |= R_CREATE; - error = 0; hash = jhash(name, namelen, 0); bucket = hash & (ls->ls_rsbtbl_size - 1); - error = search_rsb(ls, name, namelen, bucket, flags, &r); + retry: + if (flags & R_CREATE) { + error = pre_rsb_struct(ls); + if (error < 0) + goto out; + } + + spin_lock(&ls->ls_rsbtbl[bucket].lock); + + error = _search_rsb(ls, name, namelen, bucket, flags, &r); if (!error) - goto out; + goto out_unlock; if (error == -EBADR && !(flags & R_CREATE)) - goto out; + goto out_unlock; /* the rsb was found but wasn't a master copy */ if (error == -ENOTBLK) - goto out; + goto out_unlock; - error = -ENOMEM; - r = create_rsb(ls, name, namelen); - if (!r) - goto out; + error = get_rsb_struct(ls, name, namelen, &r); + if (error == -EAGAIN) { + spin_unlock(&ls->ls_rsbtbl[bucket].lock); + goto retry; + } + if (error) + goto out_unlock; r->res_hash = hash; r->res_bucket = bucket; @@ -474,18 +527,10 @@ static int find_rsb(struct dlm_ls *ls, char *name, int namelen, nodeid = 0; r->res_nodeid = nodeid; } - - spin_lock(&ls->ls_rsbtbl[bucket].lock); - error = _search_rsb(ls, name, namelen, bucket, 0, &tmp); - if (!error) { - spin_unlock(&ls->ls_rsbtbl[bucket].lock); - dlm_free_rsb(r); - r = tmp; - goto out; - } list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list); - spin_unlock(&ls->ls_rsbtbl[bucket].lock); error = 0; + out_unlock: + spin_unlock(&ls->ls_rsbtbl[bucket].lock); out: *r_ret = r; return error; @@ -580,9 +625,8 @@ static void detach_lkb(struct dlm_lkb *lkb) static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) { - struct dlm_lkb *lkb, *tmp; - uint32_t lkid = 0; - uint16_t bucket; + struct dlm_lkb *lkb; + int rv, id; lkb = dlm_allocate_lkb(ls); if (!lkb) @@ -594,60 +638,42 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) INIT_LIST_HEAD(&lkb->lkb_ownqueue); INIT_LIST_HEAD(&lkb->lkb_rsb_lookup); INIT_LIST_HEAD(&lkb->lkb_time_list); - INIT_LIST_HEAD(&lkb->lkb_astqueue); + INIT_LIST_HEAD(&lkb->lkb_cb_list); + mutex_init(&lkb->lkb_cb_mutex); + INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work); - get_random_bytes(&bucket, sizeof(bucket)); - bucket &= (ls->ls_lkbtbl_size - 1); - - write_lock(&ls->ls_lkbtbl[bucket].lock); + retry: + rv = idr_pre_get(&ls->ls_lkbidr, GFP_NOFS); + if (!rv) + return -ENOMEM; - /* counter can roll over so we must verify lkid is not in use */ + spin_lock(&ls->ls_lkbidr_spin); + rv = idr_get_new_above(&ls->ls_lkbidr, lkb, 1, &id); + if (!rv) + lkb->lkb_id = id; + spin_unlock(&ls->ls_lkbidr_spin); - while (lkid == 0) { - lkid = (bucket << 16) | ls->ls_lkbtbl[bucket].counter++; + if (rv == -EAGAIN) + goto retry; - list_for_each_entry(tmp, &ls->ls_lkbtbl[bucket].list, - lkb_idtbl_list) { - if (tmp->lkb_id != lkid) - continue; - lkid = 0; - break; - } + if (rv < 0) { + log_error(ls, "create_lkb idr error %d", rv); + return rv; } - lkb->lkb_id = lkid; - list_add(&lkb->lkb_idtbl_list, &ls->ls_lkbtbl[bucket].list); - write_unlock(&ls->ls_lkbtbl[bucket].lock); - *lkb_ret = lkb; return 0; } -static struct dlm_lkb *__find_lkb(struct dlm_ls *ls, uint32_t lkid) -{ - struct dlm_lkb *lkb; - uint16_t bucket = (lkid >> 16); - - list_for_each_entry(lkb, &ls->ls_lkbtbl[bucket].list, lkb_idtbl_list) { - if (lkb->lkb_id == lkid) - return lkb; - } - return NULL; -} - static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret) { struct dlm_lkb *lkb; - uint16_t bucket = (lkid >> 16); - - if (bucket >= ls->ls_lkbtbl_size) - return -EBADSLT; - read_lock(&ls->ls_lkbtbl[bucket].lock); - lkb = __find_lkb(ls, lkid); + spin_lock(&ls->ls_lkbidr_spin); + lkb = idr_find(&ls->ls_lkbidr, lkid); if (lkb) kref_get(&lkb->lkb_ref); - read_unlock(&ls->ls_lkbtbl[bucket].lock); + spin_unlock(&ls->ls_lkbidr_spin); *lkb_ret = lkb; return lkb ? 0 : -ENOENT; @@ -668,12 +694,12 @@ static void kill_lkb(struct kref *kref) static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb) { - uint16_t bucket = (lkb->lkb_id >> 16); + uint32_t lkid = lkb->lkb_id; - write_lock(&ls->ls_lkbtbl[bucket].lock); + spin_lock(&ls->ls_lkbidr_spin); if (kref_put(&lkb->lkb_ref, kill_lkb)) { - list_del(&lkb->lkb_idtbl_list); - write_unlock(&ls->ls_lkbtbl[bucket].lock); + idr_remove(&ls->ls_lkbidr, lkid); + spin_unlock(&ls->ls_lkbidr_spin); detach_lkb(lkb); @@ -683,7 +709,7 @@ static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb) dlm_free_lkb(lkb); return 1; } else { - write_unlock(&ls->ls_lkbtbl[bucket].lock); + spin_unlock(&ls->ls_lkbidr_spin); return 0; } } @@ -849,9 +875,7 @@ void dlm_scan_waiters(struct dlm_ls *ls) if (!num_nodes) { num_nodes = ls->ls_num_nodes; - warned = kmalloc(GFP_KERNEL, num_nodes * sizeof(int)); - if (warned) - memset(warned, 0, num_nodes * sizeof(int)); + warned = kzalloc(num_nodes * sizeof(int), GFP_KERNEL); } if (!warned) continue; @@ -863,9 +887,7 @@ void dlm_scan_waiters(struct dlm_ls *ls) dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid); } mutex_unlock(&ls->ls_waiters_mutex); - - if (warned) - kfree(warned); + kfree(warned); if (debug_expired) log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us", @@ -2401,9 +2423,6 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) if (deadlk) { /* it's left on the granted queue */ - log_debug(r->res_ls, "deadlock %x node %d sts%d g%d r%d %s", - lkb->lkb_id, lkb->lkb_nodeid, lkb->lkb_status, - lkb->lkb_grmode, lkb->lkb_rqmode, r->res_name); revert_lock(r, lkb); queue_cast(r, lkb, -EDEADLK); error = -EDEADLK; @@ -3993,8 +4012,6 @@ static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms) default: log_error(ls, "unknown message type %d", ms->m_type); } - - dlm_astd_wake(); } /* If the lockspace is in recovery mode (locking stopped), then normal @@ -4133,7 +4150,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) struct dlm_message *ms_stub; int wait_type, stub_unlock_result, stub_cancel_result; - ms_stub = kmalloc(GFP_KERNEL, sizeof(struct dlm_message)); + ms_stub = kmalloc(sizeof(struct dlm_message), GFP_KERNEL); if (!ms_stub) { log_error(ls, "dlm_recover_waiters_pre no mem"); return; @@ -4809,7 +4826,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, goto out_put; spin_lock(&ua->proc->locks_spin); - /* dlm_user_add_ast() may have already taken lkb off the proc list */ + /* dlm_user_add_cb() may have already taken lkb off the proc list */ if (!list_empty(&lkb->lkb_ownqueue)) list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking); spin_unlock(&ua->proc->locks_spin); @@ -4946,7 +4963,7 @@ static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) /* We have to release clear_proc_locks mutex before calling unlock_proc_lock() (which does lock_rsb) due to deadlock with receiving a message that does - lock_rsb followed by dlm_user_add_ast() */ + lock_rsb followed by dlm_user_add_cb() */ static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls, struct dlm_user_proc *proc) @@ -4969,7 +4986,7 @@ static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls, return lkb; } -/* The ls_clear_proc_locks mutex protects against dlm_user_add_asts() which +/* The ls_clear_proc_locks mutex protects against dlm_user_add_cb() which 1) references lkb->ua which we free here and 2) adds lkbs to proc->asts, which we clear here. */ @@ -5011,10 +5028,10 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) dlm_put_lkb(lkb); } - list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) { + list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_cb_list) { memset(&lkb->lkb_callbacks, 0, sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE); - list_del_init(&lkb->lkb_astqueue); + list_del_init(&lkb->lkb_cb_list); dlm_put_lkb(lkb); } @@ -5053,10 +5070,10 @@ static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) spin_unlock(&proc->locks_spin); spin_lock(&proc->asts_spin); - list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) { + list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_cb_list) { memset(&lkb->lkb_callbacks, 0, sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE); - list_del_init(&lkb->lkb_astqueue); + list_del_init(&lkb->lkb_cb_list); dlm_put_lkb(lkb); } spin_unlock(&proc->asts_spin); diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 14cbf4099753..a1d8f1af144b 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -15,7 +15,6 @@ #include "lockspace.h" #include "member.h" #include "recoverd.h" -#include "ast.h" #include "dir.h" #include "lowcomms.h" #include "config.h" @@ -24,6 +23,7 @@ #include "recover.h" #include "requestqueue.h" #include "user.h" +#include "ast.h" static int ls_count; static struct mutex ls_lock; @@ -359,17 +359,10 @@ static int threads_start(void) { int error; - /* Thread which process lock requests for all lockspace's */ - error = dlm_astd_start(); - if (error) { - log_print("cannot start dlm_astd thread %d", error); - goto fail; - } - error = dlm_scand_start(); if (error) { log_print("cannot start dlm_scand thread %d", error); - goto astd_fail; + goto fail; } /* Thread for sending/receiving messages for all lockspace's */ @@ -383,8 +376,6 @@ static int threads_start(void) scand_fail: dlm_scand_stop(); - astd_fail: - dlm_astd_stop(); fail: return error; } @@ -393,7 +384,6 @@ static void threads_stop(void) { dlm_scand_stop(); dlm_lowcomms_stop(); - dlm_astd_stop(); } static int new_lockspace(const char *name, int namelen, void **lockspace, @@ -463,7 +453,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, size = dlm_config.ci_rsbtbl_size; ls->ls_rsbtbl_size = size; - ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_NOFS); + ls->ls_rsbtbl = vmalloc(sizeof(struct dlm_rsbtable) * size); if (!ls->ls_rsbtbl) goto out_lsfree; for (i = 0; i < size; i++) { @@ -472,22 +462,13 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, spin_lock_init(&ls->ls_rsbtbl[i].lock); } - size = dlm_config.ci_lkbtbl_size; - ls->ls_lkbtbl_size = size; - - ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_NOFS); - if (!ls->ls_lkbtbl) - goto out_rsbfree; - for (i = 0; i < size; i++) { - INIT_LIST_HEAD(&ls->ls_lkbtbl[i].list); - rwlock_init(&ls->ls_lkbtbl[i].lock); - ls->ls_lkbtbl[i].counter = 1; - } + idr_init(&ls->ls_lkbidr); + spin_lock_init(&ls->ls_lkbidr_spin); size = dlm_config.ci_dirtbl_size; ls->ls_dirtbl_size = size; - ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_NOFS); + ls->ls_dirtbl = vmalloc(sizeof(struct dlm_dirtable) * size); if (!ls->ls_dirtbl) goto out_lkbfree; for (i = 0; i < size; i++) { @@ -502,6 +483,9 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, INIT_LIST_HEAD(&ls->ls_timeout); mutex_init(&ls->ls_timeout_mutex); + INIT_LIST_HEAD(&ls->ls_new_rsb); + spin_lock_init(&ls->ls_new_rsb_spin); + INIT_LIST_HEAD(&ls->ls_nodes); INIT_LIST_HEAD(&ls->ls_nodes_gone); ls->ls_num_nodes = 0; @@ -520,6 +504,9 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, init_completion(&ls->ls_members_done); ls->ls_members_result = -1; + mutex_init(&ls->ls_cb_mutex); + INIT_LIST_HEAD(&ls->ls_cb_delay); + ls->ls_recoverd_task = NULL; mutex_init(&ls->ls_recoverd_active); spin_lock_init(&ls->ls_recover_lock); @@ -553,18 +540,26 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, list_add(&ls->ls_list, &lslist); spin_unlock(&lslist_lock); + if (flags & DLM_LSFL_FS) { + error = dlm_callback_start(ls); + if (error) { + log_error(ls, "can't start dlm_callback %d", error); + goto out_delist; + } + } + /* needs to find ls in lslist */ error = dlm_recoverd_start(ls); if (error) { log_error(ls, "can't start dlm_recoverd %d", error); - goto out_delist; + goto out_callback; } ls->ls_kobj.kset = dlm_kset; error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL, "%s", ls->ls_name); if (error) - goto out_stop; + goto out_recoverd; kobject_uevent(&ls->ls_kobj, KOBJ_ADD); /* let kobject handle freeing of ls if there's an error */ @@ -578,7 +573,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, error = do_uevent(ls, 1); if (error) - goto out_stop; + goto out_recoverd; wait_for_completion(&ls->ls_members_done); error = ls->ls_members_result; @@ -595,19 +590,20 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, do_uevent(ls, 0); dlm_clear_members(ls); kfree(ls->ls_node_array); - out_stop: + out_recoverd: dlm_recoverd_stop(ls); + out_callback: + dlm_callback_stop(ls); out_delist: spin_lock(&lslist_lock); list_del(&ls->ls_list); spin_unlock(&lslist_lock); kfree(ls->ls_recover_buf); out_dirfree: - kfree(ls->ls_dirtbl); + vfree(ls->ls_dirtbl); out_lkbfree: - kfree(ls->ls_lkbtbl); - out_rsbfree: - kfree(ls->ls_rsbtbl); + idr_destroy(&ls->ls_lkbidr); + vfree(ls->ls_rsbtbl); out_lsfree: if (do_unreg) kobject_put(&ls->ls_kobj); @@ -641,50 +637,64 @@ int dlm_new_lockspace(const char *name, int namelen, void **lockspace, return error; } -/* Return 1 if the lockspace still has active remote locks, - * 2 if the lockspace still has active local locks. - */ -static int lockspace_busy(struct dlm_ls *ls) -{ - int i, lkb_found = 0; - struct dlm_lkb *lkb; - - /* NOTE: We check the lockidtbl here rather than the resource table. - This is because there may be LKBs queued as ASTs that have been - unlinked from their RSBs and are pending deletion once the AST has - been delivered */ - - for (i = 0; i < ls->ls_lkbtbl_size; i++) { - read_lock(&ls->ls_lkbtbl[i].lock); - if (!list_empty(&ls->ls_lkbtbl[i].list)) { - lkb_found = 1; - list_for_each_entry(lkb, &ls->ls_lkbtbl[i].list, - lkb_idtbl_list) { - if (!lkb->lkb_nodeid) { - read_unlock(&ls->ls_lkbtbl[i].lock); - return 2; - } - } - } - read_unlock(&ls->ls_lkbtbl[i].lock); +static int lkb_idr_is_local(int id, void *p, void *data) +{ + struct dlm_lkb *lkb = p; + + if (!lkb->lkb_nodeid) + return 1; + return 0; +} + +static int lkb_idr_is_any(int id, void *p, void *data) +{ + return 1; +} + +static int lkb_idr_free(int id, void *p, void *data) +{ + struct dlm_lkb *lkb = p; + + if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY) + dlm_free_lvb(lkb->lkb_lvbptr); + + dlm_free_lkb(lkb); + return 0; +} + +/* NOTE: We check the lkbidr here rather than the resource table. + This is because there may be LKBs queued as ASTs that have been unlinked + from their RSBs and are pending deletion once the AST has been delivered */ + +static int lockspace_busy(struct dlm_ls *ls, int force) +{ + int rv; + + spin_lock(&ls->ls_lkbidr_spin); + if (force == 0) { + rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_any, ls); + } else if (force == 1) { + rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_local, ls); + } else { + rv = 0; } - return lkb_found; + spin_unlock(&ls->ls_lkbidr_spin); + return rv; } static int release_lockspace(struct dlm_ls *ls, int force) { - struct dlm_lkb *lkb; struct dlm_rsb *rsb; struct list_head *head; int i, busy, rv; - busy = lockspace_busy(ls); + busy = lockspace_busy(ls, force); spin_lock(&lslist_lock); if (ls->ls_create_count == 1) { - if (busy > force) + if (busy) { rv = -EBUSY; - else { + } else { /* remove_lockspace takes ls off lslist */ ls->ls_create_count = 0; rv = 0; @@ -708,12 +718,12 @@ static int release_lockspace(struct dlm_ls *ls, int force) dlm_recoverd_stop(ls); + dlm_callback_stop(ls); + remove_lockspace(ls); dlm_delete_debug_file(ls); - dlm_astd_suspend(); - kfree(ls->ls_recover_buf); /* @@ -721,31 +731,15 @@ static int release_lockspace(struct dlm_ls *ls, int force) */ dlm_dir_clear(ls); - kfree(ls->ls_dirtbl); + vfree(ls->ls_dirtbl); /* - * Free all lkb's on lkbtbl[] lists. + * Free all lkb's in idr */ - for (i = 0; i < ls->ls_lkbtbl_size; i++) { - head = &ls->ls_lkbtbl[i].list; - while (!list_empty(head)) { - lkb = list_entry(head->next, struct dlm_lkb, - lkb_idtbl_list); - - list_del(&lkb->lkb_idtbl_list); - - dlm_del_ast(lkb); - - if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY) - dlm_free_lvb(lkb->lkb_lvbptr); - - dlm_free_lkb(lkb); - } - } - dlm_astd_resume(); - - kfree(ls->ls_lkbtbl); + idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls); + idr_remove_all(&ls->ls_lkbidr); + idr_destroy(&ls->ls_lkbidr); /* * Free all rsb's on rsbtbl[] lists @@ -770,7 +764,14 @@ static int release_lockspace(struct dlm_ls *ls, int force) } } - kfree(ls->ls_rsbtbl); + vfree(ls->ls_rsbtbl); + + while (!list_empty(&ls->ls_new_rsb)) { + rsb = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, + res_hashchain); + list_del(&rsb->res_hashchain); + dlm_free_rsb(rsb); + } /* * Free structures on any other lists diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 5e2c71f05e46..990626e7da80 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -512,12 +512,10 @@ static void process_sctp_notification(struct connection *con, } make_sockaddr(&prim.ssp_addr, 0, &addr_len); if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) { - int i; unsigned char *b=(unsigned char *)&prim.ssp_addr; log_print("reject connect from unknown addr"); - for (i=0; i<sizeof(struct sockaddr_storage);i++) - printk("%02x ", b[i]); - printk("\n"); + print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, + b, sizeof(struct sockaddr_storage)); sctp_send_shutdown(prim.ssp_assoc_id); return; } @@ -748,7 +746,10 @@ static int tcp_accept_from_sock(struct connection *con) /* Get the new node's NODEID */ make_sockaddr(&peeraddr, 0, &len); if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) { + unsigned char *b=(unsigned char *)&peeraddr; log_print("connect from non cluster node"); + print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, + b, sizeof(struct sockaddr_storage)); sock_release(newsock); mutex_unlock(&con->sock_mutex); return -1; diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c index 8e0d00db004f..da64df7576e1 100644 --- a/fs/dlm/memory.c +++ b/fs/dlm/memory.c @@ -16,6 +16,7 @@ #include "memory.h" static struct kmem_cache *lkb_cache; +static struct kmem_cache *rsb_cache; int __init dlm_memory_init(void) @@ -26,6 +27,14 @@ int __init dlm_memory_init(void) __alignof__(struct dlm_lkb), 0, NULL); if (!lkb_cache) ret = -ENOMEM; + + rsb_cache = kmem_cache_create("dlm_rsb", sizeof(struct dlm_rsb), + __alignof__(struct dlm_rsb), 0, NULL); + if (!rsb_cache) { + kmem_cache_destroy(lkb_cache); + ret = -ENOMEM; + } + return ret; } @@ -33,6 +42,8 @@ void dlm_memory_exit(void) { if (lkb_cache) kmem_cache_destroy(lkb_cache); + if (rsb_cache) + kmem_cache_destroy(rsb_cache); } char *dlm_allocate_lvb(struct dlm_ls *ls) @@ -48,16 +59,11 @@ void dlm_free_lvb(char *p) kfree(p); } -/* FIXME: have some minimal space built-in to rsb for the name and - kmalloc a separate name if needed, like dentries are done */ - -struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen) +struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls) { struct dlm_rsb *r; - DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,); - - r = kzalloc(sizeof(*r) + namelen, GFP_NOFS); + r = kmem_cache_zalloc(rsb_cache, GFP_NOFS); return r; } @@ -65,7 +71,7 @@ void dlm_free_rsb(struct dlm_rsb *r) { if (r->res_lvbptr) dlm_free_lvb(r->res_lvbptr); - kfree(r); + kmem_cache_free(rsb_cache, r); } struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls) diff --git a/fs/dlm/memory.h b/fs/dlm/memory.h index 485fb29143bd..177c11cbb0a6 100644 --- a/fs/dlm/memory.h +++ b/fs/dlm/memory.h @@ -16,7 +16,7 @@ int dlm_memory_init(void); void dlm_memory_exit(void); -struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen); +struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls); void dlm_free_rsb(struct dlm_rsb *r); struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls); void dlm_free_lkb(struct dlm_lkb *l); diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index fd677c8c3d3b..774da3cf92c6 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c @@ -58,13 +58,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) mutex_lock(&ls->ls_recoverd_active); - /* - * Suspending and resuming dlm_astd ensures that no lkb's from this ls - * will be processed by dlm_astd during recovery. - */ - - dlm_astd_suspend(); - dlm_astd_resume(); + dlm_callback_suspend(ls); /* * Free non-master tossed rsb's. Master rsb's are kept on toss @@ -202,6 +196,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) dlm_adjust_timeouts(ls); + dlm_callback_resume(ls); + error = enable_locking(ls, rv->seq); if (error) { log_debug(ls, "enable_locking failed %d", error); @@ -222,8 +218,6 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) dlm_grant_after_purge(ls); - dlm_astd_wake(); - log_debug(ls, "recover %llx done: %u ms", (unsigned long long)rv->seq, jiffies_to_msecs(jiffies - start)); diff --git a/fs/dlm/user.c b/fs/dlm/user.c index e96bf3e9be88..d8ea60756403 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -213,9 +213,9 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, goto out; } - if (list_empty(&lkb->lkb_astqueue)) { + if (list_empty(&lkb->lkb_cb_list)) { kref_get(&lkb->lkb_ref); - list_add_tail(&lkb->lkb_astqueue, &proc->asts); + list_add_tail(&lkb->lkb_cb_list, &proc->asts); wake_up_interruptible(&proc->wait); } spin_unlock(&proc->asts_spin); @@ -832,24 +832,24 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count, } /* if we empty lkb_callbacks, we don't want to unlock the spinlock - without removing lkb_astqueue; so empty lkb_astqueue is always + without removing lkb_cb_list; so empty lkb_cb_list is always consistent with empty lkb_callbacks */ - lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_astqueue); + lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_cb_list); rv = dlm_rem_lkb_callback(lkb->lkb_resource->res_ls, lkb, &cb, &resid); if (rv < 0) { /* this shouldn't happen; lkb should have been removed from list when resid was zero */ log_print("dlm_rem_lkb_callback empty %x", lkb->lkb_id); - list_del_init(&lkb->lkb_astqueue); + list_del_init(&lkb->lkb_cb_list); spin_unlock(&proc->asts_spin); /* removes ref for proc->asts, may cause lkb to be freed */ dlm_put_lkb(lkb); goto try_another; } if (!resid) - list_del_init(&lkb->lkb_astqueue); + list_del_init(&lkb->lkb_cb_list); spin_unlock(&proc->asts_spin); if (cb.flags & DLM_CB_SKIP) { diff --git a/fs/exec.c b/fs/exec.c index f9f12ad299af..842d5700c155 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -963,9 +963,18 @@ static int de_thread(struct task_struct *tsk) leader->group_leader = tsk; tsk->exit_signal = SIGCHLD; + leader->exit_signal = -1; BUG_ON(leader->exit_state != EXIT_ZOMBIE); leader->exit_state = EXIT_DEAD; + + /* + * We are going to release_task()->ptrace_unlink() silently, + * the tracer can sleep in do_wait(). EXIT_DEAD guarantees + * the tracer wont't block again waiting for this thread. + */ + if (unlikely(leader->ptrace)) + __wake_up_parent(leader, leader->parent); write_unlock_irq(&tasklist_lock); release_task(leader); @@ -1233,7 +1242,12 @@ int check_unsafe_exec(struct linux_binprm *bprm) unsigned n_fs; int res = 0; - bprm->unsafe = tracehook_unsafe_exec(p); + if (p->ptrace) { + if (p->ptrace & PT_PTRACE_CAP) + bprm->unsafe |= LSM_UNSAFE_PTRACE_CAP; + else + bprm->unsafe |= LSM_UNSAFE_PTRACE; + } n_fs = 1; spin_lock(&p->fs->lock); @@ -1361,6 +1375,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) unsigned int depth = bprm->recursion_depth; int try,retval; struct linux_binfmt *fmt; + pid_t old_pid; retval = security_bprm_check(bprm); if (retval) @@ -1370,6 +1385,11 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) if (retval) return retval; + /* Need to fetch pid before load_binary changes it */ + rcu_read_lock(); + old_pid = task_pid_nr_ns(current, task_active_pid_ns(current->parent)); + rcu_read_unlock(); + retval = -ENOENT; for (try=0; try<2; try++) { read_lock(&binfmt_lock); @@ -1389,7 +1409,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) bprm->recursion_depth = depth; if (retval >= 0) { if (depth == 0) - tracehook_report_exec(fmt, bprm, regs); + ptrace_event(PTRACE_EVENT_EXEC, + old_pid); put_binfmt(fmt); allow_write_access(bprm->file); if (bprm->file) @@ -1777,7 +1798,7 @@ static int zap_process(struct task_struct *start, int exit_code) t = start; do { - task_clear_group_stop_pending(t); + task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); if (t != current && t->mm) { sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 2f343b4d7a7d..3f7a59bfa7ad 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -976,16 +976,12 @@ void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, pagevec_init(&pvec, 0); next = 0; - while (next <= (loff_t)-1 && - pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE) - ) { + do { + if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) + break; for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; - pgoff_t page_index = page->index; - - ASSERTCMP(page_index, >=, next); - next = page_index + 1; - + next = page->index; if (PageFsCache(page)) { __fscache_wait_on_page_write(cookie, page); __fscache_uncache_page(cookie, page); @@ -993,7 +989,7 @@ void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, } pagevec_release(&pvec); cond_resched(); - } + } while (++next); _leave(""); } diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 2cd0e56b8893..7878c473ae62 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -854,11 +854,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, blen++; else { if (bstart) { - if (metadata) - __gfs2_free_meta(ip, bstart, blen); - else - __gfs2_free_data(ip, bstart, blen); - + __gfs2_free_blocks(ip, bstart, blen, metadata); btotal += blen; } @@ -870,11 +866,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, gfs2_add_inode_blocks(&ip->i_inode, -1); } if (bstart) { - if (metadata) - __gfs2_free_meta(ip, bstart, blen); - else - __gfs2_free_data(ip, bstart, blen); - + __gfs2_free_blocks(ip, bstart, blen, metadata); btotal += blen; } diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 091ee4779538..1cc2f8ec52a2 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -339,6 +339,67 @@ fail: return (copied) ? copied : error; } +/** + * gfs2_dir_get_hash_table - Get pointer to the dir hash table + * @ip: The inode in question + * + * Returns: The hash table or an error + */ + +static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip) +{ + struct inode *inode = &ip->i_inode; + int ret; + u32 hsize; + __be64 *hc; + + BUG_ON(!(ip->i_diskflags & GFS2_DIF_EXHASH)); + + hc = ip->i_hash_cache; + if (hc) + return hc; + + hsize = 1 << ip->i_depth; + hsize *= sizeof(__be64); + if (hsize != i_size_read(&ip->i_inode)) { + gfs2_consist_inode(ip); + return ERR_PTR(-EIO); + } + + hc = kmalloc(hsize, GFP_NOFS); + ret = -ENOMEM; + if (hc == NULL) + return ERR_PTR(-ENOMEM); + + ret = gfs2_dir_read_data(ip, (char *)hc, 0, hsize, 1); + if (ret < 0) { + kfree(hc); + return ERR_PTR(ret); + } + + spin_lock(&inode->i_lock); + if (ip->i_hash_cache) + kfree(hc); + else + ip->i_hash_cache = hc; + spin_unlock(&inode->i_lock); + + return ip->i_hash_cache; +} + +/** + * gfs2_dir_hash_inval - Invalidate dir hash + * @ip: The directory inode + * + * Must be called with an exclusive glock, or during glock invalidation. + */ +void gfs2_dir_hash_inval(struct gfs2_inode *ip) +{ + __be64 *hc = ip->i_hash_cache; + ip->i_hash_cache = NULL; + kfree(hc); +} + static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent) { return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0; @@ -686,17 +747,12 @@ static int get_leaf(struct gfs2_inode *dip, u64 leaf_no, static int get_leaf_nr(struct gfs2_inode *dip, u32 index, u64 *leaf_out) { - __be64 leaf_no; - int error; - - error = gfs2_dir_read_data(dip, (char *)&leaf_no, - index * sizeof(__be64), - sizeof(__be64), 0); - if (error != sizeof(u64)) - return (error < 0) ? error : -EIO; - - *leaf_out = be64_to_cpu(leaf_no); + __be64 *hash; + hash = gfs2_dir_get_hash_table(dip); + if (IS_ERR(hash)) + return PTR_ERR(hash); + *leaf_out = be64_to_cpu(*(hash + index)); return 0; } @@ -966,6 +1022,8 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) for (x = 0; x < half_len; x++) lp[x] = cpu_to_be64(bn); + gfs2_dir_hash_inval(dip); + error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(u64), half_len * sizeof(u64)); if (error != half_len * sizeof(u64)) { @@ -1052,70 +1110,54 @@ fail_brelse: static int dir_double_exhash(struct gfs2_inode *dip) { - struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct buffer_head *dibh; u32 hsize; - u64 *buf; - u64 *from, *to; - u64 block; - u64 disksize = i_size_read(&dip->i_inode); + u32 hsize_bytes; + __be64 *hc; + __be64 *hc2, *h; int x; int error = 0; hsize = 1 << dip->i_depth; - if (hsize * sizeof(u64) != disksize) { - gfs2_consist_inode(dip); - return -EIO; - } + hsize_bytes = hsize * sizeof(__be64); - /* Allocate both the "from" and "to" buffers in one big chunk */ + hc = gfs2_dir_get_hash_table(dip); + if (IS_ERR(hc)) + return PTR_ERR(hc); - buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS); - if (!buf) + h = hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS); + if (!hc2) return -ENOMEM; - for (block = disksize >> sdp->sd_hash_bsize_shift; block--;) { - error = gfs2_dir_read_data(dip, (char *)buf, - block * sdp->sd_hash_bsize, - sdp->sd_hash_bsize, 1); - if (error != sdp->sd_hash_bsize) { - if (error >= 0) - error = -EIO; - goto fail; - } - - from = buf; - to = (u64 *)((char *)buf + sdp->sd_hash_bsize); - - for (x = sdp->sd_hash_ptrs; x--; from++) { - *to++ = *from; /* No endianess worries */ - *to++ = *from; - } + error = gfs2_meta_inode_buffer(dip, &dibh); + if (error) + goto out_kfree; - error = gfs2_dir_write_data(dip, - (char *)buf + sdp->sd_hash_bsize, - block * sdp->sd_sb.sb_bsize, - sdp->sd_sb.sb_bsize); - if (error != sdp->sd_sb.sb_bsize) { - if (error >= 0) - error = -EIO; - goto fail; - } + for (x = 0; x < hsize; x++) { + *h++ = *hc; + *h++ = *hc; + hc++; } - kfree(buf); - - error = gfs2_meta_inode_buffer(dip, &dibh); - if (!gfs2_assert_withdraw(sdp, !error)) { - dip->i_depth++; - gfs2_dinode_out(dip, dibh->b_data); - brelse(dibh); - } + error = gfs2_dir_write_data(dip, (char *)hc2, 0, hsize_bytes * 2); + if (error != (hsize_bytes * 2)) + goto fail; - return error; + gfs2_dir_hash_inval(dip); + dip->i_hash_cache = hc2; + dip->i_depth++; + gfs2_dinode_out(dip, dibh->b_data); + brelse(dibh); + return 0; fail: - kfree(buf); + /* Replace original hash table & size */ + gfs2_dir_write_data(dip, (char *)hc, 0, hsize_bytes); + i_size_write(&dip->i_inode, hsize_bytes); + gfs2_dinode_out(dip, dibh->b_data); + brelse(dibh); +out_kfree: + kfree(hc2); return error; } @@ -1348,6 +1390,7 @@ out: return error; } + /** * dir_e_read - Reads the entries from a directory into a filldir buffer * @dip: dinode pointer @@ -1362,9 +1405,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, filldir_t filldir) { struct gfs2_inode *dip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); u32 hsize, len = 0; - u32 ht_offset, lp_offset, ht_offset_cur = -1; u32 hash, index; __be64 *lp; int copied = 0; @@ -1372,37 +1413,17 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, unsigned depth = 0; hsize = 1 << dip->i_depth; - if (hsize * sizeof(u64) != i_size_read(inode)) { - gfs2_consist_inode(dip); - return -EIO; - } - hash = gfs2_dir_offset2hash(*offset); index = hash >> (32 - dip->i_depth); - lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS); - if (!lp) - return -ENOMEM; + lp = gfs2_dir_get_hash_table(dip); + if (IS_ERR(lp)) + return PTR_ERR(lp); while (index < hsize) { - lp_offset = index & (sdp->sd_hash_ptrs - 1); - ht_offset = index - lp_offset; - - if (ht_offset_cur != ht_offset) { - error = gfs2_dir_read_data(dip, (char *)lp, - ht_offset * sizeof(__be64), - sdp->sd_hash_bsize, 1); - if (error != sdp->sd_hash_bsize) { - if (error >= 0) - error = -EIO; - goto out; - } - ht_offset_cur = ht_offset; - } - error = gfs2_dir_read_leaf(inode, offset, opaque, filldir, &copied, &depth, - be64_to_cpu(lp[lp_offset])); + be64_to_cpu(lp[index])); if (error) break; @@ -1410,8 +1431,6 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, index = (index & ~(len - 1)) + len; } -out: - kfree(lp); if (error > 0) error = 0; return error; @@ -1914,43 +1933,22 @@ out: int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip) { - struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct buffer_head *bh; struct gfs2_leaf *leaf; u32 hsize, len; - u32 ht_offset, lp_offset, ht_offset_cur = -1; u32 index = 0, next_index; __be64 *lp; u64 leaf_no; int error = 0, last; hsize = 1 << dip->i_depth; - if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) { - gfs2_consist_inode(dip); - return -EIO; - } - lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS); - if (!lp) - return -ENOMEM; + lp = gfs2_dir_get_hash_table(dip); + if (IS_ERR(lp)) + return PTR_ERR(lp); while (index < hsize) { - lp_offset = index & (sdp->sd_hash_ptrs - 1); - ht_offset = index - lp_offset; - - if (ht_offset_cur != ht_offset) { - error = gfs2_dir_read_data(dip, (char *)lp, - ht_offset * sizeof(__be64), - sdp->sd_hash_bsize, 1); - if (error != sdp->sd_hash_bsize) { - if (error >= 0) - error = -EIO; - goto out; - } - ht_offset_cur = ht_offset; - } - - leaf_no = be64_to_cpu(lp[lp_offset]); + leaf_no = be64_to_cpu(lp[index]); if (leaf_no) { error = get_leaf(dip, leaf_no, &bh); if (error) @@ -1976,7 +1974,6 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip) } out: - kfree(lp); return error; } diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index e686af11becd..ff5772fbf024 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h @@ -35,6 +35,7 @@ extern int gfs2_diradd_alloc_required(struct inode *dir, const struct qstr *filename); extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, struct buffer_head **bhp); +extern void gfs2_dir_hash_inval(struct gfs2_inode *ip); static inline u32 gfs2_disk_hash(const char *data, int len) { diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index f82cb5e1cb6b..edeb9e802903 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -174,7 +174,9 @@ void gfs2_set_inode_flags(struct inode *inode) struct gfs2_inode *ip = GFS2_I(inode); unsigned int flags = inode->i_flags; - flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_NOSEC); + if ((ip->i_eattr == 0) && !is_sxid(inode->i_mode)) + inode->i_flags |= S_NOSEC; if (ip->i_diskflags & GFS2_DIF_IMMUTABLE) flags |= S_IMMUTABLE; if (ip->i_diskflags & GFS2_DIF_APPENDONLY) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 1c1336e7b3b2..88e8a23d0026 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -409,6 +409,10 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) if (held1 && held2 && list_empty(&gl->gl_holders)) clear_bit(GLF_QUEUED, &gl->gl_flags); + if (new_state != gl->gl_target) + /* shorten our minimum hold time */ + gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR, + GL_GLOCK_MIN_HOLD); gl->gl_state = new_state; gl->gl_tchange = jiffies; } @@ -668,7 +672,7 @@ static void glock_work_func(struct work_struct *work) gl->gl_demote_state != LM_ST_EXCLUSIVE) { unsigned long holdtime, now = jiffies; - holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; + holdtime = gl->gl_tchange + gl->gl_hold_time; if (time_before(now, holdtime)) delay = holdtime - now; @@ -679,9 +683,14 @@ static void glock_work_func(struct work_struct *work) } run_queue(gl, 0); spin_unlock(&gl->gl_spin); - if (!delay || - queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) + if (!delay) gfs2_glock_put(gl); + else { + if (gl->gl_name.ln_type != LM_TYPE_INODE) + delay = 0; + if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) + gfs2_glock_put(gl); + } if (drop_ref) gfs2_glock_put(gl); } @@ -743,6 +752,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_tchange = jiffies; gl->gl_object = NULL; gl->gl_sbd = sdp; + gl->gl_hold_time = GL_GLOCK_DFT_HOLD; INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); INIT_WORK(&gl->gl_delete, delete_work_func); @@ -855,8 +865,15 @@ static int gfs2_glock_demote_wait(void *word) static void wait_on_holder(struct gfs2_holder *gh) { + unsigned long time1 = jiffies; + might_sleep(); wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE); + if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */ + /* Lengthen the minimum hold time. */ + gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time + + GL_GLOCK_HOLD_INCR, + GL_GLOCK_MAX_HOLD); } static void wait_on_demote(struct gfs2_glock *gl) @@ -1093,8 +1110,9 @@ void gfs2_glock_dq(struct gfs2_holder *gh) gfs2_glock_hold(gl); if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && - !test_bit(GLF_DEMOTE, &gl->gl_flags)) - delay = gl->gl_ops->go_min_hold_time; + !test_bit(GLF_DEMOTE, &gl->gl_flags) && + gl->gl_name.ln_type == LM_TYPE_INODE) + delay = gl->gl_hold_time; if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) gfs2_glock_put(gl); } @@ -1273,12 +1291,13 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) unsigned long now = jiffies; gfs2_glock_hold(gl); - holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; - if (test_bit(GLF_QUEUED, &gl->gl_flags)) { + holdtime = gl->gl_tchange + gl->gl_hold_time; + if (test_bit(GLF_QUEUED, &gl->gl_flags) && + gl->gl_name.ln_type == LM_TYPE_INODE) { if (time_before(now, holdtime)) delay = holdtime - now; if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) - delay = gl->gl_ops->go_min_hold_time; + delay = gl->gl_hold_time; } spin_lock(&gl->gl_spin); @@ -1667,7 +1686,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) dtime *= 1000000/HZ; /* demote time in uSec */ if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) dtime = 0; - gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d\n", + gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d m:%ld\n", state2str(gl->gl_state), gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, @@ -1676,7 +1695,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) state2str(gl->gl_demote_state), dtime, atomic_read(&gl->gl_ail_count), atomic_read(&gl->gl_revokes), - atomic_read(&gl->gl_ref)); + atomic_read(&gl->gl_ref), gl->gl_hold_time); list_for_each_entry(gh, &gl->gl_holders, gh_list) { error = dump_holder(seq, gh); diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 6b2f757b9281..66707118af25 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -113,6 +113,12 @@ enum { #define GLR_TRYFAILED 13 +#define GL_GLOCK_MAX_HOLD (long)(HZ / 5) +#define GL_GLOCK_DFT_HOLD (long)(HZ / 5) +#define GL_GLOCK_MIN_HOLD (long)(10) +#define GL_GLOCK_HOLD_INCR (long)(HZ / 20) +#define GL_GLOCK_HOLD_DECR (long)(HZ / 40) + struct lm_lockops { const char *lm_proto_name; int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 2cca29316bd6..da21ecaafcc2 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -26,6 +26,7 @@ #include "rgrp.h" #include "util.h" #include "trans.h" +#include "dir.h" /** * __gfs2_ail_flush - remove all buffers for a given lock from the AIL @@ -218,6 +219,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) if (ip) { set_bit(GIF_INVALID, &ip->i_flags); forget_all_cached_acls(&ip->i_inode); + gfs2_dir_hash_inval(ip); } } @@ -316,6 +318,8 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) ip->i_generation = be64_to_cpu(str->di_generation); ip->i_diskflags = be32_to_cpu(str->di_flags); + ip->i_eattr = be64_to_cpu(str->di_eattr); + /* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */ gfs2_set_inode_flags(&ip->i_inode); height = be16_to_cpu(str->di_height); if (unlikely(height > GFS2_MAX_META_HEIGHT)) @@ -328,7 +332,6 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) ip->i_depth = (u8)depth; ip->i_entries = be32_to_cpu(str->di_entries); - ip->i_eattr = be64_to_cpu(str->di_eattr); if (S_ISREG(ip->i_inode.i_mode)) gfs2_set_aops(&ip->i_inode); @@ -549,7 +552,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = { .go_lock = inode_go_lock, .go_dump = inode_go_dump, .go_type = LM_TYPE_INODE, - .go_min_hold_time = HZ / 5, .go_flags = GLOF_ASPACE, }; @@ -560,7 +562,6 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { .go_unlock = rgrp_go_unlock, .go_dump = gfs2_rgrp_dump, .go_type = LM_TYPE_RGRP, - .go_min_hold_time = HZ / 5, .go_flags = GLOF_ASPACE, }; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 81206e70cbf6..892ac37de8ae 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -163,7 +163,6 @@ struct gfs2_glock_operations { int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl); void (*go_callback) (struct gfs2_glock *gl); const int go_type; - const unsigned long go_min_hold_time; const unsigned long go_flags; #define GLOF_ASPACE 1 }; @@ -221,6 +220,7 @@ struct gfs2_glock { unsigned int gl_hash; unsigned long gl_demote_time; /* time of first demote request */ + long gl_hold_time; struct list_head gl_holders; const struct gfs2_glock_operations *gl_ops; @@ -285,6 +285,7 @@ struct gfs2_inode { u64 i_goal; /* goal block for allocations */ struct rw_semaphore i_rw_mutex; struct list_head i_trunc_list; + __be64 *i_hash_cache; u32 i_entries; u32 i_diskflags; u8 i_height; diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index c2b34cd2abe0..29e1ace7953d 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -41,6 +41,7 @@ static void gfs2_init_inode_once(void *foo) init_rwsem(&ip->i_rw_mutex); INIT_LIST_HEAD(&ip->i_trunc_list); ip->i_alloc = NULL; + ip->i_hash_cache = NULL; } static void gfs2_init_glock_once(void *foo) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 2a77071fb7b6..516516e0c2a2 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1094,6 +1094,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent if (sdp->sd_args.ar_nobarrier) set_bit(SDF_NOBARRIERS, &sdp->sd_flags); + sb->s_flags |= MS_NOSEC; sb->s_magic = GFS2_MAGIC; sb->s_op = &gfs2_super_ops; sb->s_d_op = &gfs2_dops; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 9b780df3fd54..7f8af1eb02de 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1607,14 +1607,15 @@ rgrp_error: } /** - * gfs2_free_data - free a contiguous run of data block(s) + * __gfs2_free_blocks - free a contiguous run of block(s) * @ip: the inode these blocks are being freed from * @bstart: first block of a run of contiguous blocks * @blen: the length of the block run + * @meta: 1 if the blocks represent metadata * */ -void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) +void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *rgd; @@ -1631,54 +1632,11 @@ void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) gfs2_trans_add_rg(rgd); /* Directories keep their data in the metadata address space */ - if (ip->i_depth) + if (meta || ip->i_depth) gfs2_meta_wipe(ip, bstart, blen); } /** - * gfs2_free_data - free a contiguous run of data block(s) - * @ip: the inode these blocks are being freed from - * @bstart: first block of a run of contiguous blocks - * @blen: the length of the block run - * - */ - -void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) -{ - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - - __gfs2_free_data(ip, bstart, blen); - gfs2_statfs_change(sdp, 0, +blen, 0); - gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); -} - -/** - * gfs2_free_meta - free a contiguous run of data block(s) - * @ip: the inode these blocks are being freed from - * @bstart: first block of a run of contiguous blocks - * @blen: the length of the block run - * - */ - -void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) -{ - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_rgrpd *rgd; - - rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); - if (!rgd) - return; - trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE); - rgd->rd_free += blen; - - gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); - gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); - - gfs2_trans_add_rg(rgd); - gfs2_meta_wipe(ip, bstart, blen); -} - -/** * gfs2_free_meta - free a contiguous run of data block(s) * @ip: the inode these blocks are being freed from * @bstart: first block of a run of contiguous blocks @@ -1690,7 +1648,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - __gfs2_free_meta(ip, bstart, blen); + __gfs2_free_blocks(ip, bstart, blen, 1); gfs2_statfs_change(sdp, 0, +blen, 0); gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); } diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index a80e3034ac47..d253f9a8c70e 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -52,9 +52,7 @@ extern int gfs2_ri_update(struct gfs2_inode *ip); extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); -extern void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); -extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); -extern void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); +extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); extern void gfs2_unlink_di(struct inode *inode); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index fb0edf735483..b7beadd9ba4c 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1533,7 +1533,7 @@ out: /* Case 3 starts here */ truncate_inode_pages(&inode->i_data, 0); end_writeback(inode); - + gfs2_dir_hash_inval(ip); ip->i_gl->gl_object = NULL; gfs2_glock_add_to_lru(ip->i_gl); gfs2_glock_put(ip->i_gl); diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index 2312de34bd42..2a734cfccc92 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c @@ -43,6 +43,10 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec) node->tree->node_size - (rec + 1) * 2); if (!recoff) return 0; + if (recoff > node->tree->node_size - 2) { + printk(KERN_ERR "hfs: recoff %d too large\n", recoff); + return 0; + } retval = hfs_bnode_read_u16(node, recoff) + 2; if (retval > node->tree->max_key_len + 2) { diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index b4ba1b319333..4dfbfec357e8 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -212,7 +212,9 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", str->name, cnid, inode->i_nlink); - hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + if (err) + return err; hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); entry_size = hfsplus_fill_cat_thread(sb, &entry, @@ -269,7 +271,9 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); - hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + if (err) + return err; if (!str) { int len; @@ -347,12 +351,14 @@ int hfsplus_rename_cat(u32 cnid, struct hfs_find_data src_fd, dst_fd; hfsplus_cat_entry entry; int entry_size, type; - int err = 0; + int err; dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name, dst_dir->i_ino, dst_name->name); - hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd); + err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd); + if (err) + return err; dst_fd = src_fd; /* find the old dir entry and read the data */ diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 4df5059c25da..25b2443a004c 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -38,7 +38,9 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, sb = dir->i_sb; dentry->d_fsdata = NULL; - hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + if (err) + return ERR_PTR(err); hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); again: err = hfs_brec_read(&fd, &entry, sizeof(entry)); @@ -132,7 +134,9 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) if (filp->f_pos >= inode->i_size) return 0; - hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + if (err) + return err; hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); err = hfs_brec_find(&fd); if (err) diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index b1991a2a08e0..5849e3ef35cc 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -119,22 +119,31 @@ static void __hfsplus_ext_write_extent(struct inode *inode, set_bit(HFSPLUS_I_EXT_DIRTY, &hip->flags); } -static void hfsplus_ext_write_extent_locked(struct inode *inode) +static int hfsplus_ext_write_extent_locked(struct inode *inode) { + int res; + if (HFSPLUS_I(inode)->extent_state & HFSPLUS_EXT_DIRTY) { struct hfs_find_data fd; - hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); + res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); + if (res) + return res; __hfsplus_ext_write_extent(inode, &fd); hfs_find_exit(&fd); } + return 0; } -void hfsplus_ext_write_extent(struct inode *inode) +int hfsplus_ext_write_extent(struct inode *inode) { + int res; + mutex_lock(&HFSPLUS_I(inode)->extents_lock); - hfsplus_ext_write_extent_locked(inode); + res = hfsplus_ext_write_extent_locked(inode); mutex_unlock(&HFSPLUS_I(inode)->extents_lock); + + return res; } static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, @@ -194,9 +203,11 @@ static int hfsplus_ext_read_extent(struct inode *inode, u32 block) block < hip->cached_start + hip->cached_blocks) return 0; - hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); - res = __hfsplus_ext_cache_extent(&fd, inode, block); - hfs_find_exit(&fd); + res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); + if (!res) { + res = __hfsplus_ext_cache_extent(&fd, inode, block); + hfs_find_exit(&fd); + } return res; } @@ -209,6 +220,7 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, struct hfsplus_inode_info *hip = HFSPLUS_I(inode); int res = -EIO; u32 ablock, dblock, mask; + sector_t sector; int was_dirty = 0; int shift; @@ -255,10 +267,12 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, done: dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); + mask = (1 << sbi->fs_shift) - 1; - map_bh(bh_result, sb, - (dblock << sbi->fs_shift) + sbi->blockoffset + - (iblock & mask)); + sector = ((sector_t)dblock << sbi->fs_shift) + + sbi->blockoffset + (iblock & mask); + map_bh(bh_result, sb, sector); + if (create) { set_buffer_new(bh_result); hip->phys_size += sb->s_blocksize; @@ -371,7 +385,9 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid, if (total_blocks == blocks) return 0; - hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); + res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); + if (res) + return res; do { res = __hfsplus_ext_read_extent(&fd, ext_entry, cnid, total_blocks, type); @@ -469,7 +485,9 @@ out: insert_extent: dprint(DBG_EXTENT, "insert new extent\n"); - hfsplus_ext_write_extent_locked(inode); + res = hfsplus_ext_write_extent_locked(inode); + if (res) + goto out; memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); hip->cached_extents[0].start_block = cpu_to_be32(start); @@ -500,7 +518,6 @@ void hfsplus_file_truncate(struct inode *inode) struct page *page; void *fsdata; u32 size = inode->i_size; - int res; res = pagecache_write_begin(NULL, mapping, size, 0, AOP_FLAG_UNINTERRUPTIBLE, @@ -523,7 +540,12 @@ void hfsplus_file_truncate(struct inode *inode) goto out; mutex_lock(&hip->extents_lock); - hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); + res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); + if (res) { + mutex_unlock(&hip->extents_lock); + /* XXX: We lack error handling of hfsplus_file_truncate() */ + return; + } while (1) { if (alloc_cnt == hip->first_blocks) { hfsplus_free_extents(sb, hip->first_extents, diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 38184e360932..d7674d051f52 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -13,6 +13,7 @@ #include <linux/fs.h> #include <linux/mutex.h> #include <linux/buffer_head.h> +#include <linux/blkdev.h> #include "hfsplus_raw.h" #define DBG_BNODE_REFS 0x00000001 @@ -110,7 +111,9 @@ struct hfsplus_vh; struct hfs_btree; struct hfsplus_sb_info { + void *s_vhdr_buf; struct hfsplus_vh *s_vhdr; + void *s_backup_vhdr_buf; struct hfsplus_vh *s_backup_vhdr; struct hfs_btree *ext_tree; struct hfs_btree *cat_tree; @@ -258,6 +261,15 @@ struct hfsplus_readdir_data { struct hfsplus_cat_key key; }; +/* + * Find minimum acceptible I/O size for an hfsplus sb. + */ +static inline unsigned short hfsplus_min_io_size(struct super_block *sb) +{ + return max_t(unsigned short, bdev_logical_block_size(sb->s_bdev), + HFSPLUS_SECTOR_SIZE); +} + #define hfs_btree_open hfsplus_btree_open #define hfs_btree_close hfsplus_btree_close #define hfs_btree_write hfsplus_btree_write @@ -374,7 +386,7 @@ extern const struct file_operations hfsplus_dir_operations; /* extents.c */ int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *); -void hfsplus_ext_write_extent(struct inode *); +int hfsplus_ext_write_extent(struct inode *); int hfsplus_get_block(struct inode *, sector_t, struct buffer_head *, int); int hfsplus_free_fork(struct super_block *, u32, struct hfsplus_fork_raw *, int); @@ -437,8 +449,8 @@ int hfsplus_compare_dentry(const struct dentry *parent, /* wrapper.c */ int hfsplus_read_wrapper(struct super_block *); int hfs_part_find(struct super_block *, sector_t *, sector_t *); -int hfsplus_submit_bio(struct block_device *bdev, sector_t sector, - void *data, int rw); +int hfsplus_submit_bio(struct super_block *sb, sector_t sector, + void *buf, void **data, int rw); /* time macros */ #define __hfsp_mt2ut(t) (be32_to_cpu(t) - 2082844800U) diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 30486e01d003..4cc1e3a36ec7 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -195,11 +195,13 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, hip->flags = 0; set_bit(HFSPLUS_I_RSRC, &hip->flags); - hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); - err = hfsplus_find_cat(sb, dir->i_ino, &fd); - if (!err) - err = hfsplus_cat_read_inode(inode, &fd); - hfs_find_exit(&fd); + err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + if (!err) { + err = hfsplus_find_cat(sb, dir->i_ino, &fd); + if (!err) + err = hfsplus_cat_read_inode(inode, &fd); + hfs_find_exit(&fd); + } if (err) { iput(inode); return ERR_PTR(err); diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c index 40ad88c12c64..eb355d81e279 100644 --- a/fs/hfsplus/part_tbl.c +++ b/fs/hfsplus/part_tbl.c @@ -88,11 +88,12 @@ static int hfs_parse_old_pmap(struct super_block *sb, struct old_pmap *pm, return -ENOENT; } -static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm, - sector_t *part_start, sector_t *part_size) +static int hfs_parse_new_pmap(struct super_block *sb, void *buf, + struct new_pmap *pm, sector_t *part_start, sector_t *part_size) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); int size = be32_to_cpu(pm->pmMapBlkCnt); + int buf_size = hfsplus_min_io_size(sb); int res; int i = 0; @@ -107,11 +108,14 @@ static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm, if (++i >= size) return -ENOENT; - res = hfsplus_submit_bio(sb->s_bdev, - *part_start + HFS_PMAP_BLK + i, - pm, READ); - if (res) - return res; + pm = (struct new_pmap *)((u8 *)pm + HFSPLUS_SECTOR_SIZE); + if ((u8 *)pm - (u8 *)buf >= buf_size) { + res = hfsplus_submit_bio(sb, + *part_start + HFS_PMAP_BLK + i, + buf, (void **)&pm, READ); + if (res) + return res; + } } while (pm->pmSig == cpu_to_be16(HFS_NEW_PMAP_MAGIC)); return -ENOENT; @@ -124,15 +128,15 @@ static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm, int hfs_part_find(struct super_block *sb, sector_t *part_start, sector_t *part_size) { - void *data; + void *buf, *data; int res; - data = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL); - if (!data) + buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL); + if (!buf) return -ENOMEM; - res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK, - data, READ); + res = hfsplus_submit_bio(sb, *part_start + HFS_PMAP_BLK, + buf, &data, READ); if (res) goto out; @@ -141,13 +145,13 @@ int hfs_part_find(struct super_block *sb, res = hfs_parse_old_pmap(sb, data, part_start, part_size); break; case HFS_NEW_PMAP_MAGIC: - res = hfs_parse_new_pmap(sb, data, part_start, part_size); + res = hfs_parse_new_pmap(sb, buf, data, part_start, part_size); break; default: res = -ENOENT; break; } out: - kfree(data); + kfree(buf); return res; } diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 84a47b709f51..c106ca22e812 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -73,11 +73,13 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID || inode->i_ino == HFSPLUS_ROOT_CNID) { - hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); - err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); - if (!err) - err = hfsplus_cat_read_inode(inode, &fd); - hfs_find_exit(&fd); + err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); + if (!err) { + err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); + if (!err) + err = hfsplus_cat_read_inode(inode, &fd); + hfs_find_exit(&fd); + } } else { err = hfsplus_system_read_inode(inode); } @@ -133,9 +135,13 @@ static int hfsplus_system_write_inode(struct inode *inode) static int hfsplus_write_inode(struct inode *inode, struct writeback_control *wbc) { + int err; + dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino); - hfsplus_ext_write_extent(inode); + err = hfsplus_ext_write_extent(inode); + if (err) + return err; if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID || inode->i_ino == HFSPLUS_ROOT_CNID) @@ -197,17 +203,17 @@ int hfsplus_sync_fs(struct super_block *sb, int wait) write_backup = 1; } - error2 = hfsplus_submit_bio(sb->s_bdev, + error2 = hfsplus_submit_bio(sb, sbi->part_start + HFSPLUS_VOLHEAD_SECTOR, - sbi->s_vhdr, WRITE_SYNC); + sbi->s_vhdr_buf, NULL, WRITE_SYNC); if (!error) error = error2; if (!write_backup) goto out; - error2 = hfsplus_submit_bio(sb->s_bdev, + error2 = hfsplus_submit_bio(sb, sbi->part_start + sbi->sect_count - 2, - sbi->s_backup_vhdr, WRITE_SYNC); + sbi->s_backup_vhdr_buf, NULL, WRITE_SYNC); if (!error) error2 = error; out: @@ -251,8 +257,8 @@ static void hfsplus_put_super(struct super_block *sb) hfs_btree_close(sbi->ext_tree); iput(sbi->alloc_file); iput(sbi->hidden_dir); - kfree(sbi->s_vhdr); - kfree(sbi->s_backup_vhdr); + kfree(sbi->s_vhdr_buf); + kfree(sbi->s_backup_vhdr_buf); unload_nls(sbi->nls); kfree(sb->s_fs_info); sb->s_fs_info = NULL; @@ -393,6 +399,13 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) if (!sbi->rsrc_clump_blocks) sbi->rsrc_clump_blocks = 1; + err = generic_check_addressable(sbi->alloc_blksz_shift, + sbi->total_blocks); + if (err) { + printk(KERN_ERR "hfs: filesystem size too large.\n"); + goto out_free_vhdr; + } + /* Set up operations so we can load metadata */ sb->s_op = &hfsplus_sops; sb->s_maxbytes = MAX_LFS_FILESIZE; @@ -417,6 +430,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) sb->s_flags |= MS_RDONLY; } + err = -EINVAL; + /* Load metadata objects (B*Trees) */ sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); if (!sbi->ext_tree) { @@ -447,7 +462,9 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; str.name = HFSP_HIDDENDIR_NAME; - hfs_find_init(sbi->cat_tree, &fd); + err = hfs_find_init(sbi->cat_tree, &fd); + if (err) + goto out_put_root; hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { hfs_find_exit(&fd); diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c index a3f0bfcc881e..a32998f29f0b 100644 --- a/fs/hfsplus/unicode.c +++ b/fs/hfsplus/unicode.c @@ -142,7 +142,11 @@ int hfsplus_uni2asc(struct super_block *sb, /* search for single decomposed char */ if (likely(compose)) ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0); - if (ce1 && (cc = ce1[0])) { + if (ce1) + cc = ce1[0]; + else + cc = 0; + if (cc) { /* start of a possibly decomposed Hangul char */ if (cc != 0xffff) goto done; @@ -209,7 +213,8 @@ int hfsplus_uni2asc(struct super_block *sb, i++; ce2 = ce1; } - if ((cc = ce2[0])) { + cc = ce2[0]; + if (cc) { ip += i; ustrlen -= i; goto done; @@ -301,7 +306,11 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { size = asc2unichar(sb, astr, len, &c); - if (decompose && (dstr = decompose_unichar(c, &dsize))) { + if (decompose) + dstr = decompose_unichar(c, &dsize); + else + dstr = NULL; + if (dstr) { if (outlen + dsize > HFSPLUS_MAX_STRLEN) break; do { @@ -346,15 +355,23 @@ int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode, astr += size; len -= size; - if (decompose && (dstr = decompose_unichar(c, &dsize))) { + if (decompose) + dstr = decompose_unichar(c, &dsize); + else + dstr = NULL; + if (dstr) { do { c2 = *dstr++; - if (!casefold || (c2 = case_fold(c2))) + if (casefold) + c2 = case_fold(c2); + if (!casefold || c2) hash = partial_name_hash(c2, hash); } while (--dsize > 0); } else { c2 = c; - if (!casefold || (c2 = case_fold(c2))) + if (casefold) + c2 = case_fold(c2); + if (!casefold || c2) hash = partial_name_hash(c2, hash); } } @@ -422,12 +439,14 @@ int hfsplus_compare_dentry(const struct dentry *parent, c1 = *dstr1; c2 = *dstr2; if (casefold) { - if (!(c1 = case_fold(c1))) { + c1 = case_fold(c1); + if (!c1) { dstr1++; dsize1--; continue; } - if (!(c2 = case_fold(c2))) { + c2 = case_fold(c2); + if (!c2) { dstr2++; dsize2--; continue; diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 4ac88ff79aa6..10e515a0d452 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -31,25 +31,67 @@ static void hfsplus_end_io_sync(struct bio *bio, int err) complete(bio->bi_private); } -int hfsplus_submit_bio(struct block_device *bdev, sector_t sector, - void *data, int rw) +/* + * hfsplus_submit_bio - Perfrom block I/O + * @sb: super block of volume for I/O + * @sector: block to read or write, for blocks of HFSPLUS_SECTOR_SIZE bytes + * @buf: buffer for I/O + * @data: output pointer for location of requested data + * @rw: direction of I/O + * + * The unit of I/O is hfsplus_min_io_size(sb), which may be bigger than + * HFSPLUS_SECTOR_SIZE, and @buf must be sized accordingly. On reads + * @data will return a pointer to the start of the requested sector, + * which may not be the same location as @buf. + * + * If @sector is not aligned to the bdev logical block size it will + * be rounded down. For writes this means that @buf should contain data + * that starts at the rounded-down address. As long as the data was + * read using hfsplus_submit_bio() and the same buffer is used things + * will work correctly. + */ +int hfsplus_submit_bio(struct super_block *sb, sector_t sector, + void *buf, void **data, int rw) { DECLARE_COMPLETION_ONSTACK(wait); struct bio *bio; int ret = 0; + unsigned int io_size; + loff_t start; + int offset; + + /* + * Align sector to hardware sector size and find offset. We + * assume that io_size is a power of two, which _should_ + * be true. + */ + io_size = hfsplus_min_io_size(sb); + start = (loff_t)sector << HFSPLUS_SECTOR_SHIFT; + offset = start & (io_size - 1); + sector &= ~((io_size >> HFSPLUS_SECTOR_SHIFT) - 1); bio = bio_alloc(GFP_NOIO, 1); bio->bi_sector = sector; - bio->bi_bdev = bdev; + bio->bi_bdev = sb->s_bdev; bio->bi_end_io = hfsplus_end_io_sync; bio->bi_private = &wait; - /* - * We always submit one sector at a time, so bio_add_page must not fail. - */ - if (bio_add_page(bio, virt_to_page(data), HFSPLUS_SECTOR_SIZE, - offset_in_page(data)) != HFSPLUS_SECTOR_SIZE) - BUG(); + if (!(rw & WRITE) && data) + *data = (u8 *)buf + offset; + + while (io_size > 0) { + unsigned int page_offset = offset_in_page(buf); + unsigned int len = min_t(unsigned int, PAGE_SIZE - page_offset, + io_size); + + ret = bio_add_page(bio, virt_to_page(buf), len, page_offset); + if (ret != len) { + ret = -EIO; + goto out; + } + io_size -= len; + buf = (u8 *)buf + len; + } submit_bio(rw, bio); wait_for_completion(&wait); @@ -57,8 +99,9 @@ int hfsplus_submit_bio(struct block_device *bdev, sector_t sector, if (!bio_flagged(bio, BIO_UPTODATE)) ret = -EIO; +out: bio_put(bio); - return ret; + return ret < 0 ? ret : 0; } static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd) @@ -141,23 +184,19 @@ int hfsplus_read_wrapper(struct super_block *sb) if (hfsplus_get_last_session(sb, &part_start, &part_size)) goto out; - if ((u64)part_start + part_size > 0x100000000ULL) { - pr_err("hfs: volumes larger than 2TB are not supported yet\n"); - goto out; - } error = -ENOMEM; - sbi->s_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL); - if (!sbi->s_vhdr) + sbi->s_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL); + if (!sbi->s_vhdr_buf) goto out; - sbi->s_backup_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL); - if (!sbi->s_backup_vhdr) + sbi->s_backup_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL); + if (!sbi->s_backup_vhdr_buf) goto out_free_vhdr; reread: - error = hfsplus_submit_bio(sb->s_bdev, - part_start + HFSPLUS_VOLHEAD_SECTOR, - sbi->s_vhdr, READ); + error = hfsplus_submit_bio(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, + sbi->s_vhdr_buf, (void **)&sbi->s_vhdr, + READ); if (error) goto out_free_backup_vhdr; @@ -172,8 +211,9 @@ reread: if (!hfsplus_read_mdb(sbi->s_vhdr, &wd)) goto out_free_backup_vhdr; wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT; - part_start += wd.ablk_start + wd.embed_start * wd.ablk_size; - part_size = wd.embed_count * wd.ablk_size; + part_start += (sector_t)wd.ablk_start + + (sector_t)wd.embed_start * wd.ablk_size; + part_size = (sector_t)wd.embed_count * wd.ablk_size; goto reread; default: /* @@ -186,9 +226,9 @@ reread: goto reread; } - error = hfsplus_submit_bio(sb->s_bdev, - part_start + part_size - 2, - sbi->s_backup_vhdr, READ); + error = hfsplus_submit_bio(sb, part_start + part_size - 2, + sbi->s_backup_vhdr_buf, + (void **)&sbi->s_backup_vhdr, READ); if (error) goto out_free_backup_vhdr; diff --git a/fs/proc/array.c b/fs/proc/array.c index 9b45ee84fbcc..3a1dafd228d1 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -172,7 +172,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; tpid = 0; if (pid_alive(p)) { - struct task_struct *tracer = tracehook_tracer_task(p); + struct task_struct *tracer = ptrace_parent(p); if (tracer) tpid = task_pid_nr_ns(tracer, ns); } diff --git a/fs/proc/base.c b/fs/proc/base.c index 3dc5e2a5cc38..91fb655a5cbf 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -216,7 +216,7 @@ static struct mm_struct *__check_mem_permission(struct task_struct *task) if (task_is_stopped_or_traced(task)) { int match; rcu_read_lock(); - match = (tracehook_tracer_task(task) == current); + match = (ptrace_parent(task) == current); rcu_read_unlock(); if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) return mm; diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index 87cd0ead8633..fb3b5c813a30 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -78,7 +78,7 @@ static int nothing_to_commit(struct ubifs_info *c) * If the root TNC node is dirty, we definitely have something to * commit. */ - if (c->zroot.znode && test_bit(DIRTY_ZNODE, &c->zroot.znode->flags)) + if (c->zroot.znode && ubifs_zn_dirty(c->zroot.znode)) return 0; /* @@ -418,7 +418,7 @@ int ubifs_run_commit(struct ubifs_info *c) spin_lock(&c->cs_lock); if (c->cmt_state == COMMIT_BROKEN) { - err = -EINVAL; + err = -EROFS; goto out; } @@ -444,7 +444,7 @@ int ubifs_run_commit(struct ubifs_info *c) * re-check it. */ if (c->cmt_state == COMMIT_BROKEN) { - err = -EINVAL; + err = -EROFS; goto out_cmt_unlock; } @@ -576,7 +576,7 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) struct idx_node *i; size_t sz; - if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX)) + if (!dbg_is_chk_index(c)) return 0; INIT_LIST_HEAD(&list); diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 0bb2bcef0de9..eef109a1a927 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -27,13 +27,12 @@ * various local functions of those subsystems. */ -#define UBIFS_DBG_PRESERVE_UBI - -#include "ubifs.h" #include <linux/module.h> -#include <linux/moduleparam.h> #include <linux/debugfs.h> #include <linux/math64.h> +#include <linux/uaccess.h> +#include <linux/random.h> +#include "ubifs.h" #ifdef CONFIG_UBIFS_FS_DEBUG @@ -42,15 +41,6 @@ DEFINE_SPINLOCK(dbg_lock); static char dbg_key_buf0[128]; static char dbg_key_buf1[128]; -unsigned int ubifs_chk_flags; -unsigned int ubifs_tst_flags; - -module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); -module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); - -MODULE_PARM_DESC(debug_chks, "Debug check flags"); -MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); - static const char *get_key_fmt(int fmt) { switch (fmt) { @@ -91,6 +81,28 @@ static const char *get_key_type(int type) } } +static const char *get_dent_type(int type) +{ + switch (type) { + case UBIFS_ITYPE_REG: + return "file"; + case UBIFS_ITYPE_DIR: + return "dir"; + case UBIFS_ITYPE_LNK: + return "symlink"; + case UBIFS_ITYPE_BLK: + return "blkdev"; + case UBIFS_ITYPE_CHR: + return "char dev"; + case UBIFS_ITYPE_FIFO: + return "fifo"; + case UBIFS_ITYPE_SOCK: + return "socket"; + default: + return "unknown/invalid type"; + } +} + static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, char *buffer) { @@ -234,9 +246,13 @@ static void dump_ch(const struct ubifs_ch *ch) printk(KERN_DEBUG "\tlen %u\n", le32_to_cpu(ch->len)); } -void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode) +void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode) { const struct ubifs_inode *ui = ubifs_inode(inode); + struct qstr nm = { .name = NULL }; + union ubifs_key key; + struct ubifs_dent_node *dent, *pdent = NULL; + int count = 2; printk(KERN_DEBUG "Dump in-memory inode:"); printk(KERN_DEBUG "\tinode %lu\n", inode->i_ino); @@ -270,6 +286,32 @@ void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode) printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read); printk(KERN_DEBUG "\tread_in_a_row %lu\n", ui->read_in_a_row); printk(KERN_DEBUG "\tdata_len %d\n", ui->data_len); + + if (!S_ISDIR(inode->i_mode)) + return; + + printk(KERN_DEBUG "List of directory entries:\n"); + ubifs_assert(!mutex_is_locked(&c->tnc_mutex)); + + lowest_dent_key(c, &key, inode->i_ino); + while (1) { + dent = ubifs_tnc_next_ent(c, &key, &nm); + if (IS_ERR(dent)) { + if (PTR_ERR(dent) != -ENOENT) + printk(KERN_DEBUG "error %ld\n", PTR_ERR(dent)); + break; + } + + printk(KERN_DEBUG "\t%d: %s (%s)\n", + count++, dent->name, get_dent_type(dent->type)); + + nm.name = dent->name; + nm.len = le16_to_cpu(dent->nlen); + kfree(pdent); + pdent = dent; + key_read(c, &dent->key, &key); + } + kfree(pdent); } void dbg_dump_node(const struct ubifs_info *c, const void *node) @@ -278,7 +320,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) union ubifs_key key; const struct ubifs_ch *ch = node; - if (dbg_failure_mode) + if (dbg_is_tst_rcvry(c)) return; /* If the magic is incorrect, just hexdump the first bytes */ @@ -834,7 +876,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum) struct ubifs_scan_node *snod; void *buf; - if (dbg_failure_mode) + if (dbg_is_tst_rcvry(c)) return; printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", @@ -1080,6 +1122,7 @@ out: /** * dbg_check_synced_i_size - check synchronized inode size. + * @c: UBIFS file-system description object * @inode: inode to check * * If inode is clean, synchronized inode size has to be equivalent to current @@ -1087,12 +1130,12 @@ out: * has to be locked). Returns %0 if synchronized inode size if correct, and * %-EINVAL if not. */ -int dbg_check_synced_i_size(struct inode *inode) +int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode) { int err = 0; struct ubifs_inode *ui = ubifs_inode(inode); - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; if (!S_ISREG(inode->i_mode)) return 0; @@ -1125,7 +1168,7 @@ int dbg_check_synced_i_size(struct inode *inode) * Note, it is good idea to make sure the @dir->i_mutex is locked before * calling this function. */ -int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) +int dbg_check_dir(struct ubifs_info *c, const struct inode *dir) { unsigned int nlink = 2; union ubifs_key key; @@ -1133,7 +1176,7 @@ int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) struct qstr nm = { .name = NULL }; loff_t size = UBIFS_INO_NODE_SZ; - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; if (!S_ISDIR(dir->i_mode)) @@ -1167,12 +1210,14 @@ int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) "but calculated size is %llu", dir->i_ino, (unsigned long long)i_size_read(dir), (unsigned long long)size); + dbg_dump_inode(c, dir); dump_stack(); return -EINVAL; } if (dir->i_nlink != nlink) { ubifs_err("directory inode %lu has nlink %u, but calculated " "nlink is %u", dir->i_ino, dir->i_nlink, nlink); + dbg_dump_inode(c, dir); dump_stack(); return -EINVAL; } @@ -1489,7 +1534,7 @@ int dbg_check_tnc(struct ubifs_info *c, int extra) long clean_cnt = 0, dirty_cnt = 0; int err, last; - if (!(ubifs_chk_flags & UBIFS_CHK_TNC)) + if (!dbg_is_chk_index(c)) return 0; ubifs_assert(mutex_is_locked(&c->tnc_mutex)); @@ -1736,7 +1781,7 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) int err; long long calc = 0; - if (!(ubifs_chk_flags & UBIFS_CHK_IDX_SZ)) + if (!dbg_is_chk_index(c)) return 0; err = dbg_walk_index(c, NULL, add_size, &calc); @@ -2312,7 +2357,7 @@ int dbg_check_filesystem(struct ubifs_info *c) int err; struct fsck_data fsckd; - if (!(ubifs_chk_flags & UBIFS_CHK_FS)) + if (!dbg_is_chk_fs(c)) return 0; fsckd.inodes = RB_ROOT; @@ -2347,7 +2392,7 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head) struct list_head *cur; struct ubifs_scan_node *sa, *sb; - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; for (cur = head->next; cur->next != head; cur = cur->next) { @@ -2414,7 +2459,7 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) struct list_head *cur; struct ubifs_scan_node *sa, *sb; - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; for (cur = head->next; cur->next != head; cur = cur->next) { @@ -2491,214 +2536,141 @@ error_dump: return 0; } -int dbg_force_in_the_gaps(void) +static inline int chance(unsigned int n, unsigned int out_of) { - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) - return 0; + return !!((random32() % out_of) + 1 <= n); - return !(random32() & 7); } -/* Failure mode for recovery testing */ - -#define chance(n, d) (simple_rand() <= (n) * 32768LL / (d)) - -struct failure_mode_info { - struct list_head list; - struct ubifs_info *c; -}; - -static LIST_HEAD(fmi_list); -static DEFINE_SPINLOCK(fmi_lock); - -static unsigned int next; - -static int simple_rand(void) -{ - if (next == 0) - next = current->pid; - next = next * 1103515245 + 12345; - return (next >> 16) & 32767; -} - -static void failure_mode_init(struct ubifs_info *c) -{ - struct failure_mode_info *fmi; - - fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS); - if (!fmi) { - ubifs_err("Failed to register failure mode - no memory"); - return; - } - fmi->c = c; - spin_lock(&fmi_lock); - list_add_tail(&fmi->list, &fmi_list); - spin_unlock(&fmi_lock); -} - -static void failure_mode_exit(struct ubifs_info *c) +static int power_cut_emulated(struct ubifs_info *c, int lnum, int write) { - struct failure_mode_info *fmi, *tmp; - - spin_lock(&fmi_lock); - list_for_each_entry_safe(fmi, tmp, &fmi_list, list) - if (fmi->c == c) { - list_del(&fmi->list); - kfree(fmi); - } - spin_unlock(&fmi_lock); -} - -static struct ubifs_info *dbg_find_info(struct ubi_volume_desc *desc) -{ - struct failure_mode_info *fmi; - - spin_lock(&fmi_lock); - list_for_each_entry(fmi, &fmi_list, list) - if (fmi->c->ubi == desc) { - struct ubifs_info *c = fmi->c; - - spin_unlock(&fmi_lock); - return c; - } - spin_unlock(&fmi_lock); - return NULL; -} - -static int in_failure_mode(struct ubi_volume_desc *desc) -{ - struct ubifs_info *c = dbg_find_info(desc); - - if (c && dbg_failure_mode) - return c->dbg->failure_mode; - return 0; -} + struct ubifs_debug_info *d = c->dbg; -static int do_fail(struct ubi_volume_desc *desc, int lnum, int write) -{ - struct ubifs_info *c = dbg_find_info(desc); - struct ubifs_debug_info *d; + ubifs_assert(dbg_is_tst_rcvry(c)); - if (!c || !dbg_failure_mode) - return 0; - d = c->dbg; - if (d->failure_mode) - return 1; - if (!d->fail_cnt) { - /* First call - decide delay to failure */ + if (!d->pc_cnt) { + /* First call - decide delay to the power cut */ if (chance(1, 2)) { - unsigned int delay = 1 << (simple_rand() >> 11); + unsigned long delay; if (chance(1, 2)) { - d->fail_delay = 1; - d->fail_timeout = jiffies + - msecs_to_jiffies(delay); - dbg_rcvry("failing after %ums", delay); + d->pc_delay = 1; + /* Fail withing 1 minute */ + delay = random32() % 60000; + d->pc_timeout = jiffies; + d->pc_timeout += msecs_to_jiffies(delay); + ubifs_warn("failing after %lums", delay); } else { - d->fail_delay = 2; - d->fail_cnt_max = delay; - dbg_rcvry("failing after %u calls", delay); + d->pc_delay = 2; + delay = random32() % 10000; + /* Fail within 10000 operations */ + d->pc_cnt_max = delay; + ubifs_warn("failing after %lu calls", delay); } } - d->fail_cnt += 1; + + d->pc_cnt += 1; } + /* Determine if failure delay has expired */ - if (d->fail_delay == 1) { - if (time_before(jiffies, d->fail_timeout)) + if (d->pc_delay == 1 && time_before(jiffies, d->pc_timeout)) return 0; - } else if (d->fail_delay == 2) - if (d->fail_cnt++ < d->fail_cnt_max) + if (d->pc_delay == 2 && d->pc_cnt++ < d->pc_cnt_max) return 0; + if (lnum == UBIFS_SB_LNUM) { - if (write) { - if (chance(1, 2)) - return 0; - } else if (chance(19, 20)) + if (write && chance(1, 2)) + return 0; + if (chance(19, 20)) return 0; - dbg_rcvry("failing in super block LEB %d", lnum); + ubifs_warn("failing in super block LEB %d", lnum); } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) { if (chance(19, 20)) return 0; - dbg_rcvry("failing in master LEB %d", lnum); + ubifs_warn("failing in master LEB %d", lnum); } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) { - if (write) { - if (chance(99, 100)) - return 0; - } else if (chance(399, 400)) + if (write && chance(99, 100)) return 0; - dbg_rcvry("failing in log LEB %d", lnum); + if (chance(399, 400)) + return 0; + ubifs_warn("failing in log LEB %d", lnum); } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) { - if (write) { - if (chance(7, 8)) - return 0; - } else if (chance(19, 20)) + if (write && chance(7, 8)) return 0; - dbg_rcvry("failing in LPT LEB %d", lnum); + if (chance(19, 20)) + return 0; + ubifs_warn("failing in LPT LEB %d", lnum); } else if (lnum >= c->orph_first && lnum <= c->orph_last) { - if (write) { - if (chance(1, 2)) - return 0; - } else if (chance(9, 10)) + if (write && chance(1, 2)) + return 0; + if (chance(9, 10)) return 0; - dbg_rcvry("failing in orphan LEB %d", lnum); + ubifs_warn("failing in orphan LEB %d", lnum); } else if (lnum == c->ihead_lnum) { if (chance(99, 100)) return 0; - dbg_rcvry("failing in index head LEB %d", lnum); + ubifs_warn("failing in index head LEB %d", lnum); } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) { if (chance(9, 10)) return 0; - dbg_rcvry("failing in GC head LEB %d", lnum); + ubifs_warn("failing in GC head LEB %d", lnum); } else if (write && !RB_EMPTY_ROOT(&c->buds) && !ubifs_search_bud(c, lnum)) { if (chance(19, 20)) return 0; - dbg_rcvry("failing in non-bud LEB %d", lnum); + ubifs_warn("failing in non-bud LEB %d", lnum); } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND || c->cmt_state == COMMIT_RUNNING_REQUIRED) { if (chance(999, 1000)) return 0; - dbg_rcvry("failing in bud LEB %d commit running", lnum); + ubifs_warn("failing in bud LEB %d commit running", lnum); } else { if (chance(9999, 10000)) return 0; - dbg_rcvry("failing in bud LEB %d commit not running", lnum); + ubifs_warn("failing in bud LEB %d commit not running", lnum); } - ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum); - d->failure_mode = 1; + + d->pc_happened = 1; + ubifs_warn("========== Power cut emulated =========="); dump_stack(); return 1; } -static void cut_data(const void *buf, int len) +static void cut_data(const void *buf, unsigned int len) { - int flen, i; + unsigned int from, to, i, ffs = chance(1, 2); unsigned char *p = (void *)buf; - flen = (len * (long long)simple_rand()) >> 15; - for (i = flen; i < len; i++) - p[i] = 0xff; -} + from = random32() % (len + 1); + if (chance(1, 2)) + to = random32() % (len - from + 1); + else + to = len; -int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, - int len, int check) -{ - if (in_failure_mode(desc)) - return -EROFS; - return ubi_leb_read(desc, lnum, buf, offset, len, check); + if (from < to) + ubifs_warn("filled bytes %u-%u with %s", from, to - 1, + ffs ? "0xFFs" : "random data"); + + if (ffs) + for (i = from; i < to; i++) + p[i] = 0xFF; + else + for (i = from; i < to; i++) + p[i] = random32() % 0x100; } -int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, - int offset, int len, int dtype) +int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, + int offs, int len, int dtype) { int err, failing; - if (in_failure_mode(desc)) + if (c->dbg->pc_happened) return -EROFS; - failing = do_fail(desc, lnum, 1); + + failing = power_cut_emulated(c, lnum, 1); if (failing) cut_data(buf, len); - err = ubi_leb_write(desc, lnum, buf, offset, len, dtype); + err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); if (err) return err; if (failing) @@ -2706,162 +2678,207 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, return 0; } -int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, +int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len, int dtype) { int err; - if (do_fail(desc, lnum, 1)) + if (c->dbg->pc_happened) return -EROFS; - err = ubi_leb_change(desc, lnum, buf, len, dtype); + if (power_cut_emulated(c, lnum, 1)) + return -EROFS; + err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); if (err) return err; - if (do_fail(desc, lnum, 1)) + if (power_cut_emulated(c, lnum, 1)) return -EROFS; return 0; } -int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum) +int dbg_leb_unmap(struct ubifs_info *c, int lnum) { int err; - if (do_fail(desc, lnum, 0)) + if (c->dbg->pc_happened) + return -EROFS; + if (power_cut_emulated(c, lnum, 0)) return -EROFS; - err = ubi_leb_erase(desc, lnum); + err = ubi_leb_unmap(c->ubi, lnum); if (err) return err; - if (do_fail(desc, lnum, 0)) + if (power_cut_emulated(c, lnum, 0)) return -EROFS; return 0; } -int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum) +int dbg_leb_map(struct ubifs_info *c, int lnum, int dtype) { int err; - if (do_fail(desc, lnum, 0)) + if (c->dbg->pc_happened) return -EROFS; - err = ubi_leb_unmap(desc, lnum); + if (power_cut_emulated(c, lnum, 0)) + return -EROFS; + err = ubi_leb_map(c->ubi, lnum, dtype); if (err) return err; - if (do_fail(desc, lnum, 0)) + if (power_cut_emulated(c, lnum, 0)) return -EROFS; return 0; } -int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) -{ - if (in_failure_mode(desc)) - return -EROFS; - return ubi_is_mapped(desc, lnum); -} +/* + * Root directory for UBIFS stuff in debugfs. Contains sub-directories which + * contain the stuff specific to particular file-system mounts. + */ +static struct dentry *dfs_rootdir; -int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) +static int dfs_file_open(struct inode *inode, struct file *file) { - int err; - - if (do_fail(desc, lnum, 0)) - return -EROFS; - err = ubi_leb_map(desc, lnum, dtype); - if (err) - return err; - if (do_fail(desc, lnum, 0)) - return -EROFS; - return 0; + file->private_data = inode->i_private; + return nonseekable_open(inode, file); } /** - * ubifs_debugging_init - initialize UBIFS debugging. - * @c: UBIFS file-system description object + * provide_user_output - provide output to the user reading a debugfs file. + * @val: boolean value for the answer + * @u: the buffer to store the answer at + * @count: size of the buffer + * @ppos: position in the @u output buffer * - * This function initializes debugging-related data for the file system. - * Returns zero in case of success and a negative error code in case of + * This is a simple helper function which stores @val boolean value in the user + * buffer when the user reads one of UBIFS debugfs files. Returns amount of + * bytes written to @u in case of success and a negative error code in case of * failure. */ -int ubifs_debugging_init(struct ubifs_info *c) +static int provide_user_output(int val, char __user *u, size_t count, + loff_t *ppos) { - c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); - if (!c->dbg) - return -ENOMEM; + char buf[3]; - failure_mode_init(c); - return 0; + if (val) + buf[0] = '1'; + else + buf[0] = '0'; + buf[1] = '\n'; + buf[2] = 0x00; + + return simple_read_from_buffer(u, count, ppos, buf, 2); } -/** - * ubifs_debugging_exit - free debugging data. - * @c: UBIFS file-system description object - */ -void ubifs_debugging_exit(struct ubifs_info *c) +static ssize_t dfs_file_read(struct file *file, char __user *u, size_t count, + loff_t *ppos) { - failure_mode_exit(c); - kfree(c->dbg); -} + struct dentry *dent = file->f_path.dentry; + struct ubifs_info *c = file->private_data; + struct ubifs_debug_info *d = c->dbg; + int val; + + if (dent == d->dfs_chk_gen) + val = d->chk_gen; + else if (dent == d->dfs_chk_index) + val = d->chk_index; + else if (dent == d->dfs_chk_orph) + val = d->chk_orph; + else if (dent == d->dfs_chk_lprops) + val = d->chk_lprops; + else if (dent == d->dfs_chk_fs) + val = d->chk_fs; + else if (dent == d->dfs_tst_rcvry) + val = d->tst_rcvry; + else + return -EINVAL; -/* - * Root directory for UBIFS stuff in debugfs. Contains sub-directories which - * contain the stuff specific to particular file-system mounts. - */ -static struct dentry *dfs_rootdir; + return provide_user_output(val, u, count, ppos); +} /** - * dbg_debugfs_init - initialize debugfs file-system. + * interpret_user_input - interpret user debugfs file input. + * @u: user-provided buffer with the input + * @count: buffer size * - * UBIFS uses debugfs file-system to expose various debugging knobs to - * user-space. This function creates "ubifs" directory in the debugfs - * file-system. Returns zero in case of success and a negative error code in - * case of failure. + * This is a helper function which interpret user input to a boolean UBIFS + * debugfs file. Returns %0 or %1 in case of success and a negative error code + * in case of failure. */ -int dbg_debugfs_init(void) +static int interpret_user_input(const char __user *u, size_t count) { - dfs_rootdir = debugfs_create_dir("ubifs", NULL); - if (IS_ERR(dfs_rootdir)) { - int err = PTR_ERR(dfs_rootdir); - ubifs_err("cannot create \"ubifs\" debugfs directory, " - "error %d\n", err); - return err; - } + size_t buf_size; + char buf[8]; - return 0; -} + buf_size = min_t(size_t, count, (sizeof(buf) - 1)); + if (copy_from_user(buf, u, buf_size)) + return -EFAULT; -/** - * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system. - */ -void dbg_debugfs_exit(void) -{ - debugfs_remove(dfs_rootdir); -} + if (buf[0] == '1') + return 1; + else if (buf[0] == '0') + return 0; -static int open_debugfs_file(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - return nonseekable_open(inode, file); + return -EINVAL; } -static ssize_t write_debugfs_file(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t dfs_file_write(struct file *file, const char __user *u, + size_t count, loff_t *ppos) { struct ubifs_info *c = file->private_data; struct ubifs_debug_info *d = c->dbg; + struct dentry *dent = file->f_path.dentry; + int val; - if (file->f_path.dentry == d->dfs_dump_lprops) + /* + * TODO: this is racy - the file-system might have already been + * unmounted and we'd oops in this case. The plan is to fix it with + * help of 'iterate_supers_type()' which we should have in v3.0: when + * a debugfs opened, we rember FS's UUID in file->private_data. Then + * whenever we access the FS via a debugfs file, we iterate all UBIFS + * superblocks and fine the one with the same UUID, and take the + * locking right. + * + * The other way to go suggested by Al Viro is to create a separate + * 'ubifs-debug' file-system instead. + */ + if (file->f_path.dentry == d->dfs_dump_lprops) { dbg_dump_lprops(c); - else if (file->f_path.dentry == d->dfs_dump_budg) + return count; + } + if (file->f_path.dentry == d->dfs_dump_budg) { dbg_dump_budg(c, &c->bi); - else if (file->f_path.dentry == d->dfs_dump_tnc) { + return count; + } + if (file->f_path.dentry == d->dfs_dump_tnc) { mutex_lock(&c->tnc_mutex); dbg_dump_tnc(c); mutex_unlock(&c->tnc_mutex); - } else + return count; + } + + val = interpret_user_input(u, count); + if (val < 0) + return val; + + if (dent == d->dfs_chk_gen) + d->chk_gen = val; + else if (dent == d->dfs_chk_index) + d->chk_index = val; + else if (dent == d->dfs_chk_orph) + d->chk_orph = val; + else if (dent == d->dfs_chk_lprops) + d->chk_lprops = val; + else if (dent == d->dfs_chk_fs) + d->chk_fs = val; + else if (dent == d->dfs_tst_rcvry) + d->tst_rcvry = val; + else return -EINVAL; return count; } static const struct file_operations dfs_fops = { - .open = open_debugfs_file, - .write = write_debugfs_file, + .open = dfs_file_open, + .read = dfs_file_read, + .write = dfs_file_write, .owner = THIS_MODULE, .llseek = no_llseek, }; @@ -2880,12 +2897,20 @@ static const struct file_operations dfs_fops = { */ int dbg_debugfs_init_fs(struct ubifs_info *c) { - int err; + int err, n; const char *fname; struct dentry *dent; struct ubifs_debug_info *d = c->dbg; - sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); + n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME, + c->vi.ubi_num, c->vi.vol_id); + if (n == UBIFS_DFS_DIR_LEN) { + /* The array size is too small */ + fname = UBIFS_DFS_DIR_NAME; + dent = ERR_PTR(-EINVAL); + goto out; + } + fname = d->dfs_dir_name; dent = debugfs_create_dir(fname, dfs_rootdir); if (IS_ERR_OR_NULL(dent)) @@ -2910,13 +2935,55 @@ int dbg_debugfs_init_fs(struct ubifs_info *c) goto out_remove; d->dfs_dump_tnc = dent; + fname = "chk_general"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_gen = dent; + + fname = "chk_index"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_index = dent; + + fname = "chk_orphans"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_orph = dent; + + fname = "chk_lprops"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_lprops = dent; + + fname = "chk_fs"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_fs = dent; + + fname = "tst_recovery"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_tst_rcvry = dent; + return 0; out_remove: debugfs_remove_recursive(d->dfs_dir); out: err = dent ? PTR_ERR(dent) : -ENODEV; - ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", + ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n", fname, err); return err; } @@ -2930,4 +2997,179 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c) debugfs_remove_recursive(c->dbg->dfs_dir); } +struct ubifs_global_debug_info ubifs_dbg; + +static struct dentry *dfs_chk_gen; +static struct dentry *dfs_chk_index; +static struct dentry *dfs_chk_orph; +static struct dentry *dfs_chk_lprops; +static struct dentry *dfs_chk_fs; +static struct dentry *dfs_tst_rcvry; + +static ssize_t dfs_global_file_read(struct file *file, char __user *u, + size_t count, loff_t *ppos) +{ + struct dentry *dent = file->f_path.dentry; + int val; + + if (dent == dfs_chk_gen) + val = ubifs_dbg.chk_gen; + else if (dent == dfs_chk_index) + val = ubifs_dbg.chk_index; + else if (dent == dfs_chk_orph) + val = ubifs_dbg.chk_orph; + else if (dent == dfs_chk_lprops) + val = ubifs_dbg.chk_lprops; + else if (dent == dfs_chk_fs) + val = ubifs_dbg.chk_fs; + else if (dent == dfs_tst_rcvry) + val = ubifs_dbg.tst_rcvry; + else + return -EINVAL; + + return provide_user_output(val, u, count, ppos); +} + +static ssize_t dfs_global_file_write(struct file *file, const char __user *u, + size_t count, loff_t *ppos) +{ + struct dentry *dent = file->f_path.dentry; + int val; + + val = interpret_user_input(u, count); + if (val < 0) + return val; + + if (dent == dfs_chk_gen) + ubifs_dbg.chk_gen = val; + else if (dent == dfs_chk_index) + ubifs_dbg.chk_index = val; + else if (dent == dfs_chk_orph) + ubifs_dbg.chk_orph = val; + else if (dent == dfs_chk_lprops) + ubifs_dbg.chk_lprops = val; + else if (dent == dfs_chk_fs) + ubifs_dbg.chk_fs = val; + else if (dent == dfs_tst_rcvry) + ubifs_dbg.tst_rcvry = val; + else + return -EINVAL; + + return count; +} + +static const struct file_operations dfs_global_fops = { + .read = dfs_global_file_read, + .write = dfs_global_file_write, + .owner = THIS_MODULE, + .llseek = no_llseek, +}; + +/** + * dbg_debugfs_init - initialize debugfs file-system. + * + * UBIFS uses debugfs file-system to expose various debugging knobs to + * user-space. This function creates "ubifs" directory in the debugfs + * file-system. Returns zero in case of success and a negative error code in + * case of failure. + */ +int dbg_debugfs_init(void) +{ + int err; + const char *fname; + struct dentry *dent; + + fname = "ubifs"; + dent = debugfs_create_dir(fname, NULL); + if (IS_ERR_OR_NULL(dent)) + goto out; + dfs_rootdir = dent; + + fname = "chk_general"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_chk_gen = dent; + + fname = "chk_index"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_chk_index = dent; + + fname = "chk_orphans"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_chk_orph = dent; + + fname = "chk_lprops"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_chk_lprops = dent; + + fname = "chk_fs"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_chk_fs = dent; + + fname = "tst_recovery"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_tst_rcvry = dent; + + return 0; + +out_remove: + debugfs_remove_recursive(dfs_rootdir); +out: + err = dent ? PTR_ERR(dent) : -ENODEV; + ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n", + fname, err); + return err; +} + +/** + * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system. + */ +void dbg_debugfs_exit(void) +{ + debugfs_remove_recursive(dfs_rootdir); +} + +/** + * ubifs_debugging_init - initialize UBIFS debugging. + * @c: UBIFS file-system description object + * + * This function initializes debugging-related data for the file system. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_debugging_init(struct ubifs_info *c) +{ + c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); + if (!c->dbg) + return -ENOMEM; + + return 0; +} + +/** + * ubifs_debugging_exit - free debugging data. + * @c: UBIFS file-system description object + */ +void ubifs_debugging_exit(struct ubifs_info *c) +{ + kfree(c->dbg); +} + #endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index a811ac4a26bb..45174b534377 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -31,18 +31,25 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, #ifdef CONFIG_UBIFS_FS_DEBUG -#include <linux/random.h> +/* + * The UBIFS debugfs directory name pattern and maximum name length (3 for "ubi" + * + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte. + */ +#define UBIFS_DFS_DIR_NAME "ubi%d_%d" +#define UBIFS_DFS_DIR_LEN (3 + 1 + 2*2 + 1) /** * ubifs_debug_info - per-FS debugging information. * @old_zroot: old index root - used by 'dbg_check_old_index()' * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' - * @failure_mode: failure mode for recovery testing - * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls - * @fail_timeout: time in jiffies when delay of failure mode expires - * @fail_cnt: current number of calls to failure mode I/O functions - * @fail_cnt_max: number of calls by which to delay failure mode + * + * @pc_happened: non-zero if an emulated power cut happened + * @pc_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls + * @pc_timeout: time in jiffies when delay of failure mode expires + * @pc_cnt: current number of calls to failure mode I/O functions + * @pc_cnt_max: number of calls by which to delay failure mode + * * @chk_lpt_sz: used by LPT tree size checker * @chk_lpt_sz2: used by LPT tree size checker * @chk_lpt_wastage: used by LPT tree size checker @@ -56,21 +63,36 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, * @saved_free: saved amount of free space * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt * + * @chk_gen: if general extra checks are enabled + * @chk_index: if index xtra checks are enabled + * @chk_orph: if orphans extra checks are enabled + * @chk_lprops: if lprops extra checks are enabled + * @chk_fs: if UBIFS contents extra checks are enabled + * @tst_rcvry: if UBIFS recovery testing mode enabled + * * @dfs_dir_name: name of debugfs directory containing this file-system's files * @dfs_dir: direntry object of the file-system debugfs directory * @dfs_dump_lprops: "dump lprops" debugfs knob * @dfs_dump_budg: "dump budgeting information" debugfs knob * @dfs_dump_tnc: "dump TNC" debugfs knob + * @dfs_chk_gen: debugfs knob to enable UBIFS general extra checks + * @dfs_chk_index: debugfs knob to enable UBIFS index extra checks + * @dfs_chk_orph: debugfs knob to enable UBIFS orphans extra checks + * @dfs_chk_lprops: debugfs knob to enable UBIFS LEP properties extra checks + * @dfs_chk_fs: debugfs knob to enable UBIFS contents extra checks + * @dfs_tst_rcvry: debugfs knob to enable UBIFS recovery testing */ struct ubifs_debug_info { struct ubifs_zbranch old_zroot; int old_zroot_level; unsigned long long old_zroot_sqnum; - int failure_mode; - int fail_delay; - unsigned long fail_timeout; - unsigned int fail_cnt; - unsigned int fail_cnt_max; + + int pc_happened; + int pc_delay; + unsigned long pc_timeout; + unsigned int pc_cnt; + unsigned int pc_cnt_max; + long long chk_lpt_sz; long long chk_lpt_sz2; long long chk_lpt_wastage; @@ -84,11 +106,43 @@ struct ubifs_debug_info { long long saved_free; int saved_idx_gc_cnt; - char dfs_dir_name[100]; + unsigned int chk_gen:1; + unsigned int chk_index:1; + unsigned int chk_orph:1; + unsigned int chk_lprops:1; + unsigned int chk_fs:1; + unsigned int tst_rcvry:1; + + char dfs_dir_name[UBIFS_DFS_DIR_LEN + 1]; struct dentry *dfs_dir; struct dentry *dfs_dump_lprops; struct dentry *dfs_dump_budg; struct dentry *dfs_dump_tnc; + struct dentry *dfs_chk_gen; + struct dentry *dfs_chk_index; + struct dentry *dfs_chk_orph; + struct dentry *dfs_chk_lprops; + struct dentry *dfs_chk_fs; + struct dentry *dfs_tst_rcvry; +}; + +/** + * ubifs_global_debug_info - global (not per-FS) UBIFS debugging information. + * + * @chk_gen: if general extra checks are enabled + * @chk_index: if index xtra checks are enabled + * @chk_orph: if orphans extra checks are enabled + * @chk_lprops: if lprops extra checks are enabled + * @chk_fs: if UBIFS contents extra checks are enabled + * @tst_rcvry: if UBIFS recovery testing mode enabled + */ +struct ubifs_global_debug_info { + unsigned int chk_gen:1; + unsigned int chk_index:1; + unsigned int chk_orph:1; + unsigned int chk_lprops:1; + unsigned int chk_fs:1; + unsigned int tst_rcvry:1; }; #define ubifs_assert(expr) do { \ @@ -127,6 +181,8 @@ const char *dbg_key_str1(const struct ubifs_info *c, #define DBGKEY(key) dbg_key_str0(c, (key)) #define DBGKEY1(key) dbg_key_str1(c, (key)) +extern spinlock_t dbg_lock; + #define ubifs_dbg_msg(type, fmt, ...) do { \ spin_lock(&dbg_lock); \ pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \ @@ -162,41 +218,36 @@ const char *dbg_key_str1(const struct ubifs_info *c, /* Additional recovery messages */ #define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) -/* - * Debugging check flags. - * - * UBIFS_CHK_GEN: general checks - * UBIFS_CHK_TNC: check TNC - * UBIFS_CHK_IDX_SZ: check index size - * UBIFS_CHK_ORPH: check orphans - * UBIFS_CHK_OLD_IDX: check the old index - * UBIFS_CHK_LPROPS: check lprops - * UBIFS_CHK_FS: check the file-system - */ -enum { - UBIFS_CHK_GEN = 0x1, - UBIFS_CHK_TNC = 0x2, - UBIFS_CHK_IDX_SZ = 0x4, - UBIFS_CHK_ORPH = 0x8, - UBIFS_CHK_OLD_IDX = 0x10, - UBIFS_CHK_LPROPS = 0x20, - UBIFS_CHK_FS = 0x40, -}; - -/* - * Special testing flags. - * - * UBIFS_TST_RCVRY: failure mode for recovery testing - */ -enum { - UBIFS_TST_RCVRY = 0x4, -}; - -extern spinlock_t dbg_lock; +extern struct ubifs_global_debug_info ubifs_dbg; -extern unsigned int ubifs_msg_flags; -extern unsigned int ubifs_chk_flags; -extern unsigned int ubifs_tst_flags; +static inline int dbg_is_chk_gen(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.chk_gen || c->dbg->chk_gen); +} +static inline int dbg_is_chk_index(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.chk_index || c->dbg->chk_index); +} +static inline int dbg_is_chk_orph(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.chk_orph || c->dbg->chk_orph); +} +static inline int dbg_is_chk_lprops(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.chk_lprops || c->dbg->chk_lprops); +} +static inline int dbg_is_chk_fs(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.chk_fs || c->dbg->chk_fs); +} +static inline int dbg_is_tst_rcvry(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.tst_rcvry || c->dbg->tst_rcvry); +} +static inline int dbg_is_power_cut(const struct ubifs_info *c) +{ + return !!c->dbg->pc_happened; +} int ubifs_debugging_init(struct ubifs_info *c); void ubifs_debugging_exit(struct ubifs_info *c); @@ -207,7 +258,7 @@ const char *dbg_cstate(int cmt_state); const char *dbg_jhead(int jhead); const char *dbg_get_key_dump(const struct ubifs_info *c, const union ubifs_key *key); -void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); +void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode); void dbg_dump_node(const struct ubifs_info *c, const void *node); void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum, int offs); @@ -240,8 +291,8 @@ int dbg_check_cats(struct ubifs_info *c); int dbg_check_ltab(struct ubifs_info *c); int dbg_chk_lpt_free_spc(struct ubifs_info *c); int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len); -int dbg_check_synced_i_size(struct inode *inode); -int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); +int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode); +int dbg_check_dir(struct ubifs_info *c, const struct inode *dir); int dbg_check_tnc(struct ubifs_info *c, int extra); int dbg_check_idx_size(struct ubifs_info *c, long long idx_size); int dbg_check_filesystem(struct ubifs_info *c); @@ -254,54 +305,12 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head); int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head); -/* Force the use of in-the-gaps method for testing */ -static inline int dbg_force_in_the_gaps_enabled(void) -{ - return ubifs_chk_flags & UBIFS_CHK_GEN; -} -int dbg_force_in_the_gaps(void); - -/* Failure mode for recovery testing */ -#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) - -#ifndef UBIFS_DBG_PRESERVE_UBI -#define ubi_leb_read dbg_leb_read -#define ubi_leb_write dbg_leb_write -#define ubi_leb_change dbg_leb_change -#define ubi_leb_erase dbg_leb_erase -#define ubi_leb_unmap dbg_leb_unmap -#define ubi_is_mapped dbg_is_mapped -#define ubi_leb_map dbg_leb_map -#endif - -int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, - int len, int check); -int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, - int offset, int len, int dtype); -int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, - int len, int dtype); -int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum); -int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum); -int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum); -int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype); - -static inline int dbg_read(struct ubi_volume_desc *desc, int lnum, char *buf, - int offset, int len) -{ - return dbg_leb_read(desc, lnum, buf, offset, len, 0); -} - -static inline int dbg_write(struct ubi_volume_desc *desc, int lnum, - const void *buf, int offset, int len) -{ - return dbg_leb_write(desc, lnum, buf, offset, len, UBI_UNKNOWN); -} - -static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, - const void *buf, int len) -{ - return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN); -} +int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, + int len, int dtype); +int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len, + int dtype); +int dbg_leb_unmap(struct ubifs_info *c, int lnum); +int dbg_leb_map(struct ubifs_info *c, int lnum, int dtype); /* Debugfs-related stuff */ int dbg_debugfs_init(void); @@ -313,7 +322,7 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); /* Use "if (0)" to make compiler check arguments even if debugging is off */ #define ubifs_assert(expr) do { \ - if (0 && (expr)) \ + if (0) \ printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ __func__, __LINE__, current->pid); \ } while (0) @@ -323,6 +332,9 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); ubifs_err(fmt, ##__VA_ARGS__); \ } while (0) +#define DBGKEY(key) ((char *)(key)) +#define DBGKEY1(key) ((char *)(key)) + #define ubifs_dbg_msg(fmt, ...) do { \ if (0) \ pr_debug(fmt "\n", ##__VA_ARGS__); \ @@ -346,9 +358,6 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); #define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) #define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) -#define DBGKEY(key) ((char *)(key)) -#define DBGKEY1(key) ((char *)(key)) - static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; } static inline void ubifs_debugging_exit(struct ubifs_info *c) { return; } static inline const char *dbg_ntype(int type) { return ""; } @@ -357,7 +366,7 @@ static inline const char *dbg_jhead(int jhead) { return ""; } static inline const char * dbg_get_key_dump(const struct ubifs_info *c, const union ubifs_key *key) { return ""; } -static inline void dbg_dump_inode(const struct ubifs_info *c, +static inline void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode) { return; } static inline void dbg_dump_node(const struct ubifs_info *c, const void *node) { return; } @@ -409,9 +418,11 @@ static inline int dbg_check_ltab(struct ubifs_info *c) { return 0; } static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c) { return 0; } static inline int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) { return 0; } -static inline int dbg_check_synced_i_size(struct inode *inode) { return 0; } -static inline int dbg_check_dir_size(struct ubifs_info *c, - const struct inode *dir) { return 0; } +static inline int +dbg_check_synced_i_size(const struct ubifs_info *c, + struct inode *inode) { return 0; } +static inline int dbg_check_dir(struct ubifs_info *c, + const struct inode *dir) { return 0; } static inline int dbg_check_tnc(struct ubifs_info *c, int extra) { return 0; } static inline int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) { return 0; } @@ -431,9 +442,23 @@ static inline int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) { return 0; } -static inline int dbg_force_in_the_gaps(void) { return 0; } -#define dbg_force_in_the_gaps_enabled() 0 -#define dbg_failure_mode 0 +static inline int dbg_leb_write(struct ubifs_info *c, int lnum, + const void *buf, int offset, + int len, int dtype) { return 0; } +static inline int dbg_leb_change(struct ubifs_info *c, int lnum, + const void *buf, int len, + int dtype) { return 0; } +static inline int dbg_leb_unmap(struct ubifs_info *c, int lnum) { return 0; } +static inline int dbg_leb_map(struct ubifs_info *c, int lnum, + int dtype) { return 0; } + +static inline int dbg_is_chk_gen(const struct ubifs_info *c) { return 0; } +static inline int dbg_is_chk_index(const struct ubifs_info *c) { return 0; } +static inline int dbg_is_chk_orph(const struct ubifs_info *c) { return 0; } +static inline int dbg_is_chk_lprops(const struct ubifs_info *c) { return 0; } +static inline int dbg_is_chk_fs(const struct ubifs_info *c) { return 0; } +static inline int dbg_is_tst_rcvry(const struct ubifs_info *c) { return 0; } +static inline int dbg_is_power_cut(const struct ubifs_info *c) { return 0; } static inline int dbg_debugfs_init(void) { return 0; } static inline void dbg_debugfs_exit(void) { return; } diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index ef5abd38f0bf..683492043317 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -102,7 +102,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, * UBIFS has to fully control "clean <-> dirty" transitions of inodes * to make budgeting work. */ - inode->i_flags |= (S_NOCMTIME); + inode->i_flags |= S_NOCMTIME; inode_init_owner(inode, dir, mode); inode->i_mtime = inode->i_atime = inode->i_ctime = @@ -172,9 +172,11 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, #ifdef CONFIG_UBIFS_FS_DEBUG -static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm) +static int dbg_check_name(const struct ubifs_info *c, + const struct ubifs_dent_node *dent, + const struct qstr *nm) { - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; if (le16_to_cpu(dent->nlen) != nm->len) return -EINVAL; @@ -185,7 +187,7 @@ static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm) #else -#define dbg_check_name(dent, nm) 0 +#define dbg_check_name(c, dent, nm) 0 #endif @@ -219,7 +221,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - if (dbg_check_name(dent, &dentry->d_name)) { + if (dbg_check_name(c, dent, &dentry->d_name)) { err = -EINVAL; goto out; } @@ -522,7 +524,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, ubifs_assert(mutex_is_locked(&dir->i_mutex)); ubifs_assert(mutex_is_locked(&inode->i_mutex)); - err = dbg_check_synced_i_size(inode); + err = dbg_check_synced_i_size(c, inode); if (err) return err; @@ -577,7 +579,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) inode->i_nlink, dir->i_ino); ubifs_assert(mutex_is_locked(&dir->i_mutex)); ubifs_assert(mutex_is_locked(&inode->i_mutex)); - err = dbg_check_synced_i_size(inode); + err = dbg_check_synced_i_size(c, inode); if (err) return err; diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 89ef9a2f7837..f9c234bf33d3 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1263,7 +1263,7 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr) if (err) return err; - err = dbg_check_synced_i_size(inode); + err = dbg_check_synced_i_size(c, inode); if (err) return err; diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 3be645e012c9..9228950a658f 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -86,8 +86,125 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) c->no_chk_data_crc = 0; c->vfs_sb->s_flags |= MS_RDONLY; ubifs_warn("switched to read-only mode, error %d", err); + dump_stack(); + } +} + +/* + * Below are simple wrappers over UBI I/O functions which include some + * additional checks and UBIFS debugging stuff. See corresponding UBI function + * for more information. + */ + +int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs, + int len, int even_ebadmsg) +{ + int err; + + err = ubi_read(c->ubi, lnum, buf, offs, len); + /* + * In case of %-EBADMSG print the error message only if the + * @even_ebadmsg is true. + */ + if (err && (err != -EBADMSG || even_ebadmsg)) { + ubifs_err("reading %d bytes from LEB %d:%d failed, error %d", + len, lnum, offs, err); + dbg_dump_stack(); + } + return err; +} + +int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, + int len, int dtype) +{ + int err; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + if (!dbg_is_tst_rcvry(c)) + err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); + else + err = dbg_leb_write(c, lnum, buf, offs, len, dtype); + if (err) { + ubifs_err("writing %d bytes to LEB %d:%d failed, error %d", + len, lnum, offs, err); + ubifs_ro_mode(c, err); + dbg_dump_stack(); + } + return err; +} + +int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len, + int dtype) +{ + int err; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + if (!dbg_is_tst_rcvry(c)) + err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); + else + err = dbg_leb_change(c, lnum, buf, len, dtype); + if (err) { + ubifs_err("changing %d bytes in LEB %d failed, error %d", + len, lnum, err); + ubifs_ro_mode(c, err); + dbg_dump_stack(); + } + return err; +} + +int ubifs_leb_unmap(struct ubifs_info *c, int lnum) +{ + int err; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + if (!dbg_is_tst_rcvry(c)) + err = ubi_leb_unmap(c->ubi, lnum); + else + err = dbg_leb_unmap(c, lnum); + if (err) { + ubifs_err("unmap LEB %d failed, error %d", lnum, err); + ubifs_ro_mode(c, err); + dbg_dump_stack(); + } + return err; +} + +int ubifs_leb_map(struct ubifs_info *c, int lnum, int dtype) +{ + int err; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + if (!dbg_is_tst_rcvry(c)) + err = ubi_leb_map(c->ubi, lnum, dtype); + else + err = dbg_leb_map(c, lnum, dtype); + if (err) { + ubifs_err("mapping LEB %d failed, error %d", lnum, err); + ubifs_ro_mode(c, err); + dbg_dump_stack(); + } + return err; +} + +int ubifs_is_mapped(const struct ubifs_info *c, int lnum) +{ + int err; + + err = ubi_is_mapped(c->ubi, lnum); + if (err < 0) { + ubifs_err("ubi_is_mapped failed for LEB %d, error %d", + lnum, err); dbg_dump_stack(); } + return err; } /** @@ -406,14 +523,10 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) dirt = sync_len - wbuf->used; if (dirt) ubifs_pad(c, wbuf->buf + wbuf->used, dirt); - err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, - sync_len, wbuf->dtype); - if (err) { - ubifs_err("cannot write %d bytes to LEB %d:%d", - sync_len, wbuf->lnum, wbuf->offs); - dbg_dump_stack(); + err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, sync_len, + wbuf->dtype); + if (err) return err; - } spin_lock(&wbuf->lock); wbuf->offs += sync_len; @@ -605,9 +718,9 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) if (aligned_len == wbuf->avail) { dbg_io("flush jhead %s wbuf to LEB %d:%d", dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); - err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, - wbuf->offs, wbuf->size, - wbuf->dtype); + err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, + wbuf->offs, wbuf->size, + wbuf->dtype); if (err) goto out; @@ -642,8 +755,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) dbg_io("flush jhead %s wbuf to LEB %d:%d", dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); - err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, - wbuf->size, wbuf->dtype); + err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, + wbuf->size, wbuf->dtype); if (err) goto out; @@ -661,8 +774,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) */ dbg_io("write %d bytes to LEB %d:%d", wbuf->size, wbuf->lnum, wbuf->offs); - err = ubi_leb_write(c->ubi, wbuf->lnum, buf, wbuf->offs, - wbuf->size, wbuf->dtype); + err = ubifs_leb_write(c, wbuf->lnum, buf, wbuf->offs, + wbuf->size, wbuf->dtype); if (err) goto out; @@ -683,8 +796,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) n <<= c->max_write_shift; dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, wbuf->offs); - err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, - wbuf->offs, n, wbuf->dtype); + err = ubifs_leb_write(c, wbuf->lnum, buf + written, + wbuf->offs, n, wbuf->dtype); if (err) goto out; wbuf->offs += n; @@ -766,13 +879,9 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, return -EROFS; ubifs_prepare_node(c, buf, len, 1); - err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype); - if (err) { - ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", - buf_len, lnum, offs, err); + err = ubifs_leb_write(c, lnum, buf, offs, buf_len, dtype); + if (err) dbg_dump_node(c, buf); - dbg_dump_stack(); - } return err; } @@ -824,13 +933,9 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, if (rlen > 0) { /* Read everything that goes before write-buffer */ - err = ubi_read(c->ubi, lnum, buf, offs, rlen); - if (err && err != -EBADMSG) { - ubifs_err("failed to read node %d from LEB %d:%d, " - "error %d", type, lnum, offs, err); - dbg_dump_stack(); + err = ubifs_leb_read(c, lnum, buf, offs, rlen, 0); + if (err && err != -EBADMSG) return err; - } } if (type != ch->node_type) { @@ -885,12 +990,9 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, ubifs_assert(!(offs & 7) && offs < c->leb_size); ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); - err = ubi_read(c->ubi, lnum, buf, offs, len); - if (err && err != -EBADMSG) { - ubifs_err("cannot read node %d from LEB %d:%d, error %d", - type, lnum, offs, err); + err = ubifs_leb_read(c, lnum, buf, offs, len, 0); + if (err && err != -EBADMSG) return err; - } if (type != ch->node_type) { ubifs_err("bad node type (%d but expected %d)", diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index affea9494ae2..f9fd068d1ae0 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c @@ -262,7 +262,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) * an unclean reboot, because the target LEB might have been * unmapped, but not yet physically erased. */ - err = ubi_leb_map(c->ubi, bud->lnum, UBI_SHORTTERM); + err = ubifs_leb_map(c, bud->lnum, UBI_SHORTTERM); if (err) goto out_unlock; } @@ -283,8 +283,6 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) return 0; out_unlock: - if (err != -EAGAIN) - ubifs_ro_mode(c, err); mutex_unlock(&c->log_mutex); kfree(ref); kfree(bud); @@ -752,7 +750,7 @@ static int dbg_check_bud_bytes(struct ubifs_info *c) struct ubifs_bud *bud; long long bud_bytes = 0; - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; spin_lock(&c->buds_lock); diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 667884f4a615..f8a181e647cc 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -504,7 +504,7 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops) pnode = (struct ubifs_pnode *)container_of(lprops - pos, struct ubifs_pnode, lprops[0]); - return !test_bit(COW_ZNODE, &pnode->flags) && + return !test_bit(COW_CNODE, &pnode->flags) && test_bit(DIRTY_CNODE, &pnode->flags); } @@ -860,7 +860,7 @@ int dbg_check_cats(struct ubifs_info *c) struct list_head *pos; int i, cat; - if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) + if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c)) return 0; list_for_each_entry(lprops, &c->empty_list, list) { @@ -958,7 +958,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, { int i = 0, j, err = 0; - if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) + if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c)) return; for (i = 0; i < heap->cnt; i++) { @@ -1262,7 +1262,7 @@ int dbg_check_lprops(struct ubifs_info *c) int i, err; struct ubifs_lp_stats lst; - if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + if (!dbg_is_chk_lprops(c)) return 0; /* diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index ef5155e109a2..6189c74d97f0 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -701,8 +701,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, alen = ALIGN(len, c->min_io_size); set_ltab(c, lnum, c->leb_size - alen, alen - len); memset(p, 0xff, alen - len); - err = ubi_leb_change(c->ubi, lnum++, buf, alen, - UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum++, buf, alen, + UBI_SHORTTERM); if (err) goto out; p = buf; @@ -732,8 +732,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, set_ltab(c, lnum, c->leb_size - alen, alen - len); memset(p, 0xff, alen - len); - err = ubi_leb_change(c->ubi, lnum++, buf, alen, - UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum++, buf, alen, + UBI_SHORTTERM); if (err) goto out; p = buf; @@ -780,8 +780,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, alen = ALIGN(len, c->min_io_size); set_ltab(c, lnum, c->leb_size - alen, alen - len); memset(p, 0xff, alen - len); - err = ubi_leb_change(c->ubi, lnum++, buf, alen, - UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum++, buf, alen, + UBI_SHORTTERM); if (err) goto out; p = buf; @@ -806,7 +806,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, alen = ALIGN(len, c->min_io_size); set_ltab(c, lnum, c->leb_size - alen, alen - len); memset(p, 0xff, alen - len); - err = ubi_leb_change(c->ubi, lnum++, buf, alen, UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum++, buf, alen, UBI_SHORTTERM); if (err) goto out; p = buf; @@ -826,7 +826,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, /* Write remaining buffer */ memset(p, 0xff, alen - len); - err = ubi_leb_change(c->ubi, lnum, buf, alen, UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum, buf, alen, UBI_SHORTTERM); if (err) goto out; @@ -1222,7 +1222,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) if (c->big_lpt) nnode->num = calc_nnode_num_from_parent(c, parent, iip); } else { - err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz); + err = ubifs_leb_read(c, lnum, buf, offs, c->nnode_sz, 1); if (err) goto out; err = ubifs_unpack_nnode(c, buf, nnode); @@ -1247,6 +1247,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) out: ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs); + dbg_dump_stack(); kfree(nnode); return err; } @@ -1290,7 +1291,7 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) lprops->flags = ubifs_categorize_lprops(c, lprops); } } else { - err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz); + err = ubifs_leb_read(c, lnum, buf, offs, c->pnode_sz, 1); if (err) goto out; err = unpack_pnode(c, buf, pnode); @@ -1312,6 +1313,7 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) out: ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); dbg_dump_pnode(c, pnode, parent, iip); + dbg_dump_stack(); dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); kfree(pnode); return err; @@ -1331,7 +1333,7 @@ static int read_ltab(struct ubifs_info *c) buf = vmalloc(c->ltab_sz); if (!buf) return -ENOMEM; - err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz); + err = ubifs_leb_read(c, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz, 1); if (err) goto out; err = unpack_ltab(c, buf); @@ -1354,7 +1356,8 @@ static int read_lsave(struct ubifs_info *c) buf = vmalloc(c->lsave_sz); if (!buf) return -ENOMEM; - err = ubi_read(c->ubi, c->lsave_lnum, buf, c->lsave_offs, c->lsave_sz); + err = ubifs_leb_read(c, c->lsave_lnum, buf, c->lsave_offs, + c->lsave_sz, 1); if (err) goto out; err = unpack_lsave(c, buf); @@ -1814,8 +1817,8 @@ static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c, if (c->big_lpt) nnode->num = calc_nnode_num_from_parent(c, parent, iip); } else { - err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, - c->nnode_sz); + err = ubifs_leb_read(c, branch->lnum, buf, branch->offs, + c->nnode_sz, 1); if (err) return ERR_PTR(err); err = ubifs_unpack_nnode(c, buf, nnode); @@ -1883,8 +1886,8 @@ static struct ubifs_pnode *scan_get_pnode(struct ubifs_info *c, ubifs_assert(branch->lnum >= c->lpt_first && branch->lnum <= c->lpt_last); ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size); - err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, - c->pnode_sz); + err = ubifs_leb_read(c, branch->lnum, buf, branch->offs, + c->pnode_sz, 1); if (err) return ERR_PTR(err); err = unpack_pnode(c, buf, pnode); @@ -2224,7 +2227,7 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, struct ubifs_cnode *cn; int num, iip = 0, err; - if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + if (!dbg_is_chk_lprops(c)) return 0; while (cnode) { diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index dfcb5748a7dc..cddd6bd214f4 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -27,6 +27,7 @@ #include <linux/crc16.h> #include <linux/slab.h> +#include <linux/random.h> #include "ubifs.h" #ifdef CONFIG_UBIFS_FS_DEBUG @@ -116,8 +117,8 @@ static int get_cnodes_to_commit(struct ubifs_info *c) return 0; cnt += 1; while (1) { - ubifs_assert(!test_bit(COW_ZNODE, &cnode->flags)); - __set_bit(COW_ZNODE, &cnode->flags); + ubifs_assert(!test_bit(COW_CNODE, &cnode->flags)); + __set_bit(COW_CNODE, &cnode->flags); cnext = next_dirty_cnode(cnode); if (!cnext) { cnode->cnext = c->lpt_cnext; @@ -465,7 +466,7 @@ static int write_cnodes(struct ubifs_info *c) */ clear_bit(DIRTY_CNODE, &cnode->flags); smp_mb__before_clear_bit(); - clear_bit(COW_ZNODE, &cnode->flags); + clear_bit(COW_CNODE, &cnode->flags); smp_mb__after_clear_bit(); offs += len; dbg_chk_lpt_sz(c, 1, len); @@ -1160,11 +1161,11 @@ static int lpt_gc_lnum(struct ubifs_info *c, int lnum) void *buf = c->lpt_buf; dbg_lp("LEB %d", lnum); - err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); - if (err) { - ubifs_err("cannot read LEB %d, error %d", lnum, err); + + err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); + if (err) return err; - } + while (1) { if (!is_a_node(c, buf, len)) { int pad_len; @@ -1640,7 +1641,7 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) int ret; void *buf, *p; - if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + if (!dbg_is_chk_lprops(c)) return 0; buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); @@ -1650,11 +1651,11 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) } dbg_lp("LEB %d", lnum); - err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); - if (err) { - dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err); + + err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); + if (err) goto out; - } + while (1) { if (!is_a_node(c, p, len)) { int i, pad_len; @@ -1711,7 +1712,7 @@ int dbg_check_ltab(struct ubifs_info *c) { int lnum, err, i, cnt; - if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + if (!dbg_is_chk_lprops(c)) return 0; /* Bring the entire tree into memory */ @@ -1754,7 +1755,7 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c) long long free = 0; int i; - if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + if (!dbg_is_chk_lprops(c)) return 0; for (i = 0; i < c->lpt_lebs; i++) { @@ -1796,7 +1797,7 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) long long chk_lpt_sz, lpt_sz; int err = 0; - if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + if (!dbg_is_chk_lprops(c)) return 0; switch (action) { @@ -1901,11 +1902,10 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) return; } - err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); - if (err) { - ubifs_err("cannot read LEB %d, error %d", lnum, err); + err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); + if (err) goto out; - } + while (1) { offs = c->leb_size - len; if (!is_a_node(c, p, len)) { @@ -2019,7 +2019,7 @@ static int dbg_populate_lsave(struct ubifs_info *c) struct ubifs_lpt_heap *heap; int i; - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; if (random32() & 3) return 0; diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 0b5296a9a4c5..ee7cb5ebb6e8 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h @@ -39,6 +39,29 @@ static inline int ubifs_zn_dirty(const struct ubifs_znode *znode) } /** + * ubifs_zn_obsolete - check if znode is obsolete. + * @znode: znode to check + * + * This helper function returns %1 if @znode is obsolete and %0 otherwise. + */ +static inline int ubifs_zn_obsolete(const struct ubifs_znode *znode) +{ + return !!test_bit(OBSOLETE_ZNODE, &znode->flags); +} + +/** + * ubifs_zn_cow - check if znode has to be copied on write. + * @znode: znode to check + * + * This helper function returns %1 if @znode is has COW flag set and %0 + * otherwise. + */ +static inline int ubifs_zn_cow(const struct ubifs_znode *znode) +{ + return !!test_bit(COW_ZNODE, &znode->flags); +} + +/** * ubifs_wake_up_bgt - wake up background thread. * @c: UBIFS file-system description object */ @@ -122,86 +145,6 @@ static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf) } /** - * ubifs_leb_unmap - unmap an LEB. - * @c: UBIFS file-system description object - * @lnum: LEB number to unmap - * - * This function returns %0 on success and a negative error code on failure. - */ -static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum) -{ - int err; - - ubifs_assert(!c->ro_media && !c->ro_mount); - if (c->ro_error) - return -EROFS; - err = ubi_leb_unmap(c->ubi, lnum); - if (err) { - ubifs_err("unmap LEB %d failed, error %d", lnum, err); - return err; - } - - return 0; -} - -/** - * ubifs_leb_write - write to a LEB. - * @c: UBIFS file-system description object - * @lnum: LEB number to write - * @buf: buffer to write from - * @offs: offset within LEB to write to - * @len: length to write - * @dtype: data type - * - * This function returns %0 on success and a negative error code on failure. - */ -static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum, - const void *buf, int offs, int len, int dtype) -{ - int err; - - ubifs_assert(!c->ro_media && !c->ro_mount); - if (c->ro_error) - return -EROFS; - err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); - if (err) { - ubifs_err("writing %d bytes at %d:%d, error %d", - len, lnum, offs, err); - return err; - } - - return 0; -} - -/** - * ubifs_leb_change - atomic LEB change. - * @c: UBIFS file-system description object - * @lnum: LEB number to write - * @buf: buffer to write from - * @len: length to write - * @dtype: data type - * - * This function returns %0 on success and a negative error code on failure. - */ -static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum, - const void *buf, int len, int dtype) -{ - int err; - - ubifs_assert(!c->ro_media && !c->ro_mount); - if (c->ro_error) - return -EROFS; - err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); - if (err) { - ubifs_err("changing %d bytes in LEB %d, error %d", - len, lnum, err); - return err; - } - - return 0; -} - -/** * ubifs_encode_dev - encode device node IDs. * @dev: UBIFS device node information * @rdev: device IDs to encode diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index a5422fffbd69..c542c73cfa3c 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -929,7 +929,7 @@ static int dbg_check_orphans(struct ubifs_info *c) struct check_info ci; int err; - if (!(ubifs_chk_flags & UBIFS_CHK_ORPH)) + if (!dbg_is_chk_orph(c)) return 0; ci.last_ino = 0; diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 783d8e0beb76..af02790d9328 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -117,7 +117,7 @@ static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf, if (!sbuf) return -ENOMEM; - err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size); + err = ubifs_leb_read(c, lnum, sbuf, 0, c->leb_size, 0); if (err && err != -EBADMSG) goto out_free; @@ -213,10 +213,10 @@ static int write_rcvrd_mst_node(struct ubifs_info *c, mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY); ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1); - err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum, mst, sz, UBI_SHORTTERM); if (err) goto out; - err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum + 1, mst, sz, UBI_SHORTTERM); if (err) goto out; out: @@ -274,7 +274,8 @@ int ubifs_recover_master_node(struct ubifs_info *c) if (cor1) goto out_err; mst = mst1; - } else if (offs1 == 0 && offs2 + sz >= c->leb_size) { + } else if (offs1 == 0 && + c->leb_size - offs2 - sz < sz) { /* 1st LEB was unmapped and written, 2nd not */ if (cor1) goto out_err; @@ -539,8 +540,8 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, int len = ALIGN(endpt, c->min_io_size); if (start) { - err = ubi_read(c->ubi, lnum, sleb->buf, 0, - start); + err = ubifs_leb_read(c, lnum, sleb->buf, 0, + start, 1); if (err) return err; } @@ -554,8 +555,8 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ubifs_pad(c, buf, pad_len); } } - err = ubi_leb_change(c->ubi, lnum, sleb->buf, len, - UBI_UNKNOWN); + err = ubifs_leb_change(c, lnum, sleb->buf, len, + UBI_UNKNOWN); if (err) return err; } @@ -819,7 +820,8 @@ static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs, return -ENOMEM; if (c->leb_size - offs < UBIFS_CS_NODE_SZ) goto out_err; - err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ); + err = ubifs_leb_read(c, lnum, (void *)cs_node, offs, + UBIFS_CS_NODE_SZ, 0); if (err && err != -EBADMSG) goto out_free; ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0); @@ -919,8 +921,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, * * This function returns %0 on success and a negative error code on failure. */ -static int recover_head(const struct ubifs_info *c, int lnum, int offs, - void *sbuf) +static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf) { int len = c->max_write_size, err; @@ -931,15 +932,15 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs, return 0; /* Read at the head location and check it is empty flash */ - err = ubi_read(c->ubi, lnum, sbuf, offs, len); + err = ubifs_leb_read(c, lnum, sbuf, offs, len, 1); if (err || !is_empty(sbuf, len)) { dbg_rcvry("cleaning head at %d:%d", lnum, offs); if (offs == 0) return ubifs_leb_unmap(c, lnum); - err = ubi_read(c->ubi, lnum, sbuf, 0, offs); + err = ubifs_leb_read(c, lnum, sbuf, 0, offs, 1); if (err) return err; - return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN); + return ubifs_leb_change(c, lnum, sbuf, offs, UBI_UNKNOWN); } return 0; @@ -962,7 +963,7 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs, * * This function returns %0 on success and a negative error code on failure. */ -int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) +int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf) { int err; @@ -993,7 +994,7 @@ int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) * * This function returns %0 on success and a negative error code on failure. */ -static int clean_an_unclean_leb(const struct ubifs_info *c, +static int clean_an_unclean_leb(struct ubifs_info *c, struct ubifs_unclean_leb *ucleb, void *sbuf) { int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1; @@ -1009,7 +1010,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c, return 0; } - err = ubi_read(c->ubi, lnum, buf, offs, len); + err = ubifs_leb_read(c, lnum, buf, offs, len, 0); if (err && err != -EBADMSG) return err; @@ -1069,7 +1070,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c, } /* Write back the LEB atomically */ - err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN); + err = ubifs_leb_change(c, lnum, sbuf, len, UBI_UNKNOWN); if (err) return err; @@ -1089,7 +1090,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c, * * This function returns %0 on success and a negative error code on failure. */ -int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) +int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf) { dbg_rcvry("recovery"); while (!list_empty(&c->unclean_leb_list)) { @@ -1454,7 +1455,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) if (i_size >= e->d_size) return 0; /* Read the LEB */ - err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size); + err = ubifs_leb_read(c, lnum, c->sbuf, 0, c->leb_size, 1); if (err) goto out; /* Change the size field and recalculate the CRC */ @@ -1470,7 +1471,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) len -= 1; len = ALIGN(len + 1, c->min_io_size); /* Atomically write the fixed LEB back again */ - err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); + err = ubifs_leb_change(c, lnum, c->sbuf, len, UBI_UNKNOWN); if (err) goto out; dbg_rcvry("inode %lu at %d:%d size %lld -> %lld", diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 5e97161ce4d3..ccabaf1164b3 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -523,8 +523,7 @@ static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) if (!list_is_last(&next->list, &jh->buds_list)) return 0; - err = ubi_read(c->ubi, next->lnum, (char *)&data, - next->start, 4); + err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1); if (err) return 0; diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index c606f010e8df..93d938ad3d2a 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -674,15 +674,15 @@ static int fixup_leb(struct ubifs_info *c, int lnum, int len) if (len == 0) { dbg_mnt("unmap empty LEB %d", lnum); - return ubi_leb_unmap(c->ubi, lnum); + return ubifs_leb_unmap(c, lnum); } dbg_mnt("fixup LEB %d, data len %d", lnum, len); - err = ubi_read(c->ubi, lnum, c->sbuf, 0, len); + err = ubifs_leb_read(c, lnum, c->sbuf, 0, len, 1); if (err) return err; - return ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); + return ubifs_leb_change(c, lnum, c->sbuf, len, UBI_UNKNOWN); } /** diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index 36216b46f772..37383e8011b1 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c @@ -148,7 +148,7 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, INIT_LIST_HEAD(&sleb->nodes); sleb->buf = sbuf; - err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs); + err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0); if (err && err != -EBADMSG) { ubifs_err("cannot read %d bytes from LEB %d:%d," " error %d", c->leb_size - offs, lnum, offs, err); @@ -240,7 +240,7 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, int len; ubifs_err("corruption at LEB %d:%d", lnum, offs); - if (dbg_failure_mode) + if (dbg_is_tst_rcvry(c)) return; len = c->leb_size - offs; if (len > 8192) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 529be0582029..b28121278d46 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -85,7 +85,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode) if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA) return 4; - if (ui->xattr && (inode->i_mode & S_IFMT) != S_IFREG) + if (ui->xattr && !S_ISREG(inode->i_mode)) return 5; if (!ubifs_compr_present(ui->compr_type)) { @@ -94,7 +94,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode) ubifs_compr_name(ui->compr_type)); } - err = dbg_check_dir_size(c, inode); + err = dbg_check_dir(c, inode); return err; } @@ -914,7 +914,7 @@ static int check_volume_empty(struct ubifs_info *c) c->empty = 1; for (lnum = 0; lnum < c->leb_cnt; lnum++) { - err = ubi_is_mapped(c->ubi, lnum); + err = ubifs_is_mapped(c, lnum); if (unlikely(err < 0)) return err; if (err == 1) { diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 91b4213dde84..066738647685 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -223,7 +223,7 @@ static struct ubifs_znode *copy_znode(struct ubifs_info *c, __set_bit(DIRTY_ZNODE, &zn->flags); __clear_bit(COW_ZNODE, &zn->flags); - ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); + ubifs_assert(!ubifs_zn_obsolete(znode)); __set_bit(OBSOLETE_ZNODE, &znode->flags); if (znode->level != 0) { @@ -271,7 +271,7 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c, struct ubifs_znode *zn; int err; - if (!test_bit(COW_ZNODE, &znode->flags)) { + if (!ubifs_zn_cow(znode)) { /* znode is not being committed */ if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) { atomic_long_inc(&c->dirty_zn_cnt); @@ -462,7 +462,7 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type, dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); - err = ubi_read(c->ubi, lnum, buf, offs, len); + err = ubifs_leb_read(c, lnum, buf, offs, len, 1); if (err) { ubifs_err("cannot read node type %d from LEB %d:%d, error %d", type, lnum, offs, err); @@ -1666,7 +1666,7 @@ static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum, if (!overlap) { /* We may safely unlock the write-buffer and read the data */ spin_unlock(&wbuf->lock); - return ubi_read(c->ubi, lnum, buf, offs, len); + return ubifs_leb_read(c, lnum, buf, offs, len, 0); } /* Don't read under wbuf */ @@ -1680,7 +1680,7 @@ static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum, if (rlen > 0) /* Read everything that goes before write-buffer */ - return ubi_read(c->ubi, lnum, buf, offs, rlen); + return ubifs_leb_read(c, lnum, buf, offs, rlen, 0); return 0; } @@ -1767,7 +1767,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) if (wbuf) err = read_wbuf(wbuf, bu->buf, len, lnum, offs); else - err = ubi_read(c->ubi, lnum, bu->buf, offs, len); + err = ubifs_leb_read(c, lnum, bu->buf, offs, len, 0); /* Check for a race with GC */ if (maybe_leb_gced(c, lnum, bu->gc_seq)) @@ -2423,7 +2423,7 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) */ do { - ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); + ubifs_assert(!ubifs_zn_obsolete(znode)); ubifs_assert(ubifs_zn_dirty(znode)); zp = znode->parent; @@ -2479,9 +2479,8 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) c->zroot.offs = zbr->offs; c->zroot.len = zbr->len; c->zroot.znode = znode; - ubifs_assert(!test_bit(OBSOLETE_ZNODE, - &zp->flags)); - ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags)); + ubifs_assert(!ubifs_zn_obsolete(zp)); + ubifs_assert(ubifs_zn_dirty(zp)); atomic_long_dec(&c->dirty_zn_cnt); if (zp->cnext) { @@ -2865,7 +2864,7 @@ static void tnc_destroy_cnext(struct ubifs_info *c) struct ubifs_znode *znode = cnext; cnext = cnext->cnext; - if (test_bit(OBSOLETE_ZNODE, &znode->flags)) + if (ubifs_zn_obsolete(znode)) kfree(znode); } while (cnext && cnext != c->cnext); } @@ -3301,7 +3300,7 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, if (!S_ISREG(inode->i_mode)) return 0; - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; @@ -3337,9 +3336,10 @@ out_dump: ubifs_err("inode %lu has size %lld, but there are data at offset %lld " "(data key %s)", (unsigned long)inode->i_ino, size, ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key)); + mutex_unlock(&c->tnc_mutex); dbg_dump_inode(c, inode); dbg_dump_stack(); - err = -EINVAL; + return -EINVAL; out_unlock: mutex_unlock(&c->tnc_mutex); diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index 41920f357bbf..4c15f07a8bb2 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -22,6 +22,7 @@ /* This file implements TNC functions for committing */ +#include <linux/random.h> #include "ubifs.h" /** @@ -87,8 +88,12 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx, atomic_long_dec(&c->dirty_zn_cnt); ubifs_assert(ubifs_zn_dirty(znode)); - ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); + ubifs_assert(ubifs_zn_cow(znode)); + /* + * Note, unlike 'write_index()' we do not add memory barriers here + * because this function is called with @c->tnc_mutex locked. + */ __clear_bit(DIRTY_ZNODE, &znode->flags); __clear_bit(COW_ZNODE, &znode->flags); @@ -377,7 +382,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt) c->gap_lebs = NULL; return err; } - if (dbg_force_in_the_gaps_enabled()) { + if (!dbg_is_chk_index(c)) { /* * Do not print scary warnings if the debugging * option which forces in-the-gaps is enabled. @@ -491,25 +496,6 @@ static int layout_in_empty_space(struct ubifs_info *c) else next_len = ubifs_idx_node_sz(c, cnext->child_cnt); - if (c->min_io_size == 1) { - buf_offs += ALIGN(len, 8); - if (next_len) { - if (buf_offs + next_len <= c->leb_size) - continue; - err = ubifs_update_one_lp(c, lnum, 0, - c->leb_size - buf_offs, 0, 0); - if (err) - return err; - lnum = -1; - continue; - } - err = ubifs_update_one_lp(c, lnum, - c->leb_size - buf_offs, 0, 0, 0); - if (err) - return err; - break; - } - /* Update buffer positions */ wlen = used + len; used += ALIGN(len, 8); @@ -658,7 +644,7 @@ static int get_znodes_to_commit(struct ubifs_info *c) } cnt += 1; while (1) { - ubifs_assert(!test_bit(COW_ZNODE, &znode->flags)); + ubifs_assert(!ubifs_zn_cow(znode)); __set_bit(COW_ZNODE, &znode->flags); znode->alt = 0; cnext = find_next_dirty(znode); @@ -704,7 +690,7 @@ static int alloc_idx_lebs(struct ubifs_info *c, int cnt) c->ilebs[c->ileb_cnt++] = lnum; dbg_cmt("LEB %d", lnum); } - if (dbg_force_in_the_gaps()) + if (dbg_is_chk_index(c) && !(random32() & 7)) return -ENOSPC; return 0; } @@ -830,7 +816,7 @@ static int write_index(struct ubifs_info *c) struct ubifs_idx_node *idx; struct ubifs_znode *znode, *cnext; int i, lnum, offs, len, next_len, buf_len, buf_offs, used; - int avail, wlen, err, lnum_pos = 0; + int avail, wlen, err, lnum_pos = 0, blen, nxt_offs; cnext = c->enext; if (!cnext) @@ -907,7 +893,7 @@ static int write_index(struct ubifs_info *c) cnext = znode->cnext; ubifs_assert(ubifs_zn_dirty(znode)); - ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); + ubifs_assert(ubifs_zn_cow(znode)); /* * It is important that other threads should see %DIRTY_ZNODE @@ -922,6 +908,28 @@ static int write_index(struct ubifs_info *c) clear_bit(COW_ZNODE, &znode->flags); smp_mb__after_clear_bit(); + /* + * We have marked the znode as clean but have not updated the + * @c->clean_zn_cnt counter. If this znode becomes dirty again + * before 'free_obsolete_znodes()' is called, then + * @c->clean_zn_cnt will be decremented before it gets + * incremented (resulting in 2 decrements for the same znode). + * This means that @c->clean_zn_cnt may become negative for a + * while. + * + * Q: why we cannot increment @c->clean_zn_cnt? + * A: because we do not have the @c->tnc_mutex locked, and the + * following code would be racy and buggy: + * + * if (!ubifs_zn_obsolete(znode)) { + * atomic_long_inc(&c->clean_zn_cnt); + * atomic_long_inc(&ubifs_clean_zn_cnt); + * } + * + * Thus, we just delay the @c->clean_zn_cnt update until we + * have the mutex locked. + */ + /* Do not access znode from this point on */ /* Update buffer positions */ @@ -938,65 +946,38 @@ static int write_index(struct ubifs_info *c) else next_len = ubifs_idx_node_sz(c, cnext->child_cnt); - if (c->min_io_size == 1) { - /* - * Write the prepared index node immediately if there is - * no minimum IO size - */ - err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, - wlen, UBI_SHORTTERM); - if (err) - return err; - buf_offs += ALIGN(wlen, 8); - if (next_len) { - used = 0; - avail = buf_len; - if (buf_offs + next_len > c->leb_size) { - err = ubifs_update_one_lp(c, lnum, - LPROPS_NC, 0, 0, LPROPS_TAKEN); - if (err) - return err; - lnum = -1; - } + nxt_offs = buf_offs + used + next_len; + if (next_len && nxt_offs <= c->leb_size) { + if (avail > 0) continue; - } + else + blen = buf_len; } else { - int blen, nxt_offs = buf_offs + used + next_len; - - if (next_len && nxt_offs <= c->leb_size) { - if (avail > 0) - continue; - else - blen = buf_len; - } else { - wlen = ALIGN(wlen, 8); - blen = ALIGN(wlen, c->min_io_size); - ubifs_pad(c, c->cbuf + wlen, blen - wlen); - } - /* - * The buffer is full or there are no more znodes - * to do - */ - err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, - blen, UBI_SHORTTERM); - if (err) - return err; - buf_offs += blen; - if (next_len) { - if (nxt_offs > c->leb_size) { - err = ubifs_update_one_lp(c, lnum, - LPROPS_NC, 0, 0, LPROPS_TAKEN); - if (err) - return err; - lnum = -1; - } - used -= blen; - if (used < 0) - used = 0; - avail = buf_len - used; - memmove(c->cbuf, c->cbuf + blen, used); - continue; + wlen = ALIGN(wlen, 8); + blen = ALIGN(wlen, c->min_io_size); + ubifs_pad(c, c->cbuf + wlen, blen - wlen); + } + + /* The buffer is full or there are no more znodes to do */ + err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, blen, + UBI_SHORTTERM); + if (err) + return err; + buf_offs += blen; + if (next_len) { + if (nxt_offs > c->leb_size) { + err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0, + 0, LPROPS_TAKEN); + if (err) + return err; + lnum = -1; } + used -= blen; + if (used < 0) + used = 0; + avail = buf_len - used; + memmove(c->cbuf, c->cbuf + blen, used); + continue; } break; } @@ -1029,7 +1010,7 @@ static void free_obsolete_znodes(struct ubifs_info *c) do { znode = cnext; cnext = znode->cnext; - if (test_bit(OBSOLETE_ZNODE, &znode->flags)) + if (ubifs_zn_obsolete(znode)) kfree(znode); else { znode->cnext = NULL; diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 4cd648501fa4..27f22551f805 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -230,14 +230,14 @@ enum { * LPT cnode flag bits. * * DIRTY_CNODE: cnode is dirty - * COW_CNODE: cnode is being committed and must be copied before writing * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted), - * so it can (and must) be freed when the commit is finished + * so it can (and must) be freed when the commit is finished + * COW_CNODE: cnode is being committed and must be copied before writing */ enum { DIRTY_CNODE = 0, - COW_CNODE = 1, - OBSOLETE_CNODE = 2, + OBSOLETE_CNODE = 1, + COW_CNODE = 2, }; /* @@ -1468,6 +1468,15 @@ extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; /* io.c */ void ubifs_ro_mode(struct ubifs_info *c, int err); +int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs, + int len, int even_ebadmsg); +int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, + int len, int dtype); +int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len, + int dtype); +int ubifs_leb_unmap(struct ubifs_info *c, int lnum); +int ubifs_leb_map(struct ubifs_info *c, int lnum, int dtype); +int ubifs_is_mapped(const struct ubifs_info *c, int lnum); int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len); int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, int dtype); @@ -1747,8 +1756,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf, int jhead); struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf); -int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); -int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf); +int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf); +int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf); int ubifs_rcvry_gc_commit(struct ubifs_info *c); int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, int deletion, loff_t new_size); diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 284a7c89697e..75bb316529dd 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -88,8 +88,6 @@ xfs-y += xfs_alloc.o \ xfs_vnodeops.o \ xfs_rw.o -xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o - # Objects in linux/ xfs-y += $(addprefix $(XFS_LINUX)/, \ kmem.o \ diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index a5dcd6a0f1b5..cac48fe22ad5 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c @@ -264,7 +264,7 @@ xfs_set_mode(struct inode *inode, mode_t mode) iattr.ia_mode = mode; iattr.ia_ctime = current_fs_time(inode->i_sb); - error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL); + error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL); } return error; diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index b3b418f519f3..63e971e2b837 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -181,6 +181,7 @@ xfs_setfilesize( isize = xfs_ioend_new_eof(ioend); if (isize) { + trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); ip->i_d.di_size = isize; xfs_mark_inode_dirty(ip); } @@ -894,11 +895,6 @@ out_invalidate: * For unwritten space on the page we need to start the conversion to * regular allocated space. * For any other dirty buffer heads on the page we should flush them. - * - * If we detect that a transaction would be required to flush the page, we - * have to check the process flags first, if we are already in a transaction - * or disk I/O during allocations is off, we need to fail the writepage and - * redirty the page. */ STATIC int xfs_vm_writepage( @@ -906,7 +902,6 @@ xfs_vm_writepage( struct writeback_control *wbc) { struct inode *inode = page->mapping->host; - int delalloc, unwritten; struct buffer_head *bh, *head; struct xfs_bmbt_irec imap; xfs_ioend_t *ioend = NULL, *iohead = NULL; @@ -938,15 +933,10 @@ xfs_vm_writepage( goto redirty; /* - * We need a transaction if there are delalloc or unwritten buffers - * on the page. - * - * If we need a transaction and the process flags say we are already - * in a transaction, or no IO is allowed then mark the page dirty - * again and leave the page as is. + * Given that we do not allow direct reclaim to call us, we should + * never be called while in a filesystem transaction. */ - xfs_count_page_state(page, &delalloc, &unwritten); - if ((current->flags & PF_FSTRANS) && (delalloc || unwritten)) + if (WARN_ON(current->flags & PF_FSTRANS)) goto redirty; /* Is this page beyond the end of the file? */ @@ -970,7 +960,7 @@ xfs_vm_writepage( offset = page_offset(page); type = IO_OVERWRITE; - if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking) + if (wbc->sync_mode == WB_SYNC_NONE) nonblocking = 1; do { diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 5e68099db2a5..b2b411985591 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -499,16 +499,14 @@ found: spin_unlock(&pag->pag_buf_lock); xfs_perag_put(pag); - if (xfs_buf_cond_lock(bp)) { - /* failed, so wait for the lock if requested. */ - if (!(flags & XBF_TRYLOCK)) { - xfs_buf_lock(bp); - XFS_STATS_INC(xb_get_locked_waited); - } else { + if (!xfs_buf_trylock(bp)) { + if (flags & XBF_TRYLOCK) { xfs_buf_rele(bp); XFS_STATS_INC(xb_busy_locked); return NULL; } + xfs_buf_lock(bp); + XFS_STATS_INC(xb_get_locked_waited); } /* @@ -594,10 +592,8 @@ _xfs_buf_read( ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE))); ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); - bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \ - XBF_READ_AHEAD | _XBF_RUN_QUEUES); - bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | \ - XBF_READ_AHEAD | _XBF_RUN_QUEUES); + bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD); + bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); status = xfs_buf_iorequest(bp); if (status || XFS_BUF_ISERROR(bp) || (flags & XBF_ASYNC)) @@ -681,7 +677,6 @@ xfs_buf_read_uncached( return NULL; /* set up the buffer for a read IO */ - xfs_buf_lock(bp); XFS_BUF_SET_ADDR(bp, daddr); XFS_BUF_READ(bp); XFS_BUF_BUSY(bp); @@ -816,8 +811,6 @@ xfs_buf_get_uncached( goto fail_free_mem; } - xfs_buf_unlock(bp); - trace_xfs_buf_get_uncached(bp, _RET_IP_); return bp; @@ -896,8 +889,8 @@ xfs_buf_rele( * to push on stale inode buffers. */ int -xfs_buf_cond_lock( - xfs_buf_t *bp) +xfs_buf_trylock( + struct xfs_buf *bp) { int locked; @@ -907,15 +900,8 @@ xfs_buf_cond_lock( else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) xfs_log_force(bp->b_target->bt_mount, 0); - trace_xfs_buf_cond_lock(bp, _RET_IP_); - return locked ? 0 : -EBUSY; -} - -int -xfs_buf_lock_value( - xfs_buf_t *bp) -{ - return bp->b_sema.count; + trace_xfs_buf_trylock(bp, _RET_IP_); + return locked; } /* @@ -929,7 +915,7 @@ xfs_buf_lock_value( */ void xfs_buf_lock( - xfs_buf_t *bp) + struct xfs_buf *bp) { trace_xfs_buf_lock(bp, _RET_IP_); @@ -950,7 +936,7 @@ xfs_buf_lock( */ void xfs_buf_unlock( - xfs_buf_t *bp) + struct xfs_buf *bp) { if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) { atomic_inc(&bp->b_hold); @@ -1121,7 +1107,7 @@ xfs_bioerror_relse( XFS_BUF_UNDELAYWRITE(bp); XFS_BUF_DONE(bp); XFS_BUF_STALE(bp); - XFS_BUF_CLR_IODONE_FUNC(bp); + bp->b_iodone = NULL; if (!(fl & XBF_ASYNC)) { /* * Mark b_error and B_ERROR _both_. @@ -1223,23 +1209,21 @@ _xfs_buf_ioapply( total_nr_pages = bp->b_page_count; map_i = 0; - if (bp->b_flags & XBF_ORDERED) { - ASSERT(!(bp->b_flags & XBF_READ)); - rw = WRITE_FLUSH_FUA; - } else if (bp->b_flags & XBF_LOG_BUFFER) { - ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); - bp->b_flags &= ~_XBF_RUN_QUEUES; - rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC; - } else if (bp->b_flags & _XBF_RUN_QUEUES) { - ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); - bp->b_flags &= ~_XBF_RUN_QUEUES; - rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META; + if (bp->b_flags & XBF_WRITE) { + if (bp->b_flags & XBF_SYNCIO) + rw = WRITE_SYNC; + else + rw = WRITE; + if (bp->b_flags & XBF_FUA) + rw |= REQ_FUA; + if (bp->b_flags & XBF_FLUSH) + rw |= REQ_FLUSH; + } else if (bp->b_flags & XBF_READ_AHEAD) { + rw = READA; } else { - rw = (bp->b_flags & XBF_WRITE) ? WRITE : - (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; + rw = READ; } - next_chunk: atomic_inc(&bp->b_io_remaining); nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); @@ -1694,15 +1678,14 @@ xfs_buf_delwri_split( list_for_each_entry_safe(bp, n, dwq, b_list) { ASSERT(bp->b_flags & XBF_DELWRI); - if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) { + if (!XFS_BUF_ISPINNED(bp) && xfs_buf_trylock(bp)) { if (!force && time_before(jiffies, bp->b_queuetime + age)) { xfs_buf_unlock(bp); break; } - bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q| - _XBF_RUN_QUEUES); + bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q); bp->b_flags |= XBF_WRITE; list_move_tail(&bp->b_list, list); trace_xfs_buf_delwri_split(bp, _RET_IP_); @@ -1738,14 +1721,6 @@ xfs_buf_cmp( return 0; } -void -xfs_buf_delwri_sort( - xfs_buftarg_t *target, - struct list_head *list) -{ - list_sort(NULL, list, xfs_buf_cmp); -} - STATIC int xfsbufd( void *data) diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 50a7d5fb3b73..6a83b46b4bcf 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -46,43 +46,46 @@ typedef enum { #define XBF_READ (1 << 0) /* buffer intended for reading from device */ #define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ -#define XBF_MAPPED (1 << 2) /* buffer mapped (b_addr valid) */ +#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ +#define XBF_MAPPED (1 << 3) /* buffer mapped (b_addr valid) */ #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ #define XBF_DELWRI (1 << 6) /* buffer has dirty pages */ #define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */ -#define XBF_ORDERED (1 << 11)/* use ordered writes */ -#define XBF_READ_AHEAD (1 << 12)/* asynchronous read-ahead */ -#define XBF_LOG_BUFFER (1 << 13)/* this is a buffer used for the log */ + +/* I/O hints for the BIO layer */ +#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ +#define XBF_FUA (1 << 11)/* force cache write through mode */ +#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ /* flags used only as arguments to access routines */ -#define XBF_LOCK (1 << 14)/* lock requested */ -#define XBF_TRYLOCK (1 << 15)/* lock requested, but do not wait */ -#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */ +#define XBF_LOCK (1 << 15)/* lock requested */ +#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ +#define XBF_DONT_BLOCK (1 << 17)/* do not block in current thread */ /* flags used only internally */ -#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */ -#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */ -#define _XBF_KMEM (1 << 20)/* backed by heap memory */ -#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */ +#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ +#define _XBF_KMEM (1 << 21)/* backed by heap memory */ +#define _XBF_DELWRI_Q (1 << 22)/* buffer on delwri queue */ typedef unsigned int xfs_buf_flags_t; #define XFS_BUF_FLAGS \ { XBF_READ, "READ" }, \ { XBF_WRITE, "WRITE" }, \ + { XBF_READ_AHEAD, "READ_AHEAD" }, \ { XBF_MAPPED, "MAPPED" }, \ { XBF_ASYNC, "ASYNC" }, \ { XBF_DONE, "DONE" }, \ { XBF_DELWRI, "DELWRI" }, \ { XBF_STALE, "STALE" }, \ - { XBF_ORDERED, "ORDERED" }, \ - { XBF_READ_AHEAD, "READ_AHEAD" }, \ + { XBF_SYNCIO, "SYNCIO" }, \ + { XBF_FUA, "FUA" }, \ + { XBF_FLUSH, "FLUSH" }, \ { XBF_LOCK, "LOCK" }, /* should never be set */\ { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ { _XBF_PAGES, "PAGES" }, \ - { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ { _XBF_KMEM, "KMEM" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" } @@ -91,11 +94,6 @@ typedef enum { XBT_FORCE_FLUSH = 1, } xfs_buftarg_flags_t; -typedef struct xfs_bufhash { - struct list_head bh_list; - spinlock_t bh_lock; -} xfs_bufhash_t; - typedef struct xfs_buftarg { dev_t bt_dev; struct block_device *bt_bdev; @@ -151,7 +149,7 @@ typedef struct xfs_buf { xfs_buf_iodone_t b_iodone; /* I/O completion function */ struct completion b_iowait; /* queue for I/O waiters */ void *b_fspriv; - void *b_fspriv2; + struct xfs_trans *b_transp; struct page **b_pages; /* array of page pointers */ struct page *b_page_array[XB_PAGES]; /* inline pages */ unsigned long b_queuetime; /* time buffer was queued */ @@ -192,10 +190,11 @@ extern void xfs_buf_free(xfs_buf_t *); extern void xfs_buf_rele(xfs_buf_t *); /* Locking and Unlocking Buffers */ -extern int xfs_buf_cond_lock(xfs_buf_t *); -extern int xfs_buf_lock_value(xfs_buf_t *); +extern int xfs_buf_trylock(xfs_buf_t *); extern void xfs_buf_lock(xfs_buf_t *); extern void xfs_buf_unlock(xfs_buf_t *); +#define xfs_buf_islocked(bp) \ + ((bp)->b_sema.count <= 0) /* Buffer Read and Write Routines */ extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); @@ -234,8 +233,9 @@ extern void xfs_buf_terminate(void); #define XFS_BUF_BFLAGS(bp) ((bp)->b_flags) -#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ - ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) +#define XFS_BUF_ZEROFLAGS(bp) \ + ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \ + XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) void xfs_buf_stale(struct xfs_buf *bp); #define XFS_BUF_STALE(bp) xfs_buf_stale(bp); @@ -267,10 +267,6 @@ void xfs_buf_stale(struct xfs_buf *bp); #define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC) #define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC) -#define XFS_BUF_ORDERED(bp) ((bp)->b_flags |= XBF_ORDERED) -#define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED) -#define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED) - #define XFS_BUF_HOLD(bp) xfs_buf_hold(bp) #define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ) #define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ) @@ -280,14 +276,6 @@ void xfs_buf_stale(struct xfs_buf *bp); #define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE) #define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE) -#define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone) -#define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func)) -#define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL) - -#define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv) -#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val)) -#define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) -#define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) #define XFS_BUF_SET_START(bp) do { } while (0) #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) @@ -313,10 +301,6 @@ xfs_buf_set_ref( #define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) -#define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp) -#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0) -#define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp) -#define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp) #define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait); #define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target)) diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index f4f878fc0083..75e5d322e48f 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c @@ -151,14 +151,14 @@ xfs_nfs_get_inode( * We don't use ESTALE directly down the chain to not * confuse applications using bulkstat that expect EINVAL. */ - if (error == EINVAL) + if (error == EINVAL || error == ENOENT) error = ESTALE; return ERR_PTR(-error); } if (ip->i_d.di_gen != generation) { IRELE(ip); - return ERR_PTR(-ENOENT); + return ERR_PTR(-ESTALE); } return VFS_I(ip); diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index fbbf657df0cd..cca00f49e092 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -943,7 +943,7 @@ xfs_file_fallocate( iattr.ia_valid = ATTR_SIZE; iattr.ia_size = new_size; - error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); + error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK); } out_unlock: diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index d44d92cd12b1..501e4f630548 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -39,6 +39,7 @@ #include "xfs_buf_item.h" #include "xfs_utils.h" #include "xfs_vnodeops.h" +#include "xfs_inode_item.h" #include "xfs_trace.h" #include <linux/capability.h> @@ -497,12 +498,442 @@ xfs_vn_getattr( return 0; } +int +xfs_setattr_nonsize( + struct xfs_inode *ip, + struct iattr *iattr, + int flags) +{ + xfs_mount_t *mp = ip->i_mount; + struct inode *inode = VFS_I(ip); + int mask = iattr->ia_valid; + xfs_trans_t *tp; + int error; + uid_t uid = 0, iuid = 0; + gid_t gid = 0, igid = 0; + struct xfs_dquot *udqp = NULL, *gdqp = NULL; + struct xfs_dquot *olddquot1 = NULL, *olddquot2 = NULL; + + trace_xfs_setattr(ip); + + if (mp->m_flags & XFS_MOUNT_RDONLY) + return XFS_ERROR(EROFS); + + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + error = -inode_change_ok(inode, iattr); + if (error) + return XFS_ERROR(error); + + ASSERT((mask & ATTR_SIZE) == 0); + + /* + * If disk quotas is on, we make sure that the dquots do exist on disk, + * before we start any other transactions. Trying to do this later + * is messy. We don't care to take a readlock to look at the ids + * in inode here, because we can't hold it across the trans_reserve. + * If the IDs do change before we take the ilock, we're covered + * because the i_*dquot fields will get updated anyway. + */ + if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) { + uint qflags = 0; + + if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) { + uid = iattr->ia_uid; + qflags |= XFS_QMOPT_UQUOTA; + } else { + uid = ip->i_d.di_uid; + } + if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) { + gid = iattr->ia_gid; + qflags |= XFS_QMOPT_GQUOTA; + } else { + gid = ip->i_d.di_gid; + } + + /* + * We take a reference when we initialize udqp and gdqp, + * so it is important that we never blindly double trip on + * the same variable. See xfs_create() for an example. + */ + ASSERT(udqp == NULL); + ASSERT(gdqp == NULL); + error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), + qflags, &udqp, &gdqp); + if (error) + return error; + } + + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); + error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); + if (error) + goto out_dqrele; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + /* + * Change file ownership. Must be the owner or privileged. + */ + if (mask & (ATTR_UID|ATTR_GID)) { + /* + * These IDs could have changed since we last looked at them. + * But, we're assured that if the ownership did change + * while we didn't have the inode locked, inode's dquot(s) + * would have changed also. + */ + iuid = ip->i_d.di_uid; + igid = ip->i_d.di_gid; + gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; + uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; + + /* + * Do a quota reservation only if uid/gid is actually + * going to change. + */ + if (XFS_IS_QUOTA_RUNNING(mp) && + ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || + (XFS_IS_GQUOTA_ON(mp) && igid != gid))) { + ASSERT(tp); + error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, + capable(CAP_FOWNER) ? + XFS_QMOPT_FORCE_RES : 0); + if (error) /* out of quota */ + goto out_trans_cancel; + } + } + + xfs_trans_ijoin(tp, ip); + + /* + * Change file ownership. Must be the owner or privileged. + */ + if (mask & (ATTR_UID|ATTR_GID)) { + /* + * CAP_FSETID overrides the following restrictions: + * + * The set-user-ID and set-group-ID bits of a file will be + * cleared upon successful return from chown() + */ + if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && + !capable(CAP_FSETID)) + ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); + + /* + * Change the ownerships and register quota modifications + * in the transaction. + */ + if (iuid != uid) { + if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) { + ASSERT(mask & ATTR_UID); + ASSERT(udqp); + olddquot1 = xfs_qm_vop_chown(tp, ip, + &ip->i_udquot, udqp); + } + ip->i_d.di_uid = uid; + inode->i_uid = uid; + } + if (igid != gid) { + if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { + ASSERT(!XFS_IS_PQUOTA_ON(mp)); + ASSERT(mask & ATTR_GID); + ASSERT(gdqp); + olddquot2 = xfs_qm_vop_chown(tp, ip, + &ip->i_gdquot, gdqp); + } + ip->i_d.di_gid = gid; + inode->i_gid = gid; + } + } + + /* + * Change file access modes. + */ + if (mask & ATTR_MODE) { + umode_t mode = iattr->ia_mode; + + if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + mode &= ~S_ISGID; + + ip->i_d.di_mode &= S_IFMT; + ip->i_d.di_mode |= mode & ~S_IFMT; + + inode->i_mode &= S_IFMT; + inode->i_mode |= mode & ~S_IFMT; + } + + /* + * Change file access or modified times. + */ + if (mask & ATTR_ATIME) { + inode->i_atime = iattr->ia_atime; + ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; + ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; + ip->i_update_core = 1; + } + if (mask & ATTR_CTIME) { + inode->i_ctime = iattr->ia_ctime; + ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; + ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; + ip->i_update_core = 1; + } + if (mask & ATTR_MTIME) { + inode->i_mtime = iattr->ia_mtime; + ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; + ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; + ip->i_update_core = 1; + } + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + XFS_STATS_INC(xs_ig_attrchg); + + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(tp); + error = xfs_trans_commit(tp, 0); + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + /* + * Release any dquot(s) the inode had kept before chown. + */ + xfs_qm_dqrele(olddquot1); + xfs_qm_dqrele(olddquot2); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); + + if (error) + return XFS_ERROR(error); + + /* + * XXX(hch): Updating the ACL entries is not atomic vs the i_mode + * update. We could avoid this with linked transactions + * and passing down the transaction pointer all the way + * to attr_set. No previous user of the generic + * Posix ACL code seems to care about this issue either. + */ + if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { + error = -xfs_acl_chmod(inode); + if (error) + return XFS_ERROR(error); + } + + return 0; + +out_trans_cancel: + xfs_trans_cancel(tp, 0); + xfs_iunlock(ip, XFS_ILOCK_EXCL); +out_dqrele: + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); + return error; +} + +/* + * Truncate file. Must have write permission and not be a directory. + */ +int +xfs_setattr_size( + struct xfs_inode *ip, + struct iattr *iattr, + int flags) +{ + struct xfs_mount *mp = ip->i_mount; + struct inode *inode = VFS_I(ip); + int mask = iattr->ia_valid; + struct xfs_trans *tp; + int error; + uint lock_flags; + uint commit_flags = 0; + + trace_xfs_setattr(ip); + + if (mp->m_flags & XFS_MOUNT_RDONLY) + return XFS_ERROR(EROFS); + + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + error = -inode_change_ok(inode, iattr); + if (error) + return XFS_ERROR(error); + + ASSERT(S_ISREG(ip->i_d.di_mode)); + ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| + ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID| + ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); + + lock_flags = XFS_ILOCK_EXCL; + if (!(flags & XFS_ATTR_NOLOCK)) + lock_flags |= XFS_IOLOCK_EXCL; + xfs_ilock(ip, lock_flags); + + /* + * Short circuit the truncate case for zero length files. + */ + if (iattr->ia_size == 0 && + ip->i_size == 0 && ip->i_d.di_nextents == 0) { + if (!(mask & (ATTR_CTIME|ATTR_MTIME))) + goto out_unlock; + + /* + * Use the regular setattr path to update the timestamps. + */ + xfs_iunlock(ip, lock_flags); + iattr->ia_valid &= ~ATTR_SIZE; + return xfs_setattr_nonsize(ip, iattr, 0); + } + + /* + * Make sure that the dquots are attached to the inode. + */ + error = xfs_qm_dqattach_locked(ip, 0); + if (error) + goto out_unlock; + + /* + * Now we can make the changes. Before we join the inode to the + * transaction, take care of the part of the truncation that must be + * done without the inode lock. This needs to be done before joining + * the inode to the transaction, because the inode cannot be unlocked + * once it is a part of the transaction. + */ + if (iattr->ia_size > ip->i_size) { + /* + * Do the first part of growing a file: zero any data in the + * last block that is beyond the old EOF. We need to do this + * before the inode is joined to the transaction to modify + * i_size. + */ + error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); + if (error) + goto out_unlock; + } + xfs_iunlock(ip, XFS_ILOCK_EXCL); + lock_flags &= ~XFS_ILOCK_EXCL; + + /* + * We are going to log the inode size change in this transaction so + * any previous writes that are beyond the on disk EOF and the new + * EOF that have not been written out need to be written here. If we + * do not write the data out, we expose ourselves to the null files + * problem. + * + * Only flush from the on disk size to the smaller of the in memory + * file size or the new size as that's the range we really care about + * here and prevents waiting for other data not within the range we + * care about here. + */ + if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) { + error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size, + XBF_ASYNC, FI_NONE); + if (error) + goto out_unlock; + } + + /* + * Wait for all I/O to complete. + */ + xfs_ioend_wait(ip); + + error = -block_truncate_page(inode->i_mapping, iattr->ia_size, + xfs_get_blocks); + if (error) + goto out_unlock; + + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); + error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, + XFS_ITRUNCATE_LOG_COUNT); + if (error) + goto out_trans_cancel; + + truncate_setsize(inode, iattr->ia_size); + + commit_flags = XFS_TRANS_RELEASE_LOG_RES; + lock_flags |= XFS_ILOCK_EXCL; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + xfs_trans_ijoin(tp, ip); + + /* + * Only change the c/mtime if we are changing the size or we are + * explicitly asked to change it. This handles the semantic difference + * between truncate() and ftruncate() as implemented in the VFS. + * + * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a + * special case where we need to update the times despite not having + * these flags set. For all other operations the VFS set these flags + * explicitly if it wants a timestamp update. + */ + if (iattr->ia_size != ip->i_size && + (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { + iattr->ia_ctime = iattr->ia_mtime = + current_fs_time(inode->i_sb); + mask |= ATTR_CTIME | ATTR_MTIME; + } + + if (iattr->ia_size > ip->i_size) { + ip->i_d.di_size = iattr->ia_size; + ip->i_size = iattr->ia_size; + } else if (iattr->ia_size <= ip->i_size || + (iattr->ia_size == 0 && ip->i_d.di_nextents)) { + error = xfs_itruncate_data(&tp, ip, iattr->ia_size); + if (error) + goto out_trans_abort; + + /* + * Truncated "down", so we're removing references to old data + * here - if we delay flushing for a long time, we expose + * ourselves unduly to the notorious NULL files problem. So, + * we mark this inode and flush it when the file is closed, + * and do not wait the usual (long) time for writeout. + */ + xfs_iflags_set(ip, XFS_ITRUNCATED); + } + + if (mask & ATTR_CTIME) { + inode->i_ctime = iattr->ia_ctime; + ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; + ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; + ip->i_update_core = 1; + } + if (mask & ATTR_MTIME) { + inode->i_mtime = iattr->ia_mtime; + ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; + ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; + ip->i_update_core = 1; + } + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + XFS_STATS_INC(xs_ig_attrchg); + + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(tp); + + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); +out_unlock: + if (lock_flags) + xfs_iunlock(ip, lock_flags); + return error; + +out_trans_abort: + commit_flags |= XFS_TRANS_ABORT; +out_trans_cancel: + xfs_trans_cancel(tp, commit_flags); + goto out_unlock; +} + STATIC int xfs_vn_setattr( struct dentry *dentry, struct iattr *iattr) { - return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); + if (iattr->ia_valid & ATTR_SIZE) + return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0); + return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0); } #define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 8633521b3b2e..d42f814e4d35 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -33,7 +33,6 @@ #endif #include <xfs_types.h> -#include <xfs_arch.h> #include <kmem.h> #include <mrlock.h> @@ -88,6 +87,12 @@ #include <xfs_buf.h> #include <xfs_message.h> +#ifdef __BIG_ENDIAN +#define XFS_NATIVE_HOST 1 +#else +#undef XFS_NATIVE_HOST +#endif + /* * Feature macros (disable/enable) */ diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index a9c6ccff7b48..9a72dda58bd0 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -33,7 +33,6 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" -#include "xfs_btree_trace.h" #include "xfs_ialloc.h" #include "xfs_bmap.h" #include "xfs_rtalloc.h" @@ -1407,33 +1406,31 @@ xfs_fs_fill_super( sb->s_time_gran = 1; set_posix_acl_flag(sb); - error = xfs_syncd_init(mp); + error = xfs_mountfs(mp); if (error) goto out_filestream_unmount; - error = xfs_mountfs(mp); + error = xfs_syncd_init(mp); if (error) - goto out_syncd_stop; + goto out_unmount; root = igrab(VFS_I(mp->m_rootip)); if (!root) { error = ENOENT; - goto fail_unmount; + goto out_syncd_stop; } if (is_bad_inode(root)) { error = EINVAL; - goto fail_vnrele; + goto out_syncd_stop; } sb->s_root = d_alloc_root(root); if (!sb->s_root) { error = ENOMEM; - goto fail_vnrele; + goto out_iput; } return 0; - out_syncd_stop: - xfs_syncd_stop(mp); out_filestream_unmount: xfs_filestream_unmount(mp); out_free_sb: @@ -1448,17 +1445,11 @@ xfs_fs_fill_super( out: return -error; - fail_vnrele: - if (sb->s_root) { - dput(sb->s_root); - sb->s_root = NULL; - } else { - iput(root); - } - - fail_unmount: + out_iput: + iput(root); + out_syncd_stop: xfs_syncd_stop(mp); - + out_unmount: /* * Blow away any referenced inode in the filestreams cache. * This can and will cause log traffic as inodes go inactive diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 9bd7e895a4e2..e4c938afb910 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -361,14 +361,12 @@ xfs_quiesce_data( { int error, error2 = 0; - /* push non-blocking */ - xfs_sync_data(mp, 0); xfs_qm_sync(mp, SYNC_TRYLOCK); - - /* push and block till complete */ - xfs_sync_data(mp, SYNC_WAIT); xfs_qm_sync(mp, SYNC_WAIT); + /* force out the newly dirtied log buffers */ + xfs_log_force(mp, XFS_LOG_SYNC); + /* write superblock and hoover up shutdown errors */ error = xfs_sync_fsdata(mp); @@ -438,7 +436,7 @@ xfs_quiesce_attr( WARN_ON(atomic_read(&mp->m_active_trans) != 0); /* Push the superblock and write an unmount record */ - error = xfs_log_sbcount(mp, 1); + error = xfs_log_sbcount(mp); if (error) xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " "Frozen image may not be consistent."); diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 2e1568597764..941202e7ac6e 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -21,14 +21,6 @@ struct xfs_mount; struct xfs_perag; -typedef struct xfs_sync_work { - struct list_head w_list; - struct xfs_mount *w_mount; - void *w_data; /* syncer routine argument */ - void (*w_syncer)(struct xfs_mount *, void *); - struct completion *w_completion; -} xfs_sync_work_t; - #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ #define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index d48b7a579ae1..fda0708ef2ea 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -293,7 +293,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class, __entry->buffer_length = bp->b_buffer_length; __entry->hold = atomic_read(&bp->b_hold); __entry->pincount = atomic_read(&bp->b_pin_count); - __entry->lockval = xfs_buf_lock_value(bp); + __entry->lockval = bp->b_sema.count; __entry->flags = bp->b_flags; __entry->caller_ip = caller_ip; ), @@ -323,7 +323,7 @@ DEFINE_BUF_EVENT(xfs_buf_bawrite); DEFINE_BUF_EVENT(xfs_buf_bdwrite); DEFINE_BUF_EVENT(xfs_buf_lock); DEFINE_BUF_EVENT(xfs_buf_lock_done); -DEFINE_BUF_EVENT(xfs_buf_cond_lock); +DEFINE_BUF_EVENT(xfs_buf_trylock); DEFINE_BUF_EVENT(xfs_buf_unlock); DEFINE_BUF_EVENT(xfs_buf_iowait); DEFINE_BUF_EVENT(xfs_buf_iowait_done); @@ -366,7 +366,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class, __entry->flags = flags; __entry->hold = atomic_read(&bp->b_hold); __entry->pincount = atomic_read(&bp->b_pin_count); - __entry->lockval = xfs_buf_lock_value(bp); + __entry->lockval = bp->b_sema.count; __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " @@ -409,7 +409,7 @@ TRACE_EVENT(xfs_buf_ioerror, __entry->buffer_length = bp->b_buffer_length; __entry->hold = atomic_read(&bp->b_hold); __entry->pincount = atomic_read(&bp->b_pin_count); - __entry->lockval = xfs_buf_lock_value(bp); + __entry->lockval = bp->b_sema.count; __entry->error = error; __entry->flags = bp->b_flags; __entry->caller_ip = caller_ip; @@ -454,7 +454,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class, __entry->buf_flags = bip->bli_buf->b_flags; __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count); - __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf); + __entry->buf_lockval = bip->bli_buf->b_sema.count; __entry->li_desc = bip->bli_item.li_desc; __entry->li_flags = bip->bli_item.li_flags; ), @@ -998,7 +998,8 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) - __field(loff_t, size) + __field(loff_t, isize) + __field(loff_t, disize) __field(loff_t, new_size) __field(loff_t, offset) __field(size_t, count) @@ -1006,16 +1007,18 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class, TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; - __entry->size = ip->i_d.di_size; + __entry->isize = ip->i_size; + __entry->disize = ip->i_d.di_size; __entry->new_size = ip->i_new_size; __entry->offset = offset; __entry->count = count; ), - TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " + TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx " "offset 0x%llx count %zd", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, - __entry->size, + __entry->isize, + __entry->disize, __entry->new_size, __entry->offset, __entry->count) @@ -1028,40 +1031,7 @@ DEFINE_EVENT(xfs_simple_io_class, name, \ DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); - - -TRACE_EVENT(xfs_itruncate_start, - TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size, int flag, - xfs_off_t toss_start, xfs_off_t toss_finish), - TP_ARGS(ip, new_size, flag, toss_start, toss_finish), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(xfs_fsize_t, size) - __field(xfs_fsize_t, new_size) - __field(xfs_off_t, toss_start) - __field(xfs_off_t, toss_finish) - __field(int, flag) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->size = ip->i_d.di_size; - __entry->new_size = new_size; - __entry->toss_start = toss_start; - __entry->toss_finish = toss_finish; - __entry->flag = flag; - ), - TP_printk("dev %d:%d ino 0x%llx %s size 0x%llx new_size 0x%llx " - "toss start 0x%llx toss finish 0x%llx", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __print_flags(__entry->flag, "|", XFS_ITRUNC_FLAGS), - __entry->size, - __entry->new_size, - __entry->toss_start, - __entry->toss_finish) -); +DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize); DECLARE_EVENT_CLASS(xfs_itrunc_class, TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), @@ -1089,8 +1059,8 @@ DECLARE_EVENT_CLASS(xfs_itrunc_class, DEFINE_EVENT(xfs_itrunc_class, name, \ TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \ TP_ARGS(ip, new_size)) -DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start); -DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end); +DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start); +DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end); TRACE_EVENT(xfs_pagecache_inval, TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish), diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 6fa214603819..837f31158d43 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -220,7 +220,7 @@ xfs_qm_adjust_dqtimers( { ASSERT(d->d_id); -#ifdef QUOTADEBUG +#ifdef DEBUG if (d->d_blk_hardlimit) ASSERT(be64_to_cpu(d->d_blk_softlimit) <= be64_to_cpu(d->d_blk_hardlimit)); @@ -231,6 +231,7 @@ xfs_qm_adjust_dqtimers( ASSERT(be64_to_cpu(d->d_rtb_softlimit) <= be64_to_cpu(d->d_rtb_hardlimit)); #endif + if (!d->d_btimer) { if ((d->d_blk_softlimit && (be64_to_cpu(d->d_bcount) >= @@ -318,7 +319,7 @@ xfs_qm_init_dquot_blk( ASSERT(tp); ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); + ASSERT(xfs_buf_islocked(bp)); d = (xfs_dqblk_t *)XFS_BUF_PTR(bp); @@ -534,7 +535,7 @@ xfs_qm_dqtobp( } ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); + ASSERT(xfs_buf_islocked(bp)); /* * calculate the location of the dquot inside the buffer. @@ -622,7 +623,7 @@ xfs_qm_dqread( * brelse it because we have the changes incore. */ ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); + ASSERT(xfs_buf_islocked(bp)); xfs_trans_brelse(tp, bp); return (error); @@ -1423,45 +1424,6 @@ xfs_qm_dqpurge( } -#ifdef QUOTADEBUG -void -xfs_qm_dqprint(xfs_dquot_t *dqp) -{ - struct xfs_mount *mp = dqp->q_mount; - - xfs_debug(mp, "-----------KERNEL DQUOT----------------"); - xfs_debug(mp, "---- dquotID = %d", - (int)be32_to_cpu(dqp->q_core.d_id)); - xfs_debug(mp, "---- type = %s", DQFLAGTO_TYPESTR(dqp)); - xfs_debug(mp, "---- fs = 0x%p", dqp->q_mount); - xfs_debug(mp, "---- blkno = 0x%x", (int) dqp->q_blkno); - xfs_debug(mp, "---- boffset = 0x%x", (int) dqp->q_bufoffset); - xfs_debug(mp, "---- blkhlimit = %Lu (0x%x)", - be64_to_cpu(dqp->q_core.d_blk_hardlimit), - (int)be64_to_cpu(dqp->q_core.d_blk_hardlimit)); - xfs_debug(mp, "---- blkslimit = %Lu (0x%x)", - be64_to_cpu(dqp->q_core.d_blk_softlimit), - (int)be64_to_cpu(dqp->q_core.d_blk_softlimit)); - xfs_debug(mp, "---- inohlimit = %Lu (0x%x)", - be64_to_cpu(dqp->q_core.d_ino_hardlimit), - (int)be64_to_cpu(dqp->q_core.d_ino_hardlimit)); - xfs_debug(mp, "---- inoslimit = %Lu (0x%x)", - be64_to_cpu(dqp->q_core.d_ino_softlimit), - (int)be64_to_cpu(dqp->q_core.d_ino_softlimit)); - xfs_debug(mp, "---- bcount = %Lu (0x%x)", - be64_to_cpu(dqp->q_core.d_bcount), - (int)be64_to_cpu(dqp->q_core.d_bcount)); - xfs_debug(mp, "---- icount = %Lu (0x%x)", - be64_to_cpu(dqp->q_core.d_icount), - (int)be64_to_cpu(dqp->q_core.d_icount)); - xfs_debug(mp, "---- btimer = %d", - (int)be32_to_cpu(dqp->q_core.d_btimer)); - xfs_debug(mp, "---- itimer = %d", - (int)be32_to_cpu(dqp->q_core.d_itimer)); - xfs_debug(mp, "---------------------------"); -} -#endif - /* * Give the buffer a little push if it is incore and * wait on the flush lock. diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index 5da3a23b820d..34b7e945dbfa 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h @@ -116,12 +116,6 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp) (XFS_IS_UQUOTA_ON((d)->q_mount)) : \ (XFS_IS_OQUOTA_ON((d)->q_mount)))) -#ifdef QUOTADEBUG -extern void xfs_qm_dqprint(xfs_dquot_t *); -#else -#define xfs_qm_dqprint(a) -#endif - extern void xfs_qm_dqdestroy(xfs_dquot_t *); extern int xfs_qm_dqflush(xfs_dquot_t *, uint); extern int xfs_qm_dqpurge(xfs_dquot_t *); diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index b94dace4e785..46e54ad9a2dc 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -67,32 +67,6 @@ static struct shrinker xfs_qm_shaker = { .seeks = DEFAULT_SEEKS, }; -#ifdef DEBUG -extern struct mutex qcheck_lock; -#endif - -#ifdef QUOTADEBUG -static void -xfs_qm_dquot_list_print( - struct xfs_mount *mp) -{ - xfs_dquot_t *dqp; - int i = 0; - - list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) { - xfs_debug(mp, " %d. \"%d (%s)\" " - "bcnt = %lld, icnt = %lld, refs = %d", - i++, be32_to_cpu(dqp->q_core.d_id), - DQFLAGTO_TYPESTR(dqp), - (long long)be64_to_cpu(dqp->q_core.d_bcount), - (long long)be64_to_cpu(dqp->q_core.d_icount), - dqp->q_nrefs); - } -} -#else -static void xfs_qm_dquot_list_print(struct xfs_mount *mp) { } -#endif - /* * Initialize the XQM structure. * Note that there is not one quota manager per file system. @@ -165,9 +139,6 @@ xfs_Gqm_init(void) atomic_set(&xqm->qm_totaldquots, 0); xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO; xqm->qm_nrefs = 0; -#ifdef DEBUG - mutex_init(&qcheck_lock); -#endif return xqm; out_free_udqhash: @@ -204,9 +175,6 @@ xfs_qm_destroy( mutex_lock(&xqm->qm_dqfrlist_lock); list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) { xfs_dqlock(dqp); -#ifdef QUOTADEBUG - xfs_debug(dqp->q_mount, "FREELIST destroy 0x%p", dqp); -#endif list_del_init(&dqp->q_freelist); xfs_Gqm->qm_dqfrlist_cnt--; xfs_dqunlock(dqp); @@ -214,9 +182,6 @@ xfs_qm_destroy( } mutex_unlock(&xqm->qm_dqfrlist_lock); mutex_destroy(&xqm->qm_dqfrlist_lock); -#ifdef DEBUG - mutex_destroy(&qcheck_lock); -#endif kmem_free(xqm); } @@ -409,11 +374,6 @@ xfs_qm_mount_quotas( xfs_warn(mp, "Failed to initialize disk quotas."); return; } - -#ifdef QUOTADEBUG - if (XFS_IS_QUOTA_ON(mp)) - xfs_qm_internalqcheck(mp); -#endif } /* @@ -866,8 +826,8 @@ xfs_qm_dqattach_locked( } done: -#ifdef QUOTADEBUG - if (! error) { +#ifdef DEBUG + if (!error) { if (XFS_IS_UQUOTA_ON(mp)) ASSERT(ip->i_udquot); if (XFS_IS_OQUOTA_ON(mp)) @@ -1733,8 +1693,6 @@ xfs_qm_quotacheck( mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD); mp->m_qflags |= flags; - xfs_qm_dquot_list_print(mp); - error_return: if (error) { xfs_warn(mp, @@ -2096,9 +2054,6 @@ xfs_qm_write_sb_changes( xfs_trans_t *tp; int error; -#ifdef QUOTADEBUG - xfs_notice(mp, "Writing superblock quota changes"); -#endif tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); if ((error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index 567b29b9f1b3..43b9abe1052c 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h @@ -163,10 +163,4 @@ extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint); -#ifdef DEBUG -extern int xfs_qm_internalqcheck(xfs_mount_t *); -#else -#define xfs_qm_internalqcheck(mp) (0) -#endif - #endif /* __XFS_QM_H__ */ diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 2dadb15d5ca9..609246f42e6c 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -263,7 +263,7 @@ xfs_qm_scall_trunc_qfile( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip); - error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, 1); + error = xfs_itruncate_data(&tp, ip, 0); if (error) { xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); @@ -622,7 +622,6 @@ xfs_qm_scall_setqlim( xfs_trans_log_dquot(tp, dqp); error = xfs_trans_commit(tp, 0); - xfs_qm_dqprint(dqp); xfs_qm_dqrele(dqp); out_unlock: @@ -657,7 +656,6 @@ xfs_qm_scall_getquota( xfs_qm_dqput(dqp); return XFS_ERROR(ENOENT); } - /* xfs_qm_dqprint(dqp); */ /* * Convert the disk dquot to the exportable format */ @@ -906,354 +904,3 @@ xfs_qm_dqrele_all_inodes( ASSERT(mp->m_quotainfo); xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags); } - -/*------------------------------------------------------------------------*/ -#ifdef DEBUG -/* - * This contains all the test functions for XFS disk quotas. - * Currently it does a quota accounting check. ie. it walks through - * all inodes in the file system, calculating the dquot accounting fields, - * and prints out any inconsistencies. - */ -xfs_dqhash_t *qmtest_udqtab; -xfs_dqhash_t *qmtest_gdqtab; -int qmtest_hashmask; -int qmtest_nfails; -struct mutex qcheck_lock; - -#define DQTEST_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \ - (__psunsigned_t)(id)) & \ - (qmtest_hashmask - 1)) - -#define DQTEST_HASH(mp, id, type) ((type & XFS_DQ_USER) ? \ - (qmtest_udqtab + \ - DQTEST_HASHVAL(mp, id)) : \ - (qmtest_gdqtab + \ - DQTEST_HASHVAL(mp, id))) - -#define DQTEST_LIST_PRINT(l, NXT, title) \ -{ \ - xfs_dqtest_t *dqp; int i = 0;\ - xfs_debug(NULL, "%s (#%d)", title, (int) (l)->qh_nelems); \ - for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \ - dqp = (xfs_dqtest_t *)dqp->NXT) { \ - xfs_debug(dqp->q_mount, \ - " %d. \"%d (%s)\" bcnt = %d, icnt = %d", \ - ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp), \ - dqp->d_bcount, dqp->d_icount); } \ -} - -typedef struct dqtest { - uint dq_flags; /* various flags (XFS_DQ_*) */ - struct list_head q_hashlist; - xfs_dqhash_t *q_hash; /* the hashchain header */ - xfs_mount_t *q_mount; /* filesystem this relates to */ - xfs_dqid_t d_id; /* user id or group id */ - xfs_qcnt_t d_bcount; /* # disk blocks owned by the user */ - xfs_qcnt_t d_icount; /* # inodes owned by the user */ -} xfs_dqtest_t; - -STATIC void -xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp) -{ - list_add(&dqp->q_hashlist, &h->qh_list); - h->qh_version++; - h->qh_nelems++; -} -STATIC void -xfs_qm_dqtest_print( - struct xfs_mount *mp, - struct dqtest *d) -{ - xfs_debug(mp, "-----------DQTEST DQUOT----------------"); - xfs_debug(mp, "---- dquot ID = %d", d->d_id); - xfs_debug(mp, "---- fs = 0x%p", d->q_mount); - xfs_debug(mp, "---- bcount = %Lu (0x%x)", - d->d_bcount, (int)d->d_bcount); - xfs_debug(mp, "---- icount = %Lu (0x%x)", - d->d_icount, (int)d->d_icount); - xfs_debug(mp, "---------------------------"); -} - -STATIC void -xfs_qm_dqtest_failed( - xfs_dqtest_t *d, - xfs_dquot_t *dqp, - char *reason, - xfs_qcnt_t a, - xfs_qcnt_t b, - int error) -{ - qmtest_nfails++; - if (error) - xfs_debug(dqp->q_mount, - "quotacheck failed id=%d, err=%d\nreason: %s", - d->d_id, error, reason); - else - xfs_debug(dqp->q_mount, - "quotacheck failed id=%d (%s) [%d != %d]", - d->d_id, reason, (int)a, (int)b); - xfs_qm_dqtest_print(dqp->q_mount, d); - if (dqp) - xfs_qm_dqprint(dqp); -} - -STATIC int -xfs_dqtest_cmp2( - xfs_dqtest_t *d, - xfs_dquot_t *dqp) -{ - int err = 0; - if (be64_to_cpu(dqp->q_core.d_icount) != d->d_icount) { - xfs_qm_dqtest_failed(d, dqp, "icount mismatch", - be64_to_cpu(dqp->q_core.d_icount), - d->d_icount, 0); - err++; - } - if (be64_to_cpu(dqp->q_core.d_bcount) != d->d_bcount) { - xfs_qm_dqtest_failed(d, dqp, "bcount mismatch", - be64_to_cpu(dqp->q_core.d_bcount), - d->d_bcount, 0); - err++; - } - if (dqp->q_core.d_blk_softlimit && - be64_to_cpu(dqp->q_core.d_bcount) >= - be64_to_cpu(dqp->q_core.d_blk_softlimit)) { - if (!dqp->q_core.d_btimer && dqp->q_core.d_id) { - xfs_debug(dqp->q_mount, - "%d [%s] BLK TIMER NOT STARTED", - d->d_id, DQFLAGTO_TYPESTR(d)); - err++; - } - } - if (dqp->q_core.d_ino_softlimit && - be64_to_cpu(dqp->q_core.d_icount) >= - be64_to_cpu(dqp->q_core.d_ino_softlimit)) { - if (!dqp->q_core.d_itimer && dqp->q_core.d_id) { - xfs_debug(dqp->q_mount, - "%d [%s] INO TIMER NOT STARTED", - d->d_id, DQFLAGTO_TYPESTR(d)); - err++; - } - } -#ifdef QUOTADEBUG - if (!err) { - xfs_debug(dqp->q_mount, "%d [%s] qchecked", - d->d_id, DQFLAGTO_TYPESTR(d)); - } -#endif - return (err); -} - -STATIC void -xfs_dqtest_cmp( - xfs_dqtest_t *d) -{ - xfs_dquot_t *dqp; - int error; - - /* xfs_qm_dqtest_print(d); */ - if ((error = xfs_qm_dqget(d->q_mount, NULL, d->d_id, d->dq_flags, 0, - &dqp))) { - xfs_qm_dqtest_failed(d, NULL, "dqget failed", 0, 0, error); - return; - } - xfs_dqtest_cmp2(d, dqp); - xfs_qm_dqput(dqp); -} - -STATIC int -xfs_qm_internalqcheck_dqget( - xfs_mount_t *mp, - xfs_dqid_t id, - uint type, - xfs_dqtest_t **O_dq) -{ - xfs_dqtest_t *d; - xfs_dqhash_t *h; - - h = DQTEST_HASH(mp, id, type); - list_for_each_entry(d, &h->qh_list, q_hashlist) { - if (d->d_id == id && mp == d->q_mount) { - *O_dq = d; - return (0); - } - } - d = kmem_zalloc(sizeof(xfs_dqtest_t), KM_SLEEP); - d->dq_flags = type; - d->d_id = id; - d->q_mount = mp; - d->q_hash = h; - INIT_LIST_HEAD(&d->q_hashlist); - xfs_qm_hashinsert(h, d); - *O_dq = d; - return (0); -} - -STATIC void -xfs_qm_internalqcheck_get_dquots( - xfs_mount_t *mp, - xfs_dqid_t uid, - xfs_dqid_t projid, - xfs_dqid_t gid, - xfs_dqtest_t **ud, - xfs_dqtest_t **gd) -{ - if (XFS_IS_UQUOTA_ON(mp)) - xfs_qm_internalqcheck_dqget(mp, uid, XFS_DQ_USER, ud); - if (XFS_IS_GQUOTA_ON(mp)) - xfs_qm_internalqcheck_dqget(mp, gid, XFS_DQ_GROUP, gd); - else if (XFS_IS_PQUOTA_ON(mp)) - xfs_qm_internalqcheck_dqget(mp, projid, XFS_DQ_PROJ, gd); -} - - -STATIC void -xfs_qm_internalqcheck_dqadjust( - xfs_inode_t *ip, - xfs_dqtest_t *d) -{ - d->d_icount++; - d->d_bcount += (xfs_qcnt_t)ip->i_d.di_nblocks; -} - -STATIC int -xfs_qm_internalqcheck_adjust( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t ino, /* inode number to get data for */ - void __user *buffer, /* not used */ - int ubsize, /* not used */ - int *ubused, /* not used */ - int *res) /* bulkstat result code */ -{ - xfs_inode_t *ip; - xfs_dqtest_t *ud, *gd; - uint lock_flags; - boolean_t ipreleased; - int error; - - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); - - if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { - *res = BULKSTAT_RV_NOTHING; - xfs_debug(mp, "%s: ino=%llu, uqino=%llu, gqino=%llu\n", - __func__, (unsigned long long) ino, - (unsigned long long) mp->m_sb.sb_uquotino, - (unsigned long long) mp->m_sb.sb_gquotino); - return XFS_ERROR(EINVAL); - } - ipreleased = B_FALSE; - again: - lock_flags = XFS_ILOCK_SHARED; - if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip))) { - *res = BULKSTAT_RV_NOTHING; - return (error); - } - - /* - * This inode can have blocks after eof which can get released - * when we send it to inactive. Since we don't check the dquot - * until the after all our calculations are done, we must get rid - * of those now. - */ - if (! ipreleased) { - xfs_iunlock(ip, lock_flags); - IRELE(ip); - ipreleased = B_TRUE; - goto again; - } - xfs_qm_internalqcheck_get_dquots(mp, - (xfs_dqid_t) ip->i_d.di_uid, - (xfs_dqid_t) xfs_get_projid(ip), - (xfs_dqid_t) ip->i_d.di_gid, - &ud, &gd); - if (XFS_IS_UQUOTA_ON(mp)) { - ASSERT(ud); - xfs_qm_internalqcheck_dqadjust(ip, ud); - } - if (XFS_IS_OQUOTA_ON(mp)) { - ASSERT(gd); - xfs_qm_internalqcheck_dqadjust(ip, gd); - } - xfs_iunlock(ip, lock_flags); - IRELE(ip); - *res = BULKSTAT_RV_DIDONE; - return (0); -} - - -/* PRIVATE, debugging */ -int -xfs_qm_internalqcheck( - xfs_mount_t *mp) -{ - xfs_ino_t lastino; - int done, count; - int i; - int error; - - lastino = 0; - qmtest_hashmask = 32; - count = 5; - done = 0; - qmtest_nfails = 0; - - if (! XFS_IS_QUOTA_ON(mp)) - return XFS_ERROR(ESRCH); - - xfs_log_force(mp, XFS_LOG_SYNC); - XFS_bflush(mp->m_ddev_targp); - xfs_log_force(mp, XFS_LOG_SYNC); - XFS_bflush(mp->m_ddev_targp); - - mutex_lock(&qcheck_lock); - /* There should be absolutely no quota activity while this - is going on. */ - qmtest_udqtab = kmem_zalloc(qmtest_hashmask * - sizeof(xfs_dqhash_t), KM_SLEEP); - qmtest_gdqtab = kmem_zalloc(qmtest_hashmask * - sizeof(xfs_dqhash_t), KM_SLEEP); - do { - /* - * Iterate thru all the inodes in the file system, - * adjusting the corresponding dquot counters - */ - error = xfs_bulkstat(mp, &lastino, &count, - xfs_qm_internalqcheck_adjust, - 0, NULL, &done); - if (error) { - xfs_debug(mp, "Bulkstat returned error 0x%x", error); - break; - } - } while (!done); - - xfs_debug(mp, "Checking results against system dquots"); - for (i = 0; i < qmtest_hashmask; i++) { - xfs_dqtest_t *d, *n; - xfs_dqhash_t *h; - - h = &qmtest_udqtab[i]; - list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) { - xfs_dqtest_cmp(d); - kmem_free(d); - } - h = &qmtest_gdqtab[i]; - list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) { - xfs_dqtest_cmp(d); - kmem_free(d); - } - } - - if (qmtest_nfails) { - xfs_debug(mp, "******** quotacheck failed ********"); - xfs_debug(mp, "failures = %d", qmtest_nfails); - } else { - xfs_debug(mp, "******** quotacheck successful! ********"); - } - kmem_free(qmtest_udqtab); - kmem_free(qmtest_gdqtab); - mutex_unlock(&qcheck_lock); - return (qmtest_nfails); -} - -#endif /* DEBUG */ diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c index 2a3648731331..4d00ee67792d 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/quota/xfs_trans_dquot.c @@ -59,7 +59,7 @@ xfs_trans_dqjoin( xfs_trans_add_item(tp, &dqp->q_logitem.qli_item); /* - * Initialize i_transp so we can later determine if this dquot is + * Initialize d_transp so we can later determine if this dquot is * associated with this transaction. */ dqp->q_transp = tp; @@ -387,18 +387,18 @@ xfs_trans_apply_dquot_deltas( qtrx->qt_delbcnt_delta; totalrtbdelta = qtrx->qt_rtbcount_delta + qtrx->qt_delrtb_delta; -#ifdef QUOTADEBUG +#ifdef DEBUG if (totalbdelta < 0) ASSERT(be64_to_cpu(d->d_bcount) >= - (xfs_qcnt_t) -totalbdelta); + -totalbdelta); if (totalrtbdelta < 0) ASSERT(be64_to_cpu(d->d_rtbcount) >= - (xfs_qcnt_t) -totalrtbdelta); + -totalrtbdelta); if (qtrx->qt_icount_delta < 0) ASSERT(be64_to_cpu(d->d_icount) >= - (xfs_qcnt_t) -qtrx->qt_icount_delta); + -qtrx->qt_icount_delta); #endif if (totalbdelta) be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta); @@ -642,11 +642,6 @@ xfs_trans_dqresv( ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) || (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) && (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) { -#ifdef QUOTADEBUG - xfs_debug(mp, - "BLK Res: nblks=%ld + resbcount=%Ld > hardlimit=%Ld?", - nblks, *resbcountp, hardlimit); -#endif if (nblks > 0) { /* * dquot is locked already. See if we'd go over the diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h index 5ad8ad3a1dcd..53ec3ea9a625 100644 --- a/fs/xfs/xfs.h +++ b/fs/xfs/xfs.h @@ -22,7 +22,6 @@ #define STATIC #define DEBUG 1 #define XFS_BUF_LOCK_TRACKING 1 -/* #define QUOTADEBUG 1 */ #endif #include <linux-2.6/xfs_linux.h> diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 95862bbff56b..1e00b3ef6274 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -570,9 +570,7 @@ xfs_alloc_ag_vextent_exact( xfs_agblock_t tbno; /* start block of trimmed extent */ xfs_extlen_t tlen; /* length of trimmed extent */ xfs_agblock_t tend; /* end block of trimmed extent */ - xfs_agblock_t end; /* end of allocated extent */ int i; /* success/failure of operation */ - xfs_extlen_t rlen; /* length of returned extent */ ASSERT(args->alignment == 1); @@ -625,18 +623,16 @@ xfs_alloc_ag_vextent_exact( * * Fix the length according to mod and prod if given. */ - end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen); - args->len = end - args->agbno; + args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen) + - args->agbno; xfs_alloc_fix_len(args); if (!xfs_alloc_fix_minleft(args)) goto not_found; - rlen = args->len; - ASSERT(args->agbno + rlen <= tend); - end = args->agbno + rlen; + ASSERT(args->agbno + args->len <= tend); /* - * We are allocating agbno for rlen [agbno .. end] + * We are allocating agbno for args->len * Allocate/initialize a cursor for the by-size btree. */ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, @@ -2127,7 +2123,7 @@ xfs_read_agf( * Validate the magic number of the agf block. */ agf_ok = - be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC && + agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 2b3518826a69..ffb3386e45c1 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -31,7 +31,6 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" -#include "xfs_btree_trace.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -311,72 +310,6 @@ xfs_allocbt_recs_inorder( } #endif /* DEBUG */ -#ifdef XFS_BTREE_TRACE -ktrace_t *xfs_allocbt_trace_buf; - -STATIC void -xfs_allocbt_trace_enter( - struct xfs_btree_cur *cur, - const char *func, - char *s, - int type, - int line, - __psunsigned_t a0, - __psunsigned_t a1, - __psunsigned_t a2, - __psunsigned_t a3, - __psunsigned_t a4, - __psunsigned_t a5, - __psunsigned_t a6, - __psunsigned_t a7, - __psunsigned_t a8, - __psunsigned_t a9, - __psunsigned_t a10) -{ - ktrace_enter(xfs_allocbt_trace_buf, (void *)(__psint_t)type, - (void *)func, (void *)s, NULL, (void *)cur, - (void *)a0, (void *)a1, (void *)a2, (void *)a3, - (void *)a4, (void *)a5, (void *)a6, (void *)a7, - (void *)a8, (void *)a9, (void *)a10); -} - -STATIC void -xfs_allocbt_trace_cursor( - struct xfs_btree_cur *cur, - __uint32_t *s0, - __uint64_t *l0, - __uint64_t *l1) -{ - *s0 = cur->bc_private.a.agno; - *l0 = cur->bc_rec.a.ar_startblock; - *l1 = cur->bc_rec.a.ar_blockcount; -} - -STATIC void -xfs_allocbt_trace_key( - struct xfs_btree_cur *cur, - union xfs_btree_key *key, - __uint64_t *l0, - __uint64_t *l1) -{ - *l0 = be32_to_cpu(key->alloc.ar_startblock); - *l1 = be32_to_cpu(key->alloc.ar_blockcount); -} - -STATIC void -xfs_allocbt_trace_record( - struct xfs_btree_cur *cur, - union xfs_btree_rec *rec, - __uint64_t *l0, - __uint64_t *l1, - __uint64_t *l2) -{ - *l0 = be32_to_cpu(rec->alloc.ar_startblock); - *l1 = be32_to_cpu(rec->alloc.ar_blockcount); - *l2 = 0; -} -#endif /* XFS_BTREE_TRACE */ - static const struct xfs_btree_ops xfs_allocbt_ops = { .rec_len = sizeof(xfs_alloc_rec_t), .key_len = sizeof(xfs_alloc_key_t), @@ -393,18 +326,10 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, .key_diff = xfs_allocbt_key_diff, - #ifdef DEBUG .keys_inorder = xfs_allocbt_keys_inorder, .recs_inorder = xfs_allocbt_recs_inorder, #endif - -#ifdef XFS_BTREE_TRACE - .trace_enter = xfs_allocbt_trace_enter, - .trace_cursor = xfs_allocbt_trace_cursor, - .trace_key = xfs_allocbt_trace_key, - .trace_record = xfs_allocbt_trace_record, -#endif }; /* @@ -427,13 +352,16 @@ xfs_allocbt_init_cursor( cur->bc_tp = tp; cur->bc_mp = mp; - cur->bc_nlevels = be32_to_cpu(agf->agf_levels[btnum]); cur->bc_btnum = btnum; cur->bc_blocklog = mp->m_sb.sb_blocklog; - cur->bc_ops = &xfs_allocbt_ops; - if (btnum == XFS_BTNUM_CNT) + + if (btnum == XFS_BTNUM_CNT) { + cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); cur->bc_flags = XFS_BTREE_LASTREC_UPDATE; + } else { + cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); + } cur->bc_private.a.agbp = agbp; cur->bc_private.a.agno = agno; diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h deleted file mode 100644 index 0902249354a0..000000000000 --- a/fs/xfs/xfs_arch.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_ARCH_H__ -#define __XFS_ARCH_H__ - -#ifndef XFS_BIG_INUMS -# error XFS_BIG_INUMS must be defined true or false -#endif - -#ifdef __KERNEL__ - -#include <asm/byteorder.h> - -#ifdef __BIG_ENDIAN -#define XFS_NATIVE_HOST 1 -#else -#undef XFS_NATIVE_HOST -#endif - -#else /* __KERNEL__ */ - -#if __BYTE_ORDER == __BIG_ENDIAN -#define XFS_NATIVE_HOST 1 -#else -#undef XFS_NATIVE_HOST -#endif - -#ifdef XFS_NATIVE_HOST -#define cpu_to_be16(val) ((__force __be16)(__u16)(val)) -#define cpu_to_be32(val) ((__force __be32)(__u32)(val)) -#define cpu_to_be64(val) ((__force __be64)(__u64)(val)) -#define be16_to_cpu(val) ((__force __u16)(__be16)(val)) -#define be32_to_cpu(val) ((__force __u32)(__be32)(val)) -#define be64_to_cpu(val) ((__force __u64)(__be64)(val)) -#else -#define cpu_to_be16(val) ((__force __be16)__swab16((__u16)(val))) -#define cpu_to_be32(val) ((__force __be32)__swab32((__u32)(val))) -#define cpu_to_be64(val) ((__force __be64)__swab64((__u64)(val))) -#define be16_to_cpu(val) (__swab16((__force __u16)(__be16)(val))) -#define be32_to_cpu(val) (__swab32((__force __u32)(__be32)(val))) -#define be64_to_cpu(val) (__swab64((__force __u64)(__be64)(val))) -#endif - -static inline void be16_add_cpu(__be16 *a, __s16 b) -{ - *a = cpu_to_be16(be16_to_cpu(*a) + b); -} - -static inline void be32_add_cpu(__be32 *a, __s32 b) -{ - *a = cpu_to_be32(be32_to_cpu(*a) + b); -} - -static inline void be64_add_cpu(__be64 *a, __s64 b) -{ - *a = cpu_to_be64(be64_to_cpu(*a) + b); -} - -#endif /* __KERNEL__ */ - -/* - * get and set integers from potentially unaligned locations - */ - -#define INT_GET_UNALIGNED_16_BE(pointer) \ - ((__u16)((((__u8*)(pointer))[0] << 8) | (((__u8*)(pointer))[1]))) -#define INT_SET_UNALIGNED_16_BE(pointer,value) \ - { \ - ((__u8*)(pointer))[0] = (((value) >> 8) & 0xff); \ - ((__u8*)(pointer))[1] = (((value) ) & 0xff); \ - } - -/* - * In directories inode numbers are stored as unaligned arrays of unsigned - * 8bit integers on disk. - * - * For v1 directories or v2 directories that contain inode numbers that - * do not fit into 32bit the array has eight members, but the first member - * is always zero: - * - * |unused|48-55|40-47|32-39|24-31|16-23| 8-15| 0- 7| - * - * For v2 directories that only contain entries with inode numbers that fit - * into 32bits a four-member array is used: - * - * |24-31|16-23| 8-15| 0- 7| - */ - -#define XFS_GET_DIR_INO4(di) \ - (((__u32)(di).i[0] << 24) | ((di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3])) - -#define XFS_PUT_DIR_INO4(from, di) \ -do { \ - (di).i[0] = (((from) & 0xff000000ULL) >> 24); \ - (di).i[1] = (((from) & 0x00ff0000ULL) >> 16); \ - (di).i[2] = (((from) & 0x0000ff00ULL) >> 8); \ - (di).i[3] = ((from) & 0x000000ffULL); \ -} while (0) - -#define XFS_DI_HI(di) \ - (((__u32)(di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3])) -#define XFS_DI_LO(di) \ - (((__u32)(di).i[4] << 24) | ((di).i[5] << 16) | ((di).i[6] << 8) | ((di).i[7])) - -#define XFS_GET_DIR_INO8(di) \ - (((xfs_ino_t)XFS_DI_LO(di) & 0xffffffffULL) | \ - ((xfs_ino_t)XFS_DI_HI(di) << 32)) - -#define XFS_PUT_DIR_INO8(from, di) \ -do { \ - (di).i[0] = 0; \ - (di).i[1] = (((from) & 0x00ff000000000000ULL) >> 48); \ - (di).i[2] = (((from) & 0x0000ff0000000000ULL) >> 40); \ - (di).i[3] = (((from) & 0x000000ff00000000ULL) >> 32); \ - (di).i[4] = (((from) & 0x00000000ff000000ULL) >> 24); \ - (di).i[5] = (((from) & 0x0000000000ff0000ULL) >> 16); \ - (di).i[6] = (((from) & 0x000000000000ff00ULL) >> 8); \ - (di).i[7] = ((from) & 0x00000000000000ffULL); \ -} while (0) - -#endif /* __XFS_ARCH_H__ */ diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 01d2072fb6d4..cbae424fe1ba 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -822,17 +822,21 @@ xfs_attr_inactive(xfs_inode_t *dp) error = xfs_attr_root_inactive(&trans, dp); if (error) goto out; + /* - * signal synchronous inactive transactions unless this - * is a synchronous mount filesystem in which case we - * know that we're here because we've been called out of - * xfs_inactive which means that the last reference is gone - * and the unlink transaction has already hit the disk so - * async inactive transactions are safe. + * Signal synchronous inactive transactions unless this is a + * synchronous mount filesystem in which case we know that we're here + * because we've been called out of xfs_inactive which means that the + * last reference is gone and the unlink transaction has already hit + * the disk so async inactive transactions are safe. */ - if ((error = xfs_itruncate_finish(&trans, dp, 0LL, XFS_ATTR_FORK, - (!(mp->m_flags & XFS_MOUNT_WSYNC) - ? 1 : 0)))) + if (!(mp->m_flags & XFS_MOUNT_WSYNC)) { + if (dp->i_d.di_anextents > 0) + xfs_trans_set_sync(trans); + } + + error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0); + if (error) goto out; /* @@ -1199,7 +1203,7 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context) return XFS_ERROR(error); ASSERT(bp != NULL); leaf = bp->data; - if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC)) { + if (unlikely(leaf->hdr.info.magic != cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) { XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW, context->dp->i_mount, leaf); xfs_da_brelse(NULL, bp); @@ -1606,9 +1610,8 @@ xfs_attr_node_removename(xfs_da_args_t *args) XFS_ATTR_FORK); if (error) goto out; - ASSERT(be16_to_cpu(((xfs_attr_leafblock_t *) - bp->data)->hdr.info.magic) - == XFS_ATTR_LEAF_MAGIC); + ASSERT((((xfs_attr_leafblock_t *)bp->data)->hdr.info.magic) == + cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { xfs_bmap_init(args->flist, args->firstblock); @@ -1873,11 +1876,11 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) return(XFS_ERROR(EFSCORRUPTED)); } node = bp->data; - if (be16_to_cpu(node->hdr.info.magic) - == XFS_ATTR_LEAF_MAGIC) + if (node->hdr.info.magic == + cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) break; - if (unlikely(be16_to_cpu(node->hdr.info.magic) - != XFS_DA_NODE_MAGIC)) { + if (unlikely(node->hdr.info.magic != + cpu_to_be16(XFS_DA_NODE_MAGIC))) { XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)", XFS_ERRLEVEL_LOW, context->dp->i_mount, @@ -1912,8 +1915,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) */ for (;;) { leaf = bp->data; - if (unlikely(be16_to_cpu(leaf->hdr.info.magic) - != XFS_ATTR_LEAF_MAGIC)) { + if (unlikely(leaf->hdr.info.magic != + cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) { XFS_CORRUPTION_ERROR("xfs_attr_node_list(4)", XFS_ERRLEVEL_LOW, context->dp->i_mount, leaf); diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 71e90dc2aeb1..8fad9602542b 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -731,7 +731,7 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp) int bytes, i; leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); entry = &leaf->entries[0]; bytes = sizeof(struct xfs_attr_sf_hdr); @@ -777,7 +777,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff) ASSERT(bp != NULL); memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount)); leaf = (xfs_attr_leafblock_t *)tmpbuffer; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); memset(bp->data, 0, XFS_LBSIZE(dp->i_mount)); /* @@ -872,7 +872,7 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args) goto out; node = bp1->data; leaf = bp2->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); /* both on-disk, don't endian-flip twice */ node->btree[0].hashval = leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval; @@ -997,7 +997,7 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args) int tablesize, entsize, sum, tmp, i; leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); ASSERT((args->index >= 0) && (args->index <= be16_to_cpu(leaf->hdr.count))); hdr = &leaf->hdr; @@ -1070,7 +1070,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) int tmp, i; leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); hdr = &leaf->hdr; ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE)); ASSERT((args->index >= 0) && (args->index <= be16_to_cpu(hdr->count))); @@ -1256,8 +1256,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC); leaf1 = blk1->bp->data; leaf2 = blk2->bp->data; - ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); - ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); args = state->args; /* @@ -1533,7 +1533,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action) */ blk = &state->path.blk[ state->path.active-1 ]; info = blk->bp->data; - ASSERT(be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); leaf = (xfs_attr_leafblock_t *)info; count = be16_to_cpu(leaf->hdr.count); bytes = sizeof(xfs_attr_leaf_hdr_t) + @@ -1596,7 +1596,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action) bytes = state->blocksize - (state->blocksize>>2); bytes -= be16_to_cpu(leaf->hdr.usedbytes); leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); count += be16_to_cpu(leaf->hdr.count); bytes -= be16_to_cpu(leaf->hdr.usedbytes); bytes -= count * sizeof(xfs_attr_leaf_entry_t); @@ -1650,7 +1650,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args) xfs_mount_t *mp; leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); hdr = &leaf->hdr; mp = args->trans->t_mountp; ASSERT((be16_to_cpu(hdr->count) > 0) @@ -1813,8 +1813,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, ASSERT(save_blk->magic == XFS_ATTR_LEAF_MAGIC); drop_leaf = drop_blk->bp->data; save_leaf = save_blk->bp->data; - ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); - ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); drop_hdr = &drop_leaf->hdr; save_hdr = &save_leaf->hdr; @@ -1915,7 +1915,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args) xfs_dahash_t hashval; leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); ASSERT(be16_to_cpu(leaf->hdr.count) < (XFS_LBSIZE(args->dp->i_mount)/8)); @@ -2019,7 +2019,7 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args) xfs_attr_leaf_name_remote_t *name_rmt; leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); ASSERT(be16_to_cpu(leaf->hdr.count) < (XFS_LBSIZE(args->dp->i_mount)/8)); ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); @@ -2087,8 +2087,8 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s, /* * Set up environment. */ - ASSERT(be16_to_cpu(leaf_s->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); - ASSERT(be16_to_cpu(leaf_d->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(leaf_s->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + ASSERT(leaf_d->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); hdr_s = &leaf_s->hdr; hdr_d = &leaf_d->hdr; ASSERT((be16_to_cpu(hdr_s->count) > 0) && @@ -2222,8 +2222,8 @@ xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp) leaf1 = leaf1_bp->data; leaf2 = leaf2_bp->data; - ASSERT((be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC) && - (be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC)); + ASSERT((leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) && + (leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC))); if ((be16_to_cpu(leaf1->hdr.count) > 0) && (be16_to_cpu(leaf2->hdr.count) > 0) && ((be32_to_cpu(leaf2->entries[0].hashval) < @@ -2246,7 +2246,7 @@ xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count) xfs_attr_leafblock_t *leaf; leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); if (count) *count = be16_to_cpu(leaf->hdr.count); if (!leaf->hdr.count) @@ -2265,7 +2265,7 @@ xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index) xfs_attr_leaf_name_remote_t *name_rmt; int size; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); if (leaf->entries[index].flags & XFS_ATTR_LOCAL) { name_loc = xfs_attr_leaf_name_local(leaf, index); size = xfs_attr_leaf_entsize_local(name_loc->namelen, @@ -2451,7 +2451,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args) ASSERT(bp != NULL); leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); ASSERT(args->index >= 0); entry = &leaf->entries[ args->index ]; @@ -2515,7 +2515,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args) ASSERT(bp != NULL); leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); ASSERT(args->index >= 0); entry = &leaf->entries[ args->index ]; @@ -2585,13 +2585,13 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args) } leaf1 = bp1->data; - ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); ASSERT(args->index < be16_to_cpu(leaf1->hdr.count)); ASSERT(args->index >= 0); entry1 = &leaf1->entries[ args->index ]; leaf2 = bp2->data; - ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); ASSERT(args->index2 < be16_to_cpu(leaf2->hdr.count)); ASSERT(args->index2 >= 0); entry2 = &leaf2->entries[ args->index2 ]; @@ -2689,9 +2689,9 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp) * This is a depth-first traversal! */ info = bp->data; - if (be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC) { + if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) { error = xfs_attr_node_inactive(trans, dp, bp, 1); - } else if (be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC) { + } else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) { error = xfs_attr_leaf_inactive(trans, dp, bp); } else { error = XFS_ERROR(EIO); @@ -2739,7 +2739,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp, } node = bp->data; - ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); + ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); parent_blkno = xfs_da_blkno(bp); /* save for re-read later */ count = be16_to_cpu(node->hdr.count); if (!count) { @@ -2773,10 +2773,10 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp, * Invalidate the subtree, however we have to. */ info = child_bp->data; - if (be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC) { + if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) { error = xfs_attr_node_inactive(trans, dp, child_bp, level+1); - } else if (be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC) { + } else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) { error = xfs_attr_leaf_inactive(trans, dp, child_bp); } else { @@ -2836,7 +2836,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp) int error, count, size, tmp, i; leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); /* * Count the number of "remote" value extents. diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index e546a33214c9..c51a3f903633 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -29,15 +29,11 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_mount.h" #include "xfs_itable.h" -#include "xfs_dir2_data.h" -#include "xfs_dir2_leaf.h" -#include "xfs_dir2_block.h" #include "xfs_inode_item.h" #include "xfs_extfree_item.h" #include "xfs_alloc.h" @@ -94,6 +90,7 @@ xfs_bmap_add_attrfork_local( */ STATIC int /* error */ xfs_bmap_add_extent_delay_real( + struct xfs_trans *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ @@ -439,6 +436,7 @@ xfs_bmap_add_attrfork_local( */ STATIC int /* error */ xfs_bmap_add_extent( + struct xfs_trans *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ @@ -524,7 +522,7 @@ xfs_bmap_add_extent( if (cur) ASSERT(cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL); - error = xfs_bmap_add_extent_delay_real(ip, + error = xfs_bmap_add_extent_delay_real(tp, ip, idx, &cur, new, &da_new, first, flist, &logflags); } else { @@ -561,7 +559,7 @@ xfs_bmap_add_extent( int tmp_logflags; /* partial log flag return val */ ASSERT(cur == NULL); - error = xfs_bmap_extents_to_btree(ip->i_transp, ip, first, + error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur, da_old > 0, &tmp_logflags, whichfork); logflags |= tmp_logflags; if (error) @@ -604,6 +602,7 @@ done: */ STATIC int /* error */ xfs_bmap_add_extent_delay_real( + struct xfs_trans *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ @@ -901,7 +900,7 @@ xfs_bmap_add_extent_delay_real( } if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && ip->i_d.di_nextents > ip->i_df.if_ext_max) { - error = xfs_bmap_extents_to_btree(ip->i_transp, ip, + error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur, 1, &tmp_rval, XFS_DATA_FORK); rval |= tmp_rval; @@ -984,7 +983,7 @@ xfs_bmap_add_extent_delay_real( } if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && ip->i_d.di_nextents > ip->i_df.if_ext_max) { - error = xfs_bmap_extents_to_btree(ip->i_transp, ip, + error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur, 1, &tmp_rval, XFS_DATA_FORK); rval |= tmp_rval; @@ -1052,7 +1051,7 @@ xfs_bmap_add_extent_delay_real( } if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && ip->i_d.di_nextents > ip->i_df.if_ext_max) { - error = xfs_bmap_extents_to_btree(ip->i_transp, ip, + error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur, 1, &tmp_rval, XFS_DATA_FORK); rval |= tmp_rval; @@ -2871,8 +2870,8 @@ xfs_bmap_del_extent( len = del->br_blockcount; do_div(bno, mp->m_sb.sb_rextsize); do_div(len, mp->m_sb.sb_rextsize); - if ((error = xfs_rtfree_extent(ip->i_transp, bno, - (xfs_extlen_t)len))) + error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); + if (error) goto done; do_fx = 0; nblks = len * mp->m_sb.sb_rextsize; @@ -4080,7 +4079,7 @@ xfs_bmap_sanity_check( { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - if (be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC || + if (block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC) || be16_to_cpu(block->bb_level) != level || be16_to_cpu(block->bb_numrecs) == 0 || be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) @@ -4662,7 +4661,7 @@ xfs_bmapi( if (!wasdelay && (flags & XFS_BMAPI_PREALLOC)) got.br_state = XFS_EXT_UNWRITTEN; } - error = xfs_bmap_add_extent(ip, &lastx, &cur, &got, + error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, &got, firstblock, flist, &tmp_logflags, whichfork); logflags |= tmp_logflags; @@ -4763,7 +4762,7 @@ xfs_bmapi( mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent(ip, &lastx, &cur, mval, + error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, mval, firstblock, flist, &tmp_logflags, whichfork); logflags |= tmp_logflags; @@ -5117,7 +5116,7 @@ xfs_bunmapi( del.br_blockcount = mod; } del.br_state = XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent(ip, &lastx, &cur, &del, + error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, &del, firstblock, flist, &logflags, XFS_DATA_FORK); if (error) @@ -5175,18 +5174,18 @@ xfs_bunmapi( } prev.br_state = XFS_EXT_UNWRITTEN; lastx--; - error = xfs_bmap_add_extent(ip, &lastx, &cur, - &prev, firstblock, flist, &logflags, - XFS_DATA_FORK); + error = xfs_bmap_add_extent(tp, ip, &lastx, + &cur, &prev, firstblock, flist, + &logflags, XFS_DATA_FORK); if (error) goto error0; goto nodelete; } else { ASSERT(del.br_state == XFS_EXT_NORM); del.br_state = XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent(ip, &lastx, &cur, - &del, firstblock, flist, &logflags, - XFS_DATA_FORK); + error = xfs_bmap_add_extent(tp, ip, &lastx, + &cur, &del, firstblock, flist, + &logflags, XFS_DATA_FORK); if (error) goto error0; goto nodelete; diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 87d3c10b6954..e2f5d59cbeaf 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -33,7 +33,6 @@ #include "xfs_inode_item.h" #include "xfs_alloc.h" #include "xfs_btree.h" -#include "xfs_btree_trace.h" #include "xfs_itable.h" #include "xfs_bmap.h" #include "xfs_error.h" @@ -425,10 +424,10 @@ xfs_bmbt_to_bmdr( xfs_bmbt_key_t *tkp; __be64 *tpp; - ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC); - ASSERT(be64_to_cpu(rblock->bb_u.l.bb_leftsib) == NULLDFSBNO); - ASSERT(be64_to_cpu(rblock->bb_u.l.bb_rightsib) == NULLDFSBNO); - ASSERT(be16_to_cpu(rblock->bb_level) > 0); + ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC)); + ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)); + ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)); + ASSERT(rblock->bb_level != 0); dblock->bb_level = rblock->bb_level; dblock->bb_numrecs = rblock->bb_numrecs; dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); @@ -732,95 +731,6 @@ xfs_bmbt_recs_inorder( } #endif /* DEBUG */ -#ifdef XFS_BTREE_TRACE -ktrace_t *xfs_bmbt_trace_buf; - -STATIC void -xfs_bmbt_trace_enter( - struct xfs_btree_cur *cur, - const char *func, - char *s, - int type, - int line, - __psunsigned_t a0, - __psunsigned_t a1, - __psunsigned_t a2, - __psunsigned_t a3, - __psunsigned_t a4, - __psunsigned_t a5, - __psunsigned_t a6, - __psunsigned_t a7, - __psunsigned_t a8, - __psunsigned_t a9, - __psunsigned_t a10) -{ - struct xfs_inode *ip = cur->bc_private.b.ip; - int whichfork = cur->bc_private.b.whichfork; - - ktrace_enter(xfs_bmbt_trace_buf, - (void *)((__psint_t)type | (whichfork << 8) | (line << 16)), - (void *)func, (void *)s, (void *)ip, (void *)cur, - (void *)a0, (void *)a1, (void *)a2, (void *)a3, - (void *)a4, (void *)a5, (void *)a6, (void *)a7, - (void *)a8, (void *)a9, (void *)a10); -} - -STATIC void -xfs_bmbt_trace_cursor( - struct xfs_btree_cur *cur, - __uint32_t *s0, - __uint64_t *l0, - __uint64_t *l1) -{ - struct xfs_bmbt_rec_host r; - - xfs_bmbt_set_all(&r, &cur->bc_rec.b); - - *s0 = (cur->bc_nlevels << 24) | - (cur->bc_private.b.flags << 16) | - cur->bc_private.b.allocated; - *l0 = r.l0; - *l1 = r.l1; -} - -STATIC void -xfs_bmbt_trace_key( - struct xfs_btree_cur *cur, - union xfs_btree_key *key, - __uint64_t *l0, - __uint64_t *l1) -{ - *l0 = be64_to_cpu(key->bmbt.br_startoff); - *l1 = 0; -} - -/* Endian flipping versions of the bmbt extraction functions */ -STATIC void -xfs_bmbt_disk_get_all( - xfs_bmbt_rec_t *r, - xfs_bmbt_irec_t *s) -{ - __xfs_bmbt_get_all(get_unaligned_be64(&r->l0), - get_unaligned_be64(&r->l1), s); -} - -STATIC void -xfs_bmbt_trace_record( - struct xfs_btree_cur *cur, - union xfs_btree_rec *rec, - __uint64_t *l0, - __uint64_t *l1, - __uint64_t *l2) -{ - struct xfs_bmbt_irec irec; - - xfs_bmbt_disk_get_all(&rec->bmbt, &irec); - *l0 = irec.br_startoff; - *l1 = irec.br_startblock; - *l2 = irec.br_blockcount; -} -#endif /* XFS_BTREE_TRACE */ - static const struct xfs_btree_ops xfs_bmbt_ops = { .rec_len = sizeof(xfs_bmbt_rec_t), .key_len = sizeof(xfs_bmbt_key_t), @@ -837,18 +747,10 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { .init_rec_from_cur = xfs_bmbt_init_rec_from_cur, .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, .key_diff = xfs_bmbt_key_diff, - #ifdef DEBUG .keys_inorder = xfs_bmbt_keys_inorder, .recs_inorder = xfs_bmbt_recs_inorder, #endif - -#ifdef XFS_BTREE_TRACE - .trace_enter = xfs_bmbt_trace_enter, - .trace_cursor = xfs_bmbt_trace_cursor, - .trace_key = xfs_bmbt_trace_key, - .trace_record = xfs_bmbt_trace_record, -#endif }; /* diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 2f9e97c128a0..cabf4b5604aa 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -32,7 +32,6 @@ #include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_btree.h" -#include "xfs_btree_trace.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -66,11 +65,11 @@ xfs_btree_check_lblock( be16_to_cpu(block->bb_numrecs) <= cur->bc_ops->get_maxrecs(cur, level) && block->bb_u.l.bb_leftsib && - (be64_to_cpu(block->bb_u.l.bb_leftsib) == NULLDFSBNO || + (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) || XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))) && block->bb_u.l.bb_rightsib && - (be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO || + (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) || XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))); if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, @@ -105,10 +104,10 @@ xfs_btree_check_sblock( be16_to_cpu(block->bb_level) == level && be16_to_cpu(block->bb_numrecs) <= cur->bc_ops->get_maxrecs(cur, level) && - (be32_to_cpu(block->bb_u.s.bb_leftsib) == NULLAGBLOCK || + (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) && block->bb_u.s.bb_leftsib && - (be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK || + (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) && block->bb_u.s.bb_rightsib; if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp, @@ -511,9 +510,9 @@ xfs_btree_islastblock( block = xfs_btree_get_block(cur, level, &bp); xfs_btree_check_block(cur, block, level, bp); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO; + return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO); else - return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK; + return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK); } /* @@ -777,14 +776,14 @@ xfs_btree_setbuf( b = XFS_BUF_TO_BLOCK(bp); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO) + if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)) cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; - if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO) + if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)) cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; } else { - if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK) + if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; - if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK) + if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK)) cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; } } @@ -795,9 +794,9 @@ xfs_btree_ptr_is_null( union xfs_btree_ptr *ptr) { if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - return be64_to_cpu(ptr->l) == NULLDFSBNO; + return ptr->l == cpu_to_be64(NULLDFSBNO); else - return be32_to_cpu(ptr->s) == NULLAGBLOCK; + return ptr->s == cpu_to_be32(NULLAGBLOCK); } STATIC void @@ -923,12 +922,12 @@ xfs_btree_ptr_to_daddr( union xfs_btree_ptr *ptr) { if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - ASSERT(be64_to_cpu(ptr->l) != NULLDFSBNO); + ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO)); return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); } else { ASSERT(cur->bc_private.a.agno != NULLAGNUMBER); - ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK); + ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK)); return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, be32_to_cpu(ptr->s)); diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 82fafc66bd1f..8d05a6a46ce3 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -199,25 +199,6 @@ struct xfs_btree_ops { union xfs_btree_rec *r1, union xfs_btree_rec *r2); #endif - - /* btree tracing */ -#ifdef XFS_BTREE_TRACE - void (*trace_enter)(struct xfs_btree_cur *, const char *, - char *, int, int, __psunsigned_t, - __psunsigned_t, __psunsigned_t, - __psunsigned_t, __psunsigned_t, - __psunsigned_t, __psunsigned_t, - __psunsigned_t, __psunsigned_t, - __psunsigned_t, __psunsigned_t); - void (*trace_cursor)(struct xfs_btree_cur *, __uint32_t *, - __uint64_t *, __uint64_t *); - void (*trace_key)(struct xfs_btree_cur *, - union xfs_btree_key *, __uint64_t *, - __uint64_t *); - void (*trace_record)(struct xfs_btree_cur *, - union xfs_btree_rec *, __uint64_t *, - __uint64_t *, __uint64_t *); -#endif }; /* @@ -452,4 +433,23 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block) (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks) +/* + * Trace hooks. Currently not implemented as they need to be ported + * over to the generic tracing functionality, which is some effort. + * + * i,j = integer (32 bit) + * b = btree block buffer (xfs_buf_t) + * p = btree ptr + * r = btree record + * k = btree key + */ +#define XFS_BTREE_TRACE_ARGBI(c, b, i) +#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) +#define XFS_BTREE_TRACE_ARGI(c, i) +#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s) +#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) +#define XFS_BTREE_TRACE_ARGIK(c, i, k) +#define XFS_BTREE_TRACE_ARGR(c, r) +#define XFS_BTREE_TRACE_CURSOR(c, t) + #endif /* __XFS_BTREE_H__ */ diff --git a/fs/xfs/xfs_btree_trace.c b/fs/xfs/xfs_btree_trace.c deleted file mode 100644 index 44ff942a0fda..000000000000 --- a/fs/xfs/xfs_btree_trace.c +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Copyright (c) 2008 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_types.h" -#include "xfs_inum.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_inode.h" -#include "xfs_btree.h" -#include "xfs_btree_trace.h" - -STATIC void -xfs_btree_trace_ptr( - struct xfs_btree_cur *cur, - union xfs_btree_ptr ptr, - __psunsigned_t *high, - __psunsigned_t *low) -{ - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - __u64 val = be64_to_cpu(ptr.l); - *high = val >> 32; - *low = (int)val; - } else { - *high = 0; - *low = be32_to_cpu(ptr.s); - } -} - -/* - * Add a trace buffer entry for arguments, for a buffer & 1 integer arg. - */ -void -xfs_btree_trace_argbi( - const char *func, - struct xfs_btree_cur *cur, - struct xfs_buf *b, - int i, - int line) -{ - cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBI, - line, (__psunsigned_t)b, i, 0, 0, 0, 0, 0, - 0, 0, 0, 0); -} - -/* - * Add a trace buffer entry for arguments, for a buffer & 2 integer args. - */ -void -xfs_btree_trace_argbii( - const char *func, - struct xfs_btree_cur *cur, - struct xfs_buf *b, - int i0, - int i1, - int line) -{ - cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBII, - line, (__psunsigned_t)b, i0, i1, 0, 0, 0, 0, - 0, 0, 0, 0); -} - -/* - * Add a trace buffer entry for arguments, for 3 block-length args - * and an integer arg. - */ -void -xfs_btree_trace_argfffi( - const char *func, - struct xfs_btree_cur *cur, - xfs_dfiloff_t o, - xfs_dfsbno_t b, - xfs_dfilblks_t i, - int j, - int line) -{ - cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGFFFI, - line, - o >> 32, (int)o, - b >> 32, (int)b, - i >> 32, (int)i, - (int)j, 0, 0, 0, 0); -} - -/* - * Add a trace buffer entry for arguments, for one integer arg. - */ -void -xfs_btree_trace_argi( - const char *func, - struct xfs_btree_cur *cur, - int i, - int line) -{ - cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGI, - line, i, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); -} - -/* - * Add a trace buffer entry for arguments, for int, fsblock, key. - */ -void -xfs_btree_trace_argipk( - const char *func, - struct xfs_btree_cur *cur, - int i, - union xfs_btree_ptr ptr, - union xfs_btree_key *key, - int line) -{ - __psunsigned_t high, low; - __uint64_t l0, l1; - - xfs_btree_trace_ptr(cur, ptr, &high, &low); - cur->bc_ops->trace_key(cur, key, &l0, &l1); - cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPK, - line, i, high, low, - l0 >> 32, (int)l0, - l1 >> 32, (int)l1, - 0, 0, 0, 0); -} - -/* - * Add a trace buffer entry for arguments, for int, fsblock, rec. - */ -void -xfs_btree_trace_argipr( - const char *func, - struct xfs_btree_cur *cur, - int i, - union xfs_btree_ptr ptr, - union xfs_btree_rec *rec, - int line) -{ - __psunsigned_t high, low; - __uint64_t l0, l1, l2; - - xfs_btree_trace_ptr(cur, ptr, &high, &low); - cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2); - cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPR, - line, i, - high, low, - l0 >> 32, (int)l0, - l1 >> 32, (int)l1, - l2 >> 32, (int)l2, - 0, 0); -} - -/* - * Add a trace buffer entry for arguments, for int, key. - */ -void -xfs_btree_trace_argik( - const char *func, - struct xfs_btree_cur *cur, - int i, - union xfs_btree_key *key, - int line) -{ - __uint64_t l0, l1; - - cur->bc_ops->trace_key(cur, key, &l0, &l1); - cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIK, - line, i, - l0 >> 32, (int)l0, - l1 >> 32, (int)l1, - 0, 0, 0, 0, 0, 0); -} - -/* - * Add a trace buffer entry for arguments, for record. - */ -void -xfs_btree_trace_argr( - const char *func, - struct xfs_btree_cur *cur, - union xfs_btree_rec *rec, - int line) -{ - __uint64_t l0, l1, l2; - - cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2); - cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGR, - line, - l0 >> 32, (int)l0, - l1 >> 32, (int)l1, - l2 >> 32, (int)l2, - 0, 0, 0, 0, 0); -} - -/* - * Add a trace buffer entry for the cursor/operation. - */ -void -xfs_btree_trace_cursor( - const char *func, - struct xfs_btree_cur *cur, - int type, - int line) -{ - __uint32_t s0; - __uint64_t l0, l1; - char *s; - - switch (type) { - case XBT_ARGS: - s = "args"; - break; - case XBT_ENTRY: - s = "entry"; - break; - case XBT_ERROR: - s = "error"; - break; - case XBT_EXIT: - s = "exit"; - break; - default: - s = "unknown"; - break; - } - - cur->bc_ops->trace_cursor(cur, &s0, &l0, &l1); - cur->bc_ops->trace_enter(cur, func, s, XFS_BTREE_KTRACE_CUR, line, - s0, - l0 >> 32, (int)l0, - l1 >> 32, (int)l1, - (__psunsigned_t)cur->bc_bufs[0], - (__psunsigned_t)cur->bc_bufs[1], - (__psunsigned_t)cur->bc_bufs[2], - (__psunsigned_t)cur->bc_bufs[3], - (cur->bc_ptrs[0] << 16) | cur->bc_ptrs[1], - (cur->bc_ptrs[2] << 16) | cur->bc_ptrs[3]); -} diff --git a/fs/xfs/xfs_btree_trace.h b/fs/xfs/xfs_btree_trace.h deleted file mode 100644 index 2d8a309873ea..000000000000 --- a/fs/xfs/xfs_btree_trace.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2008 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_BTREE_TRACE_H__ -#define __XFS_BTREE_TRACE_H__ - -struct xfs_btree_cur; -struct xfs_buf; - - -/* - * Trace hooks. - * i,j = integer (32 bit) - * b = btree block buffer (xfs_buf_t) - * p = btree ptr - * r = btree record - * k = btree key - */ - -#ifdef XFS_BTREE_TRACE - -/* - * Trace buffer entry types. - */ -#define XFS_BTREE_KTRACE_ARGBI 1 -#define XFS_BTREE_KTRACE_ARGBII 2 -#define XFS_BTREE_KTRACE_ARGFFFI 3 -#define XFS_BTREE_KTRACE_ARGI 4 -#define XFS_BTREE_KTRACE_ARGIPK 5 -#define XFS_BTREE_KTRACE_ARGIPR 6 -#define XFS_BTREE_KTRACE_ARGIK 7 -#define XFS_BTREE_KTRACE_ARGR 8 -#define XFS_BTREE_KTRACE_CUR 9 - -/* - * Sub-types for cursor traces. - */ -#define XBT_ARGS 0 -#define XBT_ENTRY 1 -#define XBT_ERROR 2 -#define XBT_EXIT 3 - -void xfs_btree_trace_argbi(const char *, struct xfs_btree_cur *, - struct xfs_buf *, int, int); -void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *, - struct xfs_buf *, int, int, int); -void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int); -void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int, - union xfs_btree_ptr, union xfs_btree_key *, int); -void xfs_btree_trace_argipr(const char *, struct xfs_btree_cur *, int, - union xfs_btree_ptr, union xfs_btree_rec *, int); -void xfs_btree_trace_argik(const char *, struct xfs_btree_cur *, int, - union xfs_btree_key *, int); -void xfs_btree_trace_argr(const char *, struct xfs_btree_cur *, - union xfs_btree_rec *, int); -void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int); - -#define XFS_BTREE_TRACE_ARGBI(c, b, i) \ - xfs_btree_trace_argbi(__func__, c, b, i, __LINE__) -#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) \ - xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__) -#define XFS_BTREE_TRACE_ARGI(c, i) \ - xfs_btree_trace_argi(__func__, c, i, __LINE__) -#define XFS_BTREE_TRACE_ARGIPK(c, i, p, k) \ - xfs_btree_trace_argipk(__func__, c, i, p, k, __LINE__) -#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) \ - xfs_btree_trace_argipr(__func__, c, i, p, r, __LINE__) -#define XFS_BTREE_TRACE_ARGIK(c, i, k) \ - xfs_btree_trace_argik(__func__, c, i, k, __LINE__) -#define XFS_BTREE_TRACE_ARGR(c, r) \ - xfs_btree_trace_argr(__func__, c, r, __LINE__) -#define XFS_BTREE_TRACE_CURSOR(c, t) \ - xfs_btree_trace_cursor(__func__, c, t, __LINE__) -#else -#define XFS_BTREE_TRACE_ARGBI(c, b, i) -#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) -#define XFS_BTREE_TRACE_ARGI(c, i) -#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s) -#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) -#define XFS_BTREE_TRACE_ARGIK(c, i, k) -#define XFS_BTREE_TRACE_ARGR(c, r) -#define XFS_BTREE_TRACE_CURSOR(c, t) -#endif /* XFS_BTREE_TRACE */ - -#endif /* __XFS_BTREE_TRACE_H__ */ diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 7b7e005e3dcc..88492916c3dc 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -90,13 +90,11 @@ xfs_buf_item_flush_log_debug( uint first, uint last) { - xfs_buf_log_item_t *bip; + xfs_buf_log_item_t *bip = bp->b_fspriv; uint nbytes; - bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); - if ((bip == NULL) || (bip->bli_item.li_type != XFS_LI_BUF)) { + if (bip == NULL || (bip->bli_item.li_type != XFS_LI_BUF)) return; - } ASSERT(bip->bli_logged != NULL); nbytes = last - first + 1; @@ -408,7 +406,7 @@ xfs_buf_item_unpin( int stale = bip->bli_flags & XFS_BLI_STALE; int freed; - ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip); + ASSERT(bp->b_fspriv == bip); ASSERT(atomic_read(&bip->bli_refcount) > 0); trace_xfs_buf_item_unpin(bip); @@ -420,7 +418,7 @@ xfs_buf_item_unpin( if (freed && stale) { ASSERT(bip->bli_flags & XFS_BLI_STALE); - ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); + ASSERT(xfs_buf_islocked(bp)); ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); ASSERT(XFS_BUF_ISSTALE(bp)); ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); @@ -443,7 +441,7 @@ xfs_buf_item_unpin( * Since the transaction no longer refers to the buffer, * the buffer should no longer refer to the transaction. */ - XFS_BUF_SET_FSPRIVATE2(bp, NULL); + bp->b_transp = NULL; } /* @@ -454,13 +452,13 @@ xfs_buf_item_unpin( */ if (bip->bli_flags & XFS_BLI_STALE_INODE) { xfs_buf_do_callbacks(bp); - XFS_BUF_SET_FSPRIVATE(bp, NULL); - XFS_BUF_CLR_IODONE_FUNC(bp); + bp->b_fspriv = NULL; + bp->b_iodone = NULL; } else { spin_lock(&ailp->xa_lock); xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip); xfs_buf_item_relse(bp); - ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL); + ASSERT(bp->b_fspriv == NULL); } xfs_buf_relse(bp); } @@ -483,7 +481,7 @@ xfs_buf_item_trylock( if (XFS_BUF_ISPINNED(bp)) return XFS_ITEM_PINNED; - if (!XFS_BUF_CPSEMA(bp)) + if (!xfs_buf_trylock(bp)) return XFS_ITEM_LOCKED; /* take a reference to the buffer. */ @@ -525,7 +523,7 @@ xfs_buf_item_unlock( uint hold; /* Clear the buffer's association with this transaction. */ - XFS_BUF_SET_FSPRIVATE2(bp, NULL); + bp->b_transp = NULL; /* * If this is a transaction abort, don't return early. Instead, allow @@ -684,7 +682,7 @@ xfs_buf_item_init( xfs_buf_t *bp, xfs_mount_t *mp) { - xfs_log_item_t *lip; + xfs_log_item_t *lip = bp->b_fspriv; xfs_buf_log_item_t *bip; int chunks; int map_size; @@ -696,12 +694,8 @@ xfs_buf_item_init( * nothing to do here so return. */ ASSERT(bp->b_target->bt_mount == mp); - if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { - lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); - if (lip->li_type == XFS_LI_BUF) { - return; - } - } + if (lip != NULL && lip->li_type == XFS_LI_BUF) + return; /* * chunks is the number of XFS_BLF_CHUNK size pieces @@ -740,11 +734,9 @@ xfs_buf_item_init( * Put the buf item into the list of items attached to the * buffer at the front. */ - if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { - bip->bli_item.li_bio_list = - XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); - } - XFS_BUF_SET_FSPRIVATE(bp, bip); + if (bp->b_fspriv) + bip->bli_item.li_bio_list = bp->b_fspriv; + bp->b_fspriv = bip; } @@ -876,12 +868,11 @@ xfs_buf_item_relse( trace_xfs_buf_item_relse(bp, _RET_IP_); - bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); - XFS_BUF_SET_FSPRIVATE(bp, bip->bli_item.li_bio_list); - if ((XFS_BUF_FSPRIVATE(bp, void *) == NULL) && - (XFS_BUF_IODONE_FUNC(bp) != NULL)) { - XFS_BUF_CLR_IODONE_FUNC(bp); - } + bip = bp->b_fspriv; + bp->b_fspriv = bip->bli_item.li_bio_list; + if (bp->b_fspriv == NULL) + bp->b_iodone = NULL; + xfs_buf_rele(bp); xfs_buf_item_free(bip); } @@ -905,20 +896,20 @@ xfs_buf_attach_iodone( xfs_log_item_t *head_lip; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); + ASSERT(xfs_buf_islocked(bp)); lip->li_cb = cb; - if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { - head_lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); + head_lip = bp->b_fspriv; + if (head_lip) { lip->li_bio_list = head_lip->li_bio_list; head_lip->li_bio_list = lip; } else { - XFS_BUF_SET_FSPRIVATE(bp, lip); + bp->b_fspriv = lip; } - ASSERT((XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks) || - (XFS_BUF_IODONE_FUNC(bp) == NULL)); - XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); + ASSERT(bp->b_iodone == NULL || + bp->b_iodone == xfs_buf_iodone_callbacks); + bp->b_iodone = xfs_buf_iodone_callbacks; } /* @@ -939,8 +930,8 @@ xfs_buf_do_callbacks( { struct xfs_log_item *lip; - while ((lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *)) != NULL) { - XFS_BUF_SET_FSPRIVATE(bp, lip->li_bio_list); + while ((lip = bp->b_fspriv) != NULL) { + bp->b_fspriv = lip->li_bio_list; ASSERT(lip->li_cb != NULL); /* * Clear the next pointer so we don't have any @@ -1007,7 +998,7 @@ xfs_buf_iodone_callbacks( XFS_BUF_DONE(bp); XFS_BUF_SET_START(bp); } - ASSERT(XFS_BUF_IODONE_FUNC(bp)); + ASSERT(bp->b_iodone != NULL); trace_xfs_buf_item_iodone_async(bp, _RET_IP_); xfs_buf_relse(bp); return; @@ -1026,8 +1017,8 @@ xfs_buf_iodone_callbacks( do_callbacks: xfs_buf_do_callbacks(bp); - XFS_BUF_SET_FSPRIVATE(bp, NULL); - XFS_BUF_CLR_IODONE_FUNC(bp); + bp->b_fspriv = NULL; + bp->b_iodone = NULL; xfs_buf_ioend(bp, 0); } diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 6102ac6d1dff..2925726529f8 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -24,11 +24,12 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" -#include "xfs_dir2_sf.h" +#include "xfs_dir2.h" +#include "xfs_dir2_format.h" +#include "xfs_dir2_priv.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" @@ -36,10 +37,6 @@ #include "xfs_bmap.h" #include "xfs_attr.h" #include "xfs_attr_leaf.h" -#include "xfs_dir2_data.h" -#include "xfs_dir2_leaf.h" -#include "xfs_dir2_block.h" -#include "xfs_dir2_node.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -89,7 +86,7 @@ STATIC void xfs_da_node_unbalance(xfs_da_state_t *state, */ STATIC uint xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count); STATIC int xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp); -STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra); +STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps); STATIC int xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, xfs_da_state_blk_t *save_blk); @@ -321,11 +318,11 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, ASSERT(bp != NULL); node = bp->data; oldroot = blk1->bp->data; - if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC) { + if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) { size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] - (char *)oldroot); } else { - ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); leaf = (xfs_dir2_leaf_t *)oldroot; size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] - (char *)leaf); @@ -352,7 +349,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, node->hdr.count = cpu_to_be16(2); #ifdef DEBUG - if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC) { + if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) { ASSERT(blk1->blkno >= mp->m_dirleafblk && blk1->blkno < mp->m_dirfreeblk); ASSERT(blk2->blkno >= mp->m_dirleafblk && @@ -384,7 +381,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, int useextra; node = oldblk->bp->data; - ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); + ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); /* * With V2 dirs the extra block is data or freespace. @@ -483,8 +480,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, node1 = node2; node2 = tmpnode; } - ASSERT(be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC); - ASSERT(be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC); + ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); count = (be16_to_cpu(node1->hdr.count) - be16_to_cpu(node2->hdr.count)) / 2; if (count == 0) return; @@ -578,7 +575,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, int tmp; node = oldblk->bp->data; - ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); + ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count))); ASSERT(newblk->blkno != 0); if (state->args->whichfork == XFS_DATA_FORK) @@ -714,7 +711,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) ASSERT(args != NULL); ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC); oldroot = root_blk->bp->data; - ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC); + ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); ASSERT(!oldroot->hdr.info.forw); ASSERT(!oldroot->hdr.info.back); @@ -737,10 +734,10 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) ASSERT(bp != NULL); blkinfo = bp->data; if (be16_to_cpu(oldroot->hdr.level) == 1) { - ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIR2_LEAFN_MAGIC || - be16_to_cpu(blkinfo->magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(blkinfo->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || + blkinfo->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); } else { - ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DA_NODE_MAGIC); + ASSERT(blkinfo->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); } ASSERT(!blkinfo->forw); ASSERT(!blkinfo->back); @@ -776,7 +773,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action) */ blk = &state->path.blk[ state->path.active-1 ]; info = blk->bp->data; - ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC); + ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); node = (xfs_da_intnode_t *)info; count = be16_to_cpu(node->hdr.count); if (count > (state->node_ents >> 1)) { @@ -836,7 +833,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action) count -= state->node_ents >> 2; count -= be16_to_cpu(node->hdr.count); node = bp->data; - ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); + ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); count -= be16_to_cpu(node->hdr.count); xfs_da_brelse(state->args->trans, bp); if (count >= 0) @@ -911,7 +908,7 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path) } for (blk--, level--; level >= 0; blk--, level--) { node = blk->bp->data; - ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); + ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); btree = &node->btree[ blk->index ]; if (be32_to_cpu(btree->hashval) == lasthash) break; @@ -979,8 +976,8 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, drop_node = drop_blk->bp->data; save_node = save_blk->bp->data; - ASSERT(be16_to_cpu(drop_node->hdr.info.magic) == XFS_DA_NODE_MAGIC); - ASSERT(be16_to_cpu(save_node->hdr.info.magic) == XFS_DA_NODE_MAGIC); + ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(save_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); tp = state->args->trans; /* @@ -1278,8 +1275,8 @@ xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp) node1 = node1_bp->data; node2 = node2_bp->data; - ASSERT((be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC) && - (be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC)); + ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) && + node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) && ((be32_to_cpu(node2->btree[0].hashval) < be32_to_cpu(node1->btree[0].hashval)) || @@ -1299,7 +1296,7 @@ xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count) xfs_da_intnode_t *node; node = bp->data; - ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); + ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); if (count) *count = be16_to_cpu(node->hdr.count); if (!node->hdr.count) @@ -1412,7 +1409,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, for (blk = &path->blk[level]; level >= 0; blk--, level--) { ASSERT(blk->bp != NULL); node = blk->bp->data; - ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); + ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) { blk->index++; blkno = be32_to_cpu(node->btree[blk->index].before); @@ -1451,9 +1448,9 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, return(error); ASSERT(blk->bp != NULL); info = blk->bp->data; - ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC || - be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC || - be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC); + ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || + info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || + info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); blk->magic = be16_to_cpu(info->magic); if (blk->magic == XFS_DA_NODE_MAGIC) { node = (xfs_da_intnode_t *)info; @@ -1546,79 +1543,62 @@ const struct xfs_nameops xfs_default_nameops = { .compname = xfs_da_compname }; -/* - * Add a block to the btree ahead of the file. - * Return the new block number to the caller. - */ int -xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) +xfs_da_grow_inode_int( + struct xfs_da_args *args, + xfs_fileoff_t *bno, + int count) { - xfs_fileoff_t bno, b; - xfs_bmbt_irec_t map; - xfs_bmbt_irec_t *mapp; - xfs_inode_t *dp; - int nmap, error, w, count, c, got, i, mapi; - xfs_trans_t *tp; - xfs_mount_t *mp; - xfs_drfsbno_t nblks; + struct xfs_trans *tp = args->trans; + struct xfs_inode *dp = args->dp; + int w = args->whichfork; + xfs_drfsbno_t nblks = dp->i_d.di_nblocks; + struct xfs_bmbt_irec map, *mapp; + int nmap, error, got, i, mapi; - dp = args->dp; - mp = dp->i_mount; - w = args->whichfork; - tp = args->trans; - nblks = dp->i_d.di_nblocks; - - /* - * For new directories adjust the file offset and block count. - */ - if (w == XFS_DATA_FORK) { - bno = mp->m_dirleafblk; - count = mp->m_dirblkfsbs; - } else { - bno = 0; - count = 1; - } /* * Find a spot in the file space to put the new block. */ - if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w))) + error = xfs_bmap_first_unused(tp, dp, count, bno, w); + if (error) return error; - if (w == XFS_DATA_FORK) - ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk); + /* * Try mapping it in one filesystem block. */ nmap = 1; ASSERT(args->firstblock != NULL); - if ((error = xfs_bmapi(tp, dp, bno, count, + error = xfs_bmapi(tp, dp, *bno, count, xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA| XFS_BMAPI_CONTIG, args->firstblock, args->total, &map, &nmap, - args->flist))) { + args->flist); + if (error) return error; - } + ASSERT(nmap <= 1); if (nmap == 1) { mapp = ↦ mapi = 1; - } - /* - * If we didn't get it and the block might work if fragmented, - * try without the CONTIG flag. Loop until we get it all. - */ - else if (nmap == 0 && count > 1) { + } else if (nmap == 0 && count > 1) { + xfs_fileoff_t b; + int c; + + /* + * If we didn't get it and the block might work if fragmented, + * try without the CONTIG flag. Loop until we get it all. + */ mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP); - for (b = bno, mapi = 0; b < bno + count; ) { + for (b = *bno, mapi = 0; b < *bno + count; ) { nmap = MIN(XFS_BMAP_MAX_NMAP, count); - c = (int)(bno + count - b); - if ((error = xfs_bmapi(tp, dp, b, c, + c = (int)(*bno + count - b); + error = xfs_bmapi(tp, dp, b, c, xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE| XFS_BMAPI_METADATA, args->firstblock, args->total, - &mapp[mapi], &nmap, args->flist))) { - kmem_free(mapp); - return error; - } + &mapp[mapi], &nmap, args->flist); + if (error) + goto out_free_map; if (nmap < 1) break; mapi += nmap; @@ -1629,24 +1609,53 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) mapi = 0; mapp = NULL; } + /* * Count the blocks we got, make sure it matches the total. */ for (i = 0, got = 0; i < mapi; i++) got += mapp[i].br_blockcount; - if (got != count || mapp[0].br_startoff != bno || + if (got != count || mapp[0].br_startoff != *bno || mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != - bno + count) { - if (mapp != &map) - kmem_free(mapp); - return XFS_ERROR(ENOSPC); + *bno + count) { + error = XFS_ERROR(ENOSPC); + goto out_free_map; } - if (mapp != &map) - kmem_free(mapp); + /* account for newly allocated blocks in reserved blocks total */ args->total -= dp->i_d.di_nblocks - nblks; - *new_blkno = (xfs_dablk_t)bno; - return 0; + +out_free_map: + if (mapp != &map) + kmem_free(mapp); + return error; +} + +/* + * Add a block to the btree ahead of the file. + * Return the new block number to the caller. + */ +int +xfs_da_grow_inode( + struct xfs_da_args *args, + xfs_dablk_t *new_blkno) +{ + xfs_fileoff_t bno; + int count; + int error; + + if (args->whichfork == XFS_DATA_FORK) { + bno = args->dp->i_mount->m_dirleafblk; + count = args->dp->i_mount->m_dirblkfsbs; + } else { + bno = 0; + count = 1; + } + + error = xfs_da_grow_inode_int(args, &bno, count); + if (!error) + *new_blkno = (xfs_dablk_t)bno; + return error; } /* @@ -1704,12 +1713,12 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, /* * Get values from the moved block. */ - if (be16_to_cpu(dead_info->magic) == XFS_DIR2_LEAFN_MAGIC) { + if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) { dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; dead_level = 0; dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval); } else { - ASSERT(be16_to_cpu(dead_info->magic) == XFS_DA_NODE_MAGIC); + ASSERT(dead_info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); dead_node = (xfs_da_intnode_t *)dead_info; dead_level = be16_to_cpu(dead_node->hdr.level); dead_hash = be32_to_cpu(dead_node->btree[be16_to_cpu(dead_node->hdr.count) - 1].hashval); @@ -1768,8 +1777,8 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w))) goto done; par_node = par_buf->data; - if (unlikely( - be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC || + if (unlikely(par_node->hdr.info.magic != + cpu_to_be16(XFS_DA_NODE_MAGIC) || (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)", XFS_ERRLEVEL_LOW, mp); @@ -1820,7 +1829,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, par_node = par_buf->data; if (unlikely( be16_to_cpu(par_node->hdr.level) != level || - be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC)) { + par_node->hdr.info.magic != cpu_to_be16(XFS_DA_NODE_MAGIC))) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)", XFS_ERRLEVEL_LOW, mp); error = XFS_ERROR(EFSCORRUPTED); @@ -1930,8 +1939,7 @@ xfs_da_do_buf( xfs_daddr_t *mappedbnop, xfs_dabuf_t **bpp, int whichfork, - int caller, - inst_t *ra) + int caller) { xfs_buf_t *bp = NULL; xfs_buf_t **bplist; @@ -2070,25 +2078,22 @@ xfs_da_do_buf( * Build a dabuf structure. */ if (bplist) { - rbp = xfs_da_buf_make(nbplist, bplist, ra); + rbp = xfs_da_buf_make(nbplist, bplist); } else if (bp) - rbp = xfs_da_buf_make(1, &bp, ra); + rbp = xfs_da_buf_make(1, &bp); else rbp = NULL; /* * For read_buf, check the magic number. */ if (caller == 1) { - xfs_dir2_data_t *data; - xfs_dir2_free_t *free; - xfs_da_blkinfo_t *info; + xfs_dir2_data_hdr_t *hdr = rbp->data; + xfs_dir2_free_t *free = rbp->data; + xfs_da_blkinfo_t *info = rbp->data; uint magic, magic1; - info = rbp->data; - data = rbp->data; - free = rbp->data; magic = be16_to_cpu(info->magic); - magic1 = be32_to_cpu(data->hdr.magic); + magic1 = be32_to_cpu(hdr->magic); if (unlikely( XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) && (magic != XFS_ATTR_LEAF_MAGIC) && @@ -2096,7 +2101,7 @@ xfs_da_do_buf( (magic != XFS_DIR2_LEAFN_MAGIC) && (magic1 != XFS_DIR2_BLOCK_MAGIC) && (magic1 != XFS_DIR2_DATA_MAGIC) && - (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC), + (free->hdr.magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)), mp, XFS_ERRTAG_DA_READ_BUF, XFS_RANDOM_DA_READ_BUF))) { trace_xfs_da_btree_corrupt(rbp->bps[0], _RET_IP_); @@ -2143,8 +2148,7 @@ xfs_da_get_buf( xfs_dabuf_t **bpp, int whichfork) { - return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0, - (inst_t *)__return_address); + return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0); } /* @@ -2159,8 +2163,7 @@ xfs_da_read_buf( xfs_dabuf_t **bpp, int whichfork) { - return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1, - (inst_t *)__return_address); + return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1); } /* @@ -2176,8 +2179,7 @@ xfs_da_reada_buf( xfs_daddr_t rval; rval = -1; - if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3, - (inst_t *)__return_address)) + if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3)) return -1; else return rval; @@ -2235,17 +2237,12 @@ xfs_da_state_free(xfs_da_state_t *state) kmem_zone_free(xfs_da_state_zone, state); } -#ifdef XFS_DABUF_DEBUG -xfs_dabuf_t *xfs_dabuf_global_list; -static DEFINE_SPINLOCK(xfs_dabuf_global_lock); -#endif - /* * Create a dabuf. */ /* ARGSUSED */ STATIC xfs_dabuf_t * -xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra) +xfs_da_buf_make(int nbuf, xfs_buf_t **bps) { xfs_buf_t *bp; xfs_dabuf_t *dabuf; @@ -2257,11 +2254,6 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra) else dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS); dabuf->dirty = 0; -#ifdef XFS_DABUF_DEBUG - dabuf->ra = ra; - dabuf->target = XFS_BUF_TARGET(bps[0]); - dabuf->blkno = XFS_BUF_ADDR(bps[0]); -#endif if (nbuf == 1) { dabuf->nbuf = 1; bp = bps[0]; @@ -2281,23 +2273,6 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra) XFS_BUF_COUNT(bp)); } } -#ifdef XFS_DABUF_DEBUG - { - xfs_dabuf_t *p; - - spin_lock(&xfs_dabuf_global_lock); - for (p = xfs_dabuf_global_list; p; p = p->next) { - ASSERT(p->blkno != dabuf->blkno || - p->target != dabuf->target); - } - dabuf->prev = NULL; - if (xfs_dabuf_global_list) - xfs_dabuf_global_list->prev = dabuf; - dabuf->next = xfs_dabuf_global_list; - xfs_dabuf_global_list = dabuf; - spin_unlock(&xfs_dabuf_global_lock); - } -#endif return dabuf; } @@ -2333,25 +2308,12 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf) ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); if (dabuf->dirty) xfs_da_buf_clean(dabuf); - if (dabuf->nbuf > 1) + if (dabuf->nbuf > 1) { kmem_free(dabuf->data); -#ifdef XFS_DABUF_DEBUG - { - spin_lock(&xfs_dabuf_global_lock); - if (dabuf->prev) - dabuf->prev->next = dabuf->next; - else - xfs_dabuf_global_list = dabuf->next; - if (dabuf->next) - dabuf->next->prev = dabuf->prev; - spin_unlock(&xfs_dabuf_global_lock); - } - memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf)); -#endif - if (dabuf->nbuf == 1) - kmem_zone_free(xfs_dabuf_zone, dabuf); - else kmem_free(dabuf); + } else { + kmem_zone_free(xfs_dabuf_zone, dabuf); + } } /* diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index fe9f5a8c1d2a..dbf7c074ae73 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h @@ -145,22 +145,11 @@ typedef struct xfs_dabuf { short dirty; /* data needs to be copied back */ short bbcount; /* how large is data in bbs */ void *data; /* pointer for buffers' data */ -#ifdef XFS_DABUF_DEBUG - inst_t *ra; /* return address of caller to make */ - struct xfs_dabuf *next; /* next in global chain */ - struct xfs_dabuf *prev; /* previous in global chain */ - struct xfs_buftarg *target; /* device for buffer */ - xfs_daddr_t blkno; /* daddr first in bps[0] */ -#endif struct xfs_buf *bps[1]; /* actually nbuf of these */ } xfs_dabuf_t; #define XFS_DA_BUF_SIZE(n) \ (sizeof(xfs_dabuf_t) + sizeof(struct xfs_buf *) * ((n) - 1)) -#ifdef XFS_DABUF_DEBUG -extern xfs_dabuf_t *xfs_dabuf_global_list; -#endif - /* * Storage for holding state during Btree searches and split/join ops. * @@ -248,6 +237,8 @@ int xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, * Utility routines. */ int xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno); +int xfs_da_grow_inode_int(struct xfs_da_args *args, xfs_fileoff_t *bno, + int count); int xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mappedbno, xfs_dabuf_t **bp, int whichfork); diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index dba7a71cedf3..4580ce00aeb4 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -24,20 +24,17 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" -#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" -#include "xfs_dir2_data.h" -#include "xfs_dir2_leaf.h" -#include "xfs_dir2_block.h" -#include "xfs_dir2_node.h" +#include "xfs_dir2.h" +#include "xfs_dir2_format.h" +#include "xfs_dir2_priv.h" #include "xfs_error.h" #include "xfs_vnodeops.h" #include "xfs_trace.h" @@ -122,15 +119,15 @@ int xfs_dir_isempty( xfs_inode_t *dp) { - xfs_dir2_sf_t *sfp; + xfs_dir2_sf_hdr_t *sfp; ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); if (dp->i_d.di_size == 0) /* might happen during shutdown. */ return 1; if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp)) return 0; - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - return !sfp->hdr.count; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + return !sfp->count; } /* @@ -500,129 +497,34 @@ xfs_dir_canenter( /* * Add a block to the directory. - * This routine is for data and free blocks, not leaf/node blocks - * which are handled by xfs_da_grow_inode. + * + * This routine is for data and free blocks, not leaf/node blocks which are + * handled by xfs_da_grow_inode. */ int xfs_dir2_grow_inode( - xfs_da_args_t *args, - int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */ - xfs_dir2_db_t *dbp) /* out: block number added */ + struct xfs_da_args *args, + int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */ + xfs_dir2_db_t *dbp) /* out: block number added */ { - xfs_fileoff_t bno; /* directory offset of new block */ - int count; /* count of filesystem blocks */ - xfs_inode_t *dp; /* incore directory inode */ - int error; - int got; /* blocks actually mapped */ - int i; - xfs_bmbt_irec_t map; /* single structure for bmap */ - int mapi; /* mapping index */ - xfs_bmbt_irec_t *mapp; /* bmap mapping structure(s) */ - xfs_mount_t *mp; - int nmap; /* number of bmap entries */ - xfs_trans_t *tp; - xfs_drfsbno_t nblks; + struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; + xfs_fileoff_t bno; /* directory offset of new block */ + int count; /* count of filesystem blocks */ + int error; trace_xfs_dir2_grow_inode(args, space); - dp = args->dp; - tp = args->trans; - mp = dp->i_mount; - nblks = dp->i_d.di_nblocks; /* * Set lowest possible block in the space requested. */ bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE); count = mp->m_dirblkfsbs; - /* - * Find the first hole for our block. - */ - if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, XFS_DATA_FORK))) - return error; - nmap = 1; - ASSERT(args->firstblock != NULL); - /* - * Try mapping the new block contiguously (one extent). - */ - if ((error = xfs_bmapi(tp, dp, bno, count, - XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, - args->firstblock, args->total, &map, &nmap, - args->flist))) - return error; - ASSERT(nmap <= 1); - if (nmap == 1) { - mapp = ↦ - mapi = 1; - } - /* - * Didn't work and this is a multiple-fsb directory block. - * Try again with contiguous flag turned on. - */ - else if (nmap == 0 && count > 1) { - xfs_fileoff_t b; /* current file offset */ - /* - * Space for maximum number of mappings. - */ - mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP); - /* - * Iterate until we get to the end of our block. - */ - for (b = bno, mapi = 0; b < bno + count; ) { - int c; /* current fsb count */ - - /* - * Can't map more than MAX_NMAP at once. - */ - nmap = MIN(XFS_BMAP_MAX_NMAP, count); - c = (int)(bno + count - b); - if ((error = xfs_bmapi(tp, dp, b, c, - XFS_BMAPI_WRITE|XFS_BMAPI_METADATA, - args->firstblock, args->total, - &mapp[mapi], &nmap, args->flist))) { - kmem_free(mapp); - return error; - } - if (nmap < 1) - break; - /* - * Add this bunch into our table, go to the next offset. - */ - mapi += nmap; - b = mapp[mapi - 1].br_startoff + - mapp[mapi - 1].br_blockcount; - } - } - /* - * Didn't work. - */ - else { - mapi = 0; - mapp = NULL; - } - /* - * See how many fsb's we got. - */ - for (i = 0, got = 0; i < mapi; i++) - got += mapp[i].br_blockcount; - /* - * Didn't get enough fsb's, or the first/last block's are wrong. - */ - if (got != count || mapp[0].br_startoff != bno || - mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != - bno + count) { - if (mapp != &map) - kmem_free(mapp); - return XFS_ERROR(ENOSPC); - } - /* - * Done with the temporary mapping table. - */ - if (mapp != &map) - kmem_free(mapp); + error = xfs_da_grow_inode_int(args, &bno, count); + if (error) + return error; - /* account for newly allocated blocks in reserved blocks total */ - args->total -= dp->i_d.di_nblocks - nblks; *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno); /* @@ -634,7 +536,7 @@ xfs_dir2_grow_inode( size = XFS_FSB_TO_B(mp, bno + count); if (size > dp->i_d.di_size) { dp->i_d.di_size = size; - xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE); } } return 0; diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h index 74a3b1057685..e937d9991c18 100644 --- a/fs/xfs/xfs_dir2.h +++ b/fs/xfs/xfs_dir2.h @@ -16,49 +16,14 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __XFS_DIR2_H__ -#define __XFS_DIR2_H__ +#define __XFS_DIR2_H__ -struct uio; -struct xfs_dabuf; -struct xfs_da_args; -struct xfs_dir2_put_args; struct xfs_bmap_free; +struct xfs_da_args; struct xfs_inode; struct xfs_mount; struct xfs_trans; -/* - * Directory version 2. - * There are 4 possible formats: - * shortform - * single block - data with embedded leaf at the end - * multiple data blocks, single leaf+freeindex block - * data blocks, node&leaf blocks (btree), freeindex blocks - * - * The shortform format is in xfs_dir2_sf.h. - * The single block format is in xfs_dir2_block.h. - * The data block format is in xfs_dir2_data.h. - * The leaf and freeindex block formats are in xfs_dir2_leaf.h. - * Node blocks are the same as the other version, in xfs_da_btree.h. - */ - -/* - * Byte offset in data block and shortform entry. - */ -typedef __uint16_t xfs_dir2_data_off_t; -#define NULLDATAOFF 0xffffU -typedef uint xfs_dir2_data_aoff_t; /* argument form */ - -/* - * Directory block number (logical dirblk in file) - */ -typedef __uint32_t xfs_dir2_db_t; - -/* - * Byte offset in a directory. - */ -typedef xfs_off_t xfs_dir2_off_t; - extern struct xfs_name xfs_name_dotdot; /* @@ -86,21 +51,10 @@ extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_bmap_free *flist, xfs_extlen_t tot); extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_name *name, uint resblks); -extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); /* - * Utility routines for v2 directories. + * Direct call from the bmap code, bypassing the generic directory layer. */ -extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, - xfs_dir2_db_t *dbp); -extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, - int *vp); -extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, - int *vp); -extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, - struct xfs_dabuf *bp); - -extern int xfs_dir_cilookup_result(struct xfs_da_args *args, - const unsigned char *name, int len); +extern int xfs_dir2_sf_to_block(struct xfs_da_args *args); #endif /* __XFS_DIR2_H__ */ diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index 580d99cef9e7..9245e029b8ea 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c @@ -23,17 +23,14 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" -#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" -#include "xfs_dir2_data.h" -#include "xfs_dir2_leaf.h" -#include "xfs_dir2_block.h" +#include "xfs_dir2_format.h" +#include "xfs_dir2_priv.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -67,7 +64,7 @@ xfs_dir2_block_addname( xfs_da_args_t *args) /* directory op arguments */ { xfs_dir2_data_free_t *bf; /* bestfree table in block */ - xfs_dir2_block_t *block; /* directory block structure */ + xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ xfs_dabuf_t *bp; /* buffer for block */ xfs_dir2_block_tail_t *btp; /* block tail */ @@ -105,13 +102,13 @@ xfs_dir2_block_addname( return error; } ASSERT(bp != NULL); - block = bp->data; + hdr = bp->data; /* * Check the magic number, corrupted if wrong. */ - if (unlikely(be32_to_cpu(block->hdr.magic) != XFS_DIR2_BLOCK_MAGIC)) { + if (unlikely(hdr->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))) { XFS_CORRUPTION_ERROR("xfs_dir2_block_addname", - XFS_ERRLEVEL_LOW, mp, block); + XFS_ERRLEVEL_LOW, mp, hdr); xfs_da_brelse(tp, bp); return XFS_ERROR(EFSCORRUPTED); } @@ -119,8 +116,8 @@ xfs_dir2_block_addname( /* * Set up pointers to parts of the block. */ - bf = block->hdr.bestfree; - btp = xfs_dir2_block_tail_p(mp, block); + bf = hdr->bestfree; + btp = xfs_dir2_block_tail_p(mp, hdr); blp = xfs_dir2_block_leaf_p(btp); /* * No stale entries? Need space for entry and new leaf. @@ -133,7 +130,7 @@ xfs_dir2_block_addname( /* * Data object just before the first leaf entry. */ - enddup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp)); + enddup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); /* * If it's not free then can't do this add without cleaning up: * the space before the first leaf entry needs to be free so it @@ -146,7 +143,7 @@ xfs_dir2_block_addname( */ else { dup = (xfs_dir2_data_unused_t *) - ((char *)block + be16_to_cpu(bf[0].offset)); + ((char *)hdr + be16_to_cpu(bf[0].offset)); if (dup == enddup) { /* * It is the biggest freespace, is it too small @@ -159,7 +156,7 @@ xfs_dir2_block_addname( */ if (be16_to_cpu(bf[1].length) >= len) dup = (xfs_dir2_data_unused_t *) - ((char *)block + + ((char *)hdr + be16_to_cpu(bf[1].offset)); else dup = NULL; @@ -182,7 +179,7 @@ xfs_dir2_block_addname( */ else if (be16_to_cpu(bf[0].length) >= len) { dup = (xfs_dir2_data_unused_t *) - ((char *)block + be16_to_cpu(bf[0].offset)); + ((char *)hdr + be16_to_cpu(bf[0].offset)); compact = 0; } /* @@ -196,7 +193,7 @@ xfs_dir2_block_addname( /* * Data object just before the first leaf entry. */ - dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp)); + dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); /* * If it's not free then the data will go where the * leaf data starts now, if it works at all. @@ -255,7 +252,8 @@ xfs_dir2_block_addname( highstale = lfloghigh = -1; fromidx >= 0; fromidx--) { - if (be32_to_cpu(blp[fromidx].address) == XFS_DIR2_NULL_DATAPTR) { + if (blp[fromidx].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) { if (highstale == -1) highstale = toidx; else { @@ -272,7 +270,7 @@ xfs_dir2_block_addname( lfloghigh -= be32_to_cpu(btp->stale) - 1; be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1)); xfs_dir2_data_make_free(tp, bp, - (xfs_dir2_data_aoff_t)((char *)blp - (char *)block), + (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)), &needlog, &needscan); blp += be32_to_cpu(btp->stale) - 1; @@ -282,7 +280,7 @@ xfs_dir2_block_addname( * This needs to happen before the next call to use_free. */ if (needscan) { - xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); needscan = 0; } } @@ -318,7 +316,7 @@ xfs_dir2_block_addname( */ xfs_dir2_data_use_free(tp, bp, enddup, (xfs_dir2_data_aoff_t) - ((char *)enddup - (char *)block + be16_to_cpu(enddup->length) - + ((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) - sizeof(*blp)), (xfs_dir2_data_aoff_t)sizeof(*blp), &needlog, &needscan); @@ -331,8 +329,7 @@ xfs_dir2_block_addname( * This needs to happen before the next call to use_free. */ if (needscan) { - xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, - &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); needscan = 0; } /* @@ -353,12 +350,14 @@ xfs_dir2_block_addname( else { for (lowstale = mid; lowstale >= 0 && - be32_to_cpu(blp[lowstale].address) != XFS_DIR2_NULL_DATAPTR; + blp[lowstale].address != + cpu_to_be32(XFS_DIR2_NULL_DATAPTR); lowstale--) continue; for (highstale = mid + 1; highstale < be32_to_cpu(btp->count) && - be32_to_cpu(blp[highstale].address) != XFS_DIR2_NULL_DATAPTR && + blp[highstale].address != + cpu_to_be32(XFS_DIR2_NULL_DATAPTR) && (lowstale < 0 || mid - lowstale > highstale - mid); highstale++) continue; @@ -397,13 +396,13 @@ xfs_dir2_block_addname( */ blp[mid].hashval = cpu_to_be32(args->hashval); blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, - (char *)dep - (char *)block)); + (char *)dep - (char *)hdr)); xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh); /* * Mark space for the data entry used. */ xfs_dir2_data_use_free(tp, bp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)block), + (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), (xfs_dir2_data_aoff_t)len, &needlog, &needscan); /* * Create the new data entry. @@ -412,12 +411,12 @@ xfs_dir2_block_addname( dep->namelen = args->namelen; memcpy(dep->name, args->name, args->namelen); tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)block); + *tagp = cpu_to_be16((char *)dep - (char *)hdr); /* * Clean up the bestfree array and log the header, tail, and entry. */ if (needscan) - xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); if (needlog) xfs_dir2_data_log_header(tp, bp); xfs_dir2_block_log_tail(tp, bp); @@ -437,7 +436,7 @@ xfs_dir2_block_getdents( xfs_off_t *offset, filldir_t filldir) { - xfs_dir2_block_t *block; /* directory block structure */ + xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dabuf_t *bp; /* buffer for block */ xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* block data entry */ @@ -470,13 +469,13 @@ xfs_dir2_block_getdents( * We'll skip entries before this. */ wantoff = xfs_dir2_dataptr_to_off(mp, *offset); - block = bp->data; + hdr = bp->data; xfs_dir2_data_check(dp, bp); /* * Set up values for the loop. */ - btp = xfs_dir2_block_tail_p(mp, block); - ptr = (char *)block->u; + btp = xfs_dir2_block_tail_p(mp, hdr); + ptr = (char *)(hdr + 1); endptr = (char *)xfs_dir2_block_leaf_p(btp); /* @@ -502,11 +501,11 @@ xfs_dir2_block_getdents( /* * The entry is before the desired starting point, skip it. */ - if ((char *)dep - (char *)block < wantoff) + if ((char *)dep - (char *)hdr < wantoff) continue; cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, - (char *)dep - (char *)block); + (char *)dep - (char *)hdr); /* * If it didn't fit, set the final offset to here & return. @@ -540,17 +539,14 @@ xfs_dir2_block_log_leaf( int first, /* index of first logged leaf */ int last) /* index of last logged leaf */ { - xfs_dir2_block_t *block; /* directory block structure */ - xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ - xfs_dir2_block_tail_t *btp; /* block tail */ - xfs_mount_t *mp; /* filesystem mount point */ + xfs_dir2_data_hdr_t *hdr = bp->data; + xfs_dir2_leaf_entry_t *blp; + xfs_dir2_block_tail_t *btp; - mp = tp->t_mountp; - block = bp->data; - btp = xfs_dir2_block_tail_p(mp, block); + btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr); blp = xfs_dir2_block_leaf_p(btp); - xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)block), - (uint)((char *)&blp[last + 1] - (char *)block - 1)); + xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr), + (uint)((char *)&blp[last + 1] - (char *)hdr - 1)); } /* @@ -561,15 +557,12 @@ xfs_dir2_block_log_tail( xfs_trans_t *tp, /* transaction structure */ xfs_dabuf_t *bp) /* block buffer */ { - xfs_dir2_block_t *block; /* directory block structure */ - xfs_dir2_block_tail_t *btp; /* block tail */ - xfs_mount_t *mp; /* filesystem mount point */ + xfs_dir2_data_hdr_t *hdr = bp->data; + xfs_dir2_block_tail_t *btp; - mp = tp->t_mountp; - block = bp->data; - btp = xfs_dir2_block_tail_p(mp, block); - xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)block), - (uint)((char *)(btp + 1) - (char *)block - 1)); + btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr); + xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr), + (uint)((char *)(btp + 1) - (char *)hdr - 1)); } /* @@ -580,7 +573,7 @@ int /* error */ xfs_dir2_block_lookup( xfs_da_args_t *args) /* dir lookup arguments */ { - xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ xfs_dabuf_t *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail */ @@ -600,14 +593,14 @@ xfs_dir2_block_lookup( return error; dp = args->dp; mp = dp->i_mount; - block = bp->data; + hdr = bp->data; xfs_dir2_data_check(dp, bp); - btp = xfs_dir2_block_tail_p(mp, block); + btp = xfs_dir2_block_tail_p(mp, hdr); blp = xfs_dir2_block_leaf_p(btp); /* * Get the offset from the leaf entry, to point to the data. */ - dep = (xfs_dir2_data_entry_t *)((char *)block + + dep = (xfs_dir2_data_entry_t *)((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); /* * Fill in inode number, CI name if appropriate, release the block. @@ -628,7 +621,7 @@ xfs_dir2_block_lookup_int( int *entno) /* returned entry number */ { xfs_dir2_dataptr_t addr; /* data entry address */ - xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ xfs_dabuf_t *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail */ @@ -654,9 +647,9 @@ xfs_dir2_block_lookup_int( return error; } ASSERT(bp != NULL); - block = bp->data; + hdr = bp->data; xfs_dir2_data_check(dp, bp); - btp = xfs_dir2_block_tail_p(mp, block); + btp = xfs_dir2_block_tail_p(mp, hdr); blp = xfs_dir2_block_leaf_p(btp); /* * Loop doing a binary search for our hash value. @@ -694,7 +687,7 @@ xfs_dir2_block_lookup_int( * Get pointer to the entry from the leaf. */ dep = (xfs_dir2_data_entry_t *) - ((char *)block + xfs_dir2_dataptr_to_off(mp, addr)); + ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr)); /* * Compare name and if it's an exact match, return the index * and buffer. If it's the first case-insensitive match, store @@ -733,7 +726,7 @@ int /* error */ xfs_dir2_block_removename( xfs_da_args_t *args) /* directory operation args */ { - xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf pointer */ xfs_dabuf_t *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail */ @@ -760,20 +753,20 @@ xfs_dir2_block_removename( dp = args->dp; tp = args->trans; mp = dp->i_mount; - block = bp->data; - btp = xfs_dir2_block_tail_p(mp, block); + hdr = bp->data; + btp = xfs_dir2_block_tail_p(mp, hdr); blp = xfs_dir2_block_leaf_p(btp); /* * Point to the data entry using the leaf entry. */ dep = (xfs_dir2_data_entry_t *) - ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); + ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); /* * Mark the data entry's space free. */ needlog = needscan = 0; xfs_dir2_data_make_free(tp, bp, - (xfs_dir2_data_aoff_t)((char *)dep - (char *)block), + (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan); /* * Fix up the block tail. @@ -789,15 +782,15 @@ xfs_dir2_block_removename( * Fix up bestfree, log the header if necessary. */ if (needscan) - xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); if (needlog) xfs_dir2_data_log_header(tp, bp); xfs_dir2_data_check(dp, bp); /* * See if the size as a shortform is good enough. */ - if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) > - XFS_IFORK_DSIZE(dp)) { + size = xfs_dir2_block_sfsize(dp, hdr, &sfh); + if (size > XFS_IFORK_DSIZE(dp)) { xfs_da_buf_done(bp); return 0; } @@ -815,7 +808,7 @@ int /* error */ xfs_dir2_block_replace( xfs_da_args_t *args) /* directory operation args */ { - xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ xfs_dabuf_t *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail */ @@ -836,14 +829,14 @@ xfs_dir2_block_replace( } dp = args->dp; mp = dp->i_mount; - block = bp->data; - btp = xfs_dir2_block_tail_p(mp, block); + hdr = bp->data; + btp = xfs_dir2_block_tail_p(mp, hdr); blp = xfs_dir2_block_leaf_p(btp); /* * Point to the data entry we need to change. */ dep = (xfs_dir2_data_entry_t *) - ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); + ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); ASSERT(be64_to_cpu(dep->inumber) != args->inumber); /* * Change the inode number to the new value. @@ -882,7 +875,7 @@ xfs_dir2_leaf_to_block( xfs_dabuf_t *dbp) /* data buffer */ { __be16 *bestsp; /* leaf bests table */ - xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_block_tail_t *btp; /* block tail */ xfs_inode_t *dp; /* incore directory inode */ xfs_dir2_data_unused_t *dup; /* unused data entry */ @@ -906,7 +899,7 @@ xfs_dir2_leaf_to_block( tp = args->trans; mp = dp->i_mount; leaf = lbp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); ltp = xfs_dir2_leaf_tail_p(mp, leaf); /* * If there are data blocks other than the first one, take this @@ -917,7 +910,7 @@ xfs_dir2_leaf_to_block( while (dp->i_d.di_size > mp->m_dirblksize) { bestsp = xfs_dir2_leaf_bests_p(ltp); if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) == - mp->m_dirblksize - (uint)sizeof(block->hdr)) { + mp->m_dirblksize - (uint)sizeof(*hdr)) { if ((error = xfs_dir2_leaf_trim_data(args, lbp, (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1)))) @@ -935,18 +928,18 @@ xfs_dir2_leaf_to_block( XFS_DATA_FORK))) { goto out; } - block = dbp->data; - ASSERT(be32_to_cpu(block->hdr.magic) == XFS_DIR2_DATA_MAGIC); + hdr = dbp->data; + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); /* * Size of the "leaf" area in the block. */ - size = (uint)sizeof(block->tail) + + size = (uint)sizeof(xfs_dir2_block_tail_t) + (uint)sizeof(*lep) * (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)); /* * Look at the last data entry. */ - tagp = (__be16 *)((char *)block + mp->m_dirblksize) - 1; - dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp)); + tagp = (__be16 *)((char *)hdr + mp->m_dirblksize) - 1; + dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); /* * If it's not free or is too short we can't do it. */ @@ -958,7 +951,7 @@ xfs_dir2_leaf_to_block( /* * Start converting it to block form. */ - block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); + hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); needlog = 1; needscan = 0; /* @@ -969,7 +962,7 @@ xfs_dir2_leaf_to_block( /* * Initialize the block tail. */ - btp = xfs_dir2_block_tail_p(mp, block); + btp = xfs_dir2_block_tail_p(mp, hdr); btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)); btp->stale = 0; xfs_dir2_block_log_tail(tp, dbp); @@ -978,7 +971,8 @@ xfs_dir2_leaf_to_block( */ lep = xfs_dir2_block_leaf_p(btp); for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) { - if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) + if (leaf->ents[from].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) continue; lep[to++] = leaf->ents[from]; } @@ -988,7 +982,7 @@ xfs_dir2_leaf_to_block( * Scan the bestfree if we need it and log the data block header. */ if (needscan) - xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); if (needlog) xfs_dir2_data_log_header(tp, dbp); /* @@ -1002,8 +996,8 @@ xfs_dir2_leaf_to_block( /* * Now see if the resulting block can be shrunken to shortform. */ - if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) > - XFS_IFORK_DSIZE(dp)) { + size = xfs_dir2_block_sfsize(dp, hdr, &sfh); + if (size > XFS_IFORK_DSIZE(dp)) { error = 0; goto out; } @@ -1024,12 +1018,10 @@ xfs_dir2_sf_to_block( xfs_da_args_t *args) /* operation arguments */ { xfs_dir2_db_t blkno; /* dir-relative block # (0) */ - xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ xfs_dabuf_t *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail pointer */ - char *buf; /* sf buffer */ - int buf_len; xfs_dir2_data_entry_t *dep; /* data entry pointer */ xfs_inode_t *dp; /* incore directory inode */ int dummy; /* trash */ @@ -1043,7 +1035,8 @@ xfs_dir2_sf_to_block( int newoffset; /* offset from current entry */ int offset; /* target block offset */ xfs_dir2_sf_entry_t *sfep; /* sf entry pointer */ - xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_dir2_sf_hdr_t *oldsfp; /* old shortform header */ + xfs_dir2_sf_hdr_t *sfp; /* shortform header */ __be16 *tagp; /* end of data entry */ xfs_trans_t *tp; /* transaction pointer */ struct xfs_name name; @@ -1061,32 +1054,30 @@ xfs_dir2_sf_to_block( ASSERT(XFS_FORCED_SHUTDOWN(mp)); return XFS_ERROR(EIO); } + + oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count)); + /* - * Copy the directory into the stack buffer. + * Copy the directory into a temporary buffer. * Then pitch the incore inode data so we can make extents. */ + sfp = kmem_alloc(dp->i_df.if_bytes, KM_SLEEP); + memcpy(sfp, oldsfp, dp->i_df.if_bytes); - buf_len = dp->i_df.if_bytes; - buf = kmem_alloc(buf_len, KM_SLEEP); - - memcpy(buf, sfp, buf_len); - xfs_idata_realloc(dp, -buf_len, XFS_DATA_FORK); + xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK); dp->i_d.di_size = 0; xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); - /* - * Reset pointer - old sfp is gone. - */ - sfp = (xfs_dir2_sf_t *)buf; + /* * Add block 0 to the inode. */ error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno); if (error) { - kmem_free(buf); + kmem_free(sfp); return error; } /* @@ -1094,21 +1085,21 @@ xfs_dir2_sf_to_block( */ error = xfs_dir2_data_init(args, blkno, &bp); if (error) { - kmem_free(buf); + kmem_free(sfp); return error; } - block = bp->data; - block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); + hdr = bp->data; + hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); /* * Compute size of block "tail" area. */ i = (uint)sizeof(*btp) + - (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t); + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t); /* * The whole thing is initialized to free by the init routine. * Say we're using the leaf and tail area. */ - dup = (xfs_dir2_data_unused_t *)block->u; + dup = (xfs_dir2_data_unused_t *)(hdr + 1); needlog = needscan = 0; xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog, &needscan); @@ -1116,50 +1107,51 @@ xfs_dir2_sf_to_block( /* * Fill in the tail. */ - btp = xfs_dir2_block_tail_p(mp, block); - btp->count = cpu_to_be32(sfp->hdr.count + 2); /* ., .. */ + btp = xfs_dir2_block_tail_p(mp, hdr); + btp->count = cpu_to_be32(sfp->count + 2); /* ., .. */ btp->stale = 0; blp = xfs_dir2_block_leaf_p(btp); - endoffset = (uint)((char *)blp - (char *)block); + endoffset = (uint)((char *)blp - (char *)hdr); /* * Remove the freespace, we'll manage it. */ xfs_dir2_data_use_free(tp, bp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)block), + (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), be16_to_cpu(dup->length), &needlog, &needscan); /* * Create entry for . */ dep = (xfs_dir2_data_entry_t *) - ((char *)block + XFS_DIR2_DATA_DOT_OFFSET); + ((char *)hdr + XFS_DIR2_DATA_DOT_OFFSET); dep->inumber = cpu_to_be64(dp->i_ino); dep->namelen = 1; dep->name[0] = '.'; tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)block); + *tagp = cpu_to_be16((char *)dep - (char *)hdr); xfs_dir2_data_log_entry(tp, bp, dep); blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot); blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, - (char *)dep - (char *)block)); + (char *)dep - (char *)hdr)); /* * Create entry for .. */ dep = (xfs_dir2_data_entry_t *) - ((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET); - dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent)); + ((char *)hdr + XFS_DIR2_DATA_DOTDOT_OFFSET); + dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp)); dep->namelen = 2; dep->name[0] = dep->name[1] = '.'; tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)block); + *tagp = cpu_to_be16((char *)dep - (char *)hdr); xfs_dir2_data_log_entry(tp, bp, dep); blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, - (char *)dep - (char *)block)); + (char *)dep - (char *)hdr)); offset = XFS_DIR2_DATA_FIRST_OFFSET; /* * Loop over existing entries, stuff them in. */ - if ((i = 0) == sfp->hdr.count) + i = 0; + if (!sfp->count) sfep = NULL; else sfep = xfs_dir2_sf_firstentry(sfp); @@ -1179,43 +1171,40 @@ xfs_dir2_sf_to_block( * There should be a hole here, make one. */ if (offset < newoffset) { - dup = (xfs_dir2_data_unused_t *) - ((char *)block + offset); + dup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); dup->length = cpu_to_be16(newoffset - offset); *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16( - ((char *)dup - (char *)block)); + ((char *)dup - (char *)hdr)); xfs_dir2_data_log_unused(tp, bp, dup); - (void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block, - dup, &dummy); + xfs_dir2_data_freeinsert(hdr, dup, &dummy); offset += be16_to_cpu(dup->length); continue; } /* * Copy a real entry. */ - dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset); - dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, - xfs_dir2_sf_inumberp(sfep))); + dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset); + dep->inumber = cpu_to_be64(xfs_dir2_sfe_get_ino(sfp, sfep)); dep->namelen = sfep->namelen; memcpy(dep->name, sfep->name, dep->namelen); tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)block); + *tagp = cpu_to_be16((char *)dep - (char *)hdr); xfs_dir2_data_log_entry(tp, bp, dep); name.name = sfep->name; name.len = sfep->namelen; blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops-> hashname(&name)); blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, - (char *)dep - (char *)block)); - offset = (int)((char *)(tagp + 1) - (char *)block); - if (++i == sfp->hdr.count) + (char *)dep - (char *)hdr)); + offset = (int)((char *)(tagp + 1) - (char *)hdr); + if (++i == sfp->count) sfep = NULL; else sfep = xfs_dir2_sf_nextentry(sfp, sfep); } /* Done with the temporary buffer */ - kmem_free(buf); + kmem_free(sfp); /* * Sort the leaf entries by hash value. */ diff --git a/fs/xfs/xfs_dir2_block.h b/fs/xfs/xfs_dir2_block.h deleted file mode 100644 index 10e689676382..000000000000 --- a/fs/xfs/xfs_dir2_block.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DIR2_BLOCK_H__ -#define __XFS_DIR2_BLOCK_H__ - -/* - * xfs_dir2_block.h - * Directory version 2, single block format structures - */ - -struct uio; -struct xfs_dabuf; -struct xfs_da_args; -struct xfs_dir2_data_hdr; -struct xfs_dir2_leaf_entry; -struct xfs_inode; -struct xfs_mount; -struct xfs_trans; - -/* - * The single block format is as follows: - * xfs_dir2_data_hdr_t structure - * xfs_dir2_data_entry_t and xfs_dir2_data_unused_t structures - * xfs_dir2_leaf_entry_t structures - * xfs_dir2_block_tail_t structure - */ - -#define XFS_DIR2_BLOCK_MAGIC 0x58443242 /* XD2B: for one block dirs */ - -typedef struct xfs_dir2_block_tail { - __be32 count; /* count of leaf entries */ - __be32 stale; /* count of stale lf entries */ -} xfs_dir2_block_tail_t; - -/* - * Generic single-block structure, for xfs_db. - */ -typedef struct xfs_dir2_block { - xfs_dir2_data_hdr_t hdr; /* magic XFS_DIR2_BLOCK_MAGIC */ - xfs_dir2_data_union_t u[1]; - xfs_dir2_leaf_entry_t leaf[1]; - xfs_dir2_block_tail_t tail; -} xfs_dir2_block_t; - -/* - * Pointer to the leaf header embedded in a data block (1-block format) - */ -static inline xfs_dir2_block_tail_t * -xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block) -{ - return (((xfs_dir2_block_tail_t *) - ((char *)(block) + (mp)->m_dirblksize)) - 1); -} - -/* - * Pointer to the leaf entries embedded in a data block (1-block format) - */ -static inline struct xfs_dir2_leaf_entry * -xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp) -{ - return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count); -} - -/* - * Function declarations. - */ -extern int xfs_dir2_block_addname(struct xfs_da_args *args); -extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent, - xfs_off_t *offset, filldir_t filldir); -extern int xfs_dir2_block_lookup(struct xfs_da_args *args); -extern int xfs_dir2_block_removename(struct xfs_da_args *args); -extern int xfs_dir2_block_replace(struct xfs_da_args *args); -extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args, - struct xfs_dabuf *lbp, struct xfs_dabuf *dbp); -extern int xfs_dir2_sf_to_block(struct xfs_da_args *args); - -#endif /* __XFS_DIR2_BLOCK_H__ */ diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c index 921595b84f5b..5bbe2a8a023f 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/xfs_dir2_data.c @@ -23,18 +23,18 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" -#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_dir2_data.h" -#include "xfs_dir2_leaf.h" -#include "xfs_dir2_block.h" +#include "xfs_dir2_format.h" +#include "xfs_dir2_priv.h" #include "xfs_error.h" +STATIC xfs_dir2_data_free_t * +xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup); + #ifdef DEBUG /* * Check the consistency of the data block. @@ -50,7 +50,7 @@ xfs_dir2_data_check( xfs_dir2_data_free_t *bf; /* bestfree table */ xfs_dir2_block_tail_t *btp=NULL; /* block tail */ int count; /* count of entries found */ - xfs_dir2_data_t *d; /* data block pointer */ + xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_data_entry_t *dep; /* data entry */ xfs_dir2_data_free_t *dfp; /* bestfree entry */ xfs_dir2_data_unused_t *dup; /* unused entry */ @@ -66,17 +66,19 @@ xfs_dir2_data_check( struct xfs_name name; mp = dp->i_mount; - d = bp->data; - ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || - be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); - bf = d->hdr.bestfree; - p = (char *)d->u; - if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { - btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d); + hdr = bp->data; + bf = hdr->bestfree; + p = (char *)(hdr + 1); + + if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { + btp = xfs_dir2_block_tail_p(mp, hdr); lep = xfs_dir2_block_leaf_p(btp); endp = (char *)lep; - } else - endp = (char *)d + mp->m_dirblksize; + } else { + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); + endp = (char *)hdr + mp->m_dirblksize; + } + count = lastfree = freeseen = 0; /* * Account for zero bestfree entries. @@ -108,8 +110,8 @@ xfs_dir2_data_check( if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { ASSERT(lastfree == 0); ASSERT(be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == - (char *)dup - (char *)d); - dfp = xfs_dir2_data_freefind(d, dup); + (char *)dup - (char *)hdr); + dfp = xfs_dir2_data_freefind(hdr, dup); if (dfp) { i = (int)(dfp - bf); ASSERT((freeseen & (1 << i)) == 0); @@ -132,13 +134,13 @@ xfs_dir2_data_check( ASSERT(dep->namelen != 0); ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0); ASSERT(be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) == - (char *)dep - (char *)d); + (char *)dep - (char *)hdr); count++; lastfree = 0; - if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { + if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, (xfs_dir2_data_aoff_t) - ((char *)dep - (char *)d)); + ((char *)dep - (char *)hdr)); name.name = dep->name; name.len = dep->namelen; hash = mp->m_dirnameops->hashname(&name); @@ -155,9 +157,10 @@ xfs_dir2_data_check( * Need to have seen all the entries and all the bestfree slots. */ ASSERT(freeseen == 7); - if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { + if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { - if (be32_to_cpu(lep[i].address) == XFS_DIR2_NULL_DATAPTR) + if (lep[i].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; if (i > 0) ASSERT(be32_to_cpu(lep[i].hashval) >= be32_to_cpu(lep[i - 1].hashval)); @@ -172,9 +175,9 @@ xfs_dir2_data_check( * Given a data block and an unused entry from that block, * return the bestfree entry if any that corresponds to it. */ -xfs_dir2_data_free_t * +STATIC xfs_dir2_data_free_t * xfs_dir2_data_freefind( - xfs_dir2_data_t *d, /* data block */ + xfs_dir2_data_hdr_t *hdr, /* data block */ xfs_dir2_data_unused_t *dup) /* data unused entry */ { xfs_dir2_data_free_t *dfp; /* bestfree entry */ @@ -184,17 +187,17 @@ xfs_dir2_data_freefind( int seenzero; /* saw a 0 bestfree entry */ #endif - off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)d); + off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr); #if defined(DEBUG) && defined(__KERNEL__) /* * Validate some consistency in the bestfree table. * Check order, non-overlapping entries, and if we find the * one we're looking for it has to be exact. */ - ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || - be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); - for (dfp = &d->hdr.bestfree[0], seenzero = matched = 0; - dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT]; + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); + for (dfp = &hdr->bestfree[0], seenzero = matched = 0; + dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT]; dfp++) { if (!dfp->offset) { ASSERT(!dfp->length); @@ -210,7 +213,7 @@ xfs_dir2_data_freefind( else ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off); ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length)); - if (dfp > &d->hdr.bestfree[0]) + if (dfp > &hdr->bestfree[0]) ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length)); } #endif @@ -219,13 +222,13 @@ xfs_dir2_data_freefind( * it can't be there since they're sorted. */ if (be16_to_cpu(dup->length) < - be16_to_cpu(d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length)) + be16_to_cpu(hdr->bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length)) return NULL; /* * Look at the three bestfree entries for our guy. */ - for (dfp = &d->hdr.bestfree[0]; - dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT]; + for (dfp = &hdr->bestfree[0]; + dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT]; dfp++) { if (!dfp->offset) return NULL; @@ -243,7 +246,7 @@ xfs_dir2_data_freefind( */ xfs_dir2_data_free_t * /* entry inserted */ xfs_dir2_data_freeinsert( - xfs_dir2_data_t *d, /* data block pointer */ + xfs_dir2_data_hdr_t *hdr, /* data block pointer */ xfs_dir2_data_unused_t *dup, /* unused space */ int *loghead) /* log the data header (out) */ { @@ -251,12 +254,13 @@ xfs_dir2_data_freeinsert( xfs_dir2_data_free_t new; /* new bestfree entry */ #ifdef __KERNEL__ - ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || - be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); #endif - dfp = d->hdr.bestfree; + dfp = hdr->bestfree; new.length = dup->length; - new.offset = cpu_to_be16((char *)dup - (char *)d); + new.offset = cpu_to_be16((char *)dup - (char *)hdr); + /* * Insert at position 0, 1, or 2; or not at all. */ @@ -286,36 +290,36 @@ xfs_dir2_data_freeinsert( */ STATIC void xfs_dir2_data_freeremove( - xfs_dir2_data_t *d, /* data block pointer */ + xfs_dir2_data_hdr_t *hdr, /* data block header */ xfs_dir2_data_free_t *dfp, /* bestfree entry pointer */ int *loghead) /* out: log data header */ { #ifdef __KERNEL__ - ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || - be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); #endif /* * It's the first entry, slide the next 2 up. */ - if (dfp == &d->hdr.bestfree[0]) { - d->hdr.bestfree[0] = d->hdr.bestfree[1]; - d->hdr.bestfree[1] = d->hdr.bestfree[2]; + if (dfp == &hdr->bestfree[0]) { + hdr->bestfree[0] = hdr->bestfree[1]; + hdr->bestfree[1] = hdr->bestfree[2]; } /* * It's the second entry, slide the 3rd entry up. */ - else if (dfp == &d->hdr.bestfree[1]) - d->hdr.bestfree[1] = d->hdr.bestfree[2]; + else if (dfp == &hdr->bestfree[1]) + hdr->bestfree[1] = hdr->bestfree[2]; /* * Must be the last entry. */ else - ASSERT(dfp == &d->hdr.bestfree[2]); + ASSERT(dfp == &hdr->bestfree[2]); /* * Clear the 3rd entry, must be zero now. */ - d->hdr.bestfree[2].length = 0; - d->hdr.bestfree[2].offset = 0; + hdr->bestfree[2].length = 0; + hdr->bestfree[2].offset = 0; *loghead = 1; } @@ -325,7 +329,7 @@ xfs_dir2_data_freeremove( void xfs_dir2_data_freescan( xfs_mount_t *mp, /* filesystem mount point */ - xfs_dir2_data_t *d, /* data block pointer */ + xfs_dir2_data_hdr_t *hdr, /* data block header */ int *loghead) /* out: log data header */ { xfs_dir2_block_tail_t *btp; /* block tail */ @@ -335,23 +339,23 @@ xfs_dir2_data_freescan( char *p; /* current entry pointer */ #ifdef __KERNEL__ - ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || - be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); #endif /* * Start by clearing the table. */ - memset(d->hdr.bestfree, 0, sizeof(d->hdr.bestfree)); + memset(hdr->bestfree, 0, sizeof(hdr->bestfree)); *loghead = 1; /* * Set up pointers. */ - p = (char *)d->u; - if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { - btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d); + p = (char *)(hdr + 1); + if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { + btp = xfs_dir2_block_tail_p(mp, hdr); endp = (char *)xfs_dir2_block_leaf_p(btp); } else - endp = (char *)d + mp->m_dirblksize; + endp = (char *)hdr + mp->m_dirblksize; /* * Loop over the block's entries. */ @@ -361,9 +365,9 @@ xfs_dir2_data_freescan( * If it's a free entry, insert it. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { - ASSERT((char *)dup - (char *)d == + ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); - xfs_dir2_data_freeinsert(d, dup, loghead); + xfs_dir2_data_freeinsert(hdr, dup, loghead); p += be16_to_cpu(dup->length); } /* @@ -371,7 +375,7 @@ xfs_dir2_data_freescan( */ else { dep = (xfs_dir2_data_entry_t *)p; - ASSERT((char *)dep - (char *)d == + ASSERT((char *)dep - (char *)hdr == be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep))); p += xfs_dir2_data_entsize(dep->namelen); } @@ -389,7 +393,7 @@ xfs_dir2_data_init( xfs_dabuf_t **bpp) /* output block buffer */ { xfs_dabuf_t *bp; /* block buffer */ - xfs_dir2_data_t *d; /* pointer to block */ + xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_inode_t *dp; /* incore directory inode */ xfs_dir2_data_unused_t *dup; /* unused entry pointer */ int error; /* error return value */ @@ -410,26 +414,28 @@ xfs_dir2_data_init( return error; } ASSERT(bp != NULL); + /* * Initialize the header. */ - d = bp->data; - d->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); - d->hdr.bestfree[0].offset = cpu_to_be16(sizeof(d->hdr)); + hdr = bp->data; + hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); + hdr->bestfree[0].offset = cpu_to_be16(sizeof(*hdr)); for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) { - d->hdr.bestfree[i].length = 0; - d->hdr.bestfree[i].offset = 0; + hdr->bestfree[i].length = 0; + hdr->bestfree[i].offset = 0; } + /* * Set up an unused entry for the block's body. */ - dup = &d->u[0].unused; + dup = (xfs_dir2_data_unused_t *)(hdr + 1); dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); - t=mp->m_dirblksize - (uint)sizeof(d->hdr); - d->hdr.bestfree[0].length = cpu_to_be16(t); + t = mp->m_dirblksize - (uint)sizeof(*hdr); + hdr->bestfree[0].length = cpu_to_be16(t); dup->length = cpu_to_be16(t); - *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)d); + *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr); /* * Log it and return it. */ @@ -448,14 +454,14 @@ xfs_dir2_data_log_entry( xfs_dabuf_t *bp, /* block buffer */ xfs_dir2_data_entry_t *dep) /* data entry pointer */ { - xfs_dir2_data_t *d; /* data block pointer */ + xfs_dir2_data_hdr_t *hdr = bp->data; + + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); - d = bp->data; - ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || - be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); - xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d), + xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr), (uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) - - (char *)d - 1)); + (char *)hdr - 1)); } /* @@ -466,13 +472,12 @@ xfs_dir2_data_log_header( xfs_trans_t *tp, /* transaction pointer */ xfs_dabuf_t *bp) /* block buffer */ { - xfs_dir2_data_t *d; /* data block pointer */ + xfs_dir2_data_hdr_t *hdr = bp->data; - d = bp->data; - ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || - be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); - xfs_da_log_buf(tp, bp, (uint)((char *)&d->hdr - (char *)d), - (uint)(sizeof(d->hdr) - 1)); + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); + + xfs_da_log_buf(tp, bp, 0, sizeof(*hdr) - 1); } /* @@ -484,23 +489,23 @@ xfs_dir2_data_log_unused( xfs_dabuf_t *bp, /* block buffer */ xfs_dir2_data_unused_t *dup) /* data unused pointer */ { - xfs_dir2_data_t *d; /* data block pointer */ + xfs_dir2_data_hdr_t *hdr = bp->data; + + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); - d = bp->data; - ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || - be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); /* * Log the first part of the unused entry. */ - xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)d), + xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)hdr), (uint)((char *)&dup->length + sizeof(dup->length) - - 1 - (char *)d)); + 1 - (char *)hdr)); /* * Log the end (tag) of the unused entry. */ xfs_da_log_buf(tp, bp, - (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d), - (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d + + (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr), + (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr + sizeof(xfs_dir2_data_off_t) - 1)); } @@ -517,7 +522,7 @@ xfs_dir2_data_make_free( int *needlogp, /* out: log header */ int *needscanp) /* out: regen bestfree */ { - xfs_dir2_data_t *d; /* data block pointer */ + xfs_dir2_data_hdr_t *hdr; /* data block pointer */ xfs_dir2_data_free_t *dfp; /* bestfree pointer */ char *endptr; /* end of data area */ xfs_mount_t *mp; /* filesystem mount point */ @@ -527,28 +532,29 @@ xfs_dir2_data_make_free( xfs_dir2_data_unused_t *prevdup; /* unused entry before us */ mp = tp->t_mountp; - d = bp->data; + hdr = bp->data; + /* * Figure out where the end of the data area is. */ - if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC) - endptr = (char *)d + mp->m_dirblksize; + if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)) + endptr = (char *)hdr + mp->m_dirblksize; else { xfs_dir2_block_tail_t *btp; /* block tail */ - ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); - btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d); + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); + btp = xfs_dir2_block_tail_p(mp, hdr); endptr = (char *)xfs_dir2_block_leaf_p(btp); } /* * If this isn't the start of the block, then back up to * the previous entry and see if it's free. */ - if (offset > sizeof(d->hdr)) { + if (offset > sizeof(*hdr)) { __be16 *tagp; /* tag just before us */ - tagp = (__be16 *)((char *)d + offset) - 1; - prevdup = (xfs_dir2_data_unused_t *)((char *)d + be16_to_cpu(*tagp)); + tagp = (__be16 *)((char *)hdr + offset) - 1; + prevdup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG) prevdup = NULL; } else @@ -557,9 +563,9 @@ xfs_dir2_data_make_free( * If this isn't the end of the block, see if the entry after * us is free. */ - if ((char *)d + offset + len < endptr) { + if ((char *)hdr + offset + len < endptr) { postdup = - (xfs_dir2_data_unused_t *)((char *)d + offset + len); + (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG) postdup = NULL; } else @@ -576,21 +582,21 @@ xfs_dir2_data_make_free( /* * See if prevdup and/or postdup are in bestfree table. */ - dfp = xfs_dir2_data_freefind(d, prevdup); - dfp2 = xfs_dir2_data_freefind(d, postdup); + dfp = xfs_dir2_data_freefind(hdr, prevdup); + dfp2 = xfs_dir2_data_freefind(hdr, postdup); /* * We need a rescan unless there are exactly 2 free entries * namely our two. Then we know what's happening, otherwise * since the third bestfree is there, there might be more * entries. */ - needscan = (d->hdr.bestfree[2].length != 0); + needscan = (hdr->bestfree[2].length != 0); /* * Fix up the new big freespace. */ be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length)); *xfs_dir2_data_unused_tag_p(prevdup) = - cpu_to_be16((char *)prevdup - (char *)d); + cpu_to_be16((char *)prevdup - (char *)hdr); xfs_dir2_data_log_unused(tp, bp, prevdup); if (!needscan) { /* @@ -600,18 +606,18 @@ xfs_dir2_data_make_free( * Remove entry 1 first then entry 0. */ ASSERT(dfp && dfp2); - if (dfp == &d->hdr.bestfree[1]) { - dfp = &d->hdr.bestfree[0]; + if (dfp == &hdr->bestfree[1]) { + dfp = &hdr->bestfree[0]; ASSERT(dfp2 == dfp); - dfp2 = &d->hdr.bestfree[1]; + dfp2 = &hdr->bestfree[1]; } - xfs_dir2_data_freeremove(d, dfp2, needlogp); - xfs_dir2_data_freeremove(d, dfp, needlogp); + xfs_dir2_data_freeremove(hdr, dfp2, needlogp); + xfs_dir2_data_freeremove(hdr, dfp, needlogp); /* * Now insert the new entry. */ - dfp = xfs_dir2_data_freeinsert(d, prevdup, needlogp); - ASSERT(dfp == &d->hdr.bestfree[0]); + dfp = xfs_dir2_data_freeinsert(hdr, prevdup, needlogp); + ASSERT(dfp == &hdr->bestfree[0]); ASSERT(dfp->length == prevdup->length); ASSERT(!dfp[1].length); ASSERT(!dfp[2].length); @@ -621,10 +627,10 @@ xfs_dir2_data_make_free( * The entry before us is free, merge with it. */ else if (prevdup) { - dfp = xfs_dir2_data_freefind(d, prevdup); + dfp = xfs_dir2_data_freefind(hdr, prevdup); be16_add_cpu(&prevdup->length, len); *xfs_dir2_data_unused_tag_p(prevdup) = - cpu_to_be16((char *)prevdup - (char *)d); + cpu_to_be16((char *)prevdup - (char *)hdr); xfs_dir2_data_log_unused(tp, bp, prevdup); /* * If the previous entry was in the table, the new entry @@ -632,27 +638,27 @@ xfs_dir2_data_make_free( * the old one and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(d, dfp, needlogp); - (void)xfs_dir2_data_freeinsert(d, prevdup, needlogp); + xfs_dir2_data_freeremove(hdr, dfp, needlogp); + xfs_dir2_data_freeinsert(hdr, prevdup, needlogp); } /* * Otherwise we need a scan if the new entry is big enough. */ else { needscan = be16_to_cpu(prevdup->length) > - be16_to_cpu(d->hdr.bestfree[2].length); + be16_to_cpu(hdr->bestfree[2].length); } } /* * The following entry is free, merge with it. */ else if (postdup) { - dfp = xfs_dir2_data_freefind(d, postdup); - newdup = (xfs_dir2_data_unused_t *)((char *)d + offset); + dfp = xfs_dir2_data_freefind(hdr, postdup); + newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length)); *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)d); + cpu_to_be16((char *)newdup - (char *)hdr); xfs_dir2_data_log_unused(tp, bp, newdup); /* * If the following entry was in the table, the new entry @@ -660,28 +666,28 @@ xfs_dir2_data_make_free( * the old one and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(d, dfp, needlogp); - (void)xfs_dir2_data_freeinsert(d, newdup, needlogp); + xfs_dir2_data_freeremove(hdr, dfp, needlogp); + xfs_dir2_data_freeinsert(hdr, newdup, needlogp); } /* * Otherwise we need a scan if the new entry is big enough. */ else { needscan = be16_to_cpu(newdup->length) > - be16_to_cpu(d->hdr.bestfree[2].length); + be16_to_cpu(hdr->bestfree[2].length); } } /* * Neither neighbor is free. Make a new entry. */ else { - newdup = (xfs_dir2_data_unused_t *)((char *)d + offset); + newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(len); *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)d); + cpu_to_be16((char *)newdup - (char *)hdr); xfs_dir2_data_log_unused(tp, bp, newdup); - (void)xfs_dir2_data_freeinsert(d, newdup, needlogp); + xfs_dir2_data_freeinsert(hdr, newdup, needlogp); } *needscanp = needscan; } @@ -699,7 +705,7 @@ xfs_dir2_data_use_free( int *needlogp, /* out: need to log header */ int *needscanp) /* out: need regen bestfree */ { - xfs_dir2_data_t *d; /* data block */ + xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_data_free_t *dfp; /* bestfree pointer */ int matchback; /* matches end of freespace */ int matchfront; /* matches start of freespace */ @@ -708,24 +714,24 @@ xfs_dir2_data_use_free( xfs_dir2_data_unused_t *newdup2; /* another new unused entry */ int oldlen; /* old unused entry's length */ - d = bp->data; - ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || - be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); + hdr = bp->data; + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG); - ASSERT(offset >= (char *)dup - (char *)d); - ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)d); - ASSERT((char *)dup - (char *)d == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); + ASSERT(offset >= (char *)dup - (char *)hdr); + ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr); + ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); /* * Look up the entry in the bestfree table. */ - dfp = xfs_dir2_data_freefind(d, dup); + dfp = xfs_dir2_data_freefind(hdr, dup); oldlen = be16_to_cpu(dup->length); - ASSERT(dfp || oldlen <= be16_to_cpu(d->hdr.bestfree[2].length)); + ASSERT(dfp || oldlen <= be16_to_cpu(hdr->bestfree[2].length)); /* * Check for alignment with front and back of the entry. */ - matchfront = (char *)dup - (char *)d == offset; - matchback = (char *)dup + oldlen - (char *)d == offset + len; + matchfront = (char *)dup - (char *)hdr == offset; + matchback = (char *)dup + oldlen - (char *)hdr == offset + len; ASSERT(*needscanp == 0); needscan = 0; /* @@ -734,9 +740,9 @@ xfs_dir2_data_use_free( */ if (matchfront && matchback) { if (dfp) { - needscan = (d->hdr.bestfree[2].offset != 0); + needscan = (hdr->bestfree[2].offset != 0); if (!needscan) - xfs_dir2_data_freeremove(d, dfp, needlogp); + xfs_dir2_data_freeremove(hdr, dfp, needlogp); } } /* @@ -744,27 +750,27 @@ xfs_dir2_data_use_free( * Make a new entry with the remaining freespace. */ else if (matchfront) { - newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len); + newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(oldlen - len); *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)d); + cpu_to_be16((char *)newdup - (char *)hdr); xfs_dir2_data_log_unused(tp, bp, newdup); /* * If it was in the table, remove it and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(d, dfp, needlogp); - dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp); + xfs_dir2_data_freeremove(hdr, dfp, needlogp); + dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp); ASSERT(dfp != NULL); ASSERT(dfp->length == newdup->length); - ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d); + ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); /* * If we got inserted at the last slot, * that means we don't know if there was a better * choice for the last slot, or not. Rescan. */ - needscan = dfp == &d->hdr.bestfree[2]; + needscan = dfp == &hdr->bestfree[2]; } } /* @@ -773,25 +779,25 @@ xfs_dir2_data_use_free( */ else if (matchback) { newdup = dup; - newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup); + newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup); *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)d); + cpu_to_be16((char *)newdup - (char *)hdr); xfs_dir2_data_log_unused(tp, bp, newdup); /* * If it was in the table, remove it and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(d, dfp, needlogp); - dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp); + xfs_dir2_data_freeremove(hdr, dfp, needlogp); + dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp); ASSERT(dfp != NULL); ASSERT(dfp->length == newdup->length); - ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d); + ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); /* * If we got inserted at the last slot, * that means we don't know if there was a better * choice for the last slot, or not. Rescan. */ - needscan = dfp == &d->hdr.bestfree[2]; + needscan = dfp == &hdr->bestfree[2]; } } /* @@ -800,15 +806,15 @@ xfs_dir2_data_use_free( */ else { newdup = dup; - newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup); + newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup); *xfs_dir2_data_unused_tag_p(newdup) = - cpu_to_be16((char *)newdup - (char *)d); + cpu_to_be16((char *)newdup - (char *)hdr); xfs_dir2_data_log_unused(tp, bp, newdup); - newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len); + newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length)); *xfs_dir2_data_unused_tag_p(newdup2) = - cpu_to_be16((char *)newdup2 - (char *)d); + cpu_to_be16((char *)newdup2 - (char *)hdr); xfs_dir2_data_log_unused(tp, bp, newdup2); /* * If the old entry was in the table, we need to scan @@ -819,13 +825,12 @@ xfs_dir2_data_use_free( * the 2 new will work. */ if (dfp) { - needscan = (d->hdr.bestfree[2].length != 0); + needscan = (hdr->bestfree[2].length != 0); if (!needscan) { - xfs_dir2_data_freeremove(d, dfp, needlogp); - (void)xfs_dir2_data_freeinsert(d, newdup, - needlogp); - (void)xfs_dir2_data_freeinsert(d, newdup2, - needlogp); + xfs_dir2_data_freeremove(hdr, dfp, needlogp); + xfs_dir2_data_freeinsert(hdr, newdup, needlogp); + xfs_dir2_data_freeinsert(hdr, newdup2, + needlogp); } } } diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h deleted file mode 100644 index efbc290c7fec..000000000000 --- a/fs/xfs/xfs_dir2_data.h +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Copyright (c) 2000,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DIR2_DATA_H__ -#define __XFS_DIR2_DATA_H__ - -/* - * Directory format 2, data block structures. - */ - -struct xfs_dabuf; -struct xfs_da_args; -struct xfs_inode; -struct xfs_trans; - -/* - * Constants. - */ -#define XFS_DIR2_DATA_MAGIC 0x58443244 /* XD2D: for multiblock dirs */ -#define XFS_DIR2_DATA_ALIGN_LOG 3 /* i.e., 8 bytes */ -#define XFS_DIR2_DATA_ALIGN (1 << XFS_DIR2_DATA_ALIGN_LOG) -#define XFS_DIR2_DATA_FREE_TAG 0xffff -#define XFS_DIR2_DATA_FD_COUNT 3 - -/* - * Directory address space divided into sections, - * spaces separated by 32GB. - */ -#define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG)) -#define XFS_DIR2_DATA_SPACE 0 -#define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE) -#define XFS_DIR2_DATA_FIRSTDB(mp) \ - xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET) - -/* - * Offsets of . and .. in data space (always block 0) - */ -#define XFS_DIR2_DATA_DOT_OFFSET \ - ((xfs_dir2_data_aoff_t)sizeof(xfs_dir2_data_hdr_t)) -#define XFS_DIR2_DATA_DOTDOT_OFFSET \ - (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1)) -#define XFS_DIR2_DATA_FIRST_OFFSET \ - (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2)) - -/* - * Structures. - */ - -/* - * Describe a free area in the data block. - * The freespace will be formatted as a xfs_dir2_data_unused_t. - */ -typedef struct xfs_dir2_data_free { - __be16 offset; /* start of freespace */ - __be16 length; /* length of freespace */ -} xfs_dir2_data_free_t; - -/* - * Header for the data blocks. - * Always at the beginning of a directory-sized block. - * The code knows that XFS_DIR2_DATA_FD_COUNT is 3. - */ -typedef struct xfs_dir2_data_hdr { - __be32 magic; /* XFS_DIR2_DATA_MAGIC */ - /* or XFS_DIR2_BLOCK_MAGIC */ - xfs_dir2_data_free_t bestfree[XFS_DIR2_DATA_FD_COUNT]; -} xfs_dir2_data_hdr_t; - -/* - * Active entry in a data block. Aligned to 8 bytes. - * Tag appears as the last 2 bytes. - */ -typedef struct xfs_dir2_data_entry { - __be64 inumber; /* inode number */ - __u8 namelen; /* name length */ - __u8 name[1]; /* name bytes, no null */ - /* variable offset */ - __be16 tag; /* starting offset of us */ -} xfs_dir2_data_entry_t; - -/* - * Unused entry in a data block. Aligned to 8 bytes. - * Tag appears as the last 2 bytes. - */ -typedef struct xfs_dir2_data_unused { - __be16 freetag; /* XFS_DIR2_DATA_FREE_TAG */ - __be16 length; /* total free length */ - /* variable offset */ - __be16 tag; /* starting offset of us */ -} xfs_dir2_data_unused_t; - -typedef union { - xfs_dir2_data_entry_t entry; - xfs_dir2_data_unused_t unused; -} xfs_dir2_data_union_t; - -/* - * Generic data block structure, for xfs_db. - */ -typedef struct xfs_dir2_data { - xfs_dir2_data_hdr_t hdr; /* magic XFS_DIR2_DATA_MAGIC */ - xfs_dir2_data_union_t u[1]; -} xfs_dir2_data_t; - -/* - * Macros. - */ - -/* - * Size of a data entry. - */ -static inline int xfs_dir2_data_entsize(int n) -{ - return (int)roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \ - (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN); -} - -/* - * Pointer to an entry's tag word. - */ -static inline __be16 * -xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep) -{ - return (__be16 *)((char *)dep + - xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16)); -} - -/* - * Pointer to a freespace's tag word. - */ -static inline __be16 * -xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup) -{ - return (__be16 *)((char *)dup + - be16_to_cpu(dup->length) - sizeof(__be16)); -} - -/* - * Function declarations. - */ -#ifdef DEBUG -extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp); -#else -#define xfs_dir2_data_check(dp,bp) -#endif -extern xfs_dir2_data_free_t *xfs_dir2_data_freefind(xfs_dir2_data_t *d, - xfs_dir2_data_unused_t *dup); -extern xfs_dir2_data_free_t *xfs_dir2_data_freeinsert(xfs_dir2_data_t *d, - xfs_dir2_data_unused_t *dup, int *loghead); -extern void xfs_dir2_data_freescan(struct xfs_mount *mp, xfs_dir2_data_t *d, - int *loghead); -extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno, - struct xfs_dabuf **bpp); -extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp, - xfs_dir2_data_entry_t *dep); -extern void xfs_dir2_data_log_header(struct xfs_trans *tp, - struct xfs_dabuf *bp); -extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp, - xfs_dir2_data_unused_t *dup); -extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp, - xfs_dir2_data_aoff_t offset, - xfs_dir2_data_aoff_t len, int *needlogp, - int *needscanp); -extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp, - xfs_dir2_data_unused_t *dup, - xfs_dir2_data_aoff_t offset, - xfs_dir2_data_aoff_t len, int *needlogp, - int *needscanp); - -#endif /* __XFS_DIR2_DATA_H__ */ diff --git a/fs/xfs/xfs_dir2_format.h b/fs/xfs/xfs_dir2_format.h new file mode 100644 index 000000000000..07270981f48f --- /dev/null +++ b/fs/xfs/xfs_dir2_format.h @@ -0,0 +1,597 @@ +/* + * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DIR2_FORMAT_H__ +#define __XFS_DIR2_FORMAT_H__ + +/* + * Directory version 2. + * + * There are 4 possible formats: + * - shortform - embedded into the inode + * - single block - data with embedded leaf at the end + * - multiple data blocks, single leaf+freeindex block + * - data blocks, node and leaf blocks (btree), freeindex blocks + * + * Note: many node blocks structures and constants are shared with the attr + * code and defined in xfs_da_btree.h. + */ + +#define XFS_DIR2_BLOCK_MAGIC 0x58443242 /* XD2B: single block dirs */ +#define XFS_DIR2_DATA_MAGIC 0x58443244 /* XD2D: multiblock dirs */ +#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F: free index blocks */ + +/* + * Byte offset in data block and shortform entry. + */ +typedef __uint16_t xfs_dir2_data_off_t; +#define NULLDATAOFF 0xffffU +typedef uint xfs_dir2_data_aoff_t; /* argument form */ + +/* + * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t. + * Only need 16 bits, this is the byte offset into the single block form. + */ +typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t; + +/* + * Offset in data space of a data entry. + */ +typedef __uint32_t xfs_dir2_dataptr_t; +#define XFS_DIR2_MAX_DATAPTR ((xfs_dir2_dataptr_t)0xffffffff) +#define XFS_DIR2_NULL_DATAPTR ((xfs_dir2_dataptr_t)0) + +/* + * Byte offset in a directory. + */ +typedef xfs_off_t xfs_dir2_off_t; + +/* + * Directory block number (logical dirblk in file) + */ +typedef __uint32_t xfs_dir2_db_t; + +/* + * Inode number stored as 8 8-bit values. + */ +typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t; + +/* + * Inode number stored as 4 8-bit values. + * Works a lot of the time, when all the inode numbers in a directory + * fit in 32 bits. + */ +typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t; + +typedef union { + xfs_dir2_ino8_t i8; + xfs_dir2_ino4_t i4; +} xfs_dir2_inou_t; +#define XFS_DIR2_MAX_SHORT_INUM ((xfs_ino_t)0xffffffffULL) + +/* + * Directory layout when stored internal to an inode. + * + * Small directories are packed as tightly as possible so as to fit into the + * literal area of the inode. These "shortform" directories consist of a + * single xfs_dir2_sf_hdr header followed by zero or more xfs_dir2_sf_entry + * structures. Due the different inode number storage size and the variable + * length name field in the xfs_dir2_sf_entry all these structure are + * variable length, and the accessors in this file should be used to iterate + * over them. + */ +typedef struct xfs_dir2_sf_hdr { + __uint8_t count; /* count of entries */ + __uint8_t i8count; /* count of 8-byte inode #s */ + xfs_dir2_inou_t parent; /* parent dir inode number */ +} __arch_pack xfs_dir2_sf_hdr_t; + +typedef struct xfs_dir2_sf_entry { + __u8 namelen; /* actual name length */ + xfs_dir2_sf_off_t offset; /* saved offset */ + __u8 name[]; /* name, variable size */ + /* + * A xfs_dir2_ino8_t or xfs_dir2_ino4_t follows here, at a + * variable offset after the name. + */ +} __arch_pack xfs_dir2_sf_entry_t; + +static inline int xfs_dir2_sf_hdr_size(int i8count) +{ + return sizeof(struct xfs_dir2_sf_hdr) - + (i8count == 0) * + (sizeof(xfs_dir2_ino8_t) - sizeof(xfs_dir2_ino4_t)); +} + +static inline xfs_dir2_data_aoff_t +xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep) +{ + return get_unaligned_be16(&sfep->offset.i); +} + +static inline void +xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off) +{ + put_unaligned_be16(off, &sfep->offset.i); +} + +static inline int +xfs_dir2_sf_entsize(struct xfs_dir2_sf_hdr *hdr, int len) +{ + return sizeof(struct xfs_dir2_sf_entry) + /* namelen + offset */ + len + /* name */ + (hdr->i8count ? /* ino */ + sizeof(xfs_dir2_ino8_t) : + sizeof(xfs_dir2_ino4_t)); +} + +static inline struct xfs_dir2_sf_entry * +xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr) +{ + return (struct xfs_dir2_sf_entry *) + ((char *)hdr + xfs_dir2_sf_hdr_size(hdr->i8count)); +} + +static inline struct xfs_dir2_sf_entry * +xfs_dir2_sf_nextentry(struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep) +{ + return (struct xfs_dir2_sf_entry *) + ((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen)); +} + + +/* + * Data block structures. + * + * A pure data block looks like the following drawing on disk: + * + * +-------------------------------------------------+ + * | xfs_dir2_data_hdr_t | + * +-------------------------------------------------+ + * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | + * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | + * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | + * | ... | + * +-------------------------------------------------+ + * | unused space | + * +-------------------------------------------------+ + * + * As all the entries are variable size structures the accessors below should + * be used to iterate over them. + * + * In addition to the pure data blocks for the data and node formats, + * most structures are also used for the combined data/freespace "block" + * format below. + */ + +#define XFS_DIR2_DATA_ALIGN_LOG 3 /* i.e., 8 bytes */ +#define XFS_DIR2_DATA_ALIGN (1 << XFS_DIR2_DATA_ALIGN_LOG) +#define XFS_DIR2_DATA_FREE_TAG 0xffff +#define XFS_DIR2_DATA_FD_COUNT 3 + +/* + * Directory address space divided into sections, + * spaces separated by 32GB. + */ +#define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG)) +#define XFS_DIR2_DATA_SPACE 0 +#define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE) +#define XFS_DIR2_DATA_FIRSTDB(mp) \ + xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET) + +/* + * Offsets of . and .. in data space (always block 0) + */ +#define XFS_DIR2_DATA_DOT_OFFSET \ + ((xfs_dir2_data_aoff_t)sizeof(struct xfs_dir2_data_hdr)) +#define XFS_DIR2_DATA_DOTDOT_OFFSET \ + (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1)) +#define XFS_DIR2_DATA_FIRST_OFFSET \ + (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2)) + +/* + * Describe a free area in the data block. + * + * The freespace will be formatted as a xfs_dir2_data_unused_t. + */ +typedef struct xfs_dir2_data_free { + __be16 offset; /* start of freespace */ + __be16 length; /* length of freespace */ +} xfs_dir2_data_free_t; + +/* + * Header for the data blocks. + * + * The code knows that XFS_DIR2_DATA_FD_COUNT is 3. + */ +typedef struct xfs_dir2_data_hdr { + __be32 magic; /* XFS_DIR2_DATA_MAGIC or */ + /* XFS_DIR2_BLOCK_MAGIC */ + xfs_dir2_data_free_t bestfree[XFS_DIR2_DATA_FD_COUNT]; +} xfs_dir2_data_hdr_t; + +/* + * Active entry in a data block. + * + * Aligned to 8 bytes. After the variable length name field there is a + * 2 byte tag field, which can be accessed using xfs_dir2_data_entry_tag_p. + */ +typedef struct xfs_dir2_data_entry { + __be64 inumber; /* inode number */ + __u8 namelen; /* name length */ + __u8 name[]; /* name bytes, no null */ + /* __be16 tag; */ /* starting offset of us */ +} xfs_dir2_data_entry_t; + +/* + * Unused entry in a data block. + * + * Aligned to 8 bytes. Tag appears as the last 2 bytes and must be accessed + * using xfs_dir2_data_unused_tag_p. + */ +typedef struct xfs_dir2_data_unused { + __be16 freetag; /* XFS_DIR2_DATA_FREE_TAG */ + __be16 length; /* total free length */ + /* variable offset */ + __be16 tag; /* starting offset of us */ +} xfs_dir2_data_unused_t; + +/* + * Size of a data entry. + */ +static inline int xfs_dir2_data_entsize(int n) +{ + return (int)roundup(offsetof(struct xfs_dir2_data_entry, name[0]) + n + + (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN); +} + +/* + * Pointer to an entry's tag word. + */ +static inline __be16 * +xfs_dir2_data_entry_tag_p(struct xfs_dir2_data_entry *dep) +{ + return (__be16 *)((char *)dep + + xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16)); +} + +/* + * Pointer to a freespace's tag word. + */ +static inline __be16 * +xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup) +{ + return (__be16 *)((char *)dup + + be16_to_cpu(dup->length) - sizeof(__be16)); +} + +/* + * Leaf block structures. + * + * A pure leaf block looks like the following drawing on disk: + * + * +---------------------------+ + * | xfs_dir2_leaf_hdr_t | + * +---------------------------+ + * | xfs_dir2_leaf_entry_t | + * | xfs_dir2_leaf_entry_t | + * | xfs_dir2_leaf_entry_t | + * | xfs_dir2_leaf_entry_t | + * | ... | + * +---------------------------+ + * | xfs_dir2_data_off_t | + * | xfs_dir2_data_off_t | + * | xfs_dir2_data_off_t | + * | ... | + * +---------------------------+ + * | xfs_dir2_leaf_tail_t | + * +---------------------------+ + * + * The xfs_dir2_data_off_t members (bests) and tail are at the end of the block + * for single-leaf (magic = XFS_DIR2_LEAF1_MAGIC) blocks only, but not present + * for directories with separate leaf nodes and free space blocks + * (magic = XFS_DIR2_LEAFN_MAGIC). + * + * As all the entries are variable size structures the accessors below should + * be used to iterate over them. + */ + +/* + * Offset of the leaf/node space. First block in this space + * is the btree root. + */ +#define XFS_DIR2_LEAF_SPACE 1 +#define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE) +#define XFS_DIR2_LEAF_FIRSTDB(mp) \ + xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET) + +/* + * Leaf block header. + */ +typedef struct xfs_dir2_leaf_hdr { + xfs_da_blkinfo_t info; /* header for da routines */ + __be16 count; /* count of entries */ + __be16 stale; /* count of stale entries */ +} xfs_dir2_leaf_hdr_t; + +/* + * Leaf block entry. + */ +typedef struct xfs_dir2_leaf_entry { + __be32 hashval; /* hash value of name */ + __be32 address; /* address of data entry */ +} xfs_dir2_leaf_entry_t; + +/* + * Leaf block tail. + */ +typedef struct xfs_dir2_leaf_tail { + __be32 bestcount; +} xfs_dir2_leaf_tail_t; + +/* + * Leaf block. + */ +typedef struct xfs_dir2_leaf { + xfs_dir2_leaf_hdr_t hdr; /* leaf header */ + xfs_dir2_leaf_entry_t ents[]; /* entries */ +} xfs_dir2_leaf_t; + +/* + * DB blocks here are logical directory block numbers, not filesystem blocks. + */ + +static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp) +{ + return (mp->m_dirblksize - (uint)sizeof(struct xfs_dir2_leaf_hdr)) / + (uint)sizeof(struct xfs_dir2_leaf_entry); +} + +/* + * Get address of the bestcount field in the single-leaf block. + */ +static inline struct xfs_dir2_leaf_tail * +xfs_dir2_leaf_tail_p(struct xfs_mount *mp, struct xfs_dir2_leaf *lp) +{ + return (struct xfs_dir2_leaf_tail *) + ((char *)lp + mp->m_dirblksize - + sizeof(struct xfs_dir2_leaf_tail)); +} + +/* + * Get address of the bests array in the single-leaf block. + */ +static inline __be16 * +xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp) +{ + return (__be16 *)ltp - be32_to_cpu(ltp->bestcount); +} + +/* + * Convert dataptr to byte in file space + */ +static inline xfs_dir2_off_t +xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) +{ + return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG; +} + +/* + * Convert byte in file space to dataptr. It had better be aligned. + */ +static inline xfs_dir2_dataptr_t +xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by) +{ + return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG); +} + +/* + * Convert byte in space to (DB) block + */ +static inline xfs_dir2_db_t +xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by) +{ + return (xfs_dir2_db_t) + (by >> (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)); +} + +/* + * Convert dataptr to a block number + */ +static inline xfs_dir2_db_t +xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) +{ + return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp)); +} + +/* + * Convert byte in space to offset in a block + */ +static inline xfs_dir2_data_aoff_t +xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by) +{ + return (xfs_dir2_data_aoff_t)(by & + ((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) - 1)); +} + +/* + * Convert dataptr to a byte offset in a block + */ +static inline xfs_dir2_data_aoff_t +xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) +{ + return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp)); +} + +/* + * Convert block and offset to byte in space + */ +static inline xfs_dir2_off_t +xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db, + xfs_dir2_data_aoff_t o) +{ + return ((xfs_dir2_off_t)db << + (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) + o; +} + +/* + * Convert block (DB) to block (dablk) + */ +static inline xfs_dablk_t +xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db) +{ + return (xfs_dablk_t)(db << mp->m_sb.sb_dirblklog); +} + +/* + * Convert byte in space to (DA) block + */ +static inline xfs_dablk_t +xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by) +{ + return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by)); +} + +/* + * Convert block and offset to dataptr + */ +static inline xfs_dir2_dataptr_t +xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db, + xfs_dir2_data_aoff_t o) +{ + return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o)); +} + +/* + * Convert block (dablk) to block (DB) + */ +static inline xfs_dir2_db_t +xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da) +{ + return (xfs_dir2_db_t)(da >> mp->m_sb.sb_dirblklog); +} + +/* + * Convert block (dablk) to byte offset in space + */ +static inline xfs_dir2_off_t +xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da) +{ + return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0); +} + +/* + * Free space block defintions for the node format. + */ + +/* + * Offset of the freespace index. + */ +#define XFS_DIR2_FREE_SPACE 2 +#define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE) +#define XFS_DIR2_FREE_FIRSTDB(mp) \ + xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET) + +typedef struct xfs_dir2_free_hdr { + __be32 magic; /* XFS_DIR2_FREE_MAGIC */ + __be32 firstdb; /* db of first entry */ + __be32 nvalid; /* count of valid entries */ + __be32 nused; /* count of used entries */ +} xfs_dir2_free_hdr_t; + +typedef struct xfs_dir2_free { + xfs_dir2_free_hdr_t hdr; /* block header */ + __be16 bests[]; /* best free counts */ + /* unused entries are -1 */ +} xfs_dir2_free_t; + +static inline int xfs_dir2_free_max_bests(struct xfs_mount *mp) +{ + return (mp->m_dirblksize - sizeof(struct xfs_dir2_free_hdr)) / + sizeof(xfs_dir2_data_off_t); +} + +/* + * Convert data space db to the corresponding free db. + */ +static inline xfs_dir2_db_t +xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) +{ + return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir2_free_max_bests(mp); +} + +/* + * Convert data space db to the corresponding index in a free db. + */ +static inline int +xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) +{ + return db % xfs_dir2_free_max_bests(mp); +} + +/* + * Single block format. + * + * The single block format looks like the following drawing on disk: + * + * +-------------------------------------------------+ + * | xfs_dir2_data_hdr_t | + * +-------------------------------------------------+ + * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | + * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | + * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t : + * | ... | + * +-------------------------------------------------+ + * | unused space | + * +-------------------------------------------------+ + * | ... | + * | xfs_dir2_leaf_entry_t | + * | xfs_dir2_leaf_entry_t | + * +-------------------------------------------------+ + * | xfs_dir2_block_tail_t | + * +-------------------------------------------------+ + * + * As all the entries are variable size structures the accessors below should + * be used to iterate over them. + */ + +typedef struct xfs_dir2_block_tail { + __be32 count; /* count of leaf entries */ + __be32 stale; /* count of stale lf entries */ +} xfs_dir2_block_tail_t; + +/* + * Pointer to the leaf header embedded in a data block (1-block format) + */ +static inline struct xfs_dir2_block_tail * +xfs_dir2_block_tail_p(struct xfs_mount *mp, struct xfs_dir2_data_hdr *hdr) +{ + return ((struct xfs_dir2_block_tail *) + ((char *)hdr + mp->m_dirblksize)) - 1; +} + +/* + * Pointer to the leaf entries embedded in a data block (1-block format) + */ +static inline struct xfs_dir2_leaf_entry * +xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp) +{ + return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count); +} + +#endif /* __XFS_DIR2_FORMAT_H__ */ diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index ae891223be90..ca2386d82cdf 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -24,18 +24,14 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" -#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_bmap.h" -#include "xfs_dir2_data.h" -#include "xfs_dir2_leaf.h" -#include "xfs_dir2_block.h" -#include "xfs_dir2_node.h" +#include "xfs_dir2_format.h" +#include "xfs_dir2_priv.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -64,7 +60,7 @@ xfs_dir2_block_to_leaf( { __be16 *bestsp; /* leaf's bestsp entries */ xfs_dablk_t blkno; /* leaf block's bno */ - xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block's leaf entries */ xfs_dir2_block_tail_t *btp; /* block's tail */ xfs_inode_t *dp; /* incore directory inode */ @@ -101,9 +97,9 @@ xfs_dir2_block_to_leaf( } ASSERT(lbp != NULL); leaf = lbp->data; - block = dbp->data; + hdr = dbp->data; xfs_dir2_data_check(dp, dbp); - btp = xfs_dir2_block_tail_p(mp, block); + btp = xfs_dir2_block_tail_p(mp, hdr); blp = xfs_dir2_block_leaf_p(btp); /* * Set the counts in the leaf header. @@ -123,23 +119,23 @@ xfs_dir2_block_to_leaf( * tail be free. */ xfs_dir2_data_make_free(tp, dbp, - (xfs_dir2_data_aoff_t)((char *)blp - (char *)block), - (xfs_dir2_data_aoff_t)((char *)block + mp->m_dirblksize - + (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), + (xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize - (char *)blp), &needlog, &needscan); /* * Fix up the block header, make it a data block. */ - block->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); + hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); if (needscan) - xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); /* * Set up leaf tail and bests table. */ ltp = xfs_dir2_leaf_tail_p(mp, leaf); ltp->bestcount = cpu_to_be32(1); bestsp = xfs_dir2_leaf_bests_p(ltp); - bestsp[0] = block->hdr.bestfree[0].length; + bestsp[0] = hdr->bestfree[0].length; /* * Log the data header and leaf bests table. */ @@ -152,6 +148,131 @@ xfs_dir2_block_to_leaf( return 0; } +STATIC void +xfs_dir2_leaf_find_stale( + struct xfs_dir2_leaf *leaf, + int index, + int *lowstale, + int *highstale) +{ + /* + * Find the first stale entry before our index, if any. + */ + for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) { + if (leaf->ents[*lowstale].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + break; + } + + /* + * Find the first stale entry at or after our index, if any. + * Stop if the result would require moving more entries than using + * lowstale. + */ + for (*highstale = index; + *highstale < be16_to_cpu(leaf->hdr.count); + ++*highstale) { + if (leaf->ents[*highstale].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + break; + if (*lowstale >= 0 && index - *lowstale <= *highstale - index) + break; + } +} + +struct xfs_dir2_leaf_entry * +xfs_dir2_leaf_find_entry( + xfs_dir2_leaf_t *leaf, /* leaf structure */ + int index, /* leaf table position */ + int compact, /* need to compact leaves */ + int lowstale, /* index of prev stale leaf */ + int highstale, /* index of next stale leaf */ + int *lfloglow, /* low leaf logging index */ + int *lfloghigh) /* high leaf logging index */ +{ + if (!leaf->hdr.stale) { + xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */ + + /* + * Now we need to make room to insert the leaf entry. + * + * If there are no stale entries, just insert a hole at index. + */ + lep = &leaf->ents[index]; + if (index < be16_to_cpu(leaf->hdr.count)) + memmove(lep + 1, lep, + (be16_to_cpu(leaf->hdr.count) - index) * + sizeof(*lep)); + + /* + * Record low and high logging indices for the leaf. + */ + *lfloglow = index; + *lfloghigh = be16_to_cpu(leaf->hdr.count); + be16_add_cpu(&leaf->hdr.count, 1); + return lep; + } + + /* + * There are stale entries. + * + * We will use one of them for the new entry. It's probably not at + * the right location, so we'll have to shift some up or down first. + * + * If we didn't compact before, we need to find the nearest stale + * entries before and after our insertion point. + */ + if (compact == 0) + xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale); + + /* + * If the low one is better, use it. + */ + if (lowstale >= 0 && + (highstale == be16_to_cpu(leaf->hdr.count) || + index - lowstale - 1 < highstale - index)) { + ASSERT(index - lowstale - 1 >= 0); + ASSERT(leaf->ents[lowstale].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)); + + /* + * Copy entries up to cover the stale entry and make room + * for the new entry. + */ + if (index - lowstale - 1 > 0) { + memmove(&leaf->ents[lowstale], + &leaf->ents[lowstale + 1], + (index - lowstale - 1) * + sizeof(xfs_dir2_leaf_entry_t)); + } + *lfloglow = MIN(lowstale, *lfloglow); + *lfloghigh = MAX(index - 1, *lfloghigh); + be16_add_cpu(&leaf->hdr.stale, -1); + return &leaf->ents[index - 1]; + } + + /* + * The high one is better, so use that one. + */ + ASSERT(highstale - index >= 0); + ASSERT(leaf->ents[highstale].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)); + + /* + * Copy entries down to cover the stale entry and make room for the + * new entry. + */ + if (highstale - index > 0) { + memmove(&leaf->ents[index + 1], + &leaf->ents[index], + (highstale - index) * sizeof(xfs_dir2_leaf_entry_t)); + } + *lfloglow = MIN(index, *lfloglow); + *lfloghigh = MAX(highstale, *lfloghigh); + be16_add_cpu(&leaf->hdr.stale, -1); + return &leaf->ents[index]; +} + /* * Add an entry to a leaf form directory. */ @@ -161,7 +282,7 @@ xfs_dir2_leaf_addname( { __be16 *bestsp; /* freespace table in leaf */ int compact; /* need to compact leaves */ - xfs_dir2_data_t *data; /* data block structure */ + xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dabuf_t *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data block entry */ xfs_inode_t *dp; /* incore directory inode */ @@ -225,7 +346,7 @@ xfs_dir2_leaf_addname( continue; i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); ASSERT(i < be32_to_cpu(ltp->bestcount)); - ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF); + ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF)); if (be16_to_cpu(bestsp[i]) >= length) { use_block = i; break; @@ -239,7 +360,8 @@ xfs_dir2_leaf_addname( /* * Remember a block we see that's missing. */ - if (be16_to_cpu(bestsp[i]) == NULLDATAOFF && use_block == -1) + if (bestsp[i] == cpu_to_be16(NULLDATAOFF) && + use_block == -1) use_block = i; else if (be16_to_cpu(bestsp[i]) >= length) { use_block = i; @@ -250,14 +372,17 @@ xfs_dir2_leaf_addname( /* * How many bytes do we need in the leaf block? */ - needbytes = - (leaf->hdr.stale ? 0 : (uint)sizeof(leaf->ents[0])) + - (use_block != -1 ? 0 : (uint)sizeof(leaf->bests[0])); + needbytes = 0; + if (!leaf->hdr.stale) + needbytes += sizeof(xfs_dir2_leaf_entry_t); + if (use_block == -1) + needbytes += sizeof(xfs_dir2_data_off_t); + /* * Now kill use_block if it refers to a missing block, so we * can use it as an indication of allocation needed. */ - if (use_block != -1 && be16_to_cpu(bestsp[use_block]) == NULLDATAOFF) + if (use_block != -1 && bestsp[use_block] == cpu_to_be16(NULLDATAOFF)) use_block = -1; /* * If we don't have enough free bytes but we can make enough @@ -369,8 +494,8 @@ xfs_dir2_leaf_addname( */ else xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block); - data = dbp->data; - bestsp[use_block] = data->hdr.bestfree[0].length; + hdr = dbp->data; + bestsp[use_block] = hdr->bestfree[0].length; grown = 1; } /* @@ -384,7 +509,7 @@ xfs_dir2_leaf_addname( xfs_da_brelse(tp, lbp); return error; } - data = dbp->data; + hdr = dbp->data; grown = 0; } xfs_dir2_data_check(dp, dbp); @@ -392,14 +517,14 @@ xfs_dir2_leaf_addname( * Point to the biggest freespace in our data block. */ dup = (xfs_dir2_data_unused_t *) - ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset)); + ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset)); ASSERT(be16_to_cpu(dup->length) >= length); needscan = needlog = 0; /* * Mark the initial part of our freespace in use for the new entry. */ xfs_dir2_data_use_free(tp, dbp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length, + (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, &needlog, &needscan); /* * Initialize our new entry (at last). @@ -409,12 +534,12 @@ xfs_dir2_leaf_addname( dep->namelen = args->namelen; memcpy(dep->name, args->name, dep->namelen); tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)data); + *tagp = cpu_to_be16((char *)dep - (char *)hdr); /* * Need to scan fix up the bestfree table. */ if (needscan) - xfs_dir2_data_freescan(mp, data, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); /* * Need to log the data block's header. */ @@ -425,107 +550,15 @@ xfs_dir2_leaf_addname( * If the bests table needs to be changed, do it. * Log the change unless we've already done that. */ - if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(data->hdr.bestfree[0].length)) { - bestsp[use_block] = data->hdr.bestfree[0].length; + if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(hdr->bestfree[0].length)) { + bestsp[use_block] = hdr->bestfree[0].length; if (!grown) xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block); } - /* - * Now we need to make room to insert the leaf entry. - * If there are no stale entries, we just insert a hole at index. - */ - if (!leaf->hdr.stale) { - /* - * lep is still good as the index leaf entry. - */ - if (index < be16_to_cpu(leaf->hdr.count)) - memmove(lep + 1, lep, - (be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep)); - /* - * Record low and high logging indices for the leaf. - */ - lfloglow = index; - lfloghigh = be16_to_cpu(leaf->hdr.count); - be16_add_cpu(&leaf->hdr.count, 1); - } - /* - * There are stale entries. - * We will use one of them for the new entry. - * It's probably not at the right location, so we'll have to - * shift some up or down first. - */ - else { - /* - * If we didn't compact before, we need to find the nearest - * stale entries before and after our insertion point. - */ - if (compact == 0) { - /* - * Find the first stale entry before the insertion - * point, if any. - */ - for (lowstale = index - 1; - lowstale >= 0 && - be32_to_cpu(leaf->ents[lowstale].address) != - XFS_DIR2_NULL_DATAPTR; - lowstale--) - continue; - /* - * Find the next stale entry at or after the insertion - * point, if any. Stop if we go so far that the - * lowstale entry would be better. - */ - for (highstale = index; - highstale < be16_to_cpu(leaf->hdr.count) && - be32_to_cpu(leaf->ents[highstale].address) != - XFS_DIR2_NULL_DATAPTR && - (lowstale < 0 || - index - lowstale - 1 >= highstale - index); - highstale++) - continue; - } - /* - * If the low one is better, use it. - */ - if (lowstale >= 0 && - (highstale == be16_to_cpu(leaf->hdr.count) || - index - lowstale - 1 < highstale - index)) { - ASSERT(index - lowstale - 1 >= 0); - ASSERT(be32_to_cpu(leaf->ents[lowstale].address) == - XFS_DIR2_NULL_DATAPTR); - /* - * Copy entries up to cover the stale entry - * and make room for the new entry. - */ - if (index - lowstale - 1 > 0) - memmove(&leaf->ents[lowstale], - &leaf->ents[lowstale + 1], - (index - lowstale - 1) * sizeof(*lep)); - lep = &leaf->ents[index - 1]; - lfloglow = MIN(lowstale, lfloglow); - lfloghigh = MAX(index - 1, lfloghigh); - } - /* - * The high one is better, so use that one. - */ - else { - ASSERT(highstale - index >= 0); - ASSERT(be32_to_cpu(leaf->ents[highstale].address) == - XFS_DIR2_NULL_DATAPTR); - /* - * Copy entries down to cover the stale entry - * and make room for the new entry. - */ - if (highstale - index > 0) - memmove(&leaf->ents[index + 1], - &leaf->ents[index], - (highstale - index) * sizeof(*lep)); - lep = &leaf->ents[index]; - lfloglow = MIN(index, lfloglow); - lfloghigh = MAX(highstale, lfloghigh); - } - be16_add_cpu(&leaf->hdr.stale, -1); - } + + lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale, + highstale, &lfloglow, &lfloghigh); + /* * Fill in the new leaf entry. */ @@ -562,7 +595,7 @@ xfs_dir2_leaf_check( leaf = bp->data; mp = dp->i_mount; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); /* * This value is not restrictive enough. * Should factor in the size of the bests table as well. @@ -582,7 +615,7 @@ xfs_dir2_leaf_check( if (i + 1 < be16_to_cpu(leaf->hdr.count)) ASSERT(be32_to_cpu(leaf->ents[i].hashval) <= be32_to_cpu(leaf->ents[i + 1].hashval)); - if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR) + if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; } ASSERT(be16_to_cpu(leaf->hdr.stale) == stale); @@ -611,7 +644,8 @@ xfs_dir2_leaf_compact( * Compress out the stale entries in place. */ for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) { - if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) + if (leaf->ents[from].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) continue; /* * Only actually copy the entries that are different. @@ -663,24 +697,9 @@ xfs_dir2_leaf_compact_x1( leaf = bp->data; ASSERT(be16_to_cpu(leaf->hdr.stale) > 1); index = *indexp; - /* - * Find the first stale entry before our index, if any. - */ - for (lowstale = index - 1; - lowstale >= 0 && - be32_to_cpu(leaf->ents[lowstale].address) != XFS_DIR2_NULL_DATAPTR; - lowstale--) - continue; - /* - * Find the first stale entry at or after our index, if any. - * Stop if the answer would be worse than lowstale. - */ - for (highstale = index; - highstale < be16_to_cpu(leaf->hdr.count) && - be32_to_cpu(leaf->ents[highstale].address) != XFS_DIR2_NULL_DATAPTR && - (lowstale < 0 || index - lowstale > highstale - index); - highstale++) - continue; + + xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale); + /* * Pick the better of lowstale and highstale. */ @@ -701,7 +720,8 @@ xfs_dir2_leaf_compact_x1( if (index == from) newindex = to; if (from != keepstale && - be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) { + leaf->ents[from].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) { if (from == to) *lowlogp = to; continue; @@ -760,7 +780,7 @@ xfs_dir2_leaf_getdents( int byteoff; /* offset in current block */ xfs_dir2_db_t curdb; /* db for current block */ xfs_dir2_off_t curoff; /* current overall offset */ - xfs_dir2_data_t *data; /* data block structure */ + xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_data_entry_t *dep; /* data entry */ xfs_dir2_data_unused_t *dup; /* unused entry */ int error = 0; /* error return value */ @@ -1018,23 +1038,23 @@ xfs_dir2_leaf_getdents( else if (curoff > newoff) ASSERT(xfs_dir2_byte_to_db(mp, curoff) == curdb); - data = bp->data; + hdr = bp->data; xfs_dir2_data_check(dp, bp); /* * Find our position in the block. */ - ptr = (char *)&data->u; + ptr = (char *)(hdr + 1); byteoff = xfs_dir2_byte_to_off(mp, curoff); /* * Skip past the header. */ if (byteoff == 0) - curoff += (uint)sizeof(data->hdr); + curoff += (uint)sizeof(*hdr); /* * Skip past entries until we reach our offset. */ else { - while ((char *)ptr - (char *)data < byteoff) { + while ((char *)ptr - (char *)hdr < byteoff) { dup = (xfs_dir2_data_unused_t *)ptr; if (be16_to_cpu(dup->freetag) @@ -1055,8 +1075,8 @@ xfs_dir2_leaf_getdents( curoff = xfs_dir2_db_off_to_byte(mp, xfs_dir2_byte_to_db(mp, curoff), - (char *)ptr - (char *)data); - if (ptr >= (char *)data + mp->m_dirblksize) { + (char *)ptr - (char *)hdr); + if (ptr >= (char *)hdr + mp->m_dirblksize) { continue; } } @@ -1179,7 +1199,7 @@ xfs_dir2_leaf_log_bests( xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf); firstb = xfs_dir2_leaf_bests_p(ltp) + first; lastb = xfs_dir2_leaf_bests_p(ltp) + last; @@ -1202,8 +1222,8 @@ xfs_dir2_leaf_log_ents( xfs_dir2_leaf_t *leaf; /* leaf structure */ leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC || - be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || + leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); firstlep = &leaf->ents[first]; lastlep = &leaf->ents[last]; xfs_da_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf), @@ -1221,8 +1241,8 @@ xfs_dir2_leaf_log_header( xfs_dir2_leaf_t *leaf; /* leaf structure */ leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC || - be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || + leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); xfs_da_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf), (uint)(sizeof(leaf->hdr) - 1)); } @@ -1241,7 +1261,7 @@ xfs_dir2_leaf_log_tail( mp = tp->t_mountp; leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); ltp = xfs_dir2_leaf_tail_p(mp, leaf); xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf), (uint)(mp->m_dirblksize - 1)); @@ -1437,7 +1457,7 @@ xfs_dir2_leaf_removename( xfs_da_args_t *args) /* operation arguments */ { __be16 *bestsp; /* leaf block best freespace */ - xfs_dir2_data_t *data; /* data block structure */ + xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_db_t db; /* data block number */ xfs_dabuf_t *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data entry structure */ @@ -1467,7 +1487,7 @@ xfs_dir2_leaf_removename( tp = args->trans; mp = dp->i_mount; leaf = lbp->data; - data = dbp->data; + hdr = dbp->data; xfs_dir2_data_check(dp, dbp); /* * Point to the leaf entry, use that to point to the data entry. @@ -1475,9 +1495,9 @@ xfs_dir2_leaf_removename( lep = &leaf->ents[index]; db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); dep = (xfs_dir2_data_entry_t *) - ((char *)data + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); + ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); needscan = needlog = 0; - oldbest = be16_to_cpu(data->hdr.bestfree[0].length); + oldbest = be16_to_cpu(hdr->bestfree[0].length); ltp = xfs_dir2_leaf_tail_p(mp, leaf); bestsp = xfs_dir2_leaf_bests_p(ltp); ASSERT(be16_to_cpu(bestsp[db]) == oldbest); @@ -1485,7 +1505,7 @@ xfs_dir2_leaf_removename( * Mark the former data entry unused. */ xfs_dir2_data_make_free(tp, dbp, - (xfs_dir2_data_aoff_t)((char *)dep - (char *)data), + (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan); /* * We just mark the leaf entry stale by putting a null in it. @@ -1499,23 +1519,23 @@ xfs_dir2_leaf_removename( * log the data block header if necessary. */ if (needscan) - xfs_dir2_data_freescan(mp, data, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); if (needlog) xfs_dir2_data_log_header(tp, dbp); /* * If the longest freespace in the data block has changed, * put the new value in the bests table and log that. */ - if (be16_to_cpu(data->hdr.bestfree[0].length) != oldbest) { - bestsp[db] = data->hdr.bestfree[0].length; + if (be16_to_cpu(hdr->bestfree[0].length) != oldbest) { + bestsp[db] = hdr->bestfree[0].length; xfs_dir2_leaf_log_bests(tp, lbp, db, db); } xfs_dir2_data_check(dp, dbp); /* * If the data block is now empty then get rid of the data block. */ - if (be16_to_cpu(data->hdr.bestfree[0].length) == - mp->m_dirblksize - (uint)sizeof(data->hdr)) { + if (be16_to_cpu(hdr->bestfree[0].length) == + mp->m_dirblksize - (uint)sizeof(*hdr)) { ASSERT(db != mp->m_dirdatablk); if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { /* @@ -1542,7 +1562,7 @@ xfs_dir2_leaf_removename( * Look for the last active entry (i). */ for (i = db - 1; i > 0; i--) { - if (be16_to_cpu(bestsp[i]) != NULLDATAOFF) + if (bestsp[i] != cpu_to_be16(NULLDATAOFF)) break; } /* @@ -1686,9 +1706,6 @@ xfs_dir2_leaf_trim_data( xfs_dir2_db_t db) /* data block number */ { __be16 *bestsp; /* leaf bests table */ -#ifdef DEBUG - xfs_dir2_data_t *data; /* data block structure */ -#endif xfs_dabuf_t *dbp; /* data block buffer */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return value */ @@ -1707,20 +1724,21 @@ xfs_dir2_leaf_trim_data( XFS_DATA_FORK))) { return error; } -#ifdef DEBUG - data = dbp->data; - ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC); -#endif - /* this seems to be an error - * data is only valid if DEBUG is defined? - * RMC 09/08/1999 - */ leaf = lbp->data; ltp = xfs_dir2_leaf_tail_p(mp, leaf); - ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) == - mp->m_dirblksize - (uint)sizeof(data->hdr)); + +#ifdef DEBUG +{ + struct xfs_dir2_data_hdr *hdr = dbp->data; + + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); + ASSERT(be16_to_cpu(hdr->bestfree[0].length) == + mp->m_dirblksize - (uint)sizeof(*hdr)); ASSERT(db == be32_to_cpu(ltp->bestcount) - 1); +} +#endif + /* * Get rid of the data block. */ @@ -1740,6 +1758,20 @@ xfs_dir2_leaf_trim_data( return 0; } +static inline size_t +xfs_dir2_leaf_size( + struct xfs_dir2_leaf_hdr *hdr, + int counts) +{ + int entries; + + entries = be16_to_cpu(hdr->count) - be16_to_cpu(hdr->stale); + return sizeof(xfs_dir2_leaf_hdr_t) + + entries * sizeof(xfs_dir2_leaf_entry_t) + + counts * sizeof(xfs_dir2_data_off_t) + + sizeof(xfs_dir2_leaf_tail_t); +} + /* * Convert node form directory to leaf form directory. * The root of the node form dir needs to already be a LEAFN block. @@ -1810,7 +1842,7 @@ xfs_dir2_node_to_leaf( return 0; lbp = state->path.blk[0].bp; leaf = lbp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); /* * Read the freespace block. */ @@ -1819,20 +1851,19 @@ xfs_dir2_node_to_leaf( return error; } free = fbp->data; - ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); + ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); ASSERT(!free->hdr.firstdb); + /* * Now see if the leafn and free data will fit in a leaf1. * If not, release the buffer and give up. */ - if ((uint)sizeof(leaf->hdr) + - (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)) * (uint)sizeof(leaf->ents[0]) + - be32_to_cpu(free->hdr.nvalid) * (uint)sizeof(leaf->bests[0]) + - (uint)sizeof(leaf->tail) > - mp->m_dirblksize) { + if (xfs_dir2_leaf_size(&leaf->hdr, be32_to_cpu(free->hdr.nvalid)) > + mp->m_dirblksize) { xfs_da_brelse(tp, fbp); return 0; } + /* * If the leaf has any stale entries in it, compress them out. * The compact routine will log the header. @@ -1851,7 +1882,7 @@ xfs_dir2_node_to_leaf( * Set up the leaf bests table. */ memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests, - be32_to_cpu(ltp->bestcount) * sizeof(leaf->bests[0])); + be32_to_cpu(ltp->bestcount) * sizeof(xfs_dir2_data_off_t)); xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); xfs_dir2_leaf_log_tail(tp, lbp); xfs_dir2_leaf_check(dp, lbp); diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h deleted file mode 100644 index 6c9539f06987..000000000000 --- a/fs/xfs/xfs_dir2_leaf.h +++ /dev/null @@ -1,253 +0,0 @@ -/* - * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DIR2_LEAF_H__ -#define __XFS_DIR2_LEAF_H__ - -struct uio; -struct xfs_dabuf; -struct xfs_da_args; -struct xfs_inode; -struct xfs_mount; -struct xfs_trans; - -/* - * Offset of the leaf/node space. First block in this space - * is the btree root. - */ -#define XFS_DIR2_LEAF_SPACE 1 -#define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE) -#define XFS_DIR2_LEAF_FIRSTDB(mp) \ - xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET) - -/* - * Offset in data space of a data entry. - */ -typedef __uint32_t xfs_dir2_dataptr_t; -#define XFS_DIR2_MAX_DATAPTR ((xfs_dir2_dataptr_t)0xffffffff) -#define XFS_DIR2_NULL_DATAPTR ((xfs_dir2_dataptr_t)0) - -/* - * Leaf block header. - */ -typedef struct xfs_dir2_leaf_hdr { - xfs_da_blkinfo_t info; /* header for da routines */ - __be16 count; /* count of entries */ - __be16 stale; /* count of stale entries */ -} xfs_dir2_leaf_hdr_t; - -/* - * Leaf block entry. - */ -typedef struct xfs_dir2_leaf_entry { - __be32 hashval; /* hash value of name */ - __be32 address; /* address of data entry */ -} xfs_dir2_leaf_entry_t; - -/* - * Leaf block tail. - */ -typedef struct xfs_dir2_leaf_tail { - __be32 bestcount; -} xfs_dir2_leaf_tail_t; - -/* - * Leaf block. - * bests and tail are at the end of the block for single-leaf only - * (magic = XFS_DIR2_LEAF1_MAGIC not XFS_DIR2_LEAFN_MAGIC). - */ -typedef struct xfs_dir2_leaf { - xfs_dir2_leaf_hdr_t hdr; /* leaf header */ - xfs_dir2_leaf_entry_t ents[1]; /* entries */ - /* ... */ - xfs_dir2_data_off_t bests[1]; /* best free counts */ - xfs_dir2_leaf_tail_t tail; /* leaf tail */ -} xfs_dir2_leaf_t; - -/* - * DB blocks here are logical directory block numbers, not filesystem blocks. - */ - -static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp) -{ - return (int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) / - (uint)sizeof(xfs_dir2_leaf_entry_t)); -} - -/* - * Get address of the bestcount field in the single-leaf block. - */ -static inline xfs_dir2_leaf_tail_t * -xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp) -{ - return (xfs_dir2_leaf_tail_t *) - ((char *)(lp) + (mp)->m_dirblksize - - (uint)sizeof(xfs_dir2_leaf_tail_t)); -} - -/* - * Get address of the bests array in the single-leaf block. - */ -static inline __be16 * -xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp) -{ - return (__be16 *)ltp - be32_to_cpu(ltp->bestcount); -} - -/* - * Convert dataptr to byte in file space - */ -static inline xfs_dir2_off_t -xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) -{ - return (xfs_dir2_off_t)(dp) << XFS_DIR2_DATA_ALIGN_LOG; -} - -/* - * Convert byte in file space to dataptr. It had better be aligned. - */ -static inline xfs_dir2_dataptr_t -xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return (xfs_dir2_dataptr_t)((by) >> XFS_DIR2_DATA_ALIGN_LOG); -} - -/* - * Convert byte in space to (DB) block - */ -static inline xfs_dir2_db_t -xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return (xfs_dir2_db_t)((by) >> \ - ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)); -} - -/* - * Convert dataptr to a block number - */ -static inline xfs_dir2_db_t -xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) -{ - return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp)); -} - -/* - * Convert byte in space to offset in a block - */ -static inline xfs_dir2_data_aoff_t -xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return (xfs_dir2_data_aoff_t)((by) & \ - ((1 << ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) - 1)); -} - -/* - * Convert dataptr to a byte offset in a block - */ -static inline xfs_dir2_data_aoff_t -xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) -{ - return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp)); -} - -/* - * Convert block and offset to byte in space - */ -static inline xfs_dir2_off_t -xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db, - xfs_dir2_data_aoff_t o) -{ - return ((xfs_dir2_off_t)(db) << \ - ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) + (o); -} - -/* - * Convert block (DB) to block (dablk) - */ -static inline xfs_dablk_t -xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db) -{ - return (xfs_dablk_t)((db) << (mp)->m_sb.sb_dirblklog); -} - -/* - * Convert byte in space to (DA) block - */ -static inline xfs_dablk_t -xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by)); -} - -/* - * Convert block and offset to dataptr - */ -static inline xfs_dir2_dataptr_t -xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db, - xfs_dir2_data_aoff_t o) -{ - return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o)); -} - -/* - * Convert block (dablk) to block (DB) - */ -static inline xfs_dir2_db_t -xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da) -{ - return (xfs_dir2_db_t)((da) >> (mp)->m_sb.sb_dirblklog); -} - -/* - * Convert block (dablk) to byte offset in space - */ -static inline xfs_dir2_off_t -xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da) -{ - return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0); -} - -/* - * Function declarations. - */ -extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args, - struct xfs_dabuf *dbp); -extern int xfs_dir2_leaf_addname(struct xfs_da_args *args); -extern void xfs_dir2_leaf_compact(struct xfs_da_args *args, - struct xfs_dabuf *bp); -extern void xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp, - int *lowstalep, int *highstalep, - int *lowlogp, int *highlogp); -extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent, - size_t bufsize, xfs_off_t *offset, - filldir_t filldir); -extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno, - struct xfs_dabuf **bpp, int magic); -extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp, - int first, int last); -extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp, - struct xfs_dabuf *bp); -extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args); -extern int xfs_dir2_leaf_removename(struct xfs_da_args *args); -extern int xfs_dir2_leaf_replace(struct xfs_da_args *args); -extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args, - struct xfs_dabuf *lbp); -extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args, - struct xfs_dabuf *lbp, xfs_dir2_db_t db); -extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); - -#endif /* __XFS_DIR2_LEAF_H__ */ diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index a0aab7d3294f..084b3247d636 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c @@ -23,18 +23,14 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" -#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_bmap.h" -#include "xfs_dir2_data.h" -#include "xfs_dir2_leaf.h" -#include "xfs_dir2_block.h" -#include "xfs_dir2_node.h" +#include "xfs_dir2_format.h" +#include "xfs_dir2_priv.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -73,7 +69,7 @@ xfs_dir2_free_log_bests( xfs_dir2_free_t *free; /* freespace structure */ free = bp->data; - ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); + ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); xfs_da_log_buf(tp, bp, (uint)((char *)&free->bests[first] - (char *)free), (uint)((char *)&free->bests[last] - (char *)free + @@ -91,7 +87,7 @@ xfs_dir2_free_log_header( xfs_dir2_free_t *free; /* freespace structure */ free = bp->data; - ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); + ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); xfs_da_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free), (uint)(sizeof(xfs_dir2_free_hdr_t) - 1)); } @@ -244,89 +240,13 @@ xfs_dir2_leafn_add( lfloglow = be16_to_cpu(leaf->hdr.count); lfloghigh = -1; } - /* - * No stale entries, just insert a space for the new entry. - */ - if (!leaf->hdr.stale) { - lep = &leaf->ents[index]; - if (index < be16_to_cpu(leaf->hdr.count)) - memmove(lep + 1, lep, - (be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep)); - lfloglow = index; - lfloghigh = be16_to_cpu(leaf->hdr.count); - be16_add_cpu(&leaf->hdr.count, 1); - } - /* - * There are stale entries. We'll use one for the new entry. - */ - else { - /* - * If we didn't do a compact then we need to figure out - * which stale entry will be used. - */ - if (compact == 0) { - /* - * Find first stale entry before our insertion point. - */ - for (lowstale = index - 1; - lowstale >= 0 && - be32_to_cpu(leaf->ents[lowstale].address) != - XFS_DIR2_NULL_DATAPTR; - lowstale--) - continue; - /* - * Find next stale entry after insertion point. - * Stop looking if the answer would be worse than - * lowstale already found. - */ - for (highstale = index; - highstale < be16_to_cpu(leaf->hdr.count) && - be32_to_cpu(leaf->ents[highstale].address) != - XFS_DIR2_NULL_DATAPTR && - (lowstale < 0 || - index - lowstale - 1 >= highstale - index); - highstale++) - continue; - } - /* - * Using the low stale entry. - * Shift entries up toward the stale slot. - */ - if (lowstale >= 0 && - (highstale == be16_to_cpu(leaf->hdr.count) || - index - lowstale - 1 < highstale - index)) { - ASSERT(be32_to_cpu(leaf->ents[lowstale].address) == - XFS_DIR2_NULL_DATAPTR); - ASSERT(index - lowstale - 1 >= 0); - if (index - lowstale - 1 > 0) - memmove(&leaf->ents[lowstale], - &leaf->ents[lowstale + 1], - (index - lowstale - 1) * sizeof(*lep)); - lep = &leaf->ents[index - 1]; - lfloglow = MIN(lowstale, lfloglow); - lfloghigh = MAX(index - 1, lfloghigh); - } - /* - * Using the high stale entry. - * Shift entries down toward the stale slot. - */ - else { - ASSERT(be32_to_cpu(leaf->ents[highstale].address) == - XFS_DIR2_NULL_DATAPTR); - ASSERT(highstale - index >= 0); - if (highstale - index > 0) - memmove(&leaf->ents[index + 1], - &leaf->ents[index], - (highstale - index) * sizeof(*lep)); - lep = &leaf->ents[index]; - lfloglow = MIN(index, lfloglow); - lfloghigh = MAX(highstale, lfloghigh); - } - be16_add_cpu(&leaf->hdr.stale, -1); - } + /* * Insert the new entry, log everything. */ + lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale, + highstale, &lfloglow, &lfloghigh); + lep->hashval = cpu_to_be32(args->hashval); lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, args->blkno, args->index)); @@ -352,14 +272,14 @@ xfs_dir2_leafn_check( leaf = bp->data; mp = dp->i_mount; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp)); for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) { if (i + 1 < be16_to_cpu(leaf->hdr.count)) { ASSERT(be32_to_cpu(leaf->ents[i].hashval) <= be32_to_cpu(leaf->ents[i + 1].hashval)); } - if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR) + if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; } ASSERT(be16_to_cpu(leaf->hdr.stale) == stale); @@ -378,7 +298,7 @@ xfs_dir2_leafn_lasthash( xfs_dir2_leaf_t *leaf; /* leaf structure */ leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); if (count) *count = be16_to_cpu(leaf->hdr.count); if (!leaf->hdr.count) @@ -417,7 +337,7 @@ xfs_dir2_leafn_lookup_for_addname( tp = args->trans; mp = dp->i_mount; leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); #ifdef __KERNEL__ ASSERT(be16_to_cpu(leaf->hdr.count) > 0); #endif @@ -434,7 +354,7 @@ xfs_dir2_leafn_lookup_for_addname( curbp = state->extrablk.bp; curfdb = state->extrablk.blkno; free = curbp->data; - ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); + ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); } length = xfs_dir2_data_entsize(args->namelen); /* @@ -488,7 +408,7 @@ xfs_dir2_leafn_lookup_for_addname( ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); ASSERT((be32_to_cpu(free->hdr.firstdb) % - XFS_DIR2_MAX_FREE_BESTS(mp)) == 0); + xfs_dir2_free_max_bests(mp)) == 0); ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb); ASSERT(curdb < be32_to_cpu(free->hdr.firstdb) + be32_to_cpu(free->hdr.nvalid)); @@ -500,7 +420,8 @@ xfs_dir2_leafn_lookup_for_addname( /* * If it has room, return it. */ - if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) { + if (unlikely(free->bests[fi] == + cpu_to_be16(NULLDATAOFF))) { XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int", XFS_ERRLEVEL_LOW, mp); if (curfdb != newfdb) @@ -561,7 +482,7 @@ xfs_dir2_leafn_lookup_for_entry( tp = args->trans; mp = dp->i_mount; leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); #ifdef __KERNEL__ ASSERT(be16_to_cpu(leaf->hdr.count) > 0); #endif @@ -742,7 +663,8 @@ xfs_dir2_leafn_moveents( int i; /* temp leaf index */ for (i = start_s, stale = 0; i < start_s + count; i++) { - if (be32_to_cpu(leaf_s->ents[i].address) == XFS_DIR2_NULL_DATAPTR) + if (leaf_s->ents[i].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; } } else @@ -789,8 +711,8 @@ xfs_dir2_leafn_order( leaf1 = leaf1_bp->data; leaf2 = leaf2_bp->data; - ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); - ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); if (be16_to_cpu(leaf1->hdr.count) > 0 && be16_to_cpu(leaf2->hdr.count) > 0 && (be32_to_cpu(leaf2->ents[0].hashval) < be32_to_cpu(leaf1->ents[0].hashval) || @@ -918,7 +840,7 @@ xfs_dir2_leafn_remove( xfs_da_state_blk_t *dblk, /* data block */ int *rval) /* resulting block needs join */ { - xfs_dir2_data_t *data; /* data block structure */ + xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_db_t db; /* data block number */ xfs_dabuf_t *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data block entry */ @@ -938,7 +860,7 @@ xfs_dir2_leafn_remove( tp = args->trans; mp = dp->i_mount; leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); /* * Point to the entry we're removing. */ @@ -963,9 +885,9 @@ xfs_dir2_leafn_remove( * in the data block in case it changes. */ dbp = dblk->bp; - data = dbp->data; - dep = (xfs_dir2_data_entry_t *)((char *)data + off); - longest = be16_to_cpu(data->hdr.bestfree[0].length); + hdr = dbp->data; + dep = (xfs_dir2_data_entry_t *)((char *)hdr + off); + longest = be16_to_cpu(hdr->bestfree[0].length); needlog = needscan = 0; xfs_dir2_data_make_free(tp, dbp, off, xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan); @@ -974,7 +896,7 @@ xfs_dir2_leafn_remove( * Log the data block header if needed. */ if (needscan) - xfs_dir2_data_freescan(mp, data, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); if (needlog) xfs_dir2_data_log_header(tp, dbp); xfs_dir2_data_check(dp, dbp); @@ -982,7 +904,7 @@ xfs_dir2_leafn_remove( * If the longest data block freespace changes, need to update * the corresponding freeblock entry. */ - if (longest < be16_to_cpu(data->hdr.bestfree[0].length)) { + if (longest < be16_to_cpu(hdr->bestfree[0].length)) { int error; /* error return value */ xfs_dabuf_t *fbp; /* freeblock buffer */ xfs_dir2_db_t fdb; /* freeblock block number */ @@ -1000,27 +922,27 @@ xfs_dir2_leafn_remove( return error; } free = fbp->data; - ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); + ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); ASSERT(be32_to_cpu(free->hdr.firstdb) == - XFS_DIR2_MAX_FREE_BESTS(mp) * + xfs_dir2_free_max_bests(mp) * (fdb - XFS_DIR2_FREE_FIRSTDB(mp))); /* * Calculate which entry we need to fix. */ findex = xfs_dir2_db_to_fdindex(mp, db); - longest = be16_to_cpu(data->hdr.bestfree[0].length); + longest = be16_to_cpu(hdr->bestfree[0].length); /* * If the data block is now empty we can get rid of it * (usually). */ - if (longest == mp->m_dirblksize - (uint)sizeof(data->hdr)) { + if (longest == mp->m_dirblksize - (uint)sizeof(*hdr)) { /* * Try to punch out the data block. */ error = xfs_dir2_shrink_inode(args, db, dbp); if (error == 0) { dblk->bp = NULL; - data = NULL; + hdr = NULL; } /* * We can get ENOSPC if there's no space reservation. @@ -1036,7 +958,7 @@ xfs_dir2_leafn_remove( * If we got rid of the data block, we can eliminate that entry * in the free block. */ - if (data == NULL) { + if (hdr == NULL) { /* * One less used entry in the free table. */ @@ -1052,7 +974,8 @@ xfs_dir2_leafn_remove( int i; /* free entry index */ for (i = findex - 1; - i >= 0 && be16_to_cpu(free->bests[i]) == NULLDATAOFF; + i >= 0 && + free->bests[i] == cpu_to_be16(NULLDATAOFF); i--) continue; free->hdr.nvalid = cpu_to_be32(i + 1); @@ -1209,7 +1132,7 @@ xfs_dir2_leafn_toosmall( */ blk = &state->path.blk[state->path.active - 1]; info = blk->bp->data; - ASSERT(be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC); + ASSERT(info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); leaf = (xfs_dir2_leaf_t *)info; count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale); bytes = (uint)sizeof(leaf->hdr) + count * (uint)sizeof(leaf->ents[0]); @@ -1268,7 +1191,7 @@ xfs_dir2_leafn_toosmall( count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale); bytes = state->blocksize - (state->blocksize >> 2); leaf = bp->data; - ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); count += be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale); bytes -= count * (uint)sizeof(leaf->ents[0]); /* @@ -1327,8 +1250,8 @@ xfs_dir2_leafn_unbalance( ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC); drop_leaf = drop_blk->bp->data; save_leaf = save_blk->bp->data; - ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); - ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); + ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); + ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); /* * If there are any stale leaf entries, take this opportunity * to purge them. @@ -1432,7 +1355,7 @@ xfs_dir2_node_addname_int( xfs_da_args_t *args, /* operation arguments */ xfs_da_state_blk_t *fblk) /* optional freespace block */ { - xfs_dir2_data_t *data; /* data block structure */ + xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_db_t dbno; /* data block number */ xfs_dabuf_t *dbp; /* data block buffer */ xfs_dir2_data_entry_t *dep; /* data entry pointer */ @@ -1469,7 +1392,7 @@ xfs_dir2_node_addname_int( */ ifbno = fblk->blkno; free = fbp->data; - ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); + ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); findex = fblk->index; /* * This means the free entry showed that the data block had @@ -1553,7 +1476,7 @@ xfs_dir2_node_addname_int( continue; } free = fbp->data; - ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); + ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); findex = 0; } /* @@ -1680,12 +1603,12 @@ xfs_dir2_node_addname_int( free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC); free->hdr.firstdb = cpu_to_be32( (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) * - XFS_DIR2_MAX_FREE_BESTS(mp)); + xfs_dir2_free_max_bests(mp)); free->hdr.nvalid = 0; free->hdr.nused = 0; } else { free = fbp->data; - ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); + ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); } /* @@ -1697,7 +1620,7 @@ xfs_dir2_node_addname_int( * freespace block, extend that table. */ if (findex >= be32_to_cpu(free->hdr.nvalid)) { - ASSERT(findex < XFS_DIR2_MAX_FREE_BESTS(mp)); + ASSERT(findex < xfs_dir2_free_max_bests(mp)); free->hdr.nvalid = cpu_to_be32(findex + 1); /* * Tag new entry so nused will go up. @@ -1708,7 +1631,7 @@ xfs_dir2_node_addname_int( * If this entry was for an empty data block * (this should always be true) then update the header. */ - if (be16_to_cpu(free->bests[findex]) == NULLDATAOFF) { + if (free->bests[findex] == cpu_to_be16(NULLDATAOFF)) { be32_add_cpu(&free->hdr.nused, 1); xfs_dir2_free_log_header(tp, fbp); } @@ -1717,8 +1640,8 @@ xfs_dir2_node_addname_int( * We haven't allocated the data entry yet so this will * change again. */ - data = dbp->data; - free->bests[findex] = data->hdr.bestfree[0].length; + hdr = dbp->data; + free->bests[findex] = hdr->bestfree[0].length; logfree = 1; } /* @@ -1743,21 +1666,21 @@ xfs_dir2_node_addname_int( xfs_da_buf_done(fbp); return error; } - data = dbp->data; + hdr = dbp->data; logfree = 0; } - ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) >= length); + ASSERT(be16_to_cpu(hdr->bestfree[0].length) >= length); /* * Point to the existing unused space. */ dup = (xfs_dir2_data_unused_t *) - ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset)); + ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset)); needscan = needlog = 0; /* * Mark the first part of the unused space, inuse for us. */ xfs_dir2_data_use_free(tp, dbp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length, + (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, &needlog, &needscan); /* * Fill in the new entry and log it. @@ -1767,13 +1690,13 @@ xfs_dir2_node_addname_int( dep->namelen = args->namelen; memcpy(dep->name, args->name, dep->namelen); tagp = xfs_dir2_data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)data); + *tagp = cpu_to_be16((char *)dep - (char *)hdr); xfs_dir2_data_log_entry(tp, dbp, dep); /* * Rescan the block for bestfree if needed. */ if (needscan) - xfs_dir2_data_freescan(mp, data, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); /* * Log the data block header if needed. */ @@ -1782,8 +1705,8 @@ xfs_dir2_node_addname_int( /* * If the freespace entry is now wrong, update it. */ - if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(data->hdr.bestfree[0].length)) { - free->bests[findex] = data->hdr.bestfree[0].length; + if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(hdr->bestfree[0].length)) { + free->bests[findex] = hdr->bestfree[0].length; logfree = 1; } /* @@ -1933,7 +1856,7 @@ xfs_dir2_node_replace( xfs_da_args_t *args) /* operation arguments */ { xfs_da_state_blk_t *blk; /* leaf block */ - xfs_dir2_data_t *data; /* data block structure */ + xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_data_entry_t *dep; /* data entry changed */ int error; /* error return value */ int i; /* btree level */ @@ -1977,10 +1900,10 @@ xfs_dir2_node_replace( /* * Point to the data entry. */ - data = state->extrablk.bp->data; - ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC); + hdr = state->extrablk.bp->data; + ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); dep = (xfs_dir2_data_entry_t *) - ((char *)data + + ((char *)hdr + xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address))); ASSERT(inum != be64_to_cpu(dep->inumber)); /* @@ -2044,7 +1967,7 @@ xfs_dir2_node_trim_free( return 0; } free = bp->data; - ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); + ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); /* * If there are used entries, there's nothing to do. */ diff --git a/fs/xfs/xfs_dir2_node.h b/fs/xfs/xfs_dir2_node.h deleted file mode 100644 index 82dfe7147195..000000000000 --- a/fs/xfs/xfs_dir2_node.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2000,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DIR2_NODE_H__ -#define __XFS_DIR2_NODE_H__ - -/* - * Directory version 2, btree node format structures - */ - -struct uio; -struct xfs_dabuf; -struct xfs_da_args; -struct xfs_da_state; -struct xfs_da_state_blk; -struct xfs_inode; -struct xfs_trans; - -/* - * Offset of the freespace index. - */ -#define XFS_DIR2_FREE_SPACE 2 -#define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE) -#define XFS_DIR2_FREE_FIRSTDB(mp) \ - xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET) - -#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F */ - -typedef struct xfs_dir2_free_hdr { - __be32 magic; /* XFS_DIR2_FREE_MAGIC */ - __be32 firstdb; /* db of first entry */ - __be32 nvalid; /* count of valid entries */ - __be32 nused; /* count of used entries */ -} xfs_dir2_free_hdr_t; - -typedef struct xfs_dir2_free { - xfs_dir2_free_hdr_t hdr; /* block header */ - __be16 bests[1]; /* best free counts */ - /* unused entries are -1 */ -} xfs_dir2_free_t; - -#define XFS_DIR2_MAX_FREE_BESTS(mp) \ - (((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_free_hdr_t)) / \ - (uint)sizeof(xfs_dir2_data_off_t)) - -/* - * Convert data space db to the corresponding free db. - */ -static inline xfs_dir2_db_t -xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) -{ - return (XFS_DIR2_FREE_FIRSTDB(mp) + (db) / XFS_DIR2_MAX_FREE_BESTS(mp)); -} - -/* - * Convert data space db to the corresponding index in a free db. - */ -static inline int -xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) -{ - return ((db) % XFS_DIR2_MAX_FREE_BESTS(mp)); -} - -extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, - struct xfs_dabuf *lbp); -extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count); -extern int xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp, - struct xfs_da_args *args, int *indexp, - struct xfs_da_state *state); -extern int xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp, - struct xfs_dabuf *leaf2_bp); -extern int xfs_dir2_leafn_split(struct xfs_da_state *state, - struct xfs_da_state_blk *oldblk, - struct xfs_da_state_blk *newblk); -extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action); -extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state, - struct xfs_da_state_blk *drop_blk, - struct xfs_da_state_blk *save_blk); -extern int xfs_dir2_node_addname(struct xfs_da_args *args); -extern int xfs_dir2_node_lookup(struct xfs_da_args *args); -extern int xfs_dir2_node_removename(struct xfs_da_args *args); -extern int xfs_dir2_node_replace(struct xfs_da_args *args); -extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo, - int *rvalp); - -#endif /* __XFS_DIR2_NODE_H__ */ diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h new file mode 100644 index 000000000000..067f403ecf8a --- /dev/null +++ b/fs/xfs/xfs_dir2_priv.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DIR2_PRIV_H__ +#define __XFS_DIR2_PRIV_H__ + +/* xfs_dir2.c */ +extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); +extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r); +extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r); +extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, + xfs_dir2_db_t *dbp); +extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, + struct xfs_dabuf *bp); +extern int xfs_dir_cilookup_result(struct xfs_da_args *args, + const unsigned char *name, int len); + +/* xfs_dir2_block.c */ +extern int xfs_dir2_block_addname(struct xfs_da_args *args); +extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent, + xfs_off_t *offset, filldir_t filldir); +extern int xfs_dir2_block_lookup(struct xfs_da_args *args); +extern int xfs_dir2_block_removename(struct xfs_da_args *args); +extern int xfs_dir2_block_replace(struct xfs_da_args *args); +extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args, + struct xfs_dabuf *lbp, struct xfs_dabuf *dbp); + +/* xfs_dir2_data.c */ +#ifdef DEBUG +extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp); +#else +#define xfs_dir2_data_check(dp,bp) +#endif +extern struct xfs_dir2_data_free * +xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr, + struct xfs_dir2_data_unused *dup, int *loghead); +extern void xfs_dir2_data_freescan(struct xfs_mount *mp, + struct xfs_dir2_data_hdr *hdr, int *loghead); +extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno, + struct xfs_dabuf **bpp); +extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp, + struct xfs_dir2_data_entry *dep); +extern void xfs_dir2_data_log_header(struct xfs_trans *tp, + struct xfs_dabuf *bp); +extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp, + struct xfs_dir2_data_unused *dup); +extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp, + xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len, + int *needlogp, int *needscanp); +extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp, + struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset, + xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); + +/* xfs_dir2_leaf.c */ +extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args, + struct xfs_dabuf *dbp); +extern int xfs_dir2_leaf_addname(struct xfs_da_args *args); +extern void xfs_dir2_leaf_compact(struct xfs_da_args *args, + struct xfs_dabuf *bp); +extern void xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp, + int *lowstalep, int *highstalep, int *lowlogp, int *highlogp); +extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent, + size_t bufsize, xfs_off_t *offset, filldir_t filldir); +extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno, + struct xfs_dabuf **bpp, int magic); +extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp, + int first, int last); +extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp, + struct xfs_dabuf *bp); +extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args); +extern int xfs_dir2_leaf_removename(struct xfs_da_args *args); +extern int xfs_dir2_leaf_replace(struct xfs_da_args *args); +extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args, + struct xfs_dabuf *lbp); +extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args, + struct xfs_dabuf *lbp, xfs_dir2_db_t db); +extern struct xfs_dir2_leaf_entry * +xfs_dir2_leaf_find_entry(struct xfs_dir2_leaf *leaf, int index, int compact, + int lowstale, int highstale, + int *lfloglow, int *lfloghigh); +extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); + +/* xfs_dir2_node.c */ +extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, + struct xfs_dabuf *lbp); +extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count); +extern int xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp, + struct xfs_da_args *args, int *indexp, + struct xfs_da_state *state); +extern int xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp, + struct xfs_dabuf *leaf2_bp); +extern int xfs_dir2_leafn_split(struct xfs_da_state *state, + struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk); +extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action); +extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state, + struct xfs_da_state_blk *drop_blk, + struct xfs_da_state_blk *save_blk); +extern int xfs_dir2_node_addname(struct xfs_da_args *args); +extern int xfs_dir2_node_lookup(struct xfs_da_args *args); +extern int xfs_dir2_node_removename(struct xfs_da_args *args); +extern int xfs_dir2_node_replace(struct xfs_da_args *args); +extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo, + int *rvalp); + +/* xfs_dir2_sf.c */ +extern xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *sfp); +extern xfs_ino_t xfs_dir2_sfe_get_ino(struct xfs_dir2_sf_hdr *sfp, + struct xfs_dir2_sf_entry *sfep); +extern int xfs_dir2_block_sfsize(struct xfs_inode *dp, + struct xfs_dir2_data_hdr *block, struct xfs_dir2_sf_hdr *sfhp); +extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp, + int size, xfs_dir2_sf_hdr_t *sfhp); +extern int xfs_dir2_sf_addname(struct xfs_da_args *args); +extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); +extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, void *dirent, + xfs_off_t *offset, filldir_t filldir); +extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); +extern int xfs_dir2_sf_removename(struct xfs_da_args *args); +extern int xfs_dir2_sf_replace(struct xfs_da_args *args); + +#endif /* __XFS_DIR2_PRIV_H__ */ diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index b1bae6b1eed9..79d05e84e296 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c @@ -23,18 +23,16 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" -#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_error.h" -#include "xfs_dir2_data.h" -#include "xfs_dir2_leaf.h" -#include "xfs_dir2_block.h" +#include "xfs_dir2.h" +#include "xfs_dir2_format.h" +#include "xfs_dir2_priv.h" #include "xfs_trace.h" /* @@ -60,6 +58,82 @@ static void xfs_dir2_sf_toino8(xfs_da_args_t *args); #endif /* XFS_BIG_INUMS */ /* + * Inode numbers in short-form directories can come in two versions, + * either 4 bytes or 8 bytes wide. These helpers deal with the + * two forms transparently by looking at the headers i8count field. + * + * For 64-bit inode number the most significant byte must be zero. + */ +static xfs_ino_t +xfs_dir2_sf_get_ino( + struct xfs_dir2_sf_hdr *hdr, + xfs_dir2_inou_t *from) +{ + if (hdr->i8count) + return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL; + else + return get_unaligned_be32(&from->i4.i); +} + +static void +xfs_dir2_sf_put_ino( + struct xfs_dir2_sf_hdr *hdr, + xfs_dir2_inou_t *to, + xfs_ino_t ino) +{ + ASSERT((ino & 0xff00000000000000ULL) == 0); + + if (hdr->i8count) + put_unaligned_be64(ino, &to->i8.i); + else + put_unaligned_be32(ino, &to->i4.i); +} + +xfs_ino_t +xfs_dir2_sf_get_parent_ino( + struct xfs_dir2_sf_hdr *hdr) +{ + return xfs_dir2_sf_get_ino(hdr, &hdr->parent); +} + +static void +xfs_dir2_sf_put_parent_ino( + struct xfs_dir2_sf_hdr *hdr, + xfs_ino_t ino) +{ + xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino); +} + +/* + * In short-form directory entries the inode numbers are stored at variable + * offset behind the entry name. The inode numbers may only be accessed + * through the helpers below. + */ +static xfs_dir2_inou_t * +xfs_dir2_sfe_inop( + struct xfs_dir2_sf_entry *sfep) +{ + return (xfs_dir2_inou_t *)&sfep->name[sfep->namelen]; +} + +xfs_ino_t +xfs_dir2_sfe_get_ino( + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep) +{ + return xfs_dir2_sf_get_ino(hdr, xfs_dir2_sfe_inop(sfep)); +} + +static void +xfs_dir2_sfe_put_ino( + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep, + xfs_ino_t ino) +{ + xfs_dir2_sf_put_ino(hdr, xfs_dir2_sfe_inop(sfep), ino); +} + +/* * Given a block directory (dp/block), calculate its size as a shortform (sf) * directory and a header for the sf directory, if it will fit it the * space currently present in the inode. If it won't fit, the output @@ -68,7 +142,7 @@ static void xfs_dir2_sf_toino8(xfs_da_args_t *args); int /* size for sf form */ xfs_dir2_block_sfsize( xfs_inode_t *dp, /* incore inode pointer */ - xfs_dir2_block_t *block, /* block directory data */ + xfs_dir2_data_hdr_t *hdr, /* block directory data */ xfs_dir2_sf_hdr_t *sfhp) /* output: header for sf form */ { xfs_dir2_dataptr_t addr; /* data entry address */ @@ -88,7 +162,7 @@ xfs_dir2_block_sfsize( mp = dp->i_mount; count = i8count = namelen = 0; - btp = xfs_dir2_block_tail_p(mp, block); + btp = xfs_dir2_block_tail_p(mp, hdr); blp = xfs_dir2_block_leaf_p(btp); /* @@ -101,7 +175,7 @@ xfs_dir2_block_sfsize( * Calculate the pointer to the entry at hand. */ dep = (xfs_dir2_data_entry_t *) - ((char *)block + xfs_dir2_dataptr_to_off(mp, addr)); + ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr)); /* * Detect . and .., so we can special-case them. * . is not included in sf directories. @@ -138,7 +212,7 @@ xfs_dir2_block_sfsize( */ sfhp->count = count; sfhp->i8count = i8count; - xfs_dir2_sf_put_inumber((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent); + xfs_dir2_sf_put_parent_ino(sfhp, parent); return size; } @@ -153,7 +227,7 @@ xfs_dir2_block_to_sf( int size, /* shortform directory size */ xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */ { - xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_block_tail_t *btp; /* block tail pointer */ xfs_dir2_data_entry_t *dep; /* data entry pointer */ xfs_inode_t *dp; /* incore directory inode */ @@ -164,8 +238,7 @@ xfs_dir2_block_to_sf( xfs_mount_t *mp; /* filesystem mount point */ char *ptr; /* current data pointer */ xfs_dir2_sf_entry_t *sfep; /* shortform entry */ - xfs_dir2_sf_t *sfp; /* shortform structure */ - xfs_ino_t temp; + xfs_dir2_sf_hdr_t *sfp; /* shortform directory header */ trace_xfs_dir2_block_to_sf(args); @@ -176,13 +249,14 @@ xfs_dir2_block_to_sf( * Make a copy of the block data, so we can shrink the inode * and add local data. */ - block = kmem_alloc(mp->m_dirblksize, KM_SLEEP); - memcpy(block, bp->data, mp->m_dirblksize); + hdr = kmem_alloc(mp->m_dirblksize, KM_SLEEP); + memcpy(hdr, bp->data, mp->m_dirblksize); logflags = XFS_ILOG_CORE; if ((error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp))) { ASSERT(error != ENOSPC); goto out; } + /* * The buffer is now unconditionally gone, whether * xfs_dir2_shrink_inode worked or not. @@ -198,14 +272,14 @@ xfs_dir2_block_to_sf( /* * Copy the header into the newly allocate local space. */ - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count)); dp->i_d.di_size = size; /* * Set up to loop over the block's entries. */ - btp = xfs_dir2_block_tail_p(mp, block); - ptr = (char *)block->u; + btp = xfs_dir2_block_tail_p(mp, hdr); + ptr = (char *)(hdr + 1); endptr = (char *)xfs_dir2_block_leaf_p(btp); sfep = xfs_dir2_sf_firstentry(sfp); /* @@ -233,7 +307,7 @@ xfs_dir2_block_to_sf( else if (dep->namelen == 2 && dep->name[0] == '.' && dep->name[1] == '.') ASSERT(be64_to_cpu(dep->inumber) == - xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent)); + xfs_dir2_sf_get_parent_ino(sfp)); /* * Normal entry, copy it into shortform. */ @@ -241,11 +315,11 @@ xfs_dir2_block_to_sf( sfep->namelen = dep->namelen; xfs_dir2_sf_put_offset(sfep, (xfs_dir2_data_aoff_t) - ((char *)dep - (char *)block)); + ((char *)dep - (char *)hdr)); memcpy(sfep->name, dep->name, dep->namelen); - temp = be64_to_cpu(dep->inumber); - xfs_dir2_sf_put_inumber(sfp, &temp, - xfs_dir2_sf_inumberp(sfep)); + xfs_dir2_sfe_put_ino(sfp, sfep, + be64_to_cpu(dep->inumber)); + sfep = xfs_dir2_sf_nextentry(sfp, sfep); } ptr += xfs_dir2_data_entsize(dep->namelen); @@ -254,7 +328,7 @@ xfs_dir2_block_to_sf( xfs_dir2_sf_check(args); out: xfs_trans_log_inode(args->trans, dp, logflags); - kmem_free(block); + kmem_free(hdr); return error; } @@ -277,7 +351,7 @@ xfs_dir2_sf_addname( xfs_dir2_data_aoff_t offset = 0; /* offset for new entry */ int old_isize; /* di_size before adding name */ int pick; /* which algorithm to use */ - xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */ trace_xfs_dir2_sf_addname(args); @@ -294,19 +368,19 @@ xfs_dir2_sf_addname( } ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); /* * Compute entry (and change in) size. */ - add_entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen); + add_entsize = xfs_dir2_sf_entsize(sfp, args->namelen); incr_isize = add_entsize; objchange = 0; #if XFS_BIG_INUMS /* * Do we have to change to 8 byte inodes? */ - if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) { + if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) { /* * Yes, adjust the entry size and the total size. */ @@ -314,7 +388,7 @@ xfs_dir2_sf_addname( (uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t); incr_isize += - (sfp->hdr.count + 2) * + (sfp->count + 2) * ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); objchange = 1; @@ -384,21 +458,21 @@ xfs_dir2_sf_addname_easy( { int byteoff; /* byte offset in sf dir */ xfs_inode_t *dp; /* incore directory inode */ - xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ dp = args->dp; - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; byteoff = (int)((char *)sfep - (char *)sfp); /* * Grow the in-inode space. */ - xfs_idata_realloc(dp, xfs_dir2_sf_entsize_byname(sfp, args->namelen), + xfs_idata_realloc(dp, xfs_dir2_sf_entsize(sfp, args->namelen), XFS_DATA_FORK); /* * Need to set up again due to realloc of the inode data. */ - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff); /* * Fill in the new entry. @@ -406,15 +480,14 @@ xfs_dir2_sf_addname_easy( sfep->namelen = args->namelen; xfs_dir2_sf_put_offset(sfep, offset); memcpy(sfep->name, args->name, sfep->namelen); - xfs_dir2_sf_put_inumber(sfp, &args->inumber, - xfs_dir2_sf_inumberp(sfep)); + xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber); /* * Update the header and inode. */ - sfp->hdr.count++; + sfp->count++; #if XFS_BIG_INUMS if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) - sfp->hdr.i8count++; + sfp->i8count++; #endif dp->i_d.di_size = new_isize; xfs_dir2_sf_check(args); @@ -444,19 +517,19 @@ xfs_dir2_sf_addname_hard( xfs_dir2_data_aoff_t offset; /* current offset value */ int old_isize; /* previous di_size */ xfs_dir2_sf_entry_t *oldsfep; /* entry in original dir */ - xfs_dir2_sf_t *oldsfp; /* original shortform dir */ + xfs_dir2_sf_hdr_t *oldsfp; /* original shortform dir */ xfs_dir2_sf_entry_t *sfep; /* entry in new dir */ - xfs_dir2_sf_t *sfp; /* new shortform dir */ + xfs_dir2_sf_hdr_t *sfp; /* new shortform dir */ /* * Copy the old directory to the stack buffer. */ dp = args->dp; - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; old_isize = (int)dp->i_d.di_size; buf = kmem_alloc(old_isize, KM_SLEEP); - oldsfp = (xfs_dir2_sf_t *)buf; + oldsfp = (xfs_dir2_sf_hdr_t *)buf; memcpy(oldsfp, sfp, old_isize); /* * Loop over the old directory finding the place we're going @@ -485,7 +558,7 @@ xfs_dir2_sf_addname_hard( /* * Reset the pointer since the buffer was reallocated. */ - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; /* * Copy the first part of the directory, including the header. */ @@ -498,12 +571,11 @@ xfs_dir2_sf_addname_hard( sfep->namelen = args->namelen; xfs_dir2_sf_put_offset(sfep, offset); memcpy(sfep->name, args->name, sfep->namelen); - xfs_dir2_sf_put_inumber(sfp, &args->inumber, - xfs_dir2_sf_inumberp(sfep)); - sfp->hdr.count++; + xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber); + sfp->count++; #if XFS_BIG_INUMS if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange) - sfp->hdr.i8count++; + sfp->i8count++; #endif /* * If there's more left to copy, do that. @@ -537,14 +609,14 @@ xfs_dir2_sf_addname_pick( xfs_mount_t *mp; /* filesystem mount point */ xfs_dir2_data_aoff_t offset; /* data block offset */ xfs_dir2_sf_entry_t *sfep; /* shortform entry */ - xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ int size; /* entry's data size */ int used; /* data bytes used */ dp = args->dp; mp = dp->i_mount; - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; size = xfs_dir2_data_entsize(args->namelen); offset = XFS_DIR2_DATA_FIRST_OFFSET; sfep = xfs_dir2_sf_firstentry(sfp); @@ -554,7 +626,7 @@ xfs_dir2_sf_addname_pick( * Keep track of data offset and whether we've seen a place * to insert the new entry. */ - for (i = 0; i < sfp->hdr.count; i++) { + for (i = 0; i < sfp->count; i++) { if (!holefit) holefit = offset + size <= xfs_dir2_sf_get_offset(sfep); offset = xfs_dir2_sf_get_offset(sfep) + @@ -566,7 +638,7 @@ xfs_dir2_sf_addname_pick( * was a data block (block form directory). */ used = offset + - (sfp->hdr.count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) + + (sfp->count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) + (uint)sizeof(xfs_dir2_block_tail_t); /* * If it won't fit in a block form then we can't insert it, @@ -612,30 +684,30 @@ xfs_dir2_sf_check( xfs_ino_t ino; /* entry inode number */ int offset; /* data offset */ xfs_dir2_sf_entry_t *sfep; /* shortform dir entry */ - xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ dp = args->dp; - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; offset = XFS_DIR2_DATA_FIRST_OFFSET; - ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); + ino = xfs_dir2_sf_get_parent_ino(sfp); i8count = ino > XFS_DIR2_MAX_SHORT_INUM; for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); - i < sfp->hdr.count; + i < sfp->count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset); - ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep)); + ino = xfs_dir2_sfe_get_ino(sfp, sfep); i8count += ino > XFS_DIR2_MAX_SHORT_INUM; offset = xfs_dir2_sf_get_offset(sfep) + xfs_dir2_data_entsize(sfep->namelen); } - ASSERT(i8count == sfp->hdr.i8count); + ASSERT(i8count == sfp->i8count); ASSERT(XFS_BIG_INUMS || i8count == 0); ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size); ASSERT(offset + - (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + (uint)sizeof(xfs_dir2_block_tail_t) <= dp->i_mount->m_dirblksize); } @@ -651,7 +723,7 @@ xfs_dir2_sf_create( { xfs_inode_t *dp; /* incore directory inode */ int i8count; /* parent inode is an 8-byte number */ - xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ int size; /* directory size */ trace_xfs_dir2_sf_create(args); @@ -681,13 +753,13 @@ xfs_dir2_sf_create( /* * Fill in the header, */ - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - sfp->hdr.i8count = i8count; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp->i8count = i8count; /* * Now can put in the inode number, since i8count is set. */ - xfs_dir2_sf_put_inumber(sfp, &pino, &sfp->hdr.parent); - sfp->hdr.count = 0; + xfs_dir2_sf_put_parent_ino(sfp, pino); + sfp->count = 0; dp->i_d.di_size = size; xfs_dir2_sf_check(args); xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); @@ -705,7 +777,7 @@ xfs_dir2_sf_getdents( xfs_mount_t *mp; /* filesystem mount point */ xfs_dir2_dataptr_t off; /* current entry's offset */ xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ - xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ xfs_dir2_dataptr_t dot_offset; xfs_dir2_dataptr_t dotdot_offset; xfs_ino_t ino; @@ -724,9 +796,9 @@ xfs_dir2_sf_getdents( ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); /* * If the block number in the offset is out of range, we're done. @@ -759,7 +831,7 @@ xfs_dir2_sf_getdents( * Put .. entry unless we're starting past it. */ if (*offset <= dotdot_offset) { - ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); + ino = xfs_dir2_sf_get_parent_ino(sfp); if (filldir(dirent, "..", 2, dotdot_offset & 0x7fffffff, ino, DT_DIR)) { *offset = dotdot_offset & 0x7fffffff; return 0; @@ -770,7 +842,7 @@ xfs_dir2_sf_getdents( * Loop while there are more entries and put'ing works. */ sfep = xfs_dir2_sf_firstentry(sfp); - for (i = 0; i < sfp->hdr.count; i++) { + for (i = 0; i < sfp->count; i++) { off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, xfs_dir2_sf_get_offset(sfep)); @@ -779,7 +851,7 @@ xfs_dir2_sf_getdents( continue; } - ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep)); + ino = xfs_dir2_sfe_get_ino(sfp, sfep); if (filldir(dirent, (char *)sfep->name, sfep->namelen, off & 0x7fffffff, ino, DT_UNKNOWN)) { *offset = off & 0x7fffffff; @@ -805,7 +877,7 @@ xfs_dir2_sf_lookup( int i; /* entry index */ int error; xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ - xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ enum xfs_dacmp cmp; /* comparison result */ xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */ @@ -824,8 +896,8 @@ xfs_dir2_sf_lookup( } ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); /* * Special case for . */ @@ -839,7 +911,7 @@ xfs_dir2_sf_lookup( */ if (args->namelen == 2 && args->name[0] == '.' && args->name[1] == '.') { - args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); + args->inumber = xfs_dir2_sf_get_parent_ino(sfp); args->cmpresult = XFS_CMP_EXACT; return XFS_ERROR(EEXIST); } @@ -847,7 +919,7 @@ xfs_dir2_sf_lookup( * Loop over all the entries trying to match ours. */ ci_sfep = NULL; - for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count; + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { /* * Compare name and if it's an exact match, return the inode @@ -858,8 +930,7 @@ xfs_dir2_sf_lookup( sfep->namelen); if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { args->cmpresult = cmp; - args->inumber = xfs_dir2_sf_get_inumber(sfp, - xfs_dir2_sf_inumberp(sfep)); + args->inumber = xfs_dir2_sfe_get_ino(sfp, sfep); if (cmp == XFS_CMP_EXACT) return XFS_ERROR(EEXIST); ci_sfep = sfep; @@ -891,7 +962,7 @@ xfs_dir2_sf_removename( int newsize; /* new inode size */ int oldsize; /* old inode size */ xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ - xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ trace_xfs_dir2_sf_removename(args); @@ -908,32 +979,31 @@ xfs_dir2_sf_removename( } ASSERT(dp->i_df.if_bytes == oldsize); ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->i8count)); /* * Loop over the old directory entries. * Find the one we're deleting. */ - for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count; + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { if (xfs_da_compname(args, sfep->name, sfep->namelen) == XFS_CMP_EXACT) { - ASSERT(xfs_dir2_sf_get_inumber(sfp, - xfs_dir2_sf_inumberp(sfep)) == - args->inumber); + ASSERT(xfs_dir2_sfe_get_ino(sfp, sfep) == + args->inumber); break; } } /* * Didn't find it. */ - if (i == sfp->hdr.count) + if (i == sfp->count) return XFS_ERROR(ENOENT); /* * Calculate sizes. */ byteoff = (int)((char *)sfep - (char *)sfp); - entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen); + entsize = xfs_dir2_sf_entsize(sfp, args->namelen); newsize = oldsize - entsize; /* * Copy the part if any after the removed entry, sliding it down. @@ -944,22 +1014,22 @@ xfs_dir2_sf_removename( /* * Fix up the header and file size. */ - sfp->hdr.count--; + sfp->count--; dp->i_d.di_size = newsize; /* * Reallocate, making it smaller. */ xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK); - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; #if XFS_BIG_INUMS /* * Are we changing inode number size? */ if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) { - if (sfp->hdr.i8count == 1) + if (sfp->i8count == 1) xfs_dir2_sf_toino4(args); else - sfp->hdr.i8count--; + sfp->i8count--; } #endif xfs_dir2_sf_check(args); @@ -983,7 +1053,7 @@ xfs_dir2_sf_replace( int i8elevated; /* sf_toino8 set i8count=1 */ #endif xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ - xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ trace_xfs_dir2_sf_replace(args); @@ -999,19 +1069,19 @@ xfs_dir2_sf_replace( } ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); #if XFS_BIG_INUMS /* * New inode number is large, and need to convert to 8-byte inodes. */ - if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) { + if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) { int error; /* error return value */ int newsize; /* new inode size */ newsize = dp->i_df.if_bytes + - (sfp->hdr.count + 1) * + (sfp->count + 1) * ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); /* @@ -1029,7 +1099,7 @@ xfs_dir2_sf_replace( */ xfs_dir2_sf_toino8(args); i8elevated = 1; - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; } else i8elevated = 0; #endif @@ -1040,34 +1110,32 @@ xfs_dir2_sf_replace( if (args->namelen == 2 && args->name[0] == '.' && args->name[1] == '.') { #if XFS_BIG_INUMS || defined(DEBUG) - ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); + ino = xfs_dir2_sf_get_parent_ino(sfp); ASSERT(args->inumber != ino); #endif - xfs_dir2_sf_put_inumber(sfp, &args->inumber, &sfp->hdr.parent); + xfs_dir2_sf_put_parent_ino(sfp, args->inumber); } /* * Normal entry, look for the name. */ else { for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); - i < sfp->hdr.count; + i < sfp->count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { if (xfs_da_compname(args, sfep->name, sfep->namelen) == XFS_CMP_EXACT) { #if XFS_BIG_INUMS || defined(DEBUG) - ino = xfs_dir2_sf_get_inumber(sfp, - xfs_dir2_sf_inumberp(sfep)); + ino = xfs_dir2_sfe_get_ino(sfp, sfep); ASSERT(args->inumber != ino); #endif - xfs_dir2_sf_put_inumber(sfp, &args->inumber, - xfs_dir2_sf_inumberp(sfep)); + xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber); break; } } /* * Didn't find it. */ - if (i == sfp->hdr.count) { + if (i == sfp->count) { ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); #if XFS_BIG_INUMS if (i8elevated) @@ -1085,10 +1153,10 @@ xfs_dir2_sf_replace( /* * And the old count was one, so need to convert to small. */ - if (sfp->hdr.i8count == 1) + if (sfp->i8count == 1) xfs_dir2_sf_toino4(args); else - sfp->hdr.i8count--; + sfp->i8count--; } /* * See if the old number was small, the new number is large. @@ -1099,9 +1167,9 @@ xfs_dir2_sf_replace( * add to the i8count unless we just converted to 8-byte * inodes (which does an implied i8count = 1) */ - ASSERT(sfp->hdr.i8count != 0); + ASSERT(sfp->i8count != 0); if (!i8elevated) - sfp->hdr.i8count++; + sfp->i8count++; } #endif xfs_dir2_sf_check(args); @@ -1121,13 +1189,12 @@ xfs_dir2_sf_toino4( char *buf; /* old dir's buffer */ xfs_inode_t *dp; /* incore directory inode */ int i; /* entry index */ - xfs_ino_t ino; /* entry inode number */ int newsize; /* new inode size */ xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */ - xfs_dir2_sf_t *oldsfp; /* old sf directory */ + xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */ int oldsize; /* old inode size */ xfs_dir2_sf_entry_t *sfep; /* new sf entry */ - xfs_dir2_sf_t *sfp; /* new sf directory */ + xfs_dir2_sf_hdr_t *sfp; /* new sf directory */ trace_xfs_dir2_sf_toino4(args); @@ -1140,44 +1207,42 @@ xfs_dir2_sf_toino4( */ oldsize = dp->i_df.if_bytes; buf = kmem_alloc(oldsize, KM_SLEEP); - oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - ASSERT(oldsfp->hdr.i8count == 1); + oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(oldsfp->i8count == 1); memcpy(buf, oldsfp, oldsize); /* * Compute the new inode size. */ newsize = oldsize - - (oldsfp->hdr.count + 1) * + (oldsfp->count + 1) * ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK); xfs_idata_realloc(dp, newsize, XFS_DATA_FORK); /* * Reset our pointers, the data has moved. */ - oldsfp = (xfs_dir2_sf_t *)buf; - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + oldsfp = (xfs_dir2_sf_hdr_t *)buf; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; /* * Fill in the new header. */ - sfp->hdr.count = oldsfp->hdr.count; - sfp->hdr.i8count = 0; - ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent); - xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent); + sfp->count = oldsfp->count; + sfp->i8count = 0; + xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp)); /* * Copy the entries field by field. */ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), oldsfep = xfs_dir2_sf_firstentry(oldsfp); - i < sfp->hdr.count; + i < sfp->count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep), oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) { sfep->namelen = oldsfep->namelen; sfep->offset = oldsfep->offset; memcpy(sfep->name, oldsfep->name, sfep->namelen); - ino = xfs_dir2_sf_get_inumber(oldsfp, - xfs_dir2_sf_inumberp(oldsfep)); - xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep)); + xfs_dir2_sfe_put_ino(sfp, sfep, + xfs_dir2_sfe_get_ino(oldsfp, oldsfep)); } /* * Clean up the inode. @@ -1199,13 +1264,12 @@ xfs_dir2_sf_toino8( char *buf; /* old dir's buffer */ xfs_inode_t *dp; /* incore directory inode */ int i; /* entry index */ - xfs_ino_t ino; /* entry inode number */ int newsize; /* new inode size */ xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */ - xfs_dir2_sf_t *oldsfp; /* old sf directory */ + xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */ int oldsize; /* old inode size */ xfs_dir2_sf_entry_t *sfep; /* new sf entry */ - xfs_dir2_sf_t *sfp; /* new sf directory */ + xfs_dir2_sf_hdr_t *sfp; /* new sf directory */ trace_xfs_dir2_sf_toino8(args); @@ -1218,44 +1282,42 @@ xfs_dir2_sf_toino8( */ oldsize = dp->i_df.if_bytes; buf = kmem_alloc(oldsize, KM_SLEEP); - oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; - ASSERT(oldsfp->hdr.i8count == 0); + oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(oldsfp->i8count == 0); memcpy(buf, oldsfp, oldsize); /* * Compute the new inode size. */ newsize = oldsize + - (oldsfp->hdr.count + 1) * + (oldsfp->count + 1) * ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK); xfs_idata_realloc(dp, newsize, XFS_DATA_FORK); /* * Reset our pointers, the data has moved. */ - oldsfp = (xfs_dir2_sf_t *)buf; - sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + oldsfp = (xfs_dir2_sf_hdr_t *)buf; + sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; /* * Fill in the new header. */ - sfp->hdr.count = oldsfp->hdr.count; - sfp->hdr.i8count = 1; - ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent); - xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent); + sfp->count = oldsfp->count; + sfp->i8count = 1; + xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp)); /* * Copy the entries field by field. */ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), oldsfep = xfs_dir2_sf_firstentry(oldsfp); - i < sfp->hdr.count; + i < sfp->count; i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep), oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) { sfep->namelen = oldsfep->namelen; sfep->offset = oldsfep->offset; memcpy(sfep->name, oldsfep->name, sfep->namelen); - ino = xfs_dir2_sf_get_inumber(oldsfp, - xfs_dir2_sf_inumberp(oldsfep)); - xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep)); + xfs_dir2_sfe_put_ino(sfp, sfep, + xfs_dir2_sfe_get_ino(oldsfp, oldsfep)); } /* * Clean up the inode. diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h deleted file mode 100644 index 6ac44b550d39..000000000000 --- a/fs/xfs/xfs_dir2_sf.h +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DIR2_SF_H__ -#define __XFS_DIR2_SF_H__ - -/* - * Directory layout when stored internal to an inode. - * - * Small directories are packed as tightly as possible so as to - * fit into the literal area of the inode. - */ - -struct uio; -struct xfs_dabuf; -struct xfs_da_args; -struct xfs_dir2_block; -struct xfs_inode; -struct xfs_mount; -struct xfs_trans; - -/* - * Inode number stored as 8 8-bit values. - */ -typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t; - -/* - * Inode number stored as 4 8-bit values. - * Works a lot of the time, when all the inode numbers in a directory - * fit in 32 bits. - */ -typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t; - -typedef union { - xfs_dir2_ino8_t i8; - xfs_dir2_ino4_t i4; -} xfs_dir2_inou_t; -#define XFS_DIR2_MAX_SHORT_INUM ((xfs_ino_t)0xffffffffULL) - -/* - * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t. - * Only need 16 bits, this is the byte offset into the single block form. - */ -typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t; - -/* - * The parent directory has a dedicated field, and the self-pointer must - * be calculated on the fly. - * - * Entries are packed toward the top as tightly as possible. The header - * and the elements must be memcpy'd out into a work area to get correct - * alignment for the inode number fields. - */ -typedef struct xfs_dir2_sf_hdr { - __uint8_t count; /* count of entries */ - __uint8_t i8count; /* count of 8-byte inode #s */ - xfs_dir2_inou_t parent; /* parent dir inode number */ -} __arch_pack xfs_dir2_sf_hdr_t; - -typedef struct xfs_dir2_sf_entry { - __uint8_t namelen; /* actual name length */ - xfs_dir2_sf_off_t offset; /* saved offset */ - __uint8_t name[1]; /* name, variable size */ - xfs_dir2_inou_t inumber; /* inode number, var. offset */ -} __arch_pack xfs_dir2_sf_entry_t; - -typedef struct xfs_dir2_sf { - xfs_dir2_sf_hdr_t hdr; /* shortform header */ - xfs_dir2_sf_entry_t list[1]; /* shortform entries */ -} xfs_dir2_sf_t; - -static inline int xfs_dir2_sf_hdr_size(int i8count) -{ - return ((uint)sizeof(xfs_dir2_sf_hdr_t) - \ - ((i8count) == 0) * \ - ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))); -} - -static inline xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep) -{ - return (xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen]; -} - -static inline xfs_intino_t -xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from) -{ - return ((sfp)->hdr.i8count == 0 ? \ - (xfs_intino_t)XFS_GET_DIR_INO4((from)->i4) : \ - (xfs_intino_t)XFS_GET_DIR_INO8((from)->i8)); -} - -static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from, - xfs_dir2_inou_t *to) -{ - if ((sfp)->hdr.i8count == 0) - XFS_PUT_DIR_INO4(*(from), (to)->i4); - else - XFS_PUT_DIR_INO8(*(from), (to)->i8); -} - -static inline xfs_dir2_data_aoff_t -xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep) -{ - return INT_GET_UNALIGNED_16_BE(&(sfep)->offset.i); -} - -static inline void -xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off) -{ - INT_SET_UNALIGNED_16_BE(&(sfep)->offset.i, off); -} - -static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len) -{ - return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \ - ((sfp)->hdr.i8count == 0) * \ - ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))); -} - -static inline int -xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep) -{ - return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (sfep)->namelen - \ - ((sfp)->hdr.i8count == 0) * \ - ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))); -} - -static inline xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp) -{ - return ((xfs_dir2_sf_entry_t *) \ - ((char *)(sfp) + xfs_dir2_sf_hdr_size(sfp->hdr.i8count))); -} - -static inline xfs_dir2_sf_entry_t * -xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep) -{ - return ((xfs_dir2_sf_entry_t *) \ - ((char *)(sfep) + xfs_dir2_sf_entsize_byentry(sfp,sfep))); -} - -/* - * Functions. - */ -extern int xfs_dir2_block_sfsize(struct xfs_inode *dp, - struct xfs_dir2_block *block, - xfs_dir2_sf_hdr_t *sfhp); -extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp, - int size, xfs_dir2_sf_hdr_t *sfhp); -extern int xfs_dir2_sf_addname(struct xfs_da_args *args); -extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); -extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, void *dirent, - xfs_off_t *offset, filldir_t filldir); -extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); -extern int xfs_dir2_sf_removename(struct xfs_da_args *args); -extern int xfs_dir2_sf_replace(struct xfs_da_args *args); - -#endif /* __XFS_DIR2_SF_H__ */ diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 8f6fc1a96386..c13fed8c394a 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -249,6 +249,11 @@ typedef struct xfs_fsop_resblks { #define XFS_MAX_LOG_BYTES \ ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES) +/* Used for sanity checks on superblock */ +#define XFS_MAX_DBLOCKS(s) ((xfs_drfsbno_t)(s)->sb_agcount * (s)->sb_agblocks) +#define XFS_MIN_DBLOCKS(s) ((xfs_drfsbno_t)((s)->sb_agcount - 1) * \ + (s)->sb_agblocks + XFS_MIN_AG_BLOCKS) + /* * Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT */ diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 84ebeec16642..dd5628bd8d0b 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -683,7 +683,7 @@ xfs_dialloc( return 0; } agi = XFS_BUF_TO_AGI(agbp); - ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); + ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); } else { /* * Continue where we left off before. In this case, we @@ -691,7 +691,7 @@ xfs_dialloc( */ agbp = *IO_agbp; agi = XFS_BUF_TO_AGI(agbp); - ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); + ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); ASSERT(be32_to_cpu(agi->agi_freecount) > 0); } mp = tp->t_mountp; @@ -775,7 +775,7 @@ nextag: if (error) goto nextag; agi = XFS_BUF_TO_AGI(agbp); - ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); + ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); } /* * Here with an allocation group that has a free inode. @@ -944,7 +944,7 @@ nextag: * See if the most recently allocated block has any free. */ newino: - if (be32_to_cpu(agi->agi_newino) != NULLAGINO) { + if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino), XFS_LOOKUP_EQ, &i); if (error) @@ -1085,7 +1085,7 @@ xfs_difree( return error; } agi = XFS_BUF_TO_AGI(agbp); - ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); + ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); ASSERT(agbno < be32_to_cpu(agi->agi_length)); /* * Initialize the cursor. @@ -1438,7 +1438,7 @@ xfs_ialloc_log_agi( xfs_agi_t *agi; /* allocation group header */ agi = XFS_BUF_TO_AGI(bp); - ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); + ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); #endif /* * Compute byte offsets for the first and last fields. @@ -1492,7 +1492,7 @@ xfs_read_agi( /* * Validate the magic number of the agi block. */ - agi_ok = be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && + agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) && XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) && be32_to_cpu(agi->agi_seqno) == agno; if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 16921f55c542..c6a75815aea0 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -31,7 +31,6 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" -#include "xfs_btree_trace.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_error.h" @@ -205,72 +204,6 @@ xfs_inobt_recs_inorder( } #endif /* DEBUG */ -#ifdef XFS_BTREE_TRACE -ktrace_t *xfs_inobt_trace_buf; - -STATIC void -xfs_inobt_trace_enter( - struct xfs_btree_cur *cur, - const char *func, - char *s, - int type, - int line, - __psunsigned_t a0, - __psunsigned_t a1, - __psunsigned_t a2, - __psunsigned_t a3, - __psunsigned_t a4, - __psunsigned_t a5, - __psunsigned_t a6, - __psunsigned_t a7, - __psunsigned_t a8, - __psunsigned_t a9, - __psunsigned_t a10) -{ - ktrace_enter(xfs_inobt_trace_buf, (void *)(__psint_t)type, - (void *)func, (void *)s, NULL, (void *)cur, - (void *)a0, (void *)a1, (void *)a2, (void *)a3, - (void *)a4, (void *)a5, (void *)a6, (void *)a7, - (void *)a8, (void *)a9, (void *)a10); -} - -STATIC void -xfs_inobt_trace_cursor( - struct xfs_btree_cur *cur, - __uint32_t *s0, - __uint64_t *l0, - __uint64_t *l1) -{ - *s0 = cur->bc_private.a.agno; - *l0 = cur->bc_rec.i.ir_startino; - *l1 = cur->bc_rec.i.ir_free; -} - -STATIC void -xfs_inobt_trace_key( - struct xfs_btree_cur *cur, - union xfs_btree_key *key, - __uint64_t *l0, - __uint64_t *l1) -{ - *l0 = be32_to_cpu(key->inobt.ir_startino); - *l1 = 0; -} - -STATIC void -xfs_inobt_trace_record( - struct xfs_btree_cur *cur, - union xfs_btree_rec *rec, - __uint64_t *l0, - __uint64_t *l1, - __uint64_t *l2) -{ - *l0 = be32_to_cpu(rec->inobt.ir_startino); - *l1 = be32_to_cpu(rec->inobt.ir_freecount); - *l2 = be64_to_cpu(rec->inobt.ir_free); -} -#endif /* XFS_BTREE_TRACE */ - static const struct xfs_btree_ops xfs_inobt_ops = { .rec_len = sizeof(xfs_inobt_rec_t), .key_len = sizeof(xfs_inobt_key_t), @@ -286,18 +219,10 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .init_rec_from_cur = xfs_inobt_init_rec_from_cur, .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, .key_diff = xfs_inobt_key_diff, - #ifdef DEBUG .keys_inorder = xfs_inobt_keys_inorder, .recs_inorder = xfs_inobt_recs_inorder, #endif - -#ifdef XFS_BTREE_TRACE - .trace_enter = xfs_inobt_trace_enter, - .trace_cursor = xfs_inobt_trace_cursor, - .trace_key = xfs_inobt_trace_key, - .trace_record = xfs_inobt_trace_record, -#endif }; /* diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 3631783b2b53..7759812c1bbe 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -38,7 +38,6 @@ #include "xfs_trans_priv.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" -#include "xfs_btree_trace.h" #include "xfs_trace.h" diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index a098a20ca63e..3cc21ddf9f7e 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -37,7 +37,6 @@ #include "xfs_buf_item.h" #include "xfs_inode_item.h" #include "xfs_btree.h" -#include "xfs_btree_trace.h" #include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_bmap.h" @@ -52,7 +51,7 @@ kmem_zone_t *xfs_ifork_zone; kmem_zone_t *xfs_inode_zone; /* - * Used in xfs_itruncate(). This is the maximum number of extents + * Used in xfs_itruncate_extents(). This is the maximum number of extents * freed from a file in a single transaction. */ #define XFS_ITRUNC_MAX_EXTENTS 2 @@ -167,7 +166,7 @@ xfs_imap_to_bp( dip = (xfs_dinode_t *)xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); - di_ok = be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC && + di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && XFS_DINODE_GOOD_VERSION(dip->di_version); if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, @@ -802,7 +801,7 @@ xfs_iread( * If we got something that isn't an inode it means someone * (nfs or dmi) has a stale handle. */ - if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) { + if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) { #ifdef DEBUG xfs_alert(mp, "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)", @@ -1179,15 +1178,15 @@ xfs_ialloc( * at least do it for regular files. */ #ifdef DEBUG -void +STATIC void xfs_isize_check( - xfs_mount_t *mp, - xfs_inode_t *ip, - xfs_fsize_t isize) + struct xfs_inode *ip, + xfs_fsize_t isize) { - xfs_fileoff_t map_first; - int nimaps; - xfs_bmbt_irec_t imaps[2]; + struct xfs_mount *mp = ip->i_mount; + xfs_fileoff_t map_first; + int nimaps; + xfs_bmbt_irec_t imaps[2]; if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) return; @@ -1214,168 +1213,14 @@ xfs_isize_check( ASSERT(nimaps == 1); ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); } +#else /* DEBUG */ +#define xfs_isize_check(ip, isize) #endif /* DEBUG */ /* - * Calculate the last possible buffered byte in a file. This must - * include data that was buffered beyond the EOF by the write code. - * This also needs to deal with overflowing the xfs_fsize_t type - * which can happen for sizes near the limit. - * - * We also need to take into account any blocks beyond the EOF. It - * may be the case that they were buffered by a write which failed. - * In that case the pages will still be in memory, but the inode size - * will never have been updated. - */ -STATIC xfs_fsize_t -xfs_file_last_byte( - xfs_inode_t *ip) -{ - xfs_mount_t *mp; - xfs_fsize_t last_byte; - xfs_fileoff_t last_block; - xfs_fileoff_t size_last_block; - int error; - - ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)); - - mp = ip->i_mount; - /* - * Only check for blocks beyond the EOF if the extents have - * been read in. This eliminates the need for the inode lock, - * and it also saves us from looking when it really isn't - * necessary. - */ - if (ip->i_df.if_flags & XFS_IFEXTENTS) { - xfs_ilock(ip, XFS_ILOCK_SHARED); - error = xfs_bmap_last_offset(NULL, ip, &last_block, - XFS_DATA_FORK); - xfs_iunlock(ip, XFS_ILOCK_SHARED); - if (error) { - last_block = 0; - } - } else { - last_block = 0; - } - size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_size); - last_block = XFS_FILEOFF_MAX(last_block, size_last_block); - - last_byte = XFS_FSB_TO_B(mp, last_block); - if (last_byte < 0) { - return XFS_MAXIOFFSET(mp); - } - last_byte += (1 << mp->m_writeio_log); - if (last_byte < 0) { - return XFS_MAXIOFFSET(mp); - } - return last_byte; -} - -/* - * Start the truncation of the file to new_size. The new size - * must be smaller than the current size. This routine will - * clear the buffer and page caches of file data in the removed - * range, and xfs_itruncate_finish() will remove the underlying - * disk blocks. - * - * The inode must have its I/O lock locked EXCLUSIVELY, and it - * must NOT have the inode lock held at all. This is because we're - * calling into the buffer/page cache code and we can't hold the - * inode lock when we do so. - * - * We need to wait for any direct I/Os in flight to complete before we - * proceed with the truncate. This is needed to prevent the extents - * being read or written by the direct I/Os from being removed while the - * I/O is in flight as there is no other method of synchronising - * direct I/O with the truncate operation. Also, because we hold - * the IOLOCK in exclusive mode, we prevent new direct I/Os from being - * started until the truncate completes and drops the lock. Essentially, - * the xfs_ioend_wait() call forms an I/O barrier that provides strict - * ordering between direct I/Os and the truncate operation. - * - * The flags parameter can have either the value XFS_ITRUNC_DEFINITE - * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used - * in the case that the caller is locking things out of order and - * may not be able to call xfs_itruncate_finish() with the inode lock - * held without dropping the I/O lock. If the caller must drop the - * I/O lock before calling xfs_itruncate_finish(), then xfs_itruncate_start() - * must be called again with all the same restrictions as the initial - * call. - */ -int -xfs_itruncate_start( - xfs_inode_t *ip, - uint flags, - xfs_fsize_t new_size) -{ - xfs_fsize_t last_byte; - xfs_off_t toss_start; - xfs_mount_t *mp; - int error = 0; - - ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); - ASSERT((new_size == 0) || (new_size <= ip->i_size)); - ASSERT((flags == XFS_ITRUNC_DEFINITE) || - (flags == XFS_ITRUNC_MAYBE)); - - mp = ip->i_mount; - - /* wait for the completion of any pending DIOs */ - if (new_size == 0 || new_size < ip->i_size) - xfs_ioend_wait(ip); - - /* - * Call toss_pages or flushinval_pages to get rid of pages - * overlapping the region being removed. We have to use - * the less efficient flushinval_pages in the case that the - * caller may not be able to finish the truncate without - * dropping the inode's I/O lock. Make sure - * to catch any pages brought in by buffers overlapping - * the EOF by searching out beyond the isize by our - * block size. We round new_size up to a block boundary - * so that we don't toss things on the same block as - * new_size but before it. - * - * Before calling toss_page or flushinval_pages, make sure to - * call remapf() over the same region if the file is mapped. - * This frees up mapped file references to the pages in the - * given range and for the flushinval_pages case it ensures - * that we get the latest mapped changes flushed out. - */ - toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); - toss_start = XFS_FSB_TO_B(mp, toss_start); - if (toss_start < 0) { - /* - * The place to start tossing is beyond our maximum - * file size, so there is no way that the data extended - * out there. - */ - return 0; - } - last_byte = xfs_file_last_byte(ip); - trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte); - if (last_byte > toss_start) { - if (flags & XFS_ITRUNC_DEFINITE) { - xfs_tosspages(ip, toss_start, - -1, FI_REMAPF_LOCKED); - } else { - error = xfs_flushinval_pages(ip, toss_start, - -1, FI_REMAPF_LOCKED); - } - } - -#ifdef DEBUG - if (new_size == 0) { - ASSERT(VN_CACHED(VFS_I(ip)) == 0); - } -#endif - return error; -} - -/* - * Shrink the file to the given new_size. The new size must be smaller than - * the current size. This will free up the underlying blocks in the removed - * range after a call to xfs_itruncate_start() or xfs_atruncate_start(). + * Free up the underlying blocks past new_size. The new size must be smaller + * than the current size. This routine can be used both for the attribute and + * data fork, and does not modify the inode size, which is left to the caller. * * The transaction passed to this routine must have made a permanent log * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the @@ -1387,31 +1232,6 @@ xfs_itruncate_start( * will be "held" within the returned transaction. This routine does NOT * require any disk space to be reserved for it within the transaction. * - * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it - * indicates the fork which is to be truncated. For the attribute fork we only - * support truncation to size 0. - * - * We use the sync parameter to indicate whether or not the first transaction - * we perform might have to be synchronous. For the attr fork, it needs to be - * so if the unlink of the inode is not yet known to be permanent in the log. - * This keeps us from freeing and reusing the blocks of the attribute fork - * before the unlink of the inode becomes permanent. - * - * For the data fork, we normally have to run synchronously if we're being - * called out of the inactive path or we're being called out of the create path - * where we're truncating an existing file. Either way, the truncate needs to - * be sync so blocks don't reappear in the file with altered data in case of a - * crash. wsync filesystems can run the first case async because anything that - * shrinks the inode has to run sync so by the time we're called here from - * inactive, the inode size is permanently set to 0. - * - * Calls from the truncate path always need to be sync unless we're in a wsync - * filesystem and the file has already been unlinked. - * - * The caller is responsible for correctly setting the sync parameter. It gets - * too hard for us to guess here which path we're being called out of just - * based on inode state. - * * If we get an error, we must return with the inode locked and linked into the * current transaction. This keeps things simple for the higher level code, * because it always knows that the inode is locked and held in the transaction @@ -1419,124 +1239,30 @@ xfs_itruncate_start( * dirty on error so that transactions can be easily aborted if possible. */ int -xfs_itruncate_finish( - xfs_trans_t **tp, - xfs_inode_t *ip, - xfs_fsize_t new_size, - int fork, - int sync) +xfs_itruncate_extents( + struct xfs_trans **tpp, + struct xfs_inode *ip, + int whichfork, + xfs_fsize_t new_size) { - xfs_fsblock_t first_block; - xfs_fileoff_t first_unmap_block; - xfs_fileoff_t last_block; - xfs_filblks_t unmap_len=0; - xfs_mount_t *mp; - xfs_trans_t *ntp; - int done; - int committed; - xfs_bmap_free_t free_list; - int error; + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp = *tpp; + struct xfs_trans *ntp; + xfs_bmap_free_t free_list; + xfs_fsblock_t first_block; + xfs_fileoff_t first_unmap_block; + xfs_fileoff_t last_block; + xfs_filblks_t unmap_len; + int committed; + int error = 0; + int done = 0; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); - ASSERT((new_size == 0) || (new_size <= ip->i_size)); - ASSERT(*tp != NULL); - ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); - ASSERT(ip->i_transp == *tp); + ASSERT(new_size <= ip->i_size); + ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(ip->i_itemp != NULL); ASSERT(ip->i_itemp->ili_lock_flags == 0); - - - ntp = *tp; - mp = (ntp)->t_mountp; - ASSERT(! XFS_NOT_DQATTACHED(mp, ip)); - - /* - * We only support truncating the entire attribute fork. - */ - if (fork == XFS_ATTR_FORK) { - new_size = 0LL; - } - first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); - trace_xfs_itruncate_finish_start(ip, new_size); - - /* - * The first thing we do is set the size to new_size permanently - * on disk. This way we don't have to worry about anyone ever - * being able to look at the data being freed even in the face - * of a crash. What we're getting around here is the case where - * we free a block, it is allocated to another file, it is written - * to, and then we crash. If the new data gets written to the - * file but the log buffers containing the free and reallocation - * don't, then we'd end up with garbage in the blocks being freed. - * As long as we make the new_size permanent before actually - * freeing any blocks it doesn't matter if they get written to. - * - * The callers must signal into us whether or not the size - * setting here must be synchronous. There are a few cases - * where it doesn't have to be synchronous. Those cases - * occur if the file is unlinked and we know the unlink is - * permanent or if the blocks being truncated are guaranteed - * to be beyond the inode eof (regardless of the link count) - * and the eof value is permanent. Both of these cases occur - * only on wsync-mounted filesystems. In those cases, we're - * guaranteed that no user will ever see the data in the blocks - * that are being truncated so the truncate can run async. - * In the free beyond eof case, the file may wind up with - * more blocks allocated to it than it needs if we crash - * and that won't get fixed until the next time the file - * is re-opened and closed but that's ok as that shouldn't - * be too many blocks. - * - * However, we can't just make all wsync xactions run async - * because there's one call out of the create path that needs - * to run sync where it's truncating an existing file to size - * 0 whose size is > 0. - * - * It's probably possible to come up with a test in this - * routine that would correctly distinguish all the above - * cases from the values of the function parameters and the - * inode state but for sanity's sake, I've decided to let the - * layers above just tell us. It's simpler to correctly figure - * out in the layer above exactly under what conditions we - * can run async and I think it's easier for others read and - * follow the logic in case something has to be changed. - * cscope is your friend -- rcc. - * - * The attribute fork is much simpler. - * - * For the attribute fork we allow the caller to tell us whether - * the unlink of the inode that led to this call is yet permanent - * in the on disk log. If it is not and we will be freeing extents - * in this inode then we make the first transaction synchronous - * to make sure that the unlink is permanent by the time we free - * the blocks. - */ - if (fork == XFS_DATA_FORK) { - if (ip->i_d.di_nextents > 0) { - /* - * If we are not changing the file size then do - * not update the on-disk file size - we may be - * called from xfs_inactive_free_eofblocks(). If we - * update the on-disk file size and then the system - * crashes before the contents of the file are - * flushed to disk then the files may be full of - * holes (ie NULL files bug). - */ - if (ip->i_size != new_size) { - ip->i_d.di_size = new_size; - ip->i_size = new_size; - xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); - } - } - } else if (sync) { - ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC)); - if (ip->i_d.di_anextents > 0) - xfs_trans_set_sync(ntp); - } - ASSERT(fork == XFS_DATA_FORK || - (fork == XFS_ATTR_FORK && - ((sync && !(mp->m_flags & XFS_MOUNT_WSYNC)) || - (sync == 0 && (mp->m_flags & XFS_MOUNT_WSYNC))))); + ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); /* * Since it is possible for space to become allocated beyond @@ -1547,128 +1273,142 @@ xfs_itruncate_finish( * beyond the maximum file size (ie it is the same as last_block), * then there is nothing to do. */ + first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); - ASSERT(first_unmap_block <= last_block); - done = 0; - if (last_block == first_unmap_block) { - done = 1; - } else { - unmap_len = last_block - first_unmap_block + 1; - } + if (first_unmap_block == last_block) + return 0; + + ASSERT(first_unmap_block < last_block); + unmap_len = last_block - first_unmap_block + 1; while (!done) { - /* - * Free up up to XFS_ITRUNC_MAX_EXTENTS. xfs_bunmapi() - * will tell us whether it freed the entire range or - * not. If this is a synchronous mount (wsync), - * then we can tell bunmapi to keep all the - * transactions asynchronous since the unlink - * transaction that made this inode inactive has - * already hit the disk. There's no danger of - * the freed blocks being reused, there being a - * crash, and the reused blocks suddenly reappearing - * in this file with garbage in them once recovery - * runs. - */ xfs_bmap_init(&free_list, &first_block); - error = xfs_bunmapi(ntp, ip, + error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, - xfs_bmapi_aflag(fork), + xfs_bmapi_aflag(whichfork), XFS_ITRUNC_MAX_EXTENTS, &first_block, &free_list, &done); - if (error) { - /* - * If the bunmapi call encounters an error, - * return to the caller where the transaction - * can be properly aborted. We just need to - * make sure we're not holding any resources - * that we were not when we came in. - */ - xfs_bmap_cancel(&free_list); - return error; - } + if (error) + goto out_bmap_cancel; /* * Duplicate the transaction that has the permanent * reservation and commit the old transaction. */ - error = xfs_bmap_finish(tp, &free_list, &committed); - ntp = *tp; + error = xfs_bmap_finish(&tp, &free_list, &committed); if (committed) - xfs_trans_ijoin(ntp, ip); - - if (error) { - /* - * If the bmap finish call encounters an error, return - * to the caller where the transaction can be properly - * aborted. We just need to make sure we're not - * holding any resources that we were not when we came - * in. - * - * Aborting from this point might lose some blocks in - * the file system, but oh well. - */ - xfs_bmap_cancel(&free_list); - return error; - } + xfs_trans_ijoin(tp, ip); + if (error) + goto out_bmap_cancel; if (committed) { /* * Mark the inode dirty so it will be logged and * moved forward in the log as part of every commit. */ - xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); } - ntp = xfs_trans_dup(ntp); - error = xfs_trans_commit(*tp, 0); - *tp = ntp; + ntp = xfs_trans_dup(tp); + error = xfs_trans_commit(tp, 0); + tp = ntp; - xfs_trans_ijoin(ntp, ip); + xfs_trans_ijoin(tp, ip); if (error) - return error; + goto out; + /* - * transaction commit worked ok so we can drop the extra ticket + * Transaction commit worked ok so we can drop the extra ticket * reference that we gained in xfs_trans_dup() */ - xfs_log_ticket_put(ntp->t_ticket); - error = xfs_trans_reserve(ntp, 0, + xfs_log_ticket_put(tp->t_ticket); + error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT); if (error) - return error; + goto out; } + +out: + *tpp = tp; + return error; +out_bmap_cancel: /* - * Only update the size in the case of the data fork, but - * always re-log the inode so that our permanent transaction - * can keep on rolling it forward in the log. + * If the bunmapi call encounters an error, return to the caller where + * the transaction can be properly aborted. We just need to make sure + * we're not holding any resources that we were not when we came in. */ - if (fork == XFS_DATA_FORK) { - xfs_isize_check(mp, ip, new_size); + xfs_bmap_cancel(&free_list); + goto out; +} + +int +xfs_itruncate_data( + struct xfs_trans **tpp, + struct xfs_inode *ip, + xfs_fsize_t new_size) +{ + int error; + + trace_xfs_itruncate_data_start(ip, new_size); + + /* + * The first thing we do is set the size to new_size permanently on + * disk. This way we don't have to worry about anyone ever being able + * to look at the data being freed even in the face of a crash. + * What we're getting around here is the case where we free a block, it + * is allocated to another file, it is written to, and then we crash. + * If the new data gets written to the file but the log buffers + * containing the free and reallocation don't, then we'd end up with + * garbage in the blocks being freed. As long as we make the new_size + * permanent before actually freeing any blocks it doesn't matter if + * they get written to. + */ + if (ip->i_d.di_nextents > 0) { /* - * If we are not changing the file size then do - * not update the on-disk file size - we may be - * called from xfs_inactive_free_eofblocks(). If we - * update the on-disk file size and then the system - * crashes before the contents of the file are - * flushed to disk then the files may be full of - * holes (ie NULL files bug). + * If we are not changing the file size then do not update + * the on-disk file size - we may be called from + * xfs_inactive_free_eofblocks(). If we update the on-disk + * file size and then the system crashes before the contents + * of the file are flushed to disk then the files may be + * full of holes (ie NULL files bug). */ if (ip->i_size != new_size) { ip->i_d.di_size = new_size; ip->i_size = new_size; + xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE); } } - xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); - ASSERT((new_size != 0) || - (fork == XFS_ATTR_FORK) || - (ip->i_delayed_blks == 0)); - ASSERT((new_size != 0) || - (fork == XFS_ATTR_FORK) || - (ip->i_d.di_nextents == 0)); - trace_xfs_itruncate_finish_end(ip, new_size); + + error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, new_size); + if (error) + return error; + + /* + * If we are not changing the file size then do not update the on-disk + * file size - we may be called from xfs_inactive_free_eofblocks(). + * If we update the on-disk file size and then the system crashes + * before the contents of the file are flushed to disk then the files + * may be full of holes (ie NULL files bug). + */ + xfs_isize_check(ip, new_size); + if (ip->i_size != new_size) { + ip->i_d.di_size = new_size; + ip->i_size = new_size; + } + + ASSERT(new_size != 0 || ip->i_delayed_blks == 0); + ASSERT(new_size != 0 || ip->i_d.di_nextents == 0); + + /* + * Always re-log the inode so that our permanent transaction can keep + * on rolling it forward in the log. + */ + xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE); + + trace_xfs_itruncate_data_end(ip, new_size); return 0; } @@ -1694,7 +1434,6 @@ xfs_iunlink( ASSERT(ip->i_d.di_nlink == 0); ASSERT(ip->i_d.di_mode != 0); - ASSERT(ip->i_transp == tp); mp = tp->t_mountp; @@ -1717,7 +1456,7 @@ xfs_iunlink( ASSERT(agi->agi_unlinked[bucket_index]); ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino); - if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) { + if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) { /* * There is already another inode in the bucket we need * to add ourselves to. Add us at the front of the list. @@ -1728,8 +1467,7 @@ xfs_iunlink( if (error) return error; - ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO); - /* both on-disk, don't endian flip twice */ + ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO)); dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; offset = ip->i_imap.im_boffset + offsetof(xfs_dinode_t, di_next_unlinked); @@ -1794,7 +1532,7 @@ xfs_iunlink_remove( agino = XFS_INO_TO_AGINO(mp, ip->i_ino); ASSERT(agino != 0); bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; - ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO); + ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)); ASSERT(agi->agi_unlinked[bucket_index]); if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) { @@ -1959,7 +1697,7 @@ xfs_ifree_cluster( * stale first, we will not attempt to lock them in the loop * below as the XFS_ISTALE flag will be set. */ - lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); + lip = bp->b_fspriv; while (lip) { if (lip->li_type == XFS_LI_INODE) { iip = (xfs_inode_log_item_t *)lip; @@ -2086,7 +1824,6 @@ xfs_ifree( xfs_buf_t *ibp; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(ip->i_transp == tp); ASSERT(ip->i_d.di_nlink == 0); ASSERT(ip->i_d.di_nextents == 0); ASSERT(ip->i_d.di_anextents == 0); @@ -2733,7 +2470,7 @@ cluster_corrupt_out: * mark the buffer as an error and call them. Otherwise * mark it as stale and brelse. */ - if (XFS_BUF_IODONE_FUNC(bp)) { + if (bp->b_iodone) { XFS_BUF_UNDONE(bp); XFS_BUF_STALE(bp); XFS_BUF_ERROR(bp,EIO); @@ -2920,7 +2657,7 @@ xfs_iflush_int( */ xfs_synchronize_times(ip); - if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC, + if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p", @@ -3073,8 +2810,8 @@ xfs_iflush_int( */ xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item); - ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); - ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL); + ASSERT(bp->b_fspriv != NULL); + ASSERT(bp->b_iodone != NULL); } else { /* * We're flushing an inode which is not in the AIL and has diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 964cfea77686..a97644ab945a 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -241,7 +241,6 @@ typedef struct xfs_inode { xfs_ifork_t i_df; /* data fork */ /* Transaction and locking information. */ - struct xfs_trans *i_transp; /* ptr to owning transaction*/ struct xfs_inode_log_item *i_itemp; /* logging information */ mrlock_t i_lock; /* inode lock */ mrlock_t i_iolock; /* inode IO lock */ @@ -458,16 +457,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) extern struct lock_class_key xfs_iolock_reclaimable; /* - * Flags for xfs_itruncate_start(). - */ -#define XFS_ITRUNC_DEFINITE 0x1 -#define XFS_ITRUNC_MAYBE 0x2 - -#define XFS_ITRUNC_FLAGS \ - { XFS_ITRUNC_DEFINITE, "DEFINITE" }, \ - { XFS_ITRUNC_MAYBE, "MAYBE" } - -/* * For multiple groups support: if S_ISGID bit is set in the parent * directory, group of new file is set to that of the parent, and * new subdirectory gets S_ISGID bit from parent. @@ -501,9 +490,10 @@ uint xfs_ip2xflags(struct xfs_inode *); uint xfs_dic2xflags(struct xfs_dinode *); int xfs_ifree(struct xfs_trans *, xfs_inode_t *, struct xfs_bmap_free *); -int xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t); -int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, - xfs_fsize_t, int, int); +int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *, + int, xfs_fsize_t); +int xfs_itruncate_data(struct xfs_trans **, struct xfs_inode *, + xfs_fsize_t); int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); void xfs_iext_realloc(xfs_inode_t *, int, int); @@ -579,13 +569,6 @@ void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int); #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) -#ifdef DEBUG -void xfs_isize_check(struct xfs_mount *, struct xfs_inode *, - xfs_fsize_t); -#else /* DEBUG */ -#define xfs_isize_check(mp, ip, isize) -#endif /* DEBUG */ - #if defined(DEBUG) void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); #else diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index b1e88d56069c..588406dc6a35 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -632,13 +632,8 @@ xfs_inode_item_unlock( struct xfs_inode *ip = iip->ili_inode; unsigned short lock_flags; - ASSERT(iip->ili_inode->i_itemp != NULL); - ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); - - /* - * Clear the transaction pointer in the inode. - */ - ip->i_transp = NULL; + ASSERT(ip->i_itemp != NULL); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); /* * If the inode needed a separate buffer with which to log @@ -664,8 +659,8 @@ xfs_inode_item_unlock( lock_flags = iip->ili_lock_flags; iip->ili_lock_flags = 0; if (lock_flags) { - xfs_iunlock(iip->ili_inode, lock_flags); - IRELE(iip->ili_inode); + xfs_iunlock(ip, lock_flags); + IRELE(ip); } } @@ -879,7 +874,7 @@ xfs_iflush_done( * Scan the buffer IO completions for other inodes being completed and * attach them to the current inode log item. */ - blip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); + blip = bp->b_fspriv; prev = NULL; while (blip != NULL) { if (lip->li_cb != xfs_iflush_done) { @@ -891,7 +886,7 @@ xfs_iflush_done( /* remove from list */ next = blip->li_bio_list; if (!prev) { - XFS_BUF_SET_FSPRIVATE(bp, next); + bp->b_fspriv = next; } else { prev->li_bio_list = next; } diff --git a/fs/xfs/xfs_inum.h b/fs/xfs/xfs_inum.h index b8e4ee4e89a4..b253c0ea5bec 100644 --- a/fs/xfs/xfs_inum.h +++ b/fs/xfs/xfs_inum.h @@ -28,17 +28,6 @@ typedef __uint32_t xfs_agino_t; /* within allocation grp inode number */ -/* - * Useful inode bits for this kernel. - * Used in some places where having 64-bits in the 32-bit kernels - * costs too much. - */ -#if XFS_BIG_INUMS -typedef xfs_ino_t xfs_intino_t; -#else -typedef __uint32_t xfs_intino_t; -#endif - #define NULLFSINO ((xfs_ino_t)-1) #define NULLAGINO ((xfs_agino_t)-1) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 41d5b8f2bf92..06ff8437ed8e 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -871,15 +871,9 @@ xlog_space_left( void xlog_iodone(xfs_buf_t *bp) { - xlog_in_core_t *iclog; - xlog_t *l; - int aborted; - - iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *); - ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long) 2); - XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); - aborted = 0; - l = iclog->ic_log; + xlog_in_core_t *iclog = bp->b_fspriv; + xlog_t *l = iclog->ic_log; + int aborted = 0; /* * Race to shutdown the filesystem if we see an error. @@ -1056,10 +1050,9 @@ xlog_alloc_log(xfs_mount_t *mp, bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); if (!bp) goto out_free_log; - XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); - XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); + bp->b_iodone = xlog_iodone; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); + ASSERT(xfs_buf_islocked(bp)); log->l_xbuf = bp; spin_lock_init(&log->l_icloglock); @@ -1090,10 +1083,8 @@ xlog_alloc_log(xfs_mount_t *mp, log->l_iclog_size, 0); if (!bp) goto out_free_iclog; - if (!XFS_BUF_CPSEMA(bp)) - ASSERT(0); - XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); - XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); + + bp->b_iodone = xlog_iodone; iclog->ic_bp = bp; iclog->ic_data = bp->b_addr; #ifdef DEBUG @@ -1118,7 +1109,7 @@ xlog_alloc_log(xfs_mount_t *mp, iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); - ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); + ASSERT(xfs_buf_islocked(iclog->ic_bp)); init_waitqueue_head(&iclog->ic_force_wait); init_waitqueue_head(&iclog->ic_write_wait); @@ -1254,9 +1245,8 @@ STATIC int xlog_bdstrat( struct xfs_buf *bp) { - struct xlog_in_core *iclog; + struct xlog_in_core *iclog = bp->b_fspriv; - iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *); if (iclog->ic_state & XLOG_STATE_IOERROR) { XFS_BUF_ERROR(bp, EIO); XFS_BUF_STALE(bp); @@ -1269,7 +1259,6 @@ xlog_bdstrat( return 0; } - bp->b_flags |= _XBF_RUN_QUEUES; xfs_buf_iorequest(bp); return 0; } @@ -1351,8 +1340,6 @@ xlog_sync(xlog_t *log, } bp = iclog->ic_bp; - ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long)1); - XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2); XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); XFS_STATS_ADD(xs_log_blocks, BTOBB(count)); @@ -1366,22 +1353,28 @@ xlog_sync(xlog_t *log, iclog->ic_bwritecnt = 1; } XFS_BUF_SET_COUNT(bp, count); - XFS_BUF_SET_FSPRIVATE(bp, iclog); /* save for later */ + bp->b_fspriv = iclog; XFS_BUF_ZEROFLAGS(bp); XFS_BUF_BUSY(bp); XFS_BUF_ASYNC(bp); - bp->b_flags |= XBF_LOG_BUFFER; + bp->b_flags |= XBF_SYNCIO; if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) { + bp->b_flags |= XBF_FUA; + /* - * If we have an external log device, flush the data device - * before flushing the log to make sure all meta data - * written back from the AIL actually made it to disk - * before writing out the new log tail LSN in the log buffer. + * Flush the data device before flushing the log to make + * sure all meta data written back from the AIL actually made + * it to disk before stamping the new log tail LSN into the + * log buffer. For an external log we need to issue the + * flush explicitly, and unfortunately synchronously here; + * for an internal log we can simply use the block layer + * state machine for preflushes. */ if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp) xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp); - XFS_BUF_ORDERED(bp); + else + bp->b_flags |= XBF_FLUSH; } ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); @@ -1404,19 +1397,16 @@ xlog_sync(xlog_t *log, } if (split) { bp = iclog->ic_log->l_xbuf; - ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == - (unsigned long)1); - XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2); XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */ XFS_BUF_SET_PTR(bp, (xfs_caddr_t)((__psint_t)&(iclog->ic_header)+ (__psint_t)count), split); - XFS_BUF_SET_FSPRIVATE(bp, iclog); + bp->b_fspriv = iclog; XFS_BUF_ZEROFLAGS(bp); XFS_BUF_BUSY(bp); XFS_BUF_ASYNC(bp); - bp->b_flags |= XBF_LOG_BUFFER; + bp->b_flags |= XBF_SYNCIO; if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) - XFS_BUF_ORDERED(bp); + bp->b_flags |= XBF_FUA; dptr = XFS_BUF_PTR(bp); /* * Bump the cycle numbers at the start of each block @@ -3521,13 +3511,13 @@ xlog_verify_iclog(xlog_t *log, spin_unlock(&log->l_icloglock); /* check log magic numbers */ - if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM) + if (iclog->ic_header.h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) xfs_emerg(log->l_mp, "%s: invalid magic num", __func__); ptr = (xfs_caddr_t) &iclog->ic_header; for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count; ptr += BBSIZE) { - if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM) + if (*(__be32 *)ptr == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) xfs_emerg(log->l_mp, "%s: unexpected magic num", __func__); } diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 04142caedb2b..8fe4206de057 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -91,6 +91,8 @@ xlog_get_bp( xlog_t *log, int nbblks) { + struct xfs_buf *bp; + if (!xlog_buf_bbcount_valid(log, nbblks)) { xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", nbblks); @@ -118,8 +120,10 @@ xlog_get_bp( nbblks += log->l_sectBBsize; nbblks = round_up(nbblks, log->l_sectBBsize); - return xfs_buf_get_uncached(log->l_mp->m_logdev_targp, - BBTOB(nbblks), 0); + bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, BBTOB(nbblks), 0); + if (bp) + xfs_buf_unlock(bp); + return bp; } STATIC void @@ -264,7 +268,7 @@ xlog_bwrite( XFS_BUF_ZEROFLAGS(bp); XFS_BUF_BUSY(bp); XFS_BUF_HOLD(bp); - XFS_BUF_PSEMA(bp, PRIBIO); + xfs_buf_lock(bp); XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); @@ -300,14 +304,14 @@ xlog_header_check_recover( xfs_mount_t *mp, xlog_rec_header_t *head) { - ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM); + ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)); /* * IRIX doesn't write the h_fmt field and leaves it zeroed * (XLOG_FMT_UNKNOWN). This stops us from trying to recover * a dirty log created in IRIX. */ - if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) { + if (unlikely(head->h_fmt != cpu_to_be32(XLOG_FMT))) { xfs_warn(mp, "dirty log written in incompatible format - can't recover"); xlog_header_check_dump(mp, head); @@ -333,7 +337,7 @@ xlog_header_check_mount( xfs_mount_t *mp, xlog_rec_header_t *head) { - ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM); + ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)); if (uuid_is_nil(&head->h_fs_uuid)) { /* @@ -367,7 +371,7 @@ xlog_recover_iodone( xfs_force_shutdown(bp->b_target->bt_mount, SHUTDOWN_META_IO_ERROR); } - XFS_BUF_CLR_IODONE_FUNC(bp); + bp->b_iodone = NULL; xfs_buf_ioend(bp, 0); } @@ -534,7 +538,7 @@ xlog_find_verify_log_record( head = (xlog_rec_header_t *)offset; - if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(head->h_magicno)) + if (head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) break; if (!smallmem) @@ -916,7 +920,7 @@ xlog_find_tail( if (error) goto done; - if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) { + if (*(__be32 *)offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) { found = 1; break; } @@ -933,8 +937,8 @@ xlog_find_tail( if (error) goto done; - if (XLOG_HEADER_MAGIC_NUM == - be32_to_cpu(*(__be32 *)offset)) { + if (*(__be32 *)offset == + cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) { found = 2; break; } @@ -1947,7 +1951,7 @@ xfs_qm_dqcheck( * This is all fine; things are still consistent, and we haven't lost * any quota information. Just don't complain about bad dquot blks. */ - if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) { + if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { if (flags & XFS_QMOPT_DOWARN) xfs_alert(mp, "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", @@ -2174,7 +2178,7 @@ xlog_recover_buffer_pass2( error = xfs_bwrite(mp, bp); } else { ASSERT(bp->b_target->bt_mount == mp); - XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); + bp->b_iodone = xlog_recover_iodone; xfs_bdwrite(mp, bp); } @@ -2238,7 +2242,7 @@ xlog_recover_inode_pass2( * Make sure the place we're flushing out to really looks * like an inode! */ - if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) { + if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) { xfs_buf_relse(bp); xfs_alert(mp, "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld", @@ -2434,7 +2438,7 @@ xlog_recover_inode_pass2( write_inode_buffer: ASSERT(bp->b_target->bt_mount == mp); - XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); + bp->b_iodone = xlog_recover_iodone; xfs_bdwrite(mp, bp); error: if (need_free) @@ -2556,7 +2560,7 @@ xlog_recover_dquot_pass2( ASSERT(dq_f->qlf_size == 2); ASSERT(bp->b_target->bt_mount == mp); - XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); + bp->b_iodone = xlog_recover_iodone; xfs_bdwrite(mp, bp); return (0); @@ -3295,7 +3299,7 @@ xlog_valid_rec_header( { int hlen; - if (unlikely(be32_to_cpu(rhead->h_magicno) != XLOG_HEADER_MAGIC_NUM)) { + if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) { XFS_ERROR_REPORT("xlog_valid_rec_header(1)", XFS_ERRLEVEL_LOW, log->l_mp); return XFS_ERROR(EFSCORRUPTED); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index b49b82363d20..7f25245da289 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -348,7 +348,7 @@ xfs_mount_validate_sb( } /* - * More sanity checking. These were stolen directly from + * More sanity checking. Most of these were stolen directly from * xfs_repair. */ if (unlikely( @@ -371,23 +371,13 @@ xfs_mount_validate_sb( (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || - (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) { + (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */) || + sbp->sb_dblocks == 0 || + sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) || + sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) { if (loud) - xfs_warn(mp, "SB sanity check 1 failed"); - return XFS_ERROR(EFSCORRUPTED); - } - - /* - * Sanity check AG count, size fields against data size field - */ - if (unlikely( - sbp->sb_dblocks == 0 || - sbp->sb_dblocks > - (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks || - sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) * - sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) { - if (loud) - xfs_warn(mp, "SB sanity check 2 failed"); + XFS_CORRUPTION_ERROR("SB sanity check failed", + XFS_ERRLEVEL_LOW, mp, sbp); return XFS_ERROR(EFSCORRUPTED); } @@ -864,7 +854,8 @@ xfs_update_alignment(xfs_mount_t *mp) if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || (BBTOB(mp->m_swidth) & mp->m_blockmask)) { if (mp->m_flags & XFS_MOUNT_RETERR) { - xfs_warn(mp, "alignment check 1 failed"); + xfs_warn(mp, "alignment check failed: " + "(sunit/swidth vs. blocksize)"); return XFS_ERROR(EINVAL); } mp->m_dalign = mp->m_swidth = 0; @@ -875,6 +866,8 @@ xfs_update_alignment(xfs_mount_t *mp) mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { if (mp->m_flags & XFS_MOUNT_RETERR) { + xfs_warn(mp, "alignment check failed: " + "(sunit/swidth vs. ag size)"); return XFS_ERROR(EINVAL); } xfs_warn(mp, @@ -889,8 +882,8 @@ xfs_update_alignment(xfs_mount_t *mp) mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); } else { if (mp->m_flags & XFS_MOUNT_RETERR) { - xfs_warn(mp, - "stripe alignment turned off: sunit(%d) less than bsize(%d)", + xfs_warn(mp, "alignment check failed: " + "sunit(%d) less than bsize(%d)", mp->m_dalign, mp->m_blockmask +1); return XFS_ERROR(EINVAL); @@ -1096,10 +1089,6 @@ xfs_mount_reset_sbqflags( if (mp->m_flags & XFS_MOUNT_RDONLY) return 0; -#ifdef QUOTADEBUG - xfs_notice(mp, "Writing superblock quota changes"); -#endif - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, XFS_DEFAULT_LOG_COUNT); @@ -1532,7 +1521,7 @@ xfs_unmountfs( xfs_warn(mp, "Unable to free reserved block pool. " "Freespace may not be correct on next mount."); - error = xfs_log_sbcount(mp, 1); + error = xfs_log_sbcount(mp); if (error) xfs_warn(mp, "Unable to update superblock counters. " "Freespace may not be correct on next mount."); @@ -1568,18 +1557,14 @@ xfs_fs_writable(xfs_mount_t *mp) /* * xfs_log_sbcount * - * Called either periodically to keep the on disk superblock values - * roughly up to date or from unmount to make sure the values are - * correct on a clean unmount. + * Sync the superblock counters to disk. * * Note this code can be called during the process of freezing, so - * we may need to use the transaction allocator which does not not + * we may need to use the transaction allocator which does not * block when the transaction subsystem is in its frozen state. */ int -xfs_log_sbcount( - xfs_mount_t *mp, - uint sync) +xfs_log_sbcount(xfs_mount_t *mp) { xfs_trans_t *tp; int error; @@ -1605,8 +1590,7 @@ xfs_log_sbcount( } xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS); - if (sync) - xfs_trans_set_sync(tp); + xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); return error; } @@ -1941,22 +1925,19 @@ unwind: * the superblock buffer if it can be locked without sleeping. * If it can't then we'll return NULL. */ -xfs_buf_t * +struct xfs_buf * xfs_getsb( - xfs_mount_t *mp, - int flags) + struct xfs_mount *mp, + int flags) { - xfs_buf_t *bp; + struct xfs_buf *bp = mp->m_sb_bp; - ASSERT(mp->m_sb_bp != NULL); - bp = mp->m_sb_bp; - if (flags & XBF_TRYLOCK) { - if (!XFS_BUF_CPSEMA(bp)) { + if (!xfs_buf_trylock(bp)) { + if (flags & XBF_TRYLOCK) return NULL; - } - } else { - XFS_BUF_PSEMA(bp, PRIBIO); + xfs_buf_lock(bp); } + XFS_BUF_HOLD(bp); ASSERT(XFS_BUF_ISDONE(bp)); return bp; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 3d68bb267c5f..bb24dac42a25 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -371,7 +371,7 @@ typedef struct xfs_mod_sb { int64_t msb_delta; /* Change to make to specified field */ } xfs_mod_sb_t; -extern int xfs_log_sbcount(xfs_mount_t *, uint); +extern int xfs_log_sbcount(xfs_mount_t *); extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); extern int xfs_mountfs(xfs_mount_t *mp); diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index c83f63b33aae..efc147f0e9b6 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1426,6 +1426,7 @@ xfs_trans_committed( static inline void xfs_log_item_batch_insert( struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t commit_lsn) @@ -1434,7 +1435,7 @@ xfs_log_item_batch_insert( spin_lock(&ailp->xa_lock); /* xfs_trans_ail_update_bulk drops ailp->xa_lock */ - xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn); + xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn); for (i = 0; i < nr_items; i++) IOP_UNPIN(log_items[i], 0); @@ -1452,6 +1453,13 @@ xfs_log_item_batch_insert( * as an iclog write error even though we haven't started any IO yet. Hence in * this case all we need to do is IOP_COMMITTED processing, followed by an * IOP_UNPIN(aborted) call. + * + * The AIL cursor is used to optimise the insert process. If commit_lsn is not + * at the end of the AIL, the insert cursor avoids the need to walk + * the AIL to find the insertion point on every xfs_log_item_batch_insert() + * call. This saves a lot of needless list walking and is a net win, even + * though it slightly increases that amount of AIL lock traffic to set it up + * and tear it down. */ void xfs_trans_committed_bulk( @@ -1463,8 +1471,13 @@ xfs_trans_committed_bulk( #define LOG_ITEM_BATCH_SIZE 32 struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; struct xfs_log_vec *lv; + struct xfs_ail_cursor cur; int i = 0; + spin_lock(&ailp->xa_lock); + xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn); + spin_unlock(&ailp->xa_lock); + /* unpin all the log items */ for (lv = log_vector; lv; lv = lv->lv_next ) { struct xfs_log_item *lip = lv->lv_item; @@ -1493,7 +1506,9 @@ xfs_trans_committed_bulk( /* * Not a bulk update option due to unusual item_lsn. * Push into AIL immediately, rechecking the lsn once - * we have the ail lock. Then unpin the item. + * we have the ail lock. Then unpin the item. This does + * not affect the AIL cursor the bulk insert path is + * using. */ spin_lock(&ailp->xa_lock); if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) @@ -1507,7 +1522,7 @@ xfs_trans_committed_bulk( /* Item is a candidate for bulk AIL insert. */ log_items[i++] = lv->lv_item; if (i >= LOG_ITEM_BATCH_SIZE) { - xfs_log_item_batch_insert(ailp, log_items, + xfs_log_item_batch_insert(ailp, &cur, log_items, LOG_ITEM_BATCH_SIZE, commit_lsn); i = 0; } @@ -1515,7 +1530,11 @@ xfs_trans_committed_bulk( /* make sure we insert the remainder! */ if (i) - xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn); + xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn); + + spin_lock(&ailp->xa_lock); + xfs_trans_ail_cursor_done(ailp, &cur); + spin_unlock(&ailp->xa_lock); } /* diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 5fc2380092c8..43233e92f0f6 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -163,17 +163,11 @@ xfs_ail_max_lsn( } /* - * AIL traversal cursor initialisation. - * - * The cursor keeps track of where our current traversal is up - * to by tracking the next Æ£tem in the list for us. However, for - * this to be safe, removing an object from the AIL needs to invalidate - * any cursor that points to it. hence the traversal cursor needs to - * be linked to the struct xfs_ail so that deletion can search all the - * active cursors for invalidation. - * - * We don't link the push cursor because it is embedded in the struct - * xfs_ail and hence easily findable. + * The cursor keeps track of where our current traversal is up to by tracking + * the next item in the list for us. However, for this to be safe, removing an + * object from the AIL needs to invalidate any cursor that points to it. hence + * the traversal cursor needs to be linked to the struct xfs_ail so that + * deletion can search all the active cursors for invalidation. */ STATIC void xfs_trans_ail_cursor_init( @@ -181,31 +175,12 @@ xfs_trans_ail_cursor_init( struct xfs_ail_cursor *cur) { cur->item = NULL; - if (cur == &ailp->xa_cursors) - return; - - cur->next = ailp->xa_cursors.next; - ailp->xa_cursors.next = cur; -} - -/* - * Set the cursor to the next item, because when we look - * up the cursor the current item may have been freed. - */ -STATIC void -xfs_trans_ail_cursor_set( - struct xfs_ail *ailp, - struct xfs_ail_cursor *cur, - struct xfs_log_item *lip) -{ - if (lip) - cur->item = xfs_ail_next(ailp, lip); + list_add_tail(&cur->list, &ailp->xa_cursors); } /* - * Get the next item in the traversal and advance the cursor. - * If the cursor was invalidated (inidicated by a lip of 1), - * restart the traversal. + * Get the next item in the traversal and advance the cursor. If the cursor + * was invalidated (indicated by a lip of 1), restart the traversal. */ struct xfs_log_item * xfs_trans_ail_cursor_next( @@ -216,45 +191,31 @@ xfs_trans_ail_cursor_next( if ((__psint_t)lip & 1) lip = xfs_ail_min(ailp); - xfs_trans_ail_cursor_set(ailp, cur, lip); + if (lip) + cur->item = xfs_ail_next(ailp, lip); return lip; } /* - * Now that the traversal is complete, we need to remove the cursor - * from the list of traversing cursors. Avoid removing the embedded - * push cursor, but use the fact it is always present to make the - * list deletion simple. + * When the traversal is complete, we need to remove the cursor from the list + * of traversing cursors. */ void xfs_trans_ail_cursor_done( struct xfs_ail *ailp, - struct xfs_ail_cursor *done) + struct xfs_ail_cursor *cur) { - struct xfs_ail_cursor *prev = NULL; - struct xfs_ail_cursor *cur; - - done->item = NULL; - if (done == &ailp->xa_cursors) - return; - prev = &ailp->xa_cursors; - for (cur = prev->next; cur; prev = cur, cur = prev->next) { - if (cur == done) { - prev->next = cur->next; - break; - } - } - ASSERT(cur); + cur->item = NULL; + list_del_init(&cur->list); } /* - * Invalidate any cursor that is pointing to this item. This is - * called when an item is removed from the AIL. Any cursor pointing - * to this object is now invalid and the traversal needs to be - * terminated so it doesn't reference a freed object. We set the - * cursor item to a value of 1 so we can distinguish between an - * invalidation and the end of the list when getting the next item - * from the cursor. + * Invalidate any cursor that is pointing to this item. This is called when an + * item is removed from the AIL. Any cursor pointing to this object is now + * invalid and the traversal needs to be terminated so it doesn't reference a + * freed object. We set the low bit of the cursor item pointer so we can + * distinguish between an invalidation and the end of the list when getting the + * next item from the cursor. */ STATIC void xfs_trans_ail_cursor_clear( @@ -263,8 +224,7 @@ xfs_trans_ail_cursor_clear( { struct xfs_ail_cursor *cur; - /* need to search all cursors */ - for (cur = &ailp->xa_cursors; cur; cur = cur->next) { + list_for_each_entry(cur, &ailp->xa_cursors, list) { if (cur->item == lip) cur->item = (struct xfs_log_item *) ((__psint_t)cur->item | 1); @@ -272,9 +232,10 @@ xfs_trans_ail_cursor_clear( } /* - * Return the item in the AIL with the current lsn. - * Return the current tree generation number for use - * in calls to xfs_trans_next_ail(). + * Find the first item in the AIL with the given @lsn by searching in ascending + * LSN order and initialise the cursor to point to the next item for a + * ascending traversal. Pass a @lsn of zero to initialise the cursor to the + * first item in the AIL. Returns NULL if the list is empty. */ xfs_log_item_t * xfs_trans_ail_cursor_first( @@ -285,46 +246,112 @@ xfs_trans_ail_cursor_first( xfs_log_item_t *lip; xfs_trans_ail_cursor_init(ailp, cur); - lip = xfs_ail_min(ailp); - if (lsn == 0) + + if (lsn == 0) { + lip = xfs_ail_min(ailp); goto out; + } list_for_each_entry(lip, &ailp->xa_ail, li_ail) { if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) goto out; } - lip = NULL; + return NULL; + out: - xfs_trans_ail_cursor_set(ailp, cur, lip); + if (lip) + cur->item = xfs_ail_next(ailp, lip); return lip; } +static struct xfs_log_item * +__xfs_trans_ail_cursor_last( + struct xfs_ail *ailp, + xfs_lsn_t lsn) +{ + xfs_log_item_t *lip; + + list_for_each_entry_reverse(lip, &ailp->xa_ail, li_ail) { + if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0) + return lip; + } + return NULL; +} + +/* + * Find the last item in the AIL with the given @lsn by searching in descending + * LSN order and initialise the cursor to point to that item. If there is no + * item with the value of @lsn, then it sets the cursor to the last item with an + * LSN lower than @lsn. Returns NULL if the list is empty. + */ +struct xfs_log_item * +xfs_trans_ail_cursor_last( + struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, + xfs_lsn_t lsn) +{ + xfs_trans_ail_cursor_init(ailp, cur); + cur->item = __xfs_trans_ail_cursor_last(ailp, lsn); + return cur->item; +} + /* - * splice the log item list into the AIL at the given LSN. + * Splice the log item list into the AIL at the given LSN. We splice to the + * tail of the given LSN to maintain insert order for push traversals. The + * cursor is optional, allowing repeated updates to the same LSN to avoid + * repeated traversals. */ static void xfs_ail_splice( - struct xfs_ail *ailp, - struct list_head *list, - xfs_lsn_t lsn) + struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, + struct list_head *list, + xfs_lsn_t lsn) { - xfs_log_item_t *next_lip; + struct xfs_log_item *lip = cur ? cur->item : NULL; + struct xfs_log_item *next_lip; - /* If the list is empty, just insert the item. */ - if (list_empty(&ailp->xa_ail)) { - list_splice(list, &ailp->xa_ail); - return; + /* + * Get a new cursor if we don't have a placeholder or the existing one + * has been invalidated. + */ + if (!lip || (__psint_t)lip & 1) { + lip = __xfs_trans_ail_cursor_last(ailp, lsn); + + if (!lip) { + /* The list is empty, so just splice and return. */ + if (cur) + cur->item = NULL; + list_splice(list, &ailp->xa_ail); + return; + } } - list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { - if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) - break; + /* + * Our cursor points to the item we want to insert _after_, so we have + * to update the cursor to point to the end of the list we are splicing + * in so that it points to the correct location for the next splice. + * i.e. before the splice + * + * lsn -> lsn -> lsn + x -> lsn + x ... + * ^ + * | cursor points here + * + * After the splice we have: + * + * lsn -> lsn -> lsn -> lsn -> .... -> lsn -> lsn + x -> lsn + x ... + * ^ ^ + * | cursor points here | needs to move here + * + * So we set the cursor to the last item in the list to be spliced + * before we execute the splice, resulting in the cursor pointing to + * the correct item after the splice occurs. + */ + if (cur) { + next_lip = list_entry(list->prev, struct xfs_log_item, li_ail); + cur->item = next_lip; } - - ASSERT(&next_lip->li_ail == &ailp->xa_ail || - XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0); - - list_splice_init(list, &next_lip->li_ail); + list_splice(list, &lip->li_ail); } /* @@ -351,7 +378,7 @@ xfs_ail_worker( struct xfs_ail *ailp = container_of(to_delayed_work(work), struct xfs_ail, xa_work); xfs_mount_t *mp = ailp->xa_mount; - struct xfs_ail_cursor *cur = &ailp->xa_cursors; + struct xfs_ail_cursor cur; xfs_log_item_t *lip; xfs_lsn_t lsn; xfs_lsn_t target; @@ -363,13 +390,12 @@ xfs_ail_worker( spin_lock(&ailp->xa_lock); target = ailp->xa_target; - xfs_trans_ail_cursor_init(ailp, cur); - lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn); + lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn); if (!lip || XFS_FORCED_SHUTDOWN(mp)) { /* * AIL is empty or our push has reached the end. */ - xfs_trans_ail_cursor_done(ailp, cur); + xfs_trans_ail_cursor_done(ailp, &cur); spin_unlock(&ailp->xa_lock); goto out_done; } @@ -457,12 +483,12 @@ xfs_ail_worker( if (stuck > 100) break; - lip = xfs_trans_ail_cursor_next(ailp, cur); + lip = xfs_trans_ail_cursor_next(ailp, &cur); if (lip == NULL) break; lsn = lip->li_lsn; } - xfs_trans_ail_cursor_done(ailp, cur); + xfs_trans_ail_cursor_done(ailp, &cur); spin_unlock(&ailp->xa_lock); if (flush_log) { @@ -645,6 +671,7 @@ xfs_trans_unlocked_item( void xfs_trans_ail_update_bulk( struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t lsn) __releases(ailp->xa_lock) @@ -674,7 +701,7 @@ xfs_trans_ail_update_bulk( list_add(&lip->li_ail, &tmp); } - xfs_ail_splice(ailp, &tmp, lsn); + xfs_ail_splice(ailp, cur, &tmp, lsn); if (!mlip_changed) { spin_unlock(&ailp->xa_lock); @@ -793,6 +820,7 @@ xfs_trans_ail_init( ailp->xa_mount = mp; INIT_LIST_HEAD(&ailp->xa_ail); + INIT_LIST_HEAD(&ailp->xa_cursors); spin_lock_init(&ailp->xa_lock); INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker); mp->m_ail = ailp; diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 03b3b7f85a3b..15584fc3ed7d 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -81,7 +81,7 @@ _xfs_trans_bjoin( struct xfs_buf_log_item *bip; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL); + ASSERT(bp->b_transp == NULL); /* * The xfs_buf_log_item pointer is stored in b_fsprivate. If @@ -89,7 +89,7 @@ _xfs_trans_bjoin( * The checks to see if one is there are in xfs_buf_item_init(). */ xfs_buf_item_init(bp, tp->t_mountp); - bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); + bip = bp->b_fspriv; ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); @@ -110,7 +110,7 @@ _xfs_trans_bjoin( * Initialize b_fsprivate2 so we can find it with incore_match() * in xfs_trans_get_buf() and friends above. */ - XFS_BUF_SET_FSPRIVATE2(bp, tp); + bp->b_transp = tp; } @@ -160,7 +160,7 @@ xfs_trans_get_buf(xfs_trans_t *tp, */ bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len); if (bp != NULL) { - ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); + ASSERT(xfs_buf_islocked(bp)); if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) XFS_BUF_SUPER_STALE(bp); @@ -172,8 +172,8 @@ xfs_trans_get_buf(xfs_trans_t *tp, else if (XFS_BUF_ISSTALE(bp)) ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); - ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); - bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); + ASSERT(bp->b_transp == tp); + bip = bp->b_fspriv; ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_recur++; @@ -232,8 +232,8 @@ xfs_trans_getsb(xfs_trans_t *tp, * recursion count and return the buffer to the caller. */ bp = mp->m_sb_bp; - if (XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp) { - bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); + if (bp->b_transp == tp) { + bip = bp->b_fspriv; ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_recur++; @@ -327,9 +327,9 @@ xfs_trans_read_buf( */ bp = xfs_trans_buf_item_match(tp, target, blkno, len); if (bp != NULL) { - ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); - ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); - ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + ASSERT(xfs_buf_islocked(bp)); + ASSERT(bp->b_transp == tp); + ASSERT(bp->b_fspriv != NULL); ASSERT((XFS_BUF_ISERROR(bp)) == 0); if (!(XFS_BUF_ISDONE(bp))) { trace_xfs_trans_read_buf_io(bp, _RET_IP_); @@ -363,7 +363,7 @@ xfs_trans_read_buf( } - bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); + bip = bp->b_fspriv; bip->bli_recur++; ASSERT(atomic_read(&bip->bli_refcount) > 0); @@ -460,32 +460,30 @@ xfs_trans_brelse(xfs_trans_t *tp, xfs_buf_t *bp) { xfs_buf_log_item_t *bip; - xfs_log_item_t *lip; /* * Default to a normal brelse() call if the tp is NULL. */ if (tp == NULL) { - ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL); + struct xfs_log_item *lip = bp->b_fspriv; + + ASSERT(bp->b_transp == NULL); + /* * If there's a buf log item attached to the buffer, * then let the AIL know that the buffer is being * unlocked. */ - if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { - lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); - if (lip->li_type == XFS_LI_BUF) { - bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*); - xfs_trans_unlocked_item(bip->bli_item.li_ailp, - lip); - } + if (lip != NULL && lip->li_type == XFS_LI_BUF) { + bip = bp->b_fspriv; + xfs_trans_unlocked_item(bip->bli_item.li_ailp, lip); } xfs_buf_relse(bp); return; } - ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); - bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); + ASSERT(bp->b_transp == tp); + bip = bp->b_fspriv; ASSERT(bip->bli_item.li_type == XFS_LI_BUF); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); @@ -556,7 +554,7 @@ xfs_trans_brelse(xfs_trans_t *tp, xfs_buf_item_relse(bp); bip = NULL; } - XFS_BUF_SET_FSPRIVATE2(bp, NULL); + bp->b_transp = NULL; /* * If we've still got a buf log item on the buffer, then @@ -581,16 +579,15 @@ void xfs_trans_bhold(xfs_trans_t *tp, xfs_buf_t *bp) { - xfs_buf_log_item_t *bip; + xfs_buf_log_item_t *bip = bp->b_fspriv; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); - ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); - - bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); + ASSERT(bp->b_transp == tp); + ASSERT(bip != NULL); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); ASSERT(atomic_read(&bip->bli_refcount) > 0); + bip->bli_flags |= XFS_BLI_HOLD; trace_xfs_trans_bhold(bip); } @@ -603,19 +600,17 @@ void xfs_trans_bhold_release(xfs_trans_t *tp, xfs_buf_t *bp) { - xfs_buf_log_item_t *bip; + xfs_buf_log_item_t *bip = bp->b_fspriv; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); - ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); - - bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); + ASSERT(bp->b_transp == tp); + ASSERT(bip != NULL); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); ASSERT(atomic_read(&bip->bli_refcount) > 0); ASSERT(bip->bli_flags & XFS_BLI_HOLD); - bip->bli_flags &= ~XFS_BLI_HOLD; + bip->bli_flags &= ~XFS_BLI_HOLD; trace_xfs_trans_bhold_release(bip); } @@ -634,14 +629,14 @@ xfs_trans_log_buf(xfs_trans_t *tp, uint first, uint last) { - xfs_buf_log_item_t *bip; + xfs_buf_log_item_t *bip = bp->b_fspriv; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); - ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + ASSERT(bp->b_transp == tp); + ASSERT(bip != NULL); ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp))); - ASSERT((XFS_BUF_IODONE_FUNC(bp) == NULL) || - (XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks)); + ASSERT(bp->b_iodone == NULL || + bp->b_iodone == xfs_buf_iodone_callbacks); /* * Mark the buffer as needing to be written out eventually, @@ -656,9 +651,8 @@ xfs_trans_log_buf(xfs_trans_t *tp, XFS_BUF_DELAYWRITE(bp); XFS_BUF_DONE(bp); - bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(atomic_read(&bip->bli_refcount) > 0); - XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); + bp->b_iodone = xfs_buf_iodone_callbacks; bip->bli_item.li_cb = xfs_buf_iodone; trace_xfs_trans_log_buf(bip); @@ -706,13 +700,11 @@ xfs_trans_binval( xfs_trans_t *tp, xfs_buf_t *bp) { - xfs_buf_log_item_t *bip; + xfs_buf_log_item_t *bip = bp->b_fspriv; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); - ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); - - bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); + ASSERT(bp->b_transp == tp); + ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); trace_xfs_trans_binval(bip); @@ -780,13 +772,11 @@ xfs_trans_inode_buf( xfs_trans_t *tp, xfs_buf_t *bp) { - xfs_buf_log_item_t *bip; + xfs_buf_log_item_t *bip = bp->b_fspriv; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); - ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); - - bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); + ASSERT(bp->b_transp == tp); + ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_flags |= XFS_BLI_INODE_BUF; @@ -806,13 +796,11 @@ xfs_trans_stale_inode_buf( xfs_trans_t *tp, xfs_buf_t *bp) { - xfs_buf_log_item_t *bip; + xfs_buf_log_item_t *bip = bp->b_fspriv; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); - ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); - - bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); + ASSERT(bp->b_transp == tp); + ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_flags |= XFS_BLI_STALE_INODE; @@ -833,13 +821,11 @@ xfs_trans_inode_alloc_buf( xfs_trans_t *tp, xfs_buf_t *bp) { - xfs_buf_log_item_t *bip; + xfs_buf_log_item_t *bip = bp->b_fspriv; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); - ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); - - bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); + ASSERT(bp->b_transp == tp); + ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF; @@ -863,16 +849,14 @@ xfs_trans_dquot_buf( xfs_buf_t *bp, uint type) { - xfs_buf_log_item_t *bip; + xfs_buf_log_item_t *bip = bp->b_fspriv; ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); - ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + ASSERT(bp->b_transp == tp); + ASSERT(bip != NULL); ASSERT(type == XFS_BLF_UDQUOT_BUF || type == XFS_BLF_PDQUOT_BUF || type == XFS_BLF_GDQUOT_BUF); - - bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_format.blf_flags |= type; diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 048b0c689d3e..c8dea2fd7e68 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -55,7 +55,6 @@ xfs_trans_ijoin( { xfs_inode_log_item_t *iip; - ASSERT(ip->i_transp == NULL); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); if (ip->i_itemp == NULL) xfs_inode_item_init(ip, ip->i_mount); @@ -68,12 +67,6 @@ xfs_trans_ijoin( xfs_trans_add_item(tp, &iip->ili_item); xfs_trans_inode_broot_debug(ip); - - /* - * Initialize i_transp so we can find it with xfs_inode_incore() - * in xfs_trans_iget() above. - */ - ip->i_transp = tp; } /* @@ -111,7 +104,6 @@ xfs_trans_ichgtime( ASSERT(tp); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(ip->i_transp == tp); tv = current_fs_time(inode->i_sb); @@ -140,7 +132,6 @@ xfs_trans_log_inode( xfs_inode_t *ip, uint flags) { - ASSERT(ip->i_transp == tp); ASSERT(ip->i_itemp != NULL); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 6b164e9e9a1f..212946b97239 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -53,7 +53,7 @@ void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv, * of the list to trigger traversal restarts. */ struct xfs_ail_cursor { - struct xfs_ail_cursor *next; + struct list_head list; struct xfs_log_item *item; }; @@ -66,7 +66,7 @@ struct xfs_ail { struct xfs_mount *xa_mount; struct list_head xa_ail; xfs_lsn_t xa_target; - struct xfs_ail_cursor xa_cursors; + struct list_head xa_cursors; spinlock_t xa_lock; struct delayed_work xa_work; xfs_lsn_t xa_last_pushed_lsn; @@ -82,6 +82,7 @@ struct xfs_ail { extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t lsn) __releases(ailp->xa_lock); static inline void @@ -90,7 +91,7 @@ xfs_trans_ail_update( struct xfs_log_item *lip, xfs_lsn_t lsn) __releases(ailp->xa_lock) { - xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn); + xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn); } void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp, @@ -111,10 +112,13 @@ xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); void xfs_trans_unlocked_item(struct xfs_ail *, xfs_log_item_t *); -struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, +struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp, struct xfs_ail_cursor *cur, xfs_lsn_t lsn); -struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp, +struct xfs_log_item * xfs_trans_ail_cursor_last(struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, + xfs_lsn_t lsn); +struct xfs_log_item * xfs_trans_ail_cursor_next(struct xfs_ail *ailp, struct xfs_ail_cursor *cur); void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, struct xfs_ail_cursor *cur); diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 619720705bc6..88d121486c52 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -50,430 +50,6 @@ #include "xfs_vnodeops.h" #include "xfs_trace.h" -int -xfs_setattr( - struct xfs_inode *ip, - struct iattr *iattr, - int flags) -{ - xfs_mount_t *mp = ip->i_mount; - struct inode *inode = VFS_I(ip); - int mask = iattr->ia_valid; - xfs_trans_t *tp; - int code; - uint lock_flags; - uint commit_flags=0; - uid_t uid=0, iuid=0; - gid_t gid=0, igid=0; - struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; - int need_iolock = 1; - - trace_xfs_setattr(ip); - - if (mp->m_flags & XFS_MOUNT_RDONLY) - return XFS_ERROR(EROFS); - - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); - - code = -inode_change_ok(inode, iattr); - if (code) - return code; - - olddquot1 = olddquot2 = NULL; - udqp = gdqp = NULL; - - /* - * If disk quotas is on, we make sure that the dquots do exist on disk, - * before we start any other transactions. Trying to do this later - * is messy. We don't care to take a readlock to look at the ids - * in inode here, because we can't hold it across the trans_reserve. - * If the IDs do change before we take the ilock, we're covered - * because the i_*dquot fields will get updated anyway. - */ - if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) { - uint qflags = 0; - - if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) { - uid = iattr->ia_uid; - qflags |= XFS_QMOPT_UQUOTA; - } else { - uid = ip->i_d.di_uid; - } - if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) { - gid = iattr->ia_gid; - qflags |= XFS_QMOPT_GQUOTA; - } else { - gid = ip->i_d.di_gid; - } - - /* - * We take a reference when we initialize udqp and gdqp, - * so it is important that we never blindly double trip on - * the same variable. See xfs_create() for an example. - */ - ASSERT(udqp == NULL); - ASSERT(gdqp == NULL); - code = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), - qflags, &udqp, &gdqp); - if (code) - return code; - } - - /* - * For the other attributes, we acquire the inode lock and - * first do an error checking pass. - */ - tp = NULL; - lock_flags = XFS_ILOCK_EXCL; - if (flags & XFS_ATTR_NOLOCK) - need_iolock = 0; - if (!(mask & ATTR_SIZE)) { - tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); - commit_flags = 0; - code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), - 0, 0, 0); - if (code) { - lock_flags = 0; - goto error_return; - } - } else { - if (need_iolock) - lock_flags |= XFS_IOLOCK_EXCL; - } - - xfs_ilock(ip, lock_flags); - - /* - * Change file ownership. Must be the owner or privileged. - */ - if (mask & (ATTR_UID|ATTR_GID)) { - /* - * These IDs could have changed since we last looked at them. - * But, we're assured that if the ownership did change - * while we didn't have the inode locked, inode's dquot(s) - * would have changed also. - */ - iuid = ip->i_d.di_uid; - igid = ip->i_d.di_gid; - gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; - uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; - - /* - * Do a quota reservation only if uid/gid is actually - * going to change. - */ - if (XFS_IS_QUOTA_RUNNING(mp) && - ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || - (XFS_IS_GQUOTA_ON(mp) && igid != gid))) { - ASSERT(tp); - code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, - capable(CAP_FOWNER) ? - XFS_QMOPT_FORCE_RES : 0); - if (code) /* out of quota */ - goto error_return; - } - } - - /* - * Truncate file. Must have write permission and not be a directory. - */ - if (mask & ATTR_SIZE) { - /* Short circuit the truncate case for zero length files */ - if (iattr->ia_size == 0 && - ip->i_size == 0 && ip->i_d.di_nextents == 0) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - lock_flags &= ~XFS_ILOCK_EXCL; - if (mask & ATTR_CTIME) { - inode->i_mtime = inode->i_ctime = - current_fs_time(inode->i_sb); - xfs_mark_inode_dirty_sync(ip); - } - code = 0; - goto error_return; - } - - if (S_ISDIR(ip->i_d.di_mode)) { - code = XFS_ERROR(EISDIR); - goto error_return; - } else if (!S_ISREG(ip->i_d.di_mode)) { - code = XFS_ERROR(EINVAL); - goto error_return; - } - - /* - * Make sure that the dquots are attached to the inode. - */ - code = xfs_qm_dqattach_locked(ip, 0); - if (code) - goto error_return; - - /* - * Now we can make the changes. Before we join the inode - * to the transaction, if ATTR_SIZE is set then take care of - * the part of the truncation that must be done without the - * inode lock. This needs to be done before joining the inode - * to the transaction, because the inode cannot be unlocked - * once it is a part of the transaction. - */ - if (iattr->ia_size > ip->i_size) { - /* - * Do the first part of growing a file: zero any data - * in the last block that is beyond the old EOF. We - * need to do this before the inode is joined to the - * transaction to modify the i_size. - */ - code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); - if (code) - goto error_return; - } - xfs_iunlock(ip, XFS_ILOCK_EXCL); - lock_flags &= ~XFS_ILOCK_EXCL; - - /* - * We are going to log the inode size change in this - * transaction so any previous writes that are beyond the on - * disk EOF and the new EOF that have not been written out need - * to be written here. If we do not write the data out, we - * expose ourselves to the null files problem. - * - * Only flush from the on disk size to the smaller of the in - * memory file size or the new size as that's the range we - * really care about here and prevents waiting for other data - * not within the range we care about here. - */ - if (ip->i_size != ip->i_d.di_size && - iattr->ia_size > ip->i_d.di_size) { - code = xfs_flush_pages(ip, - ip->i_d.di_size, iattr->ia_size, - XBF_ASYNC, FI_NONE); - if (code) - goto error_return; - } - - /* wait for all I/O to complete */ - xfs_ioend_wait(ip); - - code = -block_truncate_page(inode->i_mapping, iattr->ia_size, - xfs_get_blocks); - if (code) - goto error_return; - - tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); - code = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, - XFS_ITRUNCATE_LOG_COUNT); - if (code) - goto error_return; - - truncate_setsize(inode, iattr->ia_size); - - commit_flags = XFS_TRANS_RELEASE_LOG_RES; - lock_flags |= XFS_ILOCK_EXCL; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - - xfs_trans_ijoin(tp, ip); - - /* - * Only change the c/mtime if we are changing the size - * or we are explicitly asked to change it. This handles - * the semantic difference between truncate() and ftruncate() - * as implemented in the VFS. - * - * The regular truncate() case without ATTR_CTIME and ATTR_MTIME - * is a special case where we need to update the times despite - * not having these flags set. For all other operations the - * VFS set these flags explicitly if it wants a timestamp - * update. - */ - if (iattr->ia_size != ip->i_size && - (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { - iattr->ia_ctime = iattr->ia_mtime = - current_fs_time(inode->i_sb); - mask |= ATTR_CTIME | ATTR_MTIME; - } - - if (iattr->ia_size > ip->i_size) { - ip->i_d.di_size = iattr->ia_size; - ip->i_size = iattr->ia_size; - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - } else if (iattr->ia_size <= ip->i_size || - (iattr->ia_size == 0 && ip->i_d.di_nextents)) { - /* - * signal a sync transaction unless - * we're truncating an already unlinked - * file on a wsync filesystem - */ - code = xfs_itruncate_finish(&tp, ip, iattr->ia_size, - XFS_DATA_FORK, - ((ip->i_d.di_nlink != 0 || - !(mp->m_flags & XFS_MOUNT_WSYNC)) - ? 1 : 0)); - if (code) - goto abort_return; - /* - * Truncated "down", so we're removing references - * to old data here - if we now delay flushing for - * a long time, we expose ourselves unduly to the - * notorious NULL files problem. So, we mark this - * vnode and flush it when the file is closed, and - * do not wait the usual (long) time for writeout. - */ - xfs_iflags_set(ip, XFS_ITRUNCATED); - } - } else if (tp) { - xfs_trans_ijoin(tp, ip); - } - - /* - * Change file ownership. Must be the owner or privileged. - */ - if (mask & (ATTR_UID|ATTR_GID)) { - /* - * CAP_FSETID overrides the following restrictions: - * - * The set-user-ID and set-group-ID bits of a file will be - * cleared upon successful return from chown() - */ - if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && - !capable(CAP_FSETID)) { - ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); - } - - /* - * Change the ownerships and register quota modifications - * in the transaction. - */ - if (iuid != uid) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) { - ASSERT(mask & ATTR_UID); - ASSERT(udqp); - olddquot1 = xfs_qm_vop_chown(tp, ip, - &ip->i_udquot, udqp); - } - ip->i_d.di_uid = uid; - inode->i_uid = uid; - } - if (igid != gid) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { - ASSERT(!XFS_IS_PQUOTA_ON(mp)); - ASSERT(mask & ATTR_GID); - ASSERT(gdqp); - olddquot2 = xfs_qm_vop_chown(tp, ip, - &ip->i_gdquot, gdqp); - } - ip->i_d.di_gid = gid; - inode->i_gid = gid; - } - } - - /* - * Change file access modes. - */ - if (mask & ATTR_MODE) { - umode_t mode = iattr->ia_mode; - - if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) - mode &= ~S_ISGID; - - ip->i_d.di_mode &= S_IFMT; - ip->i_d.di_mode |= mode & ~S_IFMT; - - inode->i_mode &= S_IFMT; - inode->i_mode |= mode & ~S_IFMT; - } - - /* - * Change file access or modified times. - */ - if (mask & ATTR_ATIME) { - inode->i_atime = iattr->ia_atime; - ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; - ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; - ip->i_update_core = 1; - } - if (mask & ATTR_CTIME) { - inode->i_ctime = iattr->ia_ctime; - ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; - ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; - ip->i_update_core = 1; - } - if (mask & ATTR_MTIME) { - inode->i_mtime = iattr->ia_mtime; - ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; - ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; - ip->i_update_core = 1; - } - - /* - * And finally, log the inode core if any attribute in it - * has been changed. - */ - if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE| - ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - - XFS_STATS_INC(xs_ig_attrchg); - - /* - * If this is a synchronous mount, make sure that the - * transaction goes to disk before returning to the user. - * This is slightly sub-optimal in that truncates require - * two sync transactions instead of one for wsync filesystems. - * One for the truncate and one for the timestamps since we - * don't want to change the timestamps unless we're sure the - * truncate worked. Truncates are less than 1% of the laddis - * mix so this probably isn't worth the trouble to optimize. - */ - code = 0; - if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(tp); - - code = xfs_trans_commit(tp, commit_flags); - - xfs_iunlock(ip, lock_flags); - - /* - * Release any dquot(s) the inode had kept before chown. - */ - xfs_qm_dqrele(olddquot1); - xfs_qm_dqrele(olddquot2); - xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); - - if (code) - return code; - - /* - * XXX(hch): Updating the ACL entries is not atomic vs the i_mode - * update. We could avoid this with linked transactions - * and passing down the transaction pointer all the way - * to attr_set. No previous user of the generic - * Posix ACL code seems to care about this issue either. - */ - if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { - code = -xfs_acl_chmod(inode); - if (code) - return XFS_ERROR(code); - } - - return 0; - - abort_return: - commit_flags |= XFS_TRANS_ABORT; - error_return: - xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); - if (tp) { - xfs_trans_cancel(tp, commit_flags); - } - if (lock_flags != 0) { - xfs_iunlock(ip, lock_flags); - } - return code; -} - /* * The maximum pathlen is 1024 bytes. Since the minimum file system * blocksize is 512 bytes, we can get a max of 2 extents back from @@ -621,13 +197,6 @@ xfs_free_eofblocks( */ tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); - /* - * Do the xfs_itruncate_start() call before - * reserving any log space because - * itruncate_start will call into the buffer - * cache and we can't - * do that within a transaction. - */ if (flags & XFS_FREE_EOF_TRYLOCK) { if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { xfs_trans_cancel(tp, 0); @@ -636,13 +205,6 @@ xfs_free_eofblocks( } else { xfs_ilock(ip, XFS_IOLOCK_EXCL); } - error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, - ip->i_size); - if (error) { - xfs_trans_cancel(tp, 0); - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return error; - } error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), @@ -658,15 +220,12 @@ xfs_free_eofblocks( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip); - error = xfs_itruncate_finish(&tp, ip, - ip->i_size, - XFS_DATA_FORK, - 0); - /* - * If we get an error at this point we - * simply don't bother truncating the file. - */ + error = xfs_itruncate_data(&tp, ip, ip->i_size); if (error) { + /* + * If we get an error at this point we simply don't + * bother truncating the file. + */ xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); @@ -1084,20 +643,9 @@ xfs_inactive( tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); if (truncate) { - /* - * Do the xfs_itruncate_start() call before - * reserving any log space because itruncate_start - * will call into the buffer cache and we can't - * do that within a transaction. - */ xfs_ilock(ip, XFS_IOLOCK_EXCL); - error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0); - if (error) { - xfs_trans_cancel(tp, 0); - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return VN_INACTIVE_CACHE; - } + xfs_ioend_wait(ip); error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), @@ -1114,16 +662,7 @@ xfs_inactive( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip); - /* - * normally, we have to run xfs_itruncate_finish sync. - * But if filesystem is wsync and we're in the inactive - * path, then we know that nlink == 0, and that the - * xaction that made nlink == 0 is permanently committed - * since xfs_remove runs as a synchronous transaction. - */ - error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, - (!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0)); - + error = xfs_itruncate_data(&tp, ip, 0); if (error) { xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); @@ -2430,6 +1969,8 @@ xfs_zero_remaining_bytes( if (!bp) return XFS_ERROR(ENOMEM); + xfs_buf_unlock(bp); + for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { offset_fsb = XFS_B_TO_FSBT(mp, offset); nimap = 1; @@ -2784,7 +2325,7 @@ xfs_change_file_space( iattr.ia_valid = ATTR_SIZE; iattr.ia_size = startoffset; - error = xfs_setattr(ip, &iattr, attr_flags); + error = xfs_setattr_size(ip, &iattr, attr_flags); if (error) return error; diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index 3bcd23353d6c..35d3d513e1e9 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -13,7 +13,8 @@ struct xfs_inode; struct xfs_iomap; -int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags); +int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags); +int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap, int flags); #define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ #define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ |