diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit.c | 160 | ||||
-rw-r--r-- | kernel/audit.h | 10 | ||||
-rw-r--r-- | kernel/auditfilter.c | 289 | ||||
-rw-r--r-- | kernel/auditsc.c | 269 | ||||
-rw-r--r-- | kernel/cpuset.c | 25 | ||||
-rw-r--r-- | kernel/exit.c | 5 | ||||
-rw-r--r-- | kernel/extable.c | 2 | ||||
-rw-r--r-- | kernel/fork.c | 11 | ||||
-rw-r--r-- | kernel/hrtimer.c | 10 | ||||
-rw-r--r-- | kernel/irq/manage.c | 6 | ||||
-rw-r--r-- | kernel/kprobes.c | 3 | ||||
-rw-r--r-- | kernel/module.c | 12 | ||||
-rw-r--r-- | kernel/power/main.c | 2 | ||||
-rw-r--r-- | kernel/power/pm.c | 20 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 9 | ||||
-rw-r--r-- | kernel/profile.c | 2 | ||||
-rw-r--r-- | kernel/ptrace.c | 64 | ||||
-rw-r--r-- | kernel/rcupdate.c | 23 | ||||
-rw-r--r-- | kernel/sched.c | 64 | ||||
-rw-r--r-- | kernel/signal.c | 4 | ||||
-rw-r--r-- | kernel/softirq.c | 4 | ||||
-rw-r--r-- | kernel/softlockup.c | 4 | ||||
-rw-r--r-- | kernel/timer.c | 20 | ||||
-rw-r--r-- | kernel/uid16.c | 59 | ||||
-rw-r--r-- | kernel/workqueue.c | 2 |
25 files changed, 736 insertions, 343 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index c8ccbd09048f..df57b493e1cb 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -55,6 +55,9 @@ #include <net/netlink.h> #include <linux/skbuff.h> #include <linux/netlink.h> +#include <linux/selinux.h> + +#include "audit.h" /* No auditing will take place until audit_initialized != 0. * (Initialization happens after skb_init is called.) */ @@ -227,49 +230,103 @@ void audit_log_lost(const char *message) } } -static int audit_set_rate_limit(int limit, uid_t loginuid) +static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid) { - int old = audit_rate_limit; - audit_rate_limit = limit; - audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + int old = audit_rate_limit; + + if (sid) { + char *ctx = NULL; + u32 len; + int rc; + if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + return rc; + else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "audit_rate_limit=%d old=%d by auid=%u subj=%s", + limit, old, loginuid, ctx); + kfree(ctx); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "audit_rate_limit=%d old=%d by auid=%u", - audit_rate_limit, old, loginuid); + limit, old, loginuid); + audit_rate_limit = limit; return old; } -static int audit_set_backlog_limit(int limit, uid_t loginuid) +static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid) { - int old = audit_backlog_limit; - audit_backlog_limit = limit; - audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + int old = audit_backlog_limit; + + if (sid) { + char *ctx = NULL; + u32 len; + int rc; + if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + return rc; + else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "audit_backlog_limit=%d old=%d by auid=%u subj=%s", + limit, old, loginuid, ctx); + kfree(ctx); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "audit_backlog_limit=%d old=%d by auid=%u", - audit_backlog_limit, old, loginuid); + limit, old, loginuid); + audit_backlog_limit = limit; return old; } -static int audit_set_enabled(int state, uid_t loginuid) +static int audit_set_enabled(int state, uid_t loginuid, u32 sid) { - int old = audit_enabled; + int old = audit_enabled; + if (state != 0 && state != 1) return -EINVAL; - audit_enabled = state; - audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + + if (sid) { + char *ctx = NULL; + u32 len; + int rc; + if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + return rc; + else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "audit_enabled=%d old=%d by auid=%u subj=%s", + state, old, loginuid, ctx); + kfree(ctx); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "audit_enabled=%d old=%d by auid=%u", - audit_enabled, old, loginuid); + state, old, loginuid); + audit_enabled = state; return old; } -static int audit_set_failure(int state, uid_t loginuid) +static int audit_set_failure(int state, uid_t loginuid, u32 sid) { - int old = audit_failure; + int old = audit_failure; + if (state != AUDIT_FAIL_SILENT && state != AUDIT_FAIL_PRINTK && state != AUDIT_FAIL_PANIC) return -EINVAL; - audit_failure = state; - audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + + if (sid) { + char *ctx = NULL; + u32 len; + int rc; + if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + return rc; + else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "audit_failure=%d old=%d by auid=%u subj=%s", + state, old, loginuid, ctx); + kfree(ctx); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "audit_failure=%d old=%d by auid=%u", - audit_failure, old, loginuid); + state, old, loginuid); + audit_failure = state; return old; } @@ -387,7 +444,7 @@ static int audit_netlink_ok(kernel_cap_t eff_cap, u16 msg_type) static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { - u32 uid, pid, seq; + u32 uid, pid, seq, sid; void *data; struct audit_status *status_get, status_set; int err; @@ -413,6 +470,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) pid = NETLINK_CREDS(skb)->pid; uid = NETLINK_CREDS(skb)->uid; loginuid = NETLINK_CB(skb).loginuid; + sid = NETLINK_CB(skb).sid; seq = nlh->nlmsg_seq; data = NLMSG_DATA(nlh); @@ -433,25 +491,43 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; status_get = (struct audit_status *)data; if (status_get->mask & AUDIT_STATUS_ENABLED) { - err = audit_set_enabled(status_get->enabled, loginuid); + err = audit_set_enabled(status_get->enabled, + loginuid, sid); if (err < 0) return err; } if (status_get->mask & AUDIT_STATUS_FAILURE) { - err = audit_set_failure(status_get->failure, loginuid); + err = audit_set_failure(status_get->failure, + loginuid, sid); if (err < 0) return err; } if (status_get->mask & AUDIT_STATUS_PID) { int old = audit_pid; + if (sid) { + char *ctx = NULL; + u32 len; + int rc; + if ((rc = selinux_ctxid_to_string( + sid, &ctx, &len))) + return rc; + else + audit_log(NULL, GFP_KERNEL, + AUDIT_CONFIG_CHANGE, + "audit_pid=%d old=%d by auid=%u subj=%s", + status_get->pid, old, + loginuid, ctx); + kfree(ctx); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "audit_pid=%d old=%d by auid=%u", + status_get->pid, old, loginuid); audit_pid = status_get->pid; - audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, - "audit_pid=%d old=%d by auid=%u", - audit_pid, old, loginuid); } if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) - audit_set_rate_limit(status_get->rate_limit, loginuid); + audit_set_rate_limit(status_get->rate_limit, + loginuid, sid); if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT) audit_set_backlog_limit(status_get->backlog_limit, - loginuid); + loginuid, sid); break; case AUDIT_USER: case AUDIT_FIRST_USER_MSG...AUDIT_LAST_USER_MSG: @@ -465,8 +541,23 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) ab = audit_log_start(NULL, GFP_KERNEL, msg_type); if (ab) { audit_log_format(ab, - "user pid=%d uid=%u auid=%u msg='%.1024s'", - pid, uid, loginuid, (char *)data); + "user pid=%d uid=%u auid=%u", + pid, uid, loginuid); + if (sid) { + char *ctx = NULL; + u32 len; + if (selinux_ctxid_to_string( + sid, &ctx, &len)) { + audit_log_format(ab, + " ssid=%u", sid); + /* Maybe call audit_panic? */ + } else + audit_log_format(ab, + " subj=%s", ctx); + kfree(ctx); + } + audit_log_format(ab, " msg='%.1024s'", + (char *)data); audit_set_pid(ab, pid); audit_log_end(ab); } @@ -480,7 +571,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) case AUDIT_LIST: err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, uid, seq, data, nlmsg_len(nlh), - loginuid); + loginuid, sid); break; case AUDIT_ADD_RULE: case AUDIT_DEL_RULE: @@ -490,7 +581,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) case AUDIT_LIST_RULES: err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, uid, seq, data, nlmsg_len(nlh), - loginuid); + loginuid, sid); break; case AUDIT_SIGNAL_INFO: sig_data.uid = audit_sig_uid; @@ -564,6 +655,11 @@ static int __init audit_init(void) skb_queue_head_init(&audit_skb_queue); audit_initialized = 1; audit_enabled = audit_default; + + /* Register the callback with selinux. This callback will be invoked + * when a new policy is loaded. */ + selinux_audit_set_callback(&selinux_audit_rule_update); + audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); return 0; } diff --git a/kernel/audit.h b/kernel/audit.h index bc5392076e2b..6f733920fd32 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -54,9 +54,11 @@ enum audit_state { /* Rule lists */ struct audit_field { - u32 type; - u32 val; - u32 op; + u32 type; + u32 val; + u32 op; + char *se_str; + struct selinux_audit_rule *se_rule; }; struct audit_krule { @@ -86,3 +88,5 @@ extern void audit_send_reply(int pid, int seq, int type, extern void audit_log_lost(const char *message); extern void audit_panic(const char *message); extern struct mutex audit_netlink_mutex; + +extern int selinux_audit_rule_update(void); diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index d3a8539f3a83..7c134906d689 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -23,6 +23,7 @@ #include <linux/audit.h> #include <linux/kthread.h> #include <linux/netlink.h> +#include <linux/selinux.h> #include "audit.h" /* There are three lists of rules -- one to search at task creation @@ -42,6 +43,13 @@ struct list_head audit_filter_list[AUDIT_NR_FILTERS] = { static inline void audit_free_rule(struct audit_entry *e) { + int i; + if (e->rule.fields) + for (i = 0; i < e->rule.field_count; i++) { + struct audit_field *f = &e->rule.fields[i]; + kfree(f->se_str); + selinux_audit_rule_free(f->se_rule); + } kfree(e->rule.fields); kfree(e); } @@ -52,9 +60,29 @@ static inline void audit_free_rule_rcu(struct rcu_head *head) audit_free_rule(e); } +/* Initialize an audit filterlist entry. */ +static inline struct audit_entry *audit_init_entry(u32 field_count) +{ + struct audit_entry *entry; + struct audit_field *fields; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (unlikely(!entry)) + return NULL; + + fields = kzalloc(sizeof(*fields) * field_count, GFP_KERNEL); + if (unlikely(!fields)) { + kfree(entry); + return NULL; + } + entry->rule.fields = fields; + + return entry; +} + /* Unpack a filter field's string representation from user-space * buffer. */ -static __attribute__((unused)) char *audit_unpack_string(void **bufp, size_t *remain, size_t len) +static char *audit_unpack_string(void **bufp, size_t *remain, size_t len) { char *str; @@ -84,7 +112,6 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule) { unsigned listnr; struct audit_entry *entry; - struct audit_field *fields; int i, err; err = -EINVAL; @@ -108,23 +135,14 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule) goto exit_err; err = -ENOMEM; - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (unlikely(!entry)) - goto exit_err; - fields = kmalloc(sizeof(*fields) * rule->field_count, GFP_KERNEL); - if (unlikely(!fields)) { - kfree(entry); + entry = audit_init_entry(rule->field_count); + if (!entry) goto exit_err; - } - - memset(&entry->rule, 0, sizeof(struct audit_krule)); - memset(fields, 0, sizeof(struct audit_field)); entry->rule.flags = rule->flags & AUDIT_FILTER_PREPEND; entry->rule.listnr = listnr; entry->rule.action = rule->action; entry->rule.field_count = rule->field_count; - entry->rule.fields = fields; for (i = 0; i < AUDIT_BITMASK_SIZE; i++) entry->rule.mask[i] = rule->mask[i]; @@ -150,15 +168,20 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) for (i = 0; i < rule->field_count; i++) { struct audit_field *f = &entry->rule.fields[i]; - if (rule->fields[i] & AUDIT_UNUSED_BITS) { - err = -EINVAL; - goto exit_free; - } - f->op = rule->fields[i] & (AUDIT_NEGATE|AUDIT_OPERATORS); f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS); f->val = rule->values[i]; + if (f->type & AUDIT_UNUSED_BITS || + f->type == AUDIT_SE_USER || + f->type == AUDIT_SE_ROLE || + f->type == AUDIT_SE_TYPE || + f->type == AUDIT_SE_SEN || + f->type == AUDIT_SE_CLR) { + err = -EINVAL; + goto exit_free; + } + entry->rule.vers_ops = (f->op & AUDIT_OPERATORS) ? 2 : 1; /* Support for legacy operators where @@ -188,8 +211,9 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, int err = 0; struct audit_entry *entry; void *bufp; - /* size_t remain = datasz - sizeof(struct audit_rule_data); */ + size_t remain = datasz - sizeof(struct audit_rule_data); int i; + char *str; entry = audit_to_entry_common((struct audit_rule *)data); if (IS_ERR(entry)) @@ -207,10 +231,35 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, f->op = data->fieldflags[i] & AUDIT_OPERATORS; f->type = data->fields[i]; + f->val = data->values[i]; + f->se_str = NULL; + f->se_rule = NULL; switch(f->type) { - /* call type-specific conversion routines here */ - default: - f->val = data->values[i]; + case AUDIT_SE_USER: + case AUDIT_SE_ROLE: + case AUDIT_SE_TYPE: + case AUDIT_SE_SEN: + case AUDIT_SE_CLR: + str = audit_unpack_string(&bufp, &remain, f->val); + if (IS_ERR(str)) + goto exit_free; + entry->rule.buflen += f->val; + + err = selinux_audit_rule_init(f->type, f->op, str, + &f->se_rule); + /* Keep currently invalid fields around in case they + * become valid after a policy reload. */ + if (err == -EINVAL) { + printk(KERN_WARNING "audit rule for selinux " + "\'%s\' is invalid\n", str); + err = 0; + } + if (err) { + kfree(str); + goto exit_free; + } else + f->se_str = str; + break; } } @@ -286,7 +335,14 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule) data->fields[i] = f->type; data->fieldflags[i] = f->op; switch(f->type) { - /* call type-specific conversion routines here */ + case AUDIT_SE_USER: + case AUDIT_SE_ROLE: + case AUDIT_SE_TYPE: + case AUDIT_SE_SEN: + case AUDIT_SE_CLR: + data->buflen += data->values[i] = + audit_pack_string(&bufp, f->se_str); + break; default: data->values[i] = f->val; } @@ -314,7 +370,14 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b) return 1; switch(a->fields[i].type) { - /* call type-specific comparison routines here */ + case AUDIT_SE_USER: + case AUDIT_SE_ROLE: + case AUDIT_SE_TYPE: + case AUDIT_SE_SEN: + case AUDIT_SE_CLR: + if (strcmp(a->fields[i].se_str, b->fields[i].se_str)) + return 1; + break; default: if (a->fields[i].val != b->fields[i].val) return 1; @@ -328,6 +391,81 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b) return 0; } +/* Duplicate selinux field information. The se_rule is opaque, so must be + * re-initialized. */ +static inline int audit_dupe_selinux_field(struct audit_field *df, + struct audit_field *sf) +{ + int ret = 0; + char *se_str; + + /* our own copy of se_str */ + se_str = kstrdup(sf->se_str, GFP_KERNEL); + if (unlikely(IS_ERR(se_str))) + return -ENOMEM; + df->se_str = se_str; + + /* our own (refreshed) copy of se_rule */ + ret = selinux_audit_rule_init(df->type, df->op, df->se_str, + &df->se_rule); + /* Keep currently invalid fields around in case they + * become valid after a policy reload. */ + if (ret == -EINVAL) { + printk(KERN_WARNING "audit rule for selinux \'%s\' is " + "invalid\n", df->se_str); + ret = 0; + } + + return ret; +} + +/* Duplicate an audit rule. This will be a deep copy with the exception + * of the watch - that pointer is carried over. The selinux specific fields + * will be updated in the copy. The point is to be able to replace the old + * rule with the new rule in the filterlist, then free the old rule. */ +static struct audit_entry *audit_dupe_rule(struct audit_krule *old) +{ + u32 fcount = old->field_count; + struct audit_entry *entry; + struct audit_krule *new; + int i, err = 0; + + entry = audit_init_entry(fcount); + if (unlikely(!entry)) + return ERR_PTR(-ENOMEM); + + new = &entry->rule; + new->vers_ops = old->vers_ops; + new->flags = old->flags; + new->listnr = old->listnr; + new->action = old->action; + for (i = 0; i < AUDIT_BITMASK_SIZE; i++) + new->mask[i] = old->mask[i]; + new->buflen = old->buflen; + new->field_count = old->field_count; + memcpy(new->fields, old->fields, sizeof(struct audit_field) * fcount); + + /* deep copy this information, updating the se_rule fields, because + * the originals will all be freed when the old rule is freed. */ + for (i = 0; i < fcount; i++) { + switch (new->fields[i].type) { + case AUDIT_SE_USER: + case AUDIT_SE_ROLE: + case AUDIT_SE_TYPE: + case AUDIT_SE_SEN: + case AUDIT_SE_CLR: + err = audit_dupe_selinux_field(&new->fields[i], + &old->fields[i]); + } + if (err) { + audit_free_rule(entry); + return ERR_PTR(err); + } + } + + return entry; +} + /* Add rule to given filterlist if not a duplicate. Protected by * audit_netlink_mutex. */ static inline int audit_add_rule(struct audit_entry *entry, @@ -448,9 +586,10 @@ static int audit_list_rules(void *_dest) * @data: payload data * @datasz: size of payload data * @loginuid: loginuid of sender + * @sid: SE Linux Security ID of sender */ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, - size_t datasz, uid_t loginuid) + size_t datasz, uid_t loginuid, u32 sid) { struct task_struct *tsk; int *dest; @@ -493,9 +632,23 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, err = audit_add_rule(entry, &audit_filter_list[entry->rule.listnr]); - audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, - "auid=%u add rule to list=%d res=%d\n", - loginuid, entry->rule.listnr, !err); + if (sid) { + char *ctx = NULL; + u32 len; + if (selinux_ctxid_to_string(sid, &ctx, &len)) { + /* Maybe call audit_panic? */ + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "auid=%u ssid=%u add rule to list=%d res=%d", + loginuid, sid, entry->rule.listnr, !err); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "auid=%u subj=%s add rule to list=%d res=%d", + loginuid, ctx, entry->rule.listnr, !err); + kfree(ctx); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "auid=%u add rule to list=%d res=%d", + loginuid, entry->rule.listnr, !err); if (err) audit_free_rule(entry); @@ -511,9 +664,24 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, err = audit_del_rule(entry, &audit_filter_list[entry->rule.listnr]); - audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, - "auid=%u remove rule from list=%d res=%d\n", - loginuid, entry->rule.listnr, !err); + + if (sid) { + char *ctx = NULL; + u32 len; + if (selinux_ctxid_to_string(sid, &ctx, &len)) { + /* Maybe call audit_panic? */ + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "auid=%u ssid=%u remove rule from list=%d res=%d", + loginuid, sid, entry->rule.listnr, !err); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "auid=%u subj=%s remove rule from list=%d res=%d", + loginuid, ctx, entry->rule.listnr, !err); + kfree(ctx); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "auid=%u remove rule from list=%d res=%d", + loginuid, entry->rule.listnr, !err); audit_free_rule(entry); break; @@ -628,3 +796,62 @@ unlock_and_return: rcu_read_unlock(); return result; } + +/* Check to see if the rule contains any selinux fields. Returns 1 if there + are selinux fields specified in the rule, 0 otherwise. */ +static inline int audit_rule_has_selinux(struct audit_krule *rule) +{ + int i; + + for (i = 0; i < rule->field_count; i++) { + struct audit_field *f = &rule->fields[i]; + switch (f->type) { + case AUDIT_SE_USER: + case AUDIT_SE_ROLE: + case AUDIT_SE_TYPE: + case AUDIT_SE_SEN: + case AUDIT_SE_CLR: + return 1; + } + } + + return 0; +} + +/* This function will re-initialize the se_rule field of all applicable rules. + * It will traverse the filter lists serarching for rules that contain selinux + * specific filter fields. When such a rule is found, it is copied, the + * selinux field is re-initialized, and the old rule is replaced with the + * updated rule. */ +int selinux_audit_rule_update(void) +{ + struct audit_entry *entry, *n, *nentry; + int i, err = 0; + + /* audit_netlink_mutex synchronizes the writers */ + mutex_lock(&audit_netlink_mutex); + + for (i = 0; i < AUDIT_NR_FILTERS; i++) { + list_for_each_entry_safe(entry, n, &audit_filter_list[i], list) { + if (!audit_rule_has_selinux(&entry->rule)) + continue; + + nentry = audit_dupe_rule(&entry->rule); + if (unlikely(IS_ERR(nentry))) { + /* save the first error encountered for the + * return value */ + if (!err) + err = PTR_ERR(nentry); + audit_panic("error updating selinux filters"); + list_del_rcu(&entry->list); + } else { + list_replace_rcu(&entry->list, &nentry->list); + } + call_rcu(&entry->rcu, audit_free_rule_rcu); + } + } + + mutex_unlock(&audit_netlink_mutex); + + return err; +} diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 7f160df21a23..1c03a4ed1b27 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -58,6 +58,7 @@ #include <linux/security.h> #include <linux/list.h> #include <linux/tty.h> +#include <linux/selinux.h> #include "audit.h" @@ -89,7 +90,7 @@ struct audit_names { uid_t uid; gid_t gid; dev_t rdev; - char *ctx; + u32 osid; }; struct audit_aux_data { @@ -106,7 +107,7 @@ struct audit_aux_data_ipcctl { uid_t uid; gid_t gid; mode_t mode; - char *ctx; + u32 osid; }; struct audit_aux_data_socketcall { @@ -167,7 +168,8 @@ static int audit_filter_rules(struct task_struct *tsk, struct audit_context *ctx, enum audit_state *state) { - int i, j; + int i, j, need_sid = 1; + u32 sid; for (i = 0; i < rule->field_count; i++) { struct audit_field *f = &rule->fields[i]; @@ -257,6 +259,27 @@ static int audit_filter_rules(struct task_struct *tsk, if (ctx) result = audit_comparator(ctx->loginuid, f->op, f->val); break; + case AUDIT_SE_USER: + case AUDIT_SE_ROLE: + case AUDIT_SE_TYPE: + case AUDIT_SE_SEN: + case AUDIT_SE_CLR: + /* NOTE: this may return negative values indicating + a temporary error. We simply treat this as a + match for now to avoid losing information that + may be wanted. An error message will also be + logged upon error */ + if (f->se_rule) { + if (need_sid) { + selinux_task_ctxid(tsk, &sid); + need_sid = 0; + } + result = selinux_audit_rule_match(sid, f->type, + f->op, + f->se_rule, + ctx); + } + break; case AUDIT_ARG0: case AUDIT_ARG1: case AUDIT_ARG2: @@ -329,7 +352,6 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, return AUDIT_BUILD_CONTEXT; } -/* This should be called with task_lock() held. */ static inline struct audit_context *audit_get_context(struct task_struct *tsk, int return_valid, int return_code) @@ -391,9 +413,6 @@ static inline void audit_free_names(struct audit_context *context) #endif for (i = 0; i < context->name_count; i++) { - char *p = context->names[i].ctx; - context->names[i].ctx = NULL; - kfree(p); if (context->names[i].name) __putname(context->names[i].name); } @@ -416,11 +435,6 @@ static inline void audit_free_aux(struct audit_context *context) dput(axi->dentry); mntput(axi->mnt); } - if ( aux->type == AUDIT_IPC ) { - struct audit_aux_data_ipcctl *axi = (void *)aux; - if (axi->ctx) - kfree(axi->ctx); - } context->aux = aux->next; kfree(aux); @@ -506,7 +520,7 @@ static inline void audit_free_context(struct audit_context *context) printk(KERN_ERR "audit: freed %d contexts\n", count); } -static void audit_log_task_context(struct audit_buffer *ab, gfp_t gfp_mask) +static void audit_log_task_context(struct audit_buffer *ab) { char *ctx = NULL; ssize_t len = 0; @@ -518,7 +532,7 @@ static void audit_log_task_context(struct audit_buffer *ab, gfp_t gfp_mask) return; } - ctx = kmalloc(len, gfp_mask); + ctx = kmalloc(len, GFP_KERNEL); if (!ctx) goto error_path; @@ -536,47 +550,46 @@ error_path: return; } -static void audit_log_task_info(struct audit_buffer *ab, gfp_t gfp_mask) +static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) { - char name[sizeof(current->comm)]; - struct mm_struct *mm = current->mm; + char name[sizeof(tsk->comm)]; + struct mm_struct *mm = tsk->mm; struct vm_area_struct *vma; - get_task_comm(name, current); + /* tsk == current */ + + get_task_comm(name, tsk); audit_log_format(ab, " comm="); audit_log_untrustedstring(ab, name); - if (!mm) - return; - - /* - * this is brittle; all callers that pass GFP_ATOMIC will have - * NULL current->mm and we won't get here. - */ - down_read(&mm->mmap_sem); - vma = mm->mmap; - while (vma) { - if ((vma->vm_flags & VM_EXECUTABLE) && - vma->vm_file) { - audit_log_d_path(ab, "exe=", - vma->vm_file->f_dentry, - vma->vm_file->f_vfsmnt); - break; + if (mm) { + down_read(&mm->mmap_sem); + vma = mm->mmap; + while (vma) { + if ((vma->vm_flags & VM_EXECUTABLE) && + vma->vm_file) { + audit_log_d_path(ab, "exe=", + vma->vm_file->f_dentry, + vma->vm_file->f_vfsmnt); + break; + } + vma = vma->vm_next; } - vma = vma->vm_next; + up_read(&mm->mmap_sem); } - up_read(&mm->mmap_sem); - audit_log_task_context(ab, gfp_mask); + audit_log_task_context(ab); } -static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) +static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) { - int i; + int i, call_panic = 0; struct audit_buffer *ab; struct audit_aux_data *aux; const char *tty; - ab = audit_log_start(context, gfp_mask, AUDIT_SYSCALL); + /* tsk == current */ + + ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL); if (!ab) return; /* audit_panic has been called */ audit_log_format(ab, "arch=%x syscall=%d", @@ -587,8 +600,8 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) audit_log_format(ab, " success=%s exit=%ld", (context->return_valid==AUDITSC_SUCCESS)?"yes":"no", context->return_code); - if (current->signal->tty && current->signal->tty->name) - tty = current->signal->tty->name; + if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name) + tty = tsk->signal->tty->name; else tty = "(none)"; audit_log_format(ab, @@ -607,12 +620,12 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) context->gid, context->euid, context->suid, context->fsuid, context->egid, context->sgid, context->fsgid, tty); - audit_log_task_info(ab, gfp_mask); + audit_log_task_info(ab, tsk); audit_log_end(ab); for (aux = context->aux; aux; aux = aux->next) { - ab = audit_log_start(context, gfp_mask, aux->type); + ab = audit_log_start(context, GFP_KERNEL, aux->type); if (!ab) continue; /* audit_panic has been called */ @@ -620,8 +633,39 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) case AUDIT_IPC: { struct audit_aux_data_ipcctl *axi = (void *)aux; audit_log_format(ab, - " qbytes=%lx iuid=%u igid=%u mode=%x obj=%s", - axi->qbytes, axi->uid, axi->gid, axi->mode, axi->ctx); + " qbytes=%lx iuid=%u igid=%u mode=%x", + axi->qbytes, axi->uid, axi->gid, axi->mode); + if (axi->osid != 0) { + char *ctx = NULL; + u32 len; + if (selinux_ctxid_to_string( + axi->osid, &ctx, &len)) { + audit_log_format(ab, " osid=%u", + axi->osid); + call_panic = 1; + } else + audit_log_format(ab, " obj=%s", ctx); + kfree(ctx); + } + break; } + + case AUDIT_IPC_SET_PERM: { + struct audit_aux_data_ipcctl *axi = (void *)aux; + audit_log_format(ab, + " new qbytes=%lx new iuid=%u new igid=%u new mode=%x", + axi->qbytes, axi->uid, axi->gid, axi->mode); + if (axi->osid != 0) { + char *ctx = NULL; + u32 len; + if (selinux_ctxid_to_string( + axi->osid, &ctx, &len)) { + audit_log_format(ab, " osid=%u", + axi->osid); + call_panic = 1; + } else + audit_log_format(ab, " obj=%s", ctx); + kfree(ctx); + } break; } case AUDIT_SOCKETCALL: { @@ -649,7 +693,7 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) } if (context->pwd && context->pwdmnt) { - ab = audit_log_start(context, gfp_mask, AUDIT_CWD); + ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD); if (ab) { audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt); audit_log_end(ab); @@ -659,7 +703,7 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) unsigned long ino = context->names[i].ino; unsigned long pino = context->names[i].pino; - ab = audit_log_start(context, gfp_mask, AUDIT_PATH); + ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); if (!ab) continue; /* audit_panic has been called */ @@ -685,32 +729,35 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) context->names[i].gid, MAJOR(context->names[i].rdev), MINOR(context->names[i].rdev)); - if (context->names[i].ctx) { - audit_log_format(ab, " obj=%s", - context->names[i].ctx); + if (context->names[i].osid != 0) { + char *ctx = NULL; + u32 len; + if (selinux_ctxid_to_string( + context->names[i].osid, &ctx, &len)) { + audit_log_format(ab, " osid=%u", + context->names[i].osid); + call_panic = 2; + } else + audit_log_format(ab, " obj=%s", ctx); + kfree(ctx); } audit_log_end(ab); } + if (call_panic) + audit_panic("error converting sid to string"); } /** * audit_free - free a per-task audit context * @tsk: task whose audit context block to free * - * Called from copy_process and __put_task_struct. + * Called from copy_process and do_exit */ void audit_free(struct task_struct *tsk) { struct audit_context *context; - /* - * No need to lock the task - when we execute audit_free() - * then the task has no external references anymore, and - * we are tearing it down. (The locking also confuses - * DEBUG_LOCKDEP - this freeing may occur in softirq - * contexts as well, via RCU.) - */ context = audit_get_context(tsk, 0, 0); if (likely(!context)) return; @@ -719,8 +766,9 @@ void audit_free(struct task_struct *tsk) * function (e.g., exit_group), then free context block. * We use GFP_ATOMIC here because we might be doing this * in the context of the idle thread */ + /* that can happen only if we are called from do_exit() */ if (context->in_syscall && context->auditable) - audit_log_exit(context, GFP_ATOMIC); + audit_log_exit(context, tsk); audit_free_context(context); } @@ -743,10 +791,11 @@ void audit_free(struct task_struct *tsk) * will only be written if another part of the kernel requests that it * be written). */ -void audit_syscall_entry(struct task_struct *tsk, int arch, int major, +void audit_syscall_entry(int arch, int major, unsigned long a1, unsigned long a2, unsigned long a3, unsigned long a4) { + struct task_struct *tsk = current; struct audit_context *context = tsk->audit_context; enum audit_state state; @@ -824,22 +873,18 @@ void audit_syscall_entry(struct task_struct *tsk, int arch, int major, * message), then write out the syscall information. In call cases, * free the names stored from getname(). */ -void audit_syscall_exit(struct task_struct *tsk, int valid, long return_code) +void audit_syscall_exit(int valid, long return_code) { + struct task_struct *tsk = current; struct audit_context *context; - get_task_struct(tsk); - task_lock(tsk); context = audit_get_context(tsk, valid, return_code); - task_unlock(tsk); - /* Not having a context here is ok, since the parent may have - * called __put_task_struct. */ if (likely(!context)) - goto out; + return; if (context->in_syscall && context->auditable) - audit_log_exit(context, GFP_KERNEL); + audit_log_exit(context, tsk); context->in_syscall = 0; context->auditable = 0; @@ -854,8 +899,6 @@ void audit_syscall_exit(struct task_struct *tsk, int valid, long return_code) audit_free_aux(context); tsk->audit_context = context; } - out: - put_task_struct(tsk); } /** @@ -936,40 +979,11 @@ void audit_putname(const char *name) #endif } -void audit_inode_context(int idx, const struct inode *inode) +static void audit_inode_context(int idx, const struct inode *inode) { struct audit_context *context = current->audit_context; - const char *suffix = security_inode_xattr_getsuffix(); - char *ctx = NULL; - int len = 0; - - if (!suffix) - goto ret; - - len = security_inode_getsecurity(inode, suffix, NULL, 0, 0); - if (len == -EOPNOTSUPP) - goto ret; - if (len < 0) - goto error_path; - - ctx = kmalloc(len, GFP_KERNEL); - if (!ctx) - goto error_path; - len = security_inode_getsecurity(inode, suffix, ctx, len, 0); - if (len < 0) - goto error_path; - - kfree(context->names[idx].ctx); - context->names[idx].ctx = ctx; - goto ret; - -error_path: - if (ctx) - kfree(ctx); - audit_panic("error in audit_inode_context"); -ret: - return; + selinux_get_inode_sid(inode, &context->names[idx].osid); } @@ -1155,40 +1169,37 @@ uid_t audit_get_loginuid(struct audit_context *ctx) return ctx ? ctx->loginuid : -1; } -static char *audit_ipc_context(struct kern_ipc_perm *ipcp) +/** + * audit_ipc_obj - record audit data for ipc object + * @ipcp: ipc permissions + * + * Returns 0 for success or NULL context or < 0 on error. + */ +int audit_ipc_obj(struct kern_ipc_perm *ipcp) { + struct audit_aux_data_ipcctl *ax; struct audit_context *context = current->audit_context; - char *ctx = NULL; - int len = 0; if (likely(!context)) - return NULL; - - len = security_ipc_getsecurity(ipcp, NULL, 0); - if (len == -EOPNOTSUPP) - goto ret; - if (len < 0) - goto error_path; - - ctx = kmalloc(len, GFP_ATOMIC); - if (!ctx) - goto error_path; + return 0; - len = security_ipc_getsecurity(ipcp, ctx, len); - if (len < 0) - goto error_path; + ax = kmalloc(sizeof(*ax), GFP_ATOMIC); + if (!ax) + return -ENOMEM; - return ctx; + ax->uid = ipcp->uid; + ax->gid = ipcp->gid; + ax->mode = ipcp->mode; + selinux_get_ipc_sid(ipcp, &ax->osid); -error_path: - kfree(ctx); - audit_panic("error in audit_ipc_context"); -ret: - return NULL; + ax->d.type = AUDIT_IPC; + ax->d.next = context->aux; + context->aux = (void *)ax; + return 0; } /** - * audit_ipc_perms - record audit data for ipc + * audit_ipc_set_perm - record audit data for new ipc permissions * @qbytes: msgq bytes * @uid: msgq user id * @gid: msgq group id @@ -1196,7 +1207,7 @@ ret: * * Returns 0 for success or NULL context or < 0 on error. */ -int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp) +int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp) { struct audit_aux_data_ipcctl *ax; struct audit_context *context = current->audit_context; @@ -1212,9 +1223,9 @@ int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, str ax->uid = uid; ax->gid = gid; ax->mode = mode; - ax->ctx = audit_ipc_context(ipcp); + selinux_get_ipc_sid(ipcp, &ax->osid); - ax->d.type = AUDIT_IPC; + ax->d.type = AUDIT_IPC_SET_PERM; ax->d.next = context->aux; context->aux = (void *)ax; return 0; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 72248d1b9e3f..ab81fdd4572b 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2231,19 +2231,25 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * So only GFP_KERNEL allocations, if all nodes in the cpuset are * short of memory, might require taking the callback_mutex mutex. * - * The first loop over the zonelist in mm/page_alloc.c:__alloc_pages() - * calls here with __GFP_HARDWALL always set in gfp_mask, enforcing - * hardwall cpusets - no allocation on a node outside the cpuset is - * allowed (unless in interrupt, of course). - * - * The second loop doesn't even call here for GFP_ATOMIC requests - * (if the __alloc_pages() local variable 'wait' is set). That check - * and the checks below have the combined affect in the second loop of - * the __alloc_pages() routine that: + * The first call here from mm/page_alloc:get_page_from_freelist() + * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, so + * no allocation on a node outside the cpuset is allowed (unless in + * interrupt, of course). + * + * The second pass through get_page_from_freelist() doesn't even call + * here for GFP_ATOMIC calls. For those calls, the __alloc_pages() + * variable 'wait' is not set, and the bit ALLOC_CPUSET is not set + * in alloc_flags. That logic and the checks below have the combined + * affect that: * in_interrupt - any node ok (current task context irrelevant) * GFP_ATOMIC - any node ok * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok * GFP_USER - only nodes in current tasks mems allowed ok. + * + * Rule: + * Don't call cpuset_zone_allowed() if you can't sleep, unless you + * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables + * the code that might scan up ancestor cpusets and sleep. **/ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) @@ -2255,6 +2261,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) if (in_interrupt()) return 1; node = z->zone_pgdat->node_id; + might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); if (node_isset(node, current->mems_allowed)) return 1; if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ diff --git a/kernel/exit.c b/kernel/exit.c index 1a9787ac6173..e95b93282210 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -35,6 +35,7 @@ #include <linux/futex.h> #include <linux/compat.h> #include <linux/pipe_fs_i.h> +#include <linux/audit.h> /* for audit_free() */ #include <asm/uaccess.h> #include <asm/unistd.h> @@ -56,7 +57,7 @@ static void __unhash_process(struct task_struct *p) detach_pid(p, PIDTYPE_PGID); detach_pid(p, PIDTYPE_SID); - list_del_init(&p->tasks); + list_del_rcu(&p->tasks); __get_cpu_var(process_counts)--; } list_del_rcu(&p->thread_group); @@ -910,6 +911,8 @@ fastcall NORET_TYPE void do_exit(long code) if (unlikely(tsk->compat_robust_list)) compat_exit_robust_list(tsk); #endif + if (unlikely(tsk->audit_context)) + audit_free(tsk); exit_mm(tsk); exit_sem(tsk); diff --git a/kernel/extable.c b/kernel/extable.c index 7501b531ceed..7fe262855317 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -40,7 +40,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr) return e; } -static int core_kernel_text(unsigned long addr) +int core_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) diff --git a/kernel/fork.c b/kernel/fork.c index 3384eb89cb1c..ac8100e3088a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -114,8 +114,6 @@ void __put_task_struct(struct task_struct *tsk) WARN_ON(atomic_read(&tsk->usage)); WARN_ON(tsk == current); - if (unlikely(tsk->audit_context)) - audit_free(tsk); security_task_free(tsk); free_uid(tsk->user); put_group_info(tsk->group_info); @@ -124,12 +122,6 @@ void __put_task_struct(struct task_struct *tsk) free_task(tsk); } -void __put_task_struct_cb(struct rcu_head *rhp) -{ - struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); - __put_task_struct(tsk); -} - void __init fork_init(unsigned long mempages) { #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR @@ -186,6 +178,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) atomic_set(&tsk->usage,2); atomic_set(&tsk->fs_excl, 0); tsk->btrace_seq = 0; + tsk->splice_pipe = NULL; return tsk; } @@ -1210,7 +1203,7 @@ static task_t *copy_process(unsigned long clone_flags, attach_pid(p, PIDTYPE_PGID, process_group(p)); attach_pid(p, PIDTYPE_SID, p->signal->session); - list_add_tail(&p->tasks, &init_task.tasks); + list_add_tail_rcu(&p->tasks, &init_task.tasks); __get_cpu_var(process_counts)++; } attach_pid(p, PIDTYPE_PID, p->pid); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index d2a7296c8251..01fa2ae98a85 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -456,6 +456,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) return ret; } +EXPORT_SYMBOL_GPL(hrtimer_start); /** * hrtimer_try_to_cancel - try to deactivate a timer @@ -484,6 +485,7 @@ int hrtimer_try_to_cancel(struct hrtimer *timer) return ret; } +EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel); /** * hrtimer_cancel - cancel a timer and wait for the handler to finish. @@ -504,6 +506,7 @@ int hrtimer_cancel(struct hrtimer *timer) cpu_relax(); } } +EXPORT_SYMBOL_GPL(hrtimer_cancel); /** * hrtimer_get_remaining - get remaining time for the timer @@ -522,6 +525,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) return rem; } +EXPORT_SYMBOL_GPL(hrtimer_get_remaining); #ifdef CONFIG_NO_IDLE_HZ /** @@ -580,6 +584,7 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, timer->base = &bases[clock_id]; timer->node.rb_parent = HRTIMER_INACTIVE; } +EXPORT_SYMBOL_GPL(hrtimer_init); /** * hrtimer_get_res - get the timer resolution for a clock @@ -599,6 +604,7 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) return 0; } +EXPORT_SYMBOL_GPL(hrtimer_get_res); /* * Expire the per base hrtimer-queue: @@ -836,7 +842,7 @@ static void migrate_hrtimers(int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static int __devinit hrtimer_cpu_notify(struct notifier_block *self, +static int hrtimer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; @@ -860,7 +866,7 @@ static int __devinit hrtimer_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __devinitdata hrtimers_nb = { +static struct notifier_block hrtimers_nb = { .notifier_call = hrtimer_cpu_notify, }; diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index ac766ad573e8..1279e3499534 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -246,8 +246,10 @@ int setup_irq(unsigned int irq, struct irqaction * new) mismatch: spin_unlock_irqrestore(&desc->lock, flags); - printk(KERN_ERR "%s: irq handler mismatch\n", __FUNCTION__); - dump_stack(); + if (!(new->flags & SA_PROBEIRQ)) { + printk(KERN_ERR "%s: irq handler mismatch\n", __FUNCTION__); + dump_stack(); + } return -EBUSY; } diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 1156eb0977d0..1fbf466a29aa 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -585,6 +585,9 @@ int __kprobes register_kretprobe(struct kretprobe *rp) int i; rp->kp.pre_handler = pre_handler_kretprobe; + rp->kp.post_handler = NULL; + rp->kp.fault_handler = NULL; + rp->kp.break_handler = NULL; /* Pre-allocate memory for max kretprobe instances */ if (rp->maxactive <= 0) { diff --git a/kernel/module.c b/kernel/module.c index d24deb0dbbc9..bbe04862e1b0 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -705,14 +705,14 @@ EXPORT_SYMBOL(__symbol_put); void symbol_put_addr(void *addr) { - unsigned long flags; + struct module *modaddr; - spin_lock_irqsave(&modlist_lock, flags); - if (!kernel_text_address((unsigned long)addr)) - BUG(); + if (core_kernel_text((unsigned long)addr)) + return; - module_put(module_text_address((unsigned long)addr)); - spin_unlock_irqrestore(&modlist_lock, flags); + if (!(modaddr = module_text_address((unsigned long)addr))) + BUG(); + module_put(modaddr); } EXPORT_SYMBOL_GPL(symbol_put_addr); diff --git a/kernel/power/main.c b/kernel/power/main.c index ee371f50ccaa..a6d9ef46009e 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -272,7 +272,7 @@ static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n if (*s && !strncmp(buf, *s, len)) break; } - if (*s) + if (state < PM_SUSPEND_MAX && *s) error = enter_state(state); else error = -EINVAL; diff --git a/kernel/power/pm.c b/kernel/power/pm.c index 0f6908cce1dd..84063ac8fcfc 100644 --- a/kernel/power/pm.c +++ b/kernel/power/pm.c @@ -75,25 +75,6 @@ struct pm_dev *pm_register(pm_dev_t type, return dev; } -/** - * pm_unregister - unregister a device with power management - * @dev: device to unregister - * - * Remove a device from the power management notification lists. The - * dev passed must be a handle previously returned by pm_register. - */ - -void pm_unregister(struct pm_dev *dev) -{ - if (dev) { - mutex_lock(&pm_devs_lock); - list_del(&dev->entry); - mutex_unlock(&pm_devs_lock); - - kfree(dev); - } -} - static void __pm_unregister(struct pm_dev *dev) { if (dev) { @@ -258,7 +239,6 @@ int pm_send_all(pm_request_t rqst, void *data) } EXPORT_SYMBOL(pm_register); -EXPORT_SYMBOL(pm_unregister); EXPORT_SYMBOL(pm_unregister_all); EXPORT_SYMBOL(pm_send_all); EXPORT_SYMBOL(pm_active); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index c5863d02c89e..3eeedbb13b78 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -240,14 +240,15 @@ static void copy_data_pages(struct pbe *pblist) * free_pagedir - free pages allocated with alloc_pagedir() */ -static void free_pagedir(struct pbe *pblist) +static void free_pagedir(struct pbe *pblist, int clear_nosave_free) { struct pbe *pbe; while (pblist) { pbe = (pblist + PB_PAGE_SKIP)->next; ClearPageNosave(virt_to_page(pblist)); - ClearPageNosaveFree(virt_to_page(pblist)); + if (clear_nosave_free) + ClearPageNosaveFree(virt_to_page(pblist)); free_page((unsigned long)pblist); pblist = pbe; } @@ -389,7 +390,7 @@ struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed pbe->next = alloc_image_page(gfp_mask, safe_needed); } if (!pbe) { /* get_zeroed_page() failed */ - free_pagedir(pblist); + free_pagedir(pblist, 1); pblist = NULL; } else create_pbe_list(pblist, nr_pages); @@ -736,7 +737,7 @@ static int create_image(struct snapshot_handle *handle) pblist = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1); if (pblist) copy_page_backup_list(pblist, p); - free_pagedir(p); + free_pagedir(p, 0); if (!pblist) error = -ENOMEM; } diff --git a/kernel/profile.c b/kernel/profile.c index 5a730fdb1a2c..68afe121e507 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -299,7 +299,7 @@ out: } #ifdef CONFIG_HOTPLUG_CPU -static int __devinit profile_cpu_callback(struct notifier_block *info, +static int profile_cpu_callback(struct notifier_block *info, unsigned long action, void *__cpu) { int node, cpu = (unsigned long)__cpu; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 0eeb7e66722c..921c22ad16e4 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -56,10 +56,6 @@ void ptrace_untrace(task_t *child) signal_wake_up(child, 1); } } - if (child->signal->flags & SIGNAL_GROUP_EXIT) { - sigaddset(&child->pending.signal, SIGKILL); - signal_wake_up(child, 1); - } spin_unlock(&child->sighand->siglock); } @@ -81,7 +77,8 @@ void __ptrace_unlink(task_t *child) add_parent(child); } - ptrace_untrace(child); + if (child->state == TASK_TRACED) + ptrace_untrace(child); } /* @@ -151,12 +148,34 @@ int ptrace_may_attach(struct task_struct *task) int ptrace_attach(struct task_struct *task) { int retval; - task_lock(task); + retval = -EPERM; if (task->pid <= 1) - goto bad; + goto out; if (task->tgid == current->tgid) - goto bad; + goto out; + +repeat: + /* + * Nasty, nasty. + * + * We want to hold both the task-lock and the + * tasklist_lock for writing at the same time. + * But that's against the rules (tasklist_lock + * is taken for reading by interrupts on other + * cpu's that may have task_lock). + */ + task_lock(task); + local_irq_disable(); + if (!write_trylock(&tasklist_lock)) { + local_irq_enable(); + task_unlock(task); + do { + cpu_relax(); + } while (!write_can_lock(&tasklist_lock)); + goto repeat; + } + /* the same process cannot be attached many times */ if (task->ptrace & PT_PTRACED) goto bad; @@ -169,17 +188,15 @@ int ptrace_attach(struct task_struct *task) ? PT_ATTACHED : 0); if (capable(CAP_SYS_PTRACE)) task->ptrace |= PT_PTRACE_CAP; - task_unlock(task); - write_lock_irq(&tasklist_lock); __ptrace_link(task, current); - write_unlock_irq(&tasklist_lock); force_sig_specific(SIGSTOP, task); - return 0; bad: + write_unlock_irq(&tasklist_lock); task_unlock(task); +out: return retval; } @@ -420,21 +437,22 @@ int ptrace_request(struct task_struct *child, long request, */ int ptrace_traceme(void) { - int ret; + int ret = -EPERM; /* * Are we already being traced? */ - if (current->ptrace & PT_PTRACED) - return -EPERM; - ret = security_ptrace(current->parent, current); - if (ret) - return -EPERM; - /* - * Set the ptrace bit in the process ptrace flags. - */ - current->ptrace |= PT_PTRACED; - return 0; + task_lock(current); + if (!(current->ptrace & PT_PTRACED)) { + ret = security_ptrace(current->parent, current); + /* + * Set the ptrace bit in the process ptrace flags. + */ + if (!ret) + current->ptrace |= PT_PTRACED; + } + task_unlock(current); + return ret; } /** diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 13458bbaa1be..2058f88c7bbb 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -479,12 +479,31 @@ static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) return 0; } +/* + * Check to see if there is any immediate RCU-related work to be done + * by the current CPU, returning 1 if so. This function is part of the + * RCU implementation; it is -not- an exported member of the RCU API. + */ int rcu_pending(int cpu) { return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) || __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); } +/* + * Check to see if any future RCU-related work will need to be done + * by the current CPU, even if none need be done immediately, returning + * 1 if so. This function is part of the RCU implementation; it is -not- + * an exported member of the RCU API. + */ +int rcu_needs_cpu(int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu); + + return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu)); +} + void rcu_check_callbacks(int cpu, int user) { if (user || @@ -520,7 +539,7 @@ static void __devinit rcu_online_cpu(int cpu) tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL); } -static int __devinit rcu_cpu_notify(struct notifier_block *self, +static int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; @@ -537,7 +556,7 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __devinitdata rcu_nb = { +static struct notifier_block rcu_nb = { .notifier_call = rcu_cpu_notify, }; diff --git a/kernel/sched.c b/kernel/sched.c index 365f0b90b4de..c13f1bd2df7d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -665,55 +665,13 @@ static int effective_prio(task_t *p) } /* - * We place interactive tasks back into the active array, if possible. - * - * To guarantee that this does not starve expired tasks we ignore the - * interactivity of a task if the first expired task had to wait more - * than a 'reasonable' amount of time. This deadline timeout is - * load-dependent, as the frequency of array switched decreases with - * increasing number of running tasks. We also ignore the interactivity - * if a better static_prio task has expired, and switch periodically - * regardless, to ensure that highly interactive tasks do not starve - * the less fortunate for unreasonably long periods. - */ -static inline int expired_starving(runqueue_t *rq) -{ - int limit; - - /* - * Arrays were recently switched, all is well - */ - if (!rq->expired_timestamp) - return 0; - - limit = STARVATION_LIMIT * rq->nr_running; - - /* - * It's time to switch arrays - */ - if (jiffies - rq->expired_timestamp >= limit) - return 1; - - /* - * There's a better selection in the expired array - */ - if (rq->curr->static_prio > rq->best_expired_prio) - return 1; - - /* - * All is well - */ - return 0; -} - -/* * __activate_task - move a task to the runqueue. */ static void __activate_task(task_t *p, runqueue_t *rq) { prio_array_t *target = rq->active; - if (unlikely(batch_task(p) || (expired_starving(rq) && !rt_task(p)))) + if (batch_task(p)) target = rq->expired; enqueue_task(p, target); rq->nr_running++; @@ -2532,6 +2490,22 @@ unsigned long long current_sched_time(const task_t *tsk) } /* + * We place interactive tasks back into the active array, if possible. + * + * To guarantee that this does not starve expired tasks we ignore the + * interactivity of a task if the first expired task had to wait more + * than a 'reasonable' amount of time. This deadline timeout is + * load-dependent, as the frequency of array switched decreases with + * increasing number of running tasks. We also ignore the interactivity + * if a better static_prio task has expired: + */ +#define EXPIRED_STARVING(rq) \ + ((STARVATION_LIMIT && ((rq)->expired_timestamp && \ + (jiffies - (rq)->expired_timestamp >= \ + STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \ + ((rq)->curr->static_prio > (rq)->best_expired_prio)) + +/* * Account user cpu time to a process. * @p: the process that the cpu time gets accounted to * @hardirq_offset: the offset to subtract from hardirq_count() @@ -2666,7 +2640,7 @@ void scheduler_tick(void) if (!rq->expired_timestamp) rq->expired_timestamp = jiffies; - if (!TASK_INTERACTIVE(p) || expired_starving(rq)) { + if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { enqueue_task(p, rq->expired); if (p->static_prio < rq->best_expired_prio) rq->best_expired_prio = p->static_prio; @@ -4814,7 +4788,7 @@ static int migration_call(struct notifier_block *nfb, unsigned long action, /* Register at highest priority so that task migration (migrate_all_tasks) * happens before everything else. */ -static struct notifier_block __devinitdata migration_notifier = { +static struct notifier_block migration_notifier = { .notifier_call = migration_call, .priority = 10 }; diff --git a/kernel/signal.c b/kernel/signal.c index b14f895027c3..e5f8aea78ffe 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1754,9 +1754,9 @@ relock: /* Let the debugger run. */ ptrace_stop(signr, signr, info); - /* We're back. Did the debugger cancel the sig or group_exit? */ + /* We're back. Did the debugger cancel the sig? */ signr = current->exit_code; - if (signr == 0 || current->signal->flags & SIGNAL_GROUP_EXIT) + if (signr == 0) continue; current->exit_code = 0; diff --git a/kernel/softirq.c b/kernel/softirq.c index ec8fed42a86f..336f92d64e2e 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -446,7 +446,7 @@ static void takeover_tasklets(unsigned int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static int __devinit cpu_callback(struct notifier_block *nfb, +static int cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -484,7 +484,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block __devinitdata cpu_nfb = { +static struct notifier_block cpu_nfb = { .notifier_call = cpu_callback }; diff --git a/kernel/softlockup.c b/kernel/softlockup.c index ced91e1ff564..14c7faf02909 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -104,7 +104,7 @@ static int watchdog(void * __bind_cpu) /* * Create/destroy watchdog threads as CPUs come and go: */ -static int __devinit +static int cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { int hotcpu = (unsigned long)hcpu; @@ -140,7 +140,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) return NOTIFY_OK; } -static struct notifier_block __devinitdata cpu_nfb = { +static struct notifier_block cpu_nfb = { .notifier_call = cpu_callback }; diff --git a/kernel/timer.c b/kernel/timer.c index 883773788836..9e49deed468c 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -541,6 +541,22 @@ found: } spin_unlock(&base->lock); + /* + * It can happen that other CPUs service timer IRQs and increment + * jiffies, but we have not yet got a local timer tick to process + * the timer wheels. In that case, the expiry time can be before + * jiffies, but since the high-resolution timer here is relative to + * jiffies, the default expression when high-resolution timers are + * not active, + * + * time_before(MAX_JIFFY_OFFSET + jiffies, expires) + * + * would falsely evaluate to true. If that is the case, just + * return jiffies so that we can immediately fire the local timer + */ + if (time_before(expires, jiffies)) + return jiffies; + if (time_before(hr_expires, expires)) return hr_expires; @@ -1314,7 +1330,7 @@ static void __devinit migrate_timers(int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static int __devinit timer_cpu_notify(struct notifier_block *self, +static int timer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; @@ -1334,7 +1350,7 @@ static int __devinit timer_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __devinitdata timers_nb = { +static struct notifier_block timers_nb = { .notifier_call = timer_cpu_notify, }; diff --git a/kernel/uid16.c b/kernel/uid16.c index aa25605027c8..187e2a423878 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -20,43 +20,67 @@ asmlinkage long sys_chown16(const char __user * filename, old_uid_t user, old_gid_t group) { - return sys_chown(filename, low2highuid(user), low2highgid(group)); + long ret = sys_chown(filename, low2highuid(user), low2highgid(group)); + /* avoid REGPARM breakage on x86: */ + prevent_tail_call(ret); + return ret; } asmlinkage long sys_lchown16(const char __user * filename, old_uid_t user, old_gid_t group) { - return sys_lchown(filename, low2highuid(user), low2highgid(group)); + long ret = sys_lchown(filename, low2highuid(user), low2highgid(group)); + /* avoid REGPARM breakage on x86: */ + prevent_tail_call(ret); + return ret; } asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group) { - return sys_fchown(fd, low2highuid(user), low2highgid(group)); + long ret = sys_fchown(fd, low2highuid(user), low2highgid(group)); + /* avoid REGPARM breakage on x86: */ + prevent_tail_call(ret); + return ret; } asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid) { - return sys_setregid(low2highgid(rgid), low2highgid(egid)); + long ret = sys_setregid(low2highgid(rgid), low2highgid(egid)); + /* avoid REGPARM breakage on x86: */ + prevent_tail_call(ret); + return ret; } asmlinkage long sys_setgid16(old_gid_t gid) { - return sys_setgid(low2highgid(gid)); + long ret = sys_setgid(low2highgid(gid)); + /* avoid REGPARM breakage on x86: */ + prevent_tail_call(ret); + return ret; } asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid) { - return sys_setreuid(low2highuid(ruid), low2highuid(euid)); + long ret = sys_setreuid(low2highuid(ruid), low2highuid(euid)); + /* avoid REGPARM breakage on x86: */ + prevent_tail_call(ret); + return ret; } asmlinkage long sys_setuid16(old_uid_t uid) { - return sys_setuid(low2highuid(uid)); + long ret = sys_setuid(low2highuid(uid)); + /* avoid REGPARM breakage on x86: */ + prevent_tail_call(ret); + return ret; } asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid) { - return sys_setresuid(low2highuid(ruid), low2highuid(euid), - low2highuid(suid)); + long ret = sys_setresuid(low2highuid(ruid), low2highuid(euid), + low2highuid(suid)); + /* avoid REGPARM breakage on x86: */ + prevent_tail_call(ret); + return ret; } asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, old_uid_t __user *suid) @@ -72,8 +96,11 @@ asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid) { - return sys_setresgid(low2highgid(rgid), low2highgid(egid), - low2highgid(sgid)); + long ret = sys_setresgid(low2highgid(rgid), low2highgid(egid), + low2highgid(sgid)); + /* avoid REGPARM breakage on x86: */ + prevent_tail_call(ret); + return ret; } asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, old_gid_t __user *sgid) @@ -89,12 +116,18 @@ asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, asmlinkage long sys_setfsuid16(old_uid_t uid) { - return sys_setfsuid(low2highuid(uid)); + long ret = sys_setfsuid(low2highuid(uid)); + /* avoid REGPARM breakage on x86: */ + prevent_tail_call(ret); + return ret; } asmlinkage long sys_setfsgid16(old_gid_t gid) { - return sys_setfsgid(low2highgid(gid)); + long ret = sys_setfsgid(low2highgid(gid)); + /* avoid REGPARM breakage on x86: */ + prevent_tail_call(ret); + return ret; } static int groups16_to_user(old_gid_t __user *grouplist, diff --git a/kernel/workqueue.c b/kernel/workqueue.c index e9e464a90376..880fb415a8f6 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -547,7 +547,7 @@ static void take_over_work(struct workqueue_struct *wq, unsigned int cpu) } /* We're holding the cpucontrol mutex here */ -static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, +static int workqueue_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { |