diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/audit.c | 160 | ||||
-rw-r--r-- | kernel/audit.h | 10 | ||||
-rw-r--r-- | kernel/auditfilter.c | 289 | ||||
-rw-r--r-- | kernel/auditsc.c | 269 | ||||
-rw-r--r-- | kernel/cpuset.c | 25 | ||||
-rw-r--r-- | kernel/exit.c | 11 | ||||
-rw-r--r-- | kernel/extable.c | 2 | ||||
-rw-r--r-- | kernel/fork.c | 2 | ||||
-rw-r--r-- | kernel/hrtimer.c | 10 | ||||
-rw-r--r-- | kernel/intermodule.c | 184 | ||||
-rw-r--r-- | kernel/irq/manage.c | 6 | ||||
-rw-r--r-- | kernel/module.c | 12 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 48 | ||||
-rw-r--r-- | kernel/power/main.c | 4 | ||||
-rw-r--r-- | kernel/printk.c | 28 | ||||
-rw-r--r-- | kernel/profile.c | 2 | ||||
-rw-r--r-- | kernel/ptrace.c | 57 | ||||
-rw-r--r-- | kernel/rcupdate.c | 23 | ||||
-rw-r--r-- | kernel/sched.c | 64 | ||||
-rw-r--r-- | kernel/softirq.c | 4 | ||||
-rw-r--r-- | kernel/softlockup.c | 4 | ||||
-rw-r--r-- | kernel/timer.c | 20 | ||||
-rw-r--r-- | kernel/workqueue.c | 2 |
24 files changed, 726 insertions, 511 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 58908f9d156a..f6ef00f4f90f 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -20,7 +20,6 @@ obj-$(CONFIG_SMP) += cpu.o spinlock.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_OBSOLETE_INTERMODULE) += intermodule.o obj-$(CONFIG_KALLSYMS) += kallsyms.o obj-$(CONFIG_PM) += power/ obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o diff --git a/kernel/audit.c b/kernel/audit.c index c8ccbd09048f..df57b493e1cb 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -55,6 +55,9 @@ #include <net/netlink.h> #include <linux/skbuff.h> #include <linux/netlink.h> +#include <linux/selinux.h> + +#include "audit.h" /* No auditing will take place until audit_initialized != 0. * (Initialization happens after skb_init is called.) */ @@ -227,49 +230,103 @@ void audit_log_lost(const char *message) } } -static int audit_set_rate_limit(int limit, uid_t loginuid) +static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid) { - int old = audit_rate_limit; - audit_rate_limit = limit; - audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + int old = audit_rate_limit; + + if (sid) { + char *ctx = NULL; + u32 len; + int rc; + if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + return rc; + else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "audit_rate_limit=%d old=%d by auid=%u subj=%s", + limit, old, loginuid, ctx); + kfree(ctx); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "audit_rate_limit=%d old=%d by auid=%u", - audit_rate_limit, old, loginuid); + limit, old, loginuid); + audit_rate_limit = limit; return old; } -static int audit_set_backlog_limit(int limit, uid_t loginuid) +static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid) { - int old = audit_backlog_limit; - audit_backlog_limit = limit; - audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + int old = audit_backlog_limit; + + if (sid) { + char *ctx = NULL; + u32 len; + int rc; + if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + return rc; + else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "audit_backlog_limit=%d old=%d by auid=%u subj=%s", + limit, old, loginuid, ctx); + kfree(ctx); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "audit_backlog_limit=%d old=%d by auid=%u", - audit_backlog_limit, old, loginuid); + limit, old, loginuid); + audit_backlog_limit = limit; return old; } -static int audit_set_enabled(int state, uid_t loginuid) +static int audit_set_enabled(int state, uid_t loginuid, u32 sid) { - int old = audit_enabled; + int old = audit_enabled; + if (state != 0 && state != 1) return -EINVAL; - audit_enabled = state; - audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + + if (sid) { + char *ctx = NULL; + u32 len; + int rc; + if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + return rc; + else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "audit_enabled=%d old=%d by auid=%u subj=%s", + state, old, loginuid, ctx); + kfree(ctx); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "audit_enabled=%d old=%d by auid=%u", - audit_enabled, old, loginuid); + state, old, loginuid); + audit_enabled = state; return old; } -static int audit_set_failure(int state, uid_t loginuid) +static int audit_set_failure(int state, uid_t loginuid, u32 sid) { - int old = audit_failure; + int old = audit_failure; + if (state != AUDIT_FAIL_SILENT && state != AUDIT_FAIL_PRINTK && state != AUDIT_FAIL_PANIC) return -EINVAL; - audit_failure = state; - audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + + if (sid) { + char *ctx = NULL; + u32 len; + int rc; + if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) + return rc; + else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "audit_failure=%d old=%d by auid=%u subj=%s", + state, old, loginuid, ctx); + kfree(ctx); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "audit_failure=%d old=%d by auid=%u", - audit_failure, old, loginuid); + state, old, loginuid); + audit_failure = state; return old; } @@ -387,7 +444,7 @@ static int audit_netlink_ok(kernel_cap_t eff_cap, u16 msg_type) static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { - u32 uid, pid, seq; + u32 uid, pid, seq, sid; void *data; struct audit_status *status_get, status_set; int err; @@ -413,6 +470,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) pid = NETLINK_CREDS(skb)->pid; uid = NETLINK_CREDS(skb)->uid; loginuid = NETLINK_CB(skb).loginuid; + sid = NETLINK_CB(skb).sid; seq = nlh->nlmsg_seq; data = NLMSG_DATA(nlh); @@ -433,25 +491,43 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; status_get = (struct audit_status *)data; if (status_get->mask & AUDIT_STATUS_ENABLED) { - err = audit_set_enabled(status_get->enabled, loginuid); + err = audit_set_enabled(status_get->enabled, + loginuid, sid); if (err < 0) return err; } if (status_get->mask & AUDIT_STATUS_FAILURE) { - err = audit_set_failure(status_get->failure, loginuid); + err = audit_set_failure(status_get->failure, + loginuid, sid); if (err < 0) return err; } if (status_get->mask & AUDIT_STATUS_PID) { int old = audit_pid; + if (sid) { + char *ctx = NULL; + u32 len; + int rc; + if ((rc = selinux_ctxid_to_string( + sid, &ctx, &len))) + return rc; + else + audit_log(NULL, GFP_KERNEL, + AUDIT_CONFIG_CHANGE, + "audit_pid=%d old=%d by auid=%u subj=%s", + status_get->pid, old, + loginuid, ctx); + kfree(ctx); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "audit_pid=%d old=%d by auid=%u", + status_get->pid, old, loginuid); audit_pid = status_get->pid; - audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, - "audit_pid=%d old=%d by auid=%u", - audit_pid, old, loginuid); } if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) - audit_set_rate_limit(status_get->rate_limit, loginuid); + audit_set_rate_limit(status_get->rate_limit, + loginuid, sid); if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT) audit_set_backlog_limit(status_get->backlog_limit, - loginuid); + loginuid, sid); break; case AUDIT_USER: case AUDIT_FIRST_USER_MSG...AUDIT_LAST_USER_MSG: @@ -465,8 +541,23 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) ab = audit_log_start(NULL, GFP_KERNEL, msg_type); if (ab) { audit_log_format(ab, - "user pid=%d uid=%u auid=%u msg='%.1024s'", - pid, uid, loginuid, (char *)data); + "user pid=%d uid=%u auid=%u", + pid, uid, loginuid); + if (sid) { + char *ctx = NULL; + u32 len; + if (selinux_ctxid_to_string( + sid, &ctx, &len)) { + audit_log_format(ab, + " ssid=%u", sid); + /* Maybe call audit_panic? */ + } else + audit_log_format(ab, + " subj=%s", ctx); + kfree(ctx); + } + audit_log_format(ab, " msg='%.1024s'", + (char *)data); audit_set_pid(ab, pid); audit_log_end(ab); } @@ -480,7 +571,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) case AUDIT_LIST: err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, uid, seq, data, nlmsg_len(nlh), - loginuid); + loginuid, sid); break; case AUDIT_ADD_RULE: case AUDIT_DEL_RULE: @@ -490,7 +581,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) case AUDIT_LIST_RULES: err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, uid, seq, data, nlmsg_len(nlh), - loginuid); + loginuid, sid); break; case AUDIT_SIGNAL_INFO: sig_data.uid = audit_sig_uid; @@ -564,6 +655,11 @@ static int __init audit_init(void) skb_queue_head_init(&audit_skb_queue); audit_initialized = 1; audit_enabled = audit_default; + + /* Register the callback with selinux. This callback will be invoked + * when a new policy is loaded. */ + selinux_audit_set_callback(&selinux_audit_rule_update); + audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); return 0; } diff --git a/kernel/audit.h b/kernel/audit.h index bc5392076e2b..6f733920fd32 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -54,9 +54,11 @@ enum audit_state { /* Rule lists */ struct audit_field { - u32 type; - u32 val; - u32 op; + u32 type; + u32 val; + u32 op; + char *se_str; + struct selinux_audit_rule *se_rule; }; struct audit_krule { @@ -86,3 +88,5 @@ extern void audit_send_reply(int pid, int seq, int type, extern void audit_log_lost(const char *message); extern void audit_panic(const char *message); extern struct mutex audit_netlink_mutex; + +extern int selinux_audit_rule_update(void); diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index d3a8539f3a83..7c134906d689 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -23,6 +23,7 @@ #include <linux/audit.h> #include <linux/kthread.h> #include <linux/netlink.h> +#include <linux/selinux.h> #include "audit.h" /* There are three lists of rules -- one to search at task creation @@ -42,6 +43,13 @@ struct list_head audit_filter_list[AUDIT_NR_FILTERS] = { static inline void audit_free_rule(struct audit_entry *e) { + int i; + if (e->rule.fields) + for (i = 0; i < e->rule.field_count; i++) { + struct audit_field *f = &e->rule.fields[i]; + kfree(f->se_str); + selinux_audit_rule_free(f->se_rule); + } kfree(e->rule.fields); kfree(e); } @@ -52,9 +60,29 @@ static inline void audit_free_rule_rcu(struct rcu_head *head) audit_free_rule(e); } +/* Initialize an audit filterlist entry. */ +static inline struct audit_entry *audit_init_entry(u32 field_count) +{ + struct audit_entry *entry; + struct audit_field *fields; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (unlikely(!entry)) + return NULL; + + fields = kzalloc(sizeof(*fields) * field_count, GFP_KERNEL); + if (unlikely(!fields)) { + kfree(entry); + return NULL; + } + entry->rule.fields = fields; + + return entry; +} + /* Unpack a filter field's string representation from user-space * buffer. */ -static __attribute__((unused)) char *audit_unpack_string(void **bufp, size_t *remain, size_t len) +static char *audit_unpack_string(void **bufp, size_t *remain, size_t len) { char *str; @@ -84,7 +112,6 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule) { unsigned listnr; struct audit_entry *entry; - struct audit_field *fields; int i, err; err = -EINVAL; @@ -108,23 +135,14 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule) goto exit_err; err = -ENOMEM; - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (unlikely(!entry)) - goto exit_err; - fields = kmalloc(sizeof(*fields) * rule->field_count, GFP_KERNEL); - if (unlikely(!fields)) { - kfree(entry); + entry = audit_init_entry(rule->field_count); + if (!entry) goto exit_err; - } - - memset(&entry->rule, 0, sizeof(struct audit_krule)); - memset(fields, 0, sizeof(struct audit_field)); entry->rule.flags = rule->flags & AUDIT_FILTER_PREPEND; entry->rule.listnr = listnr; entry->rule.action = rule->action; entry->rule.field_count = rule->field_count; - entry->rule.fields = fields; for (i = 0; i < AUDIT_BITMASK_SIZE; i++) entry->rule.mask[i] = rule->mask[i]; @@ -150,15 +168,20 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) for (i = 0; i < rule->field_count; i++) { struct audit_field *f = &entry->rule.fields[i]; - if (rule->fields[i] & AUDIT_UNUSED_BITS) { - err = -EINVAL; - goto exit_free; - } - f->op = rule->fields[i] & (AUDIT_NEGATE|AUDIT_OPERATORS); f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS); f->val = rule->values[i]; + if (f->type & AUDIT_UNUSED_BITS || + f->type == AUDIT_SE_USER || + f->type == AUDIT_SE_ROLE || + f->type == AUDIT_SE_TYPE || + f->type == AUDIT_SE_SEN || + f->type == AUDIT_SE_CLR) { + err = -EINVAL; + goto exit_free; + } + entry->rule.vers_ops = (f->op & AUDIT_OPERATORS) ? 2 : 1; /* Support for legacy operators where @@ -188,8 +211,9 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, int err = 0; struct audit_entry *entry; void *bufp; - /* size_t remain = datasz - sizeof(struct audit_rule_data); */ + size_t remain = datasz - sizeof(struct audit_rule_data); int i; + char *str; entry = audit_to_entry_common((struct audit_rule *)data); if (IS_ERR(entry)) @@ -207,10 +231,35 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, f->op = data->fieldflags[i] & AUDIT_OPERATORS; f->type = data->fields[i]; + f->val = data->values[i]; + f->se_str = NULL; + f->se_rule = NULL; switch(f->type) { - /* call type-specific conversion routines here */ - default: - f->val = data->values[i]; + case AUDIT_SE_USER: + case AUDIT_SE_ROLE: + case AUDIT_SE_TYPE: + case AUDIT_SE_SEN: + case AUDIT_SE_CLR: + str = audit_unpack_string(&bufp, &remain, f->val); + if (IS_ERR(str)) + goto exit_free; + entry->rule.buflen += f->val; + + err = selinux_audit_rule_init(f->type, f->op, str, + &f->se_rule); + /* Keep currently invalid fields around in case they + * become valid after a policy reload. */ + if (err == -EINVAL) { + printk(KERN_WARNING "audit rule for selinux " + "\'%s\' is invalid\n", str); + err = 0; + } + if (err) { + kfree(str); + goto exit_free; + } else + f->se_str = str; + break; } } @@ -286,7 +335,14 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule) data->fields[i] = f->type; data->fieldflags[i] = f->op; switch(f->type) { - /* call type-specific conversion routines here */ + case AUDIT_SE_USER: + case AUDIT_SE_ROLE: + case AUDIT_SE_TYPE: + case AUDIT_SE_SEN: + case AUDIT_SE_CLR: + data->buflen += data->values[i] = + audit_pack_string(&bufp, f->se_str); + break; default: data->values[i] = f->val; } @@ -314,7 +370,14 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b) return 1; switch(a->fields[i].type) { - /* call type-specific comparison routines here */ + case AUDIT_SE_USER: + case AUDIT_SE_ROLE: + case AUDIT_SE_TYPE: + case AUDIT_SE_SEN: + case AUDIT_SE_CLR: + if (strcmp(a->fields[i].se_str, b->fields[i].se_str)) + return 1; + break; default: if (a->fields[i].val != b->fields[i].val) return 1; @@ -328,6 +391,81 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b) return 0; } +/* Duplicate selinux field information. The se_rule is opaque, so must be + * re-initialized. */ +static inline int audit_dupe_selinux_field(struct audit_field *df, + struct audit_field *sf) +{ + int ret = 0; + char *se_str; + + /* our own copy of se_str */ + se_str = kstrdup(sf->se_str, GFP_KERNEL); + if (unlikely(IS_ERR(se_str))) + return -ENOMEM; + df->se_str = se_str; + + /* our own (refreshed) copy of se_rule */ + ret = selinux_audit_rule_init(df->type, df->op, df->se_str, + &df->se_rule); + /* Keep currently invalid fields around in case they + * become valid after a policy reload. */ + if (ret == -EINVAL) { + printk(KERN_WARNING "audit rule for selinux \'%s\' is " + "invalid\n", df->se_str); + ret = 0; + } + + return ret; +} + +/* Duplicate an audit rule. This will be a deep copy with the exception + * of the watch - that pointer is carried over. The selinux specific fields + * will be updated in the copy. The point is to be able to replace the old + * rule with the new rule in the filterlist, then free the old rule. */ +static struct audit_entry *audit_dupe_rule(struct audit_krule *old) +{ + u32 fcount = old->field_count; + struct audit_entry *entry; + struct audit_krule *new; + int i, err = 0; + + entry = audit_init_entry(fcount); + if (unlikely(!entry)) + return ERR_PTR(-ENOMEM); + + new = &entry->rule; + new->vers_ops = old->vers_ops; + new->flags = old->flags; + new->listnr = old->listnr; + new->action = old->action; + for (i = 0; i < AUDIT_BITMASK_SIZE; i++) + new->mask[i] = old->mask[i]; + new->buflen = old->buflen; + new->field_count = old->field_count; + memcpy(new->fields, old->fields, sizeof(struct audit_field) * fcount); + + /* deep copy this information, updating the se_rule fields, because + * the originals will all be freed when the old rule is freed. */ + for (i = 0; i < fcount; i++) { + switch (new->fields[i].type) { + case AUDIT_SE_USER: + case AUDIT_SE_ROLE: + case AUDIT_SE_TYPE: + case AUDIT_SE_SEN: + case AUDIT_SE_CLR: + err = audit_dupe_selinux_field(&new->fields[i], + &old->fields[i]); + } + if (err) { + audit_free_rule(entry); + return ERR_PTR(err); + } + } + + return entry; +} + /* Add rule to given filterlist if not a duplicate. Protected by * audit_netlink_mutex. */ static inline int audit_add_rule(struct audit_entry *entry, @@ -448,9 +586,10 @@ static int audit_list_rules(void *_dest) * @data: payload data * @datasz: size of payload data * @loginuid: loginuid of sender + * @sid: SE Linux Security ID of sender */ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, - size_t datasz, uid_t loginuid) + size_t datasz, uid_t loginuid, u32 sid) { struct task_struct *tsk; int *dest; @@ -493,9 +632,23 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, err = audit_add_rule(entry, &audit_filter_list[entry->rule.listnr]); - audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, - "auid=%u add rule to list=%d res=%d\n", - loginuid, entry->rule.listnr, !err); + if (sid) { + char *ctx = NULL; + u32 len; + if (selinux_ctxid_to_string(sid, &ctx, &len)) { + /* Maybe call audit_panic? */ + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "auid=%u ssid=%u add rule to list=%d res=%d", + loginuid, sid, entry->rule.listnr, !err); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "auid=%u subj=%s add rule to list=%d res=%d", + loginuid, ctx, entry->rule.listnr, !err); + kfree(ctx); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "auid=%u add rule to list=%d res=%d", + loginuid, entry->rule.listnr, !err); if (err) audit_free_rule(entry); @@ -511,9 +664,24 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, err = audit_del_rule(entry, &audit_filter_list[entry->rule.listnr]); - audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, - "auid=%u remove rule from list=%d res=%d\n", - loginuid, entry->rule.listnr, !err); + + if (sid) { + char *ctx = NULL; + u32 len; + if (selinux_ctxid_to_string(sid, &ctx, &len)) { + /* Maybe call audit_panic? */ + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "auid=%u ssid=%u remove rule from list=%d res=%d", + loginuid, sid, entry->rule.listnr, !err); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "auid=%u subj=%s remove rule from list=%d res=%d", + loginuid, ctx, entry->rule.listnr, !err); + kfree(ctx); + } else + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, + "auid=%u remove rule from list=%d res=%d", + loginuid, entry->rule.listnr, !err); audit_free_rule(entry); break; @@ -628,3 +796,62 @@ unlock_and_return: rcu_read_unlock(); return result; } + +/* Check to see if the rule contains any selinux fields. Returns 1 if there + are selinux fields specified in the rule, 0 otherwise. */ +static inline int audit_rule_has_selinux(struct audit_krule *rule) +{ + int i; + + for (i = 0; i < rule->field_count; i++) { + struct audit_field *f = &rule->fields[i]; + switch (f->type) { + case AUDIT_SE_USER: + case AUDIT_SE_ROLE: + case AUDIT_SE_TYPE: + case AUDIT_SE_SEN: + case AUDIT_SE_CLR: + return 1; + } + } + + return 0; +} + +/* This function will re-initialize the se_rule field of all applicable rules. + * It will traverse the filter lists serarching for rules that contain selinux + * specific filter fields. When such a rule is found, it is copied, the + * selinux field is re-initialized, and the old rule is replaced with the + * updated rule. */ +int selinux_audit_rule_update(void) +{ + struct audit_entry *entry, *n, *nentry; + int i, err = 0; + + /* audit_netlink_mutex synchronizes the writers */ + mutex_lock(&audit_netlink_mutex); + + for (i = 0; i < AUDIT_NR_FILTERS; i++) { + list_for_each_entry_safe(entry, n, &audit_filter_list[i], list) { + if (!audit_rule_has_selinux(&entry->rule)) + continue; + + nentry = audit_dupe_rule(&entry->rule); + if (unlikely(IS_ERR(nentry))) { + /* save the first error encountered for the + * return value */ + if (!err) + err = PTR_ERR(nentry); + audit_panic("error updating selinux filters"); + list_del_rcu(&entry->list); + } else { + list_replace_rcu(&entry->list, &nentry->list); + } + call_rcu(&entry->rcu, audit_free_rule_rcu); + } + } + + mutex_unlock(&audit_netlink_mutex); + + return err; +} diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 7f160df21a23..1c03a4ed1b27 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -58,6 +58,7 @@ #include <linux/security.h> #include <linux/list.h> #include <linux/tty.h> +#include <linux/selinux.h> #include "audit.h" @@ -89,7 +90,7 @@ struct audit_names { uid_t uid; gid_t gid; dev_t rdev; - char *ctx; + u32 osid; }; struct audit_aux_data { @@ -106,7 +107,7 @@ struct audit_aux_data_ipcctl { uid_t uid; gid_t gid; mode_t mode; - char *ctx; + u32 osid; }; struct audit_aux_data_socketcall { @@ -167,7 +168,8 @@ static int audit_filter_rules(struct task_struct *tsk, struct audit_context *ctx, enum audit_state *state) { - int i, j; + int i, j, need_sid = 1; + u32 sid; for (i = 0; i < rule->field_count; i++) { struct audit_field *f = &rule->fields[i]; @@ -257,6 +259,27 @@ static int audit_filter_rules(struct task_struct *tsk, if (ctx) result = audit_comparator(ctx->loginuid, f->op, f->val); break; + case AUDIT_SE_USER: + case AUDIT_SE_ROLE: + case AUDIT_SE_TYPE: + case AUDIT_SE_SEN: + case AUDIT_SE_CLR: + /* NOTE: this may return negative values indicating + a temporary error. We simply treat this as a + match for now to avoid losing information that + may be wanted. An error message will also be + logged upon error */ + if (f->se_rule) { + if (need_sid) { + selinux_task_ctxid(tsk, &sid); + need_sid = 0; + } + result = selinux_audit_rule_match(sid, f->type, + f->op, + f->se_rule, + ctx); + } + break; case AUDIT_ARG0: case AUDIT_ARG1: case AUDIT_ARG2: @@ -329,7 +352,6 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, return AUDIT_BUILD_CONTEXT; } -/* This should be called with task_lock() held. */ static inline struct audit_context *audit_get_context(struct task_struct *tsk, int return_valid, int return_code) @@ -391,9 +413,6 @@ static inline void audit_free_names(struct audit_context *context) #endif for (i = 0; i < context->name_count; i++) { - char *p = context->names[i].ctx; - context->names[i].ctx = NULL; - kfree(p); if (context->names[i].name) __putname(context->names[i].name); } @@ -416,11 +435,6 @@ static inline void audit_free_aux(struct audit_context *context) dput(axi->dentry); mntput(axi->mnt); } - if ( aux->type == AUDIT_IPC ) { - struct audit_aux_data_ipcctl *axi = (void *)aux; - if (axi->ctx) - kfree(axi->ctx); - } context->aux = aux->next; kfree(aux); @@ -506,7 +520,7 @@ static inline void audit_free_context(struct audit_context *context) printk(KERN_ERR "audit: freed %d contexts\n", count); } -static void audit_log_task_context(struct audit_buffer *ab, gfp_t gfp_mask) +static void audit_log_task_context(struct audit_buffer *ab) { char *ctx = NULL; ssize_t len = 0; @@ -518,7 +532,7 @@ static void audit_log_task_context(struct audit_buffer *ab, gfp_t gfp_mask) return; } - ctx = kmalloc(len, gfp_mask); + ctx = kmalloc(len, GFP_KERNEL); if (!ctx) goto error_path; @@ -536,47 +550,46 @@ error_path: return; } -static void audit_log_task_info(struct audit_buffer *ab, gfp_t gfp_mask) +static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) { - char name[sizeof(current->comm)]; - struct mm_struct *mm = current->mm; + char name[sizeof(tsk->comm)]; + struct mm_struct *mm = tsk->mm; struct vm_area_struct *vma; - get_task_comm(name, current); + /* tsk == current */ + + get_task_comm(name, tsk); audit_log_format(ab, " comm="); audit_log_untrustedstring(ab, name); - if (!mm) - return; - - /* - * this is brittle; all callers that pass GFP_ATOMIC will have - * NULL current->mm and we won't get here. - */ - down_read(&mm->mmap_sem); - vma = mm->mmap; - while (vma) { - if ((vma->vm_flags & VM_EXECUTABLE) && - vma->vm_file) { - audit_log_d_path(ab, "exe=", - vma->vm_file->f_dentry, - vma->vm_file->f_vfsmnt); - break; + if (mm) { + down_read(&mm->mmap_sem); + vma = mm->mmap; + while (vma) { + if ((vma->vm_flags & VM_EXECUTABLE) && + vma->vm_file) { + audit_log_d_path(ab, "exe=", + vma->vm_file->f_dentry, + vma->vm_file->f_vfsmnt); + break; + } + vma = vma->vm_next; } - vma = vma->vm_next; + up_read(&mm->mmap_sem); } - up_read(&mm->mmap_sem); - audit_log_task_context(ab, gfp_mask); + audit_log_task_context(ab); } -static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) +static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) { - int i; + int i, call_panic = 0; struct audit_buffer *ab; struct audit_aux_data *aux; const char *tty; - ab = audit_log_start(context, gfp_mask, AUDIT_SYSCALL); + /* tsk == current */ + + ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL); if (!ab) return; /* audit_panic has been called */ audit_log_format(ab, "arch=%x syscall=%d", @@ -587,8 +600,8 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) audit_log_format(ab, " success=%s exit=%ld", (context->return_valid==AUDITSC_SUCCESS)?"yes":"no", context->return_code); - if (current->signal->tty && current->signal->tty->name) - tty = current->signal->tty->name; + if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name) + tty = tsk->signal->tty->name; else tty = "(none)"; audit_log_format(ab, @@ -607,12 +620,12 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) context->gid, context->euid, context->suid, context->fsuid, context->egid, context->sgid, context->fsgid, tty); - audit_log_task_info(ab, gfp_mask); + audit_log_task_info(ab, tsk); audit_log_end(ab); for (aux = context->aux; aux; aux = aux->next) { - ab = audit_log_start(context, gfp_mask, aux->type); + ab = audit_log_start(context, GFP_KERNEL, aux->type); if (!ab) continue; /* audit_panic has been called */ @@ -620,8 +633,39 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) case AUDIT_IPC: { struct audit_aux_data_ipcctl *axi = (void *)aux; audit_log_format(ab, - " qbytes=%lx iuid=%u igid=%u mode=%x obj=%s", - axi->qbytes, axi->uid, axi->gid, axi->mode, axi->ctx); + " qbytes=%lx iuid=%u igid=%u mode=%x", + axi->qbytes, axi->uid, axi->gid, axi->mode); + if (axi->osid != 0) { + char *ctx = NULL; + u32 len; + if (selinux_ctxid_to_string( + axi->osid, &ctx, &len)) { + audit_log_format(ab, " osid=%u", + axi->osid); + call_panic = 1; + } else + audit_log_format(ab, " obj=%s", ctx); + kfree(ctx); + } + break; } + + case AUDIT_IPC_SET_PERM: { + struct audit_aux_data_ipcctl *axi = (void *)aux; + audit_log_format(ab, + " new qbytes=%lx new iuid=%u new igid=%u new mode=%x", + axi->qbytes, axi->uid, axi->gid, axi->mode); + if (axi->osid != 0) { + char *ctx = NULL; + u32 len; + if (selinux_ctxid_to_string( + axi->osid, &ctx, &len)) { + audit_log_format(ab, " osid=%u", + axi->osid); + call_panic = 1; + } else + audit_log_format(ab, " obj=%s", ctx); + kfree(ctx); + } break; } case AUDIT_SOCKETCALL: { @@ -649,7 +693,7 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) } if (context->pwd && context->pwdmnt) { - ab = audit_log_start(context, gfp_mask, AUDIT_CWD); + ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD); if (ab) { audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt); audit_log_end(ab); @@ -659,7 +703,7 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) unsigned long ino = context->names[i].ino; unsigned long pino = context->names[i].pino; - ab = audit_log_start(context, gfp_mask, AUDIT_PATH); + ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); if (!ab) continue; /* audit_panic has been called */ @@ -685,32 +729,35 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) context->names[i].gid, MAJOR(context->names[i].rdev), MINOR(context->names[i].rdev)); - if (context->names[i].ctx) { - audit_log_format(ab, " obj=%s", - context->names[i].ctx); + if (context->names[i].osid != 0) { + char *ctx = NULL; + u32 len; + if (selinux_ctxid_to_string( + context->names[i].osid, &ctx, &len)) { + audit_log_format(ab, " osid=%u", + context->names[i].osid); + call_panic = 2; + } else + audit_log_format(ab, " obj=%s", ctx); + kfree(ctx); } audit_log_end(ab); } + if (call_panic) + audit_panic("error converting sid to string"); } /** * audit_free - free a per-task audit context * @tsk: task whose audit context block to free * - * Called from copy_process and __put_task_struct. + * Called from copy_process and do_exit */ void audit_free(struct task_struct *tsk) { struct audit_context *context; - /* - * No need to lock the task - when we execute audit_free() - * then the task has no external references anymore, and - * we are tearing it down. (The locking also confuses - * DEBUG_LOCKDEP - this freeing may occur in softirq - * contexts as well, via RCU.) - */ context = audit_get_context(tsk, 0, 0); if (likely(!context)) return; @@ -719,8 +766,9 @@ void audit_free(struct task_struct *tsk) * function (e.g., exit_group), then free context block. * We use GFP_ATOMIC here because we might be doing this * in the context of the idle thread */ + /* that can happen only if we are called from do_exit() */ if (context->in_syscall && context->auditable) - audit_log_exit(context, GFP_ATOMIC); + audit_log_exit(context, tsk); audit_free_context(context); } @@ -743,10 +791,11 @@ void audit_free(struct task_struct *tsk) * will only be written if another part of the kernel requests that it * be written). */ -void audit_syscall_entry(struct task_struct *tsk, int arch, int major, +void audit_syscall_entry(int arch, int major, unsigned long a1, unsigned long a2, unsigned long a3, unsigned long a4) { + struct task_struct *tsk = current; struct audit_context *context = tsk->audit_context; enum audit_state state; @@ -824,22 +873,18 @@ void audit_syscall_entry(struct task_struct *tsk, int arch, int major, * message), then write out the syscall information. In call cases, * free the names stored from getname(). */ -void audit_syscall_exit(struct task_struct *tsk, int valid, long return_code) +void audit_syscall_exit(int valid, long return_code) { + struct task_struct *tsk = current; struct audit_context *context; - get_task_struct(tsk); - task_lock(tsk); context = audit_get_context(tsk, valid, return_code); - task_unlock(tsk); - /* Not having a context here is ok, since the parent may have - * called __put_task_struct. */ if (likely(!context)) - goto out; + return; if (context->in_syscall && context->auditable) - audit_log_exit(context, GFP_KERNEL); + audit_log_exit(context, tsk); context->in_syscall = 0; context->auditable = 0; @@ -854,8 +899,6 @@ void audit_syscall_exit(struct task_struct *tsk, int valid, long return_code) audit_free_aux(context); tsk->audit_context = context; } - out: - put_task_struct(tsk); } /** @@ -936,40 +979,11 @@ void audit_putname(const char *name) #endif } -void audit_inode_context(int idx, const struct inode *inode) +static void audit_inode_context(int idx, const struct inode *inode) { struct audit_context *context = current->audit_context; - const char *suffix = security_inode_xattr_getsuffix(); - char *ctx = NULL; - int len = 0; - - if (!suffix) - goto ret; - - len = security_inode_getsecurity(inode, suffix, NULL, 0, 0); - if (len == -EOPNOTSUPP) - goto ret; - if (len < 0) - goto error_path; - - ctx = kmalloc(len, GFP_KERNEL); - if (!ctx) - goto error_path; - len = security_inode_getsecurity(inode, suffix, ctx, len, 0); - if (len < 0) - goto error_path; - - kfree(context->names[idx].ctx); - context->names[idx].ctx = ctx; - goto ret; - -error_path: - if (ctx) - kfree(ctx); - audit_panic("error in audit_inode_context"); -ret: - return; + selinux_get_inode_sid(inode, &context->names[idx].osid); } @@ -1155,40 +1169,37 @@ uid_t audit_get_loginuid(struct audit_context *ctx) return ctx ? ctx->loginuid : -1; } -static char *audit_ipc_context(struct kern_ipc_perm *ipcp) +/** + * audit_ipc_obj - record audit data for ipc object + * @ipcp: ipc permissions + * + * Returns 0 for success or NULL context or < 0 on error. + */ +int audit_ipc_obj(struct kern_ipc_perm *ipcp) { + struct audit_aux_data_ipcctl *ax; struct audit_context *context = current->audit_context; - char *ctx = NULL; - int len = 0; if (likely(!context)) - return NULL; - - len = security_ipc_getsecurity(ipcp, NULL, 0); - if (len == -EOPNOTSUPP) - goto ret; - if (len < 0) - goto error_path; - - ctx = kmalloc(len, GFP_ATOMIC); - if (!ctx) - goto error_path; + return 0; - len = security_ipc_getsecurity(ipcp, ctx, len); - if (len < 0) - goto error_path; + ax = kmalloc(sizeof(*ax), GFP_ATOMIC); + if (!ax) + return -ENOMEM; - return ctx; + ax->uid = ipcp->uid; + ax->gid = ipcp->gid; + ax->mode = ipcp->mode; + selinux_get_ipc_sid(ipcp, &ax->osid); -error_path: - kfree(ctx); - audit_panic("error in audit_ipc_context"); -ret: - return NULL; + ax->d.type = AUDIT_IPC; + ax->d.next = context->aux; + context->aux = (void *)ax; + return 0; } /** - * audit_ipc_perms - record audit data for ipc + * audit_ipc_set_perm - record audit data for new ipc permissions * @qbytes: msgq bytes * @uid: msgq user id * @gid: msgq group id @@ -1196,7 +1207,7 @@ ret: * * Returns 0 for success or NULL context or < 0 on error. */ -int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp) +int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp) { struct audit_aux_data_ipcctl *ax; struct audit_context *context = current->audit_context; @@ -1212,9 +1223,9 @@ int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, str ax->uid = uid; ax->gid = gid; ax->mode = mode; - ax->ctx = audit_ipc_context(ipcp); + selinux_get_ipc_sid(ipcp, &ax->osid); - ax->d.type = AUDIT_IPC; + ax->d.type = AUDIT_IPC_SET_PERM; ax->d.next = context->aux; context->aux = (void *)ax; return 0; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 72248d1b9e3f..ab81fdd4572b 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2231,19 +2231,25 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * So only GFP_KERNEL allocations, if all nodes in the cpuset are * short of memory, might require taking the callback_mutex mutex. * - * The first loop over the zonelist in mm/page_alloc.c:__alloc_pages() - * calls here with __GFP_HARDWALL always set in gfp_mask, enforcing - * hardwall cpusets - no allocation on a node outside the cpuset is - * allowed (unless in interrupt, of course). - * - * The second loop doesn't even call here for GFP_ATOMIC requests - * (if the __alloc_pages() local variable 'wait' is set). That check - * and the checks below have the combined affect in the second loop of - * the __alloc_pages() routine that: + * The first call here from mm/page_alloc:get_page_from_freelist() + * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, so + * no allocation on a node outside the cpuset is allowed (unless in + * interrupt, of course). + * + * The second pass through get_page_from_freelist() doesn't even call + * here for GFP_ATOMIC calls. For those calls, the __alloc_pages() + * variable 'wait' is not set, and the bit ALLOC_CPUSET is not set + * in alloc_flags. That logic and the checks below have the combined + * affect that: * in_interrupt - any node ok (current task context irrelevant) * GFP_ATOMIC - any node ok * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok * GFP_USER - only nodes in current tasks mems allowed ok. + * + * Rule: + * Don't call cpuset_zone_allowed() if you can't sleep, unless you + * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables + * the code that might scan up ancestor cpusets and sleep. **/ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) @@ -2255,6 +2261,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) if (in_interrupt()) return 1; node = z->zone_pgdat->node_id; + might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); if (node_isset(node, current->mems_allowed)) return 1; if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ diff --git a/kernel/exit.c b/kernel/exit.c index f86434d7b3d1..e06d0c10a24e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -35,6 +35,7 @@ #include <linux/futex.h> #include <linux/compat.h> #include <linux/pipe_fs_i.h> +#include <linux/audit.h> /* for audit_free() */ #include <asm/uaccess.h> #include <asm/unistd.h> @@ -880,14 +881,6 @@ fastcall NORET_TYPE void do_exit(long code) tsk->flags |= PF_EXITING; - /* - * Make sure we don't try to process any timer firings - * while we are already exiting. - */ - tsk->it_virt_expires = cputime_zero; - tsk->it_prof_expires = cputime_zero; - tsk->it_sched_expires = 0; - if (unlikely(in_atomic())) printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", current->comm, current->pid, @@ -910,6 +903,8 @@ fastcall NORET_TYPE void do_exit(long code) if (unlikely(tsk->compat_robust_list)) compat_exit_robust_list(tsk); #endif + if (unlikely(tsk->audit_context)) + audit_free(tsk); exit_mm(tsk); exit_sem(tsk); diff --git a/kernel/extable.c b/kernel/extable.c index 7501b531ceed..7fe262855317 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -40,7 +40,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr) return e; } -static int core_kernel_text(unsigned long addr) +int core_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) diff --git a/kernel/fork.c b/kernel/fork.c index d2fa57d480d4..ac8100e3088a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -114,8 +114,6 @@ void __put_task_struct(struct task_struct *tsk) WARN_ON(atomic_read(&tsk->usage)); WARN_ON(tsk == current); - if (unlikely(tsk->audit_context)) - audit_free(tsk); security_task_free(tsk); free_uid(tsk->user); put_group_info(tsk->group_info); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 04ab27ddfd90..18324305724a 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -456,6 +456,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) return ret; } +EXPORT_SYMBOL_GPL(hrtimer_start); /** * hrtimer_try_to_cancel - try to deactivate a timer @@ -484,6 +485,7 @@ int hrtimer_try_to_cancel(struct hrtimer *timer) return ret; } +EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel); /** * hrtimer_cancel - cancel a timer and wait for the handler to finish. @@ -504,6 +506,7 @@ int hrtimer_cancel(struct hrtimer *timer) cpu_relax(); } } +EXPORT_SYMBOL_GPL(hrtimer_cancel); /** * hrtimer_get_remaining - get remaining time for the timer @@ -522,6 +525,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) return rem; } +EXPORT_SYMBOL_GPL(hrtimer_get_remaining); #ifdef CONFIG_NO_IDLE_HZ /** @@ -580,6 +584,7 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, timer->base = &bases[clock_id]; rb_set_parent(&timer->node, &timer->node); } +EXPORT_SYMBOL_GPL(hrtimer_init); /** * hrtimer_get_res - get the timer resolution for a clock @@ -599,6 +604,7 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) return 0; } +EXPORT_SYMBOL_GPL(hrtimer_get_res); /* * Expire the per base hrtimer-queue: @@ -836,7 +842,7 @@ static void migrate_hrtimers(int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static int __devinit hrtimer_cpu_notify(struct notifier_block *self, +static int hrtimer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; @@ -860,7 +866,7 @@ static int __devinit hrtimer_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __devinitdata hrtimers_nb = { +static struct notifier_block hrtimers_nb = { .notifier_call = hrtimer_cpu_notify, }; diff --git a/kernel/intermodule.c b/kernel/intermodule.c deleted file mode 100644 index 55b1e5b85db9..000000000000 --- a/kernel/intermodule.c +++ /dev/null @@ -1,184 +0,0 @@ -/* Deprecated, do not use. Moved from module.c to here. --RR */ - -/* Written by Keith Owens <kaos@ocs.com.au> Oct 2000 */ -#include <linux/module.h> -#include <linux/kmod.h> -#include <linux/spinlock.h> -#include <linux/list.h> -#include <linux/slab.h> - -/* inter_module functions are always available, even when the kernel is - * compiled without modules. Consumers of inter_module_xxx routines - * will always work, even when both are built into the kernel, this - * approach removes lots of #ifdefs in mainline code. - */ - -static struct list_head ime_list = LIST_HEAD_INIT(ime_list); -static DEFINE_SPINLOCK(ime_lock); -static int kmalloc_failed; - -struct inter_module_entry { - struct list_head list; - const char *im_name; - struct module *owner; - const void *userdata; -}; - -/** - * inter_module_register - register a new set of inter module data. - * @im_name: an arbitrary string to identify the data, must be unique - * @owner: module that is registering the data, always use THIS_MODULE - * @userdata: pointer to arbitrary userdata to be registered - * - * Description: Check that the im_name has not already been registered, - * complain if it has. For new data, add it to the inter_module_entry - * list. - */ -void inter_module_register(const char *im_name, struct module *owner, const void *userdata) -{ - struct list_head *tmp; - struct inter_module_entry *ime, *ime_new; - - if (!(ime_new = kzalloc(sizeof(*ime), GFP_KERNEL))) { - /* Overloaded kernel, not fatal */ - printk(KERN_ERR - "Aiee, inter_module_register: cannot kmalloc entry for '%s'\n", - im_name); - kmalloc_failed = 1; - return; - } - ime_new->im_name = im_name; - ime_new->owner = owner; - ime_new->userdata = userdata; - - spin_lock(&ime_lock); - list_for_each(tmp, &ime_list) { - ime = list_entry(tmp, struct inter_module_entry, list); - if (strcmp(ime->im_name, im_name) == 0) { - spin_unlock(&ime_lock); - kfree(ime_new); - /* Program logic error, fatal */ - printk(KERN_ERR "inter_module_register: duplicate im_name '%s'", im_name); - BUG(); - } - } - list_add(&(ime_new->list), &ime_list); - spin_unlock(&ime_lock); -} - -/** - * inter_module_unregister - unregister a set of inter module data. - * @im_name: an arbitrary string to identify the data, must be unique - * - * Description: Check that the im_name has been registered, complain if - * it has not. For existing data, remove it from the - * inter_module_entry list. - */ -void inter_module_unregister(const char *im_name) -{ - struct list_head *tmp; - struct inter_module_entry *ime; - - spin_lock(&ime_lock); - list_for_each(tmp, &ime_list) { - ime = list_entry(tmp, struct inter_module_entry, list); - if (strcmp(ime->im_name, im_name) == 0) { - list_del(&(ime->list)); - spin_unlock(&ime_lock); - kfree(ime); - return; - } - } - spin_unlock(&ime_lock); - if (kmalloc_failed) { - printk(KERN_ERR - "inter_module_unregister: no entry for '%s', " - "probably caused by previous kmalloc failure\n", - im_name); - return; - } - else { - /* Program logic error, fatal */ - printk(KERN_ERR "inter_module_unregister: no entry for '%s'", im_name); - BUG(); - } -} - -/** - * inter_module_get - return arbitrary userdata from another module. - * @im_name: an arbitrary string to identify the data, must be unique - * - * Description: If the im_name has not been registered, return NULL. - * Try to increment the use count on the owning module, if that fails - * then return NULL. Otherwise return the userdata. - */ -static const void *inter_module_get(const char *im_name) -{ - struct list_head *tmp; - struct inter_module_entry *ime; - const void *result = NULL; - - spin_lock(&ime_lock); - list_for_each(tmp, &ime_list) { - ime = list_entry(tmp, struct inter_module_entry, list); - if (strcmp(ime->im_name, im_name) == 0) { - if (try_module_get(ime->owner)) - result = ime->userdata; - break; - } - } - spin_unlock(&ime_lock); - return(result); -} - -/** - * inter_module_get_request - im get with automatic request_module. - * @im_name: an arbitrary string to identify the data, must be unique - * @modname: module that is expected to register im_name - * - * Description: If inter_module_get fails, do request_module then retry. - */ -const void *inter_module_get_request(const char *im_name, const char *modname) -{ - const void *result = inter_module_get(im_name); - if (!result) { - request_module("%s", modname); - result = inter_module_get(im_name); - } - return(result); -} - -/** - * inter_module_put - release use of data from another module. - * @im_name: an arbitrary string to identify the data, must be unique - * - * Description: If the im_name has not been registered, complain, - * otherwise decrement the use count on the owning module. - */ -void inter_module_put(const char *im_name) -{ - struct list_head *tmp; - struct inter_module_entry *ime; - - spin_lock(&ime_lock); - list_for_each(tmp, &ime_list) { - ime = list_entry(tmp, struct inter_module_entry, list); - if (strcmp(ime->im_name, im_name) == 0) { - if (ime->owner) - module_put(ime->owner); - spin_unlock(&ime_lock); - return; - } - } - spin_unlock(&ime_lock); - printk(KERN_ERR "inter_module_put: no entry for '%s'", im_name); - BUG(); -} - -EXPORT_SYMBOL(inter_module_register); -EXPORT_SYMBOL(inter_module_unregister); -EXPORT_SYMBOL(inter_module_get_request); -EXPORT_SYMBOL(inter_module_put); - -MODULE_LICENSE("GPL"); - diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index ac766ad573e8..1279e3499534 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -246,8 +246,10 @@ int setup_irq(unsigned int irq, struct irqaction * new) mismatch: spin_unlock_irqrestore(&desc->lock, flags); - printk(KERN_ERR "%s: irq handler mismatch\n", __FUNCTION__); - dump_stack(); + if (!(new->flags & SA_PROBEIRQ)) { + printk(KERN_ERR "%s: irq handler mismatch\n", __FUNCTION__); + dump_stack(); + } return -EBUSY; } diff --git a/kernel/module.c b/kernel/module.c index d24deb0dbbc9..bbe04862e1b0 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -705,14 +705,14 @@ EXPORT_SYMBOL(__symbol_put); void symbol_put_addr(void *addr) { - unsigned long flags; + struct module *modaddr; - spin_lock_irqsave(&modlist_lock, flags); - if (!kernel_text_address((unsigned long)addr)) - BUG(); + if (core_kernel_text((unsigned long)addr)) + return; - module_put(module_text_address((unsigned long)addr)); - spin_unlock_irqrestore(&modlist_lock, flags); + if (!(modaddr = module_text_address((unsigned long)addr))) + BUG(); + module_put(modaddr); } EXPORT_SYMBOL_GPL(symbol_put_addr); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 520f6c59948d..d38d9ec3276c 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -555,9 +555,6 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) struct cpu_timer_list *next; unsigned long i; - if (CPUCLOCK_PERTHREAD(timer->it_clock) && (p->flags & PF_EXITING)) - return; - head = (CPUCLOCK_PERTHREAD(timer->it_clock) ? p->cpu_timers : p->signal->cpu_timers); head += CPUCLOCK_WHICH(timer->it_clock); @@ -1173,6 +1170,9 @@ static void check_process_timers(struct task_struct *tsk, } t = tsk; do { + if (unlikely(t->flags & PF_EXITING)) + continue; + ticks = cputime_add(cputime_add(t->utime, t->stime), prof_left); if (!cputime_eq(prof_expires, cputime_zero) && @@ -1193,11 +1193,7 @@ static void check_process_timers(struct task_struct *tsk, t->it_sched_expires > sched)) { t->it_sched_expires = sched; } - - do { - t = next_thread(t); - } while (unlikely(t->flags & PF_EXITING)); - } while (t != tsk); + } while ((t = next_thread(t)) != tsk); } } @@ -1289,30 +1285,30 @@ void run_posix_cpu_timers(struct task_struct *tsk) #undef UNEXPIRED - BUG_ON(tsk->exit_state); - /* * Double-check with locks held. */ read_lock(&tasklist_lock); - spin_lock(&tsk->sighand->siglock); + if (likely(tsk->signal != NULL)) { + spin_lock(&tsk->sighand->siglock); - /* - * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N] - * all the timers that are firing, and put them on the firing list. - */ - check_thread_timers(tsk, &firing); - check_process_timers(tsk, &firing); + /* + * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N] + * all the timers that are firing, and put them on the firing list. + */ + check_thread_timers(tsk, &firing); + check_process_timers(tsk, &firing); - /* - * We must release these locks before taking any timer's lock. - * There is a potential race with timer deletion here, as the - * siglock now protects our private firing list. We have set - * the firing flag in each timer, so that a deletion attempt - * that gets the timer lock before we do will give it up and - * spin until we've taken care of that timer below. - */ - spin_unlock(&tsk->sighand->siglock); + /* + * We must release these locks before taking any timer's lock. + * There is a potential race with timer deletion here, as the + * siglock now protects our private firing list. We have set + * the firing flag in each timer, so that a deletion attempt + * that gets the timer lock before we do will give it up and + * spin until we've taken care of that timer below. + */ + spin_unlock(&tsk->sighand->siglock); + } read_unlock(&tasklist_lock); /* diff --git a/kernel/power/main.c b/kernel/power/main.c index ee371f50ccaa..0a907f0dc56b 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -86,6 +86,7 @@ static int suspend_prepare(suspend_state_t state) goto Thaw; } + suspend_console(); if ((error = device_suspend(PMSG_SUSPEND))) { printk(KERN_ERR "Some devices failed to suspend\n"); goto Finish; @@ -133,6 +134,7 @@ int suspend_enter(suspend_state_t state) static void suspend_finish(suspend_state_t state) { device_resume(); + resume_console(); thaw_processes(); enable_nonboot_cpus(); if (pm_ops && pm_ops->finish) @@ -272,7 +274,7 @@ static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n if (*s && !strncmp(buf, *s, len)) break; } - if (*s) + if (state < PM_SUSPEND_MAX && *s) error = enter_state(state); else error = -EINVAL; diff --git a/kernel/printk.c b/kernel/printk.c index c056f3324432..19a955619294 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -67,6 +67,7 @@ EXPORT_SYMBOL(oops_in_progress); * driver system. */ static DECLARE_MUTEX(console_sem); +static DECLARE_MUTEX(secondary_console_sem); struct console *console_drivers; /* * This is used for debugging the mess that is the VT code by @@ -76,7 +77,7 @@ struct console *console_drivers; * path in the console code where we end up in places I want * locked without the console sempahore held */ -static int console_locked; +static int console_locked, console_suspended; /* * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars @@ -698,6 +699,23 @@ int __init add_preferred_console(char *name, int idx, char *options) } /** + * suspend_console - suspend the console subsystem + * + * This disables printk() while we go into suspend states + */ +void suspend_console(void) +{ + acquire_console_sem(); + console_suspended = 1; +} + +void resume_console(void) +{ + console_suspended = 0; + release_console_sem(); +} + +/** * acquire_console_sem - lock the console system for exclusive use. * * Acquires a semaphore which guarantees that the caller has @@ -708,6 +726,10 @@ int __init add_preferred_console(char *name, int idx, char *options) void acquire_console_sem(void) { BUG_ON(in_interrupt()); + if (console_suspended) { + down(&secondary_console_sem); + return; + } down(&console_sem); console_locked = 1; console_may_schedule = 1; @@ -750,6 +772,10 @@ void release_console_sem(void) unsigned long _con_start, _log_end; unsigned long wake_klogd = 0; + if (console_suspended) { + up(&secondary_console_sem); + return; + } for ( ; ; ) { spin_lock_irqsave(&logbuf_lock, flags); wake_klogd |= log_start - log_end; diff --git a/kernel/profile.c b/kernel/profile.c index 5a730fdb1a2c..68afe121e507 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -299,7 +299,7 @@ out: } #ifdef CONFIG_HOTPLUG_CPU -static int __devinit profile_cpu_callback(struct notifier_block *info, +static int profile_cpu_callback(struct notifier_block *info, unsigned long action, void *__cpu) { int node, cpu = (unsigned long)__cpu; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 4e0f0ec003f7..921c22ad16e4 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -148,12 +148,34 @@ int ptrace_may_attach(struct task_struct *task) int ptrace_attach(struct task_struct *task) { int retval; - task_lock(task); + retval = -EPERM; if (task->pid <= 1) - goto bad; + goto out; if (task->tgid == current->tgid) - goto bad; + goto out; + +repeat: + /* + * Nasty, nasty. + * + * We want to hold both the task-lock and the + * tasklist_lock for writing at the same time. + * But that's against the rules (tasklist_lock + * is taken for reading by interrupts on other + * cpu's that may have task_lock). + */ + task_lock(task); + local_irq_disable(); + if (!write_trylock(&tasklist_lock)) { + local_irq_enable(); + task_unlock(task); + do { + cpu_relax(); + } while (!write_can_lock(&tasklist_lock)); + goto repeat; + } + /* the same process cannot be attached many times */ if (task->ptrace & PT_PTRACED) goto bad; @@ -166,17 +188,15 @@ int ptrace_attach(struct task_struct *task) ? PT_ATTACHED : 0); if (capable(CAP_SYS_PTRACE)) task->ptrace |= PT_PTRACE_CAP; - task_unlock(task); - write_lock_irq(&tasklist_lock); __ptrace_link(task, current); - write_unlock_irq(&tasklist_lock); force_sig_specific(SIGSTOP, task); - return 0; bad: + write_unlock_irq(&tasklist_lock); task_unlock(task); +out: return retval; } @@ -417,21 +437,22 @@ int ptrace_request(struct task_struct *child, long request, */ int ptrace_traceme(void) { - int ret; + int ret = -EPERM; /* * Are we already being traced? */ - if (current->ptrace & PT_PTRACED) - return -EPERM; - ret = security_ptrace(current->parent, current); - if (ret) - return -EPERM; - /* - * Set the ptrace bit in the process ptrace flags. - */ - current->ptrace |= PT_PTRACED; - return 0; + task_lock(current); + if (!(current->ptrace & PT_PTRACED)) { + ret = security_ptrace(current->parent, current); + /* + * Set the ptrace bit in the process ptrace flags. + */ + if (!ret) + current->ptrace |= PT_PTRACED; + } + task_unlock(current); + return ret; } /** diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 13458bbaa1be..2058f88c7bbb 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -479,12 +479,31 @@ static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) return 0; } +/* + * Check to see if there is any immediate RCU-related work to be done + * by the current CPU, returning 1 if so. This function is part of the + * RCU implementation; it is -not- an exported member of the RCU API. + */ int rcu_pending(int cpu) { return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) || __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); } +/* + * Check to see if any future RCU-related work will need to be done + * by the current CPU, even if none need be done immediately, returning + * 1 if so. This function is part of the RCU implementation; it is -not- + * an exported member of the RCU API. + */ +int rcu_needs_cpu(int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu); + + return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu)); +} + void rcu_check_callbacks(int cpu, int user) { if (user || @@ -520,7 +539,7 @@ static void __devinit rcu_online_cpu(int cpu) tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL); } -static int __devinit rcu_cpu_notify(struct notifier_block *self, +static int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; @@ -537,7 +556,7 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __devinitdata rcu_nb = { +static struct notifier_block rcu_nb = { .notifier_call = rcu_cpu_notify, }; diff --git a/kernel/sched.c b/kernel/sched.c index 365f0b90b4de..c13f1bd2df7d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -665,55 +665,13 @@ static int effective_prio(task_t *p) } /* - * We place interactive tasks back into the active array, if possible. - * - * To guarantee that this does not starve expired tasks we ignore the - * interactivity of a task if the first expired task had to wait more - * than a 'reasonable' amount of time. This deadline timeout is - * load-dependent, as the frequency of array switched decreases with - * increasing number of running tasks. We also ignore the interactivity - * if a better static_prio task has expired, and switch periodically - * regardless, to ensure that highly interactive tasks do not starve - * the less fortunate for unreasonably long periods. - */ -static inline int expired_starving(runqueue_t *rq) -{ - int limit; - - /* - * Arrays were recently switched, all is well - */ - if (!rq->expired_timestamp) - return 0; - - limit = STARVATION_LIMIT * rq->nr_running; - - /* - * It's time to switch arrays - */ - if (jiffies - rq->expired_timestamp >= limit) - return 1; - - /* - * There's a better selection in the expired array - */ - if (rq->curr->static_prio > rq->best_expired_prio) - return 1; - - /* - * All is well - */ - return 0; -} - -/* * __activate_task - move a task to the runqueue. */ static void __activate_task(task_t *p, runqueue_t *rq) { prio_array_t *target = rq->active; - if (unlikely(batch_task(p) || (expired_starving(rq) && !rt_task(p)))) + if (batch_task(p)) target = rq->expired; enqueue_task(p, target); rq->nr_running++; @@ -2532,6 +2490,22 @@ unsigned long long current_sched_time(const task_t *tsk) } /* + * We place interactive tasks back into the active array, if possible. + * + * To guarantee that this does not starve expired tasks we ignore the + * interactivity of a task if the first expired task had to wait more + * than a 'reasonable' amount of time. This deadline timeout is + * load-dependent, as the frequency of array switched decreases with + * increasing number of running tasks. We also ignore the interactivity + * if a better static_prio task has expired: + */ +#define EXPIRED_STARVING(rq) \ + ((STARVATION_LIMIT && ((rq)->expired_timestamp && \ + (jiffies - (rq)->expired_timestamp >= \ + STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \ + ((rq)->curr->static_prio > (rq)->best_expired_prio)) + +/* * Account user cpu time to a process. * @p: the process that the cpu time gets accounted to * @hardirq_offset: the offset to subtract from hardirq_count() @@ -2666,7 +2640,7 @@ void scheduler_tick(void) if (!rq->expired_timestamp) rq->expired_timestamp = jiffies; - if (!TASK_INTERACTIVE(p) || expired_starving(rq)) { + if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { enqueue_task(p, rq->expired); if (p->static_prio < rq->best_expired_prio) rq->best_expired_prio = p->static_prio; @@ -4814,7 +4788,7 @@ static int migration_call(struct notifier_block *nfb, unsigned long action, /* Register at highest priority so that task migration (migrate_all_tasks) * happens before everything else. */ -static struct notifier_block __devinitdata migration_notifier = { +static struct notifier_block migration_notifier = { .notifier_call = migration_call, .priority = 10 }; diff --git a/kernel/softirq.c b/kernel/softirq.c index ec8fed42a86f..336f92d64e2e 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -446,7 +446,7 @@ static void takeover_tasklets(unsigned int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static int __devinit cpu_callback(struct notifier_block *nfb, +static int cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -484,7 +484,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block __devinitdata cpu_nfb = { +static struct notifier_block cpu_nfb = { .notifier_call = cpu_callback }; diff --git a/kernel/softlockup.c b/kernel/softlockup.c index ced91e1ff564..14c7faf02909 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -104,7 +104,7 @@ static int watchdog(void * __bind_cpu) /* * Create/destroy watchdog threads as CPUs come and go: */ -static int __devinit +static int cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { int hotcpu = (unsigned long)hcpu; @@ -140,7 +140,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) return NOTIFY_OK; } -static struct notifier_block __devinitdata cpu_nfb = { +static struct notifier_block cpu_nfb = { .notifier_call = cpu_callback }; diff --git a/kernel/timer.c b/kernel/timer.c index 883773788836..9e49deed468c 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -541,6 +541,22 @@ found: } spin_unlock(&base->lock); + /* + * It can happen that other CPUs service timer IRQs and increment + * jiffies, but we have not yet got a local timer tick to process + * the timer wheels. In that case, the expiry time can be before + * jiffies, but since the high-resolution timer here is relative to + * jiffies, the default expression when high-resolution timers are + * not active, + * + * time_before(MAX_JIFFY_OFFSET + jiffies, expires) + * + * would falsely evaluate to true. If that is the case, just + * return jiffies so that we can immediately fire the local timer + */ + if (time_before(expires, jiffies)) + return jiffies; + if (time_before(hr_expires, expires)) return hr_expires; @@ -1314,7 +1330,7 @@ static void __devinit migrate_timers(int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -static int __devinit timer_cpu_notify(struct notifier_block *self, +static int timer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; @@ -1334,7 +1350,7 @@ static int __devinit timer_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __devinitdata timers_nb = { +static struct notifier_block timers_nb = { .notifier_call = timer_cpu_notify, }; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index e9e464a90376..880fb415a8f6 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -547,7 +547,7 @@ static void take_over_work(struct workqueue_struct *wq, unsigned int cpu) } /* We're holding the cpucontrol mutex here */ -static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, +static int workqueue_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { |