From 3583a71183a02c51ca71cd180e9189cfb0411cc1 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 22 Jul 2008 20:21:23 +0300 Subject: make selinux_write_opts() static This patch makes the needlessly global selinux_write_opts() static. Signed-off-by: Adrian Bunk Signed-off-by: James Morris --- security/selinux/hooks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'security/selinux') diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 3ae9bec5a508..0ffd8814af3e 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -957,7 +957,8 @@ out_err: return rc; } -void selinux_write_opts(struct seq_file *m, struct security_mnt_opts *opts) +static void selinux_write_opts(struct seq_file *m, + struct security_mnt_opts *opts) { int i; char *prefix; -- cgit v1.2.3 From df4ea865f09580b1cad621c0426612f598847815 Mon Sep 17 00:00:00 2001 From: Vesa-Matti J Kari Date: Sun, 20 Jul 2008 23:57:01 +0300 Subject: SELinux: Trivial minor fixes that change C null character style Trivial minor fixes that change C null character style. Signed-off-by: Vesa-Matti Kari Signed-off-by: James Morris --- security/selinux/ss/conditional.c | 2 +- security/selinux/ss/mls.c | 14 +++++++------- security/selinux/ss/policydb.c | 20 ++++++++++---------- 3 files changed, 18 insertions(+), 18 deletions(-) (limited to 'security/selinux') diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c index fb4efe4f4bc8..f8c850a56ed1 100644 --- a/security/selinux/ss/conditional.c +++ b/security/selinux/ss/conditional.c @@ -239,7 +239,7 @@ int cond_read_bool(struct policydb *p, struct hashtab *h, void *fp) rc = next_entry(key, fp, len); if (rc < 0) goto err; - key[len] = 0; + key[len] = '\0'; if (hashtab_insert(h, key, booldatum)) goto err; diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index 77d745da48bb..b5407f16c2a4 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -283,8 +283,8 @@ int mls_context_to_sid(struct policydb *pol, p++; delim = *p; - if (delim != 0) - *p++ = 0; + if (delim != '\0') + *p++ = '\0'; for (l = 0; l < 2; l++) { levdatum = hashtab_search(pol->p_levels.table, scontextp); @@ -302,14 +302,14 @@ int mls_context_to_sid(struct policydb *pol, while (*p && *p != ',' && *p != '-') p++; delim = *p; - if (delim != 0) - *p++ = 0; + if (delim != '\0') + *p++ = '\0'; /* Separate into range if exists */ rngptr = strchr(scontextp, '.'); if (rngptr != NULL) { /* Remove '.' */ - *rngptr++ = 0; + *rngptr++ = '\0'; } catdatum = hashtab_search(pol->p_cats.table, @@ -357,8 +357,8 @@ int mls_context_to_sid(struct policydb *pol, p++; delim = *p; - if (delim != 0) - *p++ = 0; + if (delim != '\0') + *p++ = '\0'; } else break; } diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index 2391761ae422..26646305dc0e 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -932,7 +932,7 @@ static int perm_read(struct policydb *p, struct hashtab *h, void *fp) rc = next_entry(key, fp, len); if (rc < 0) goto bad; - key[len] = 0; + key[len] = '\0'; rc = hashtab_insert(h, key, perdatum); if (rc) @@ -979,7 +979,7 @@ static int common_read(struct policydb *p, struct hashtab *h, void *fp) rc = next_entry(key, fp, len); if (rc < 0) goto bad; - key[len] = 0; + key[len] = '\0'; for (i = 0; i < nel; i++) { rc = perm_read(p, comdatum->permissions.table, fp); @@ -1117,7 +1117,7 @@ static int class_read(struct policydb *p, struct hashtab *h, void *fp) rc = next_entry(key, fp, len); if (rc < 0) goto bad; - key[len] = 0; + key[len] = '\0'; if (len2) { cladatum->comkey = kmalloc(len2 + 1, GFP_KERNEL); @@ -1128,7 +1128,7 @@ static int class_read(struct policydb *p, struct hashtab *h, void *fp) rc = next_entry(cladatum->comkey, fp, len2); if (rc < 0) goto bad; - cladatum->comkey[len2] = 0; + cladatum->comkey[len2] = '\0'; cladatum->comdatum = hashtab_search(p->p_commons.table, cladatum->comkey); @@ -1201,7 +1201,7 @@ static int role_read(struct policydb *p, struct hashtab *h, void *fp) rc = next_entry(key, fp, len); if (rc < 0) goto bad; - key[len] = 0; + key[len] = '\0'; rc = ebitmap_read(&role->dominates, fp); if (rc) @@ -1262,7 +1262,7 @@ static int type_read(struct policydb *p, struct hashtab *h, void *fp) rc = next_entry(key, fp, len); if (rc < 0) goto bad; - key[len] = 0; + key[len] = '\0'; rc = hashtab_insert(h, key, typdatum); if (rc) @@ -1334,7 +1334,7 @@ static int user_read(struct policydb *p, struct hashtab *h, void *fp) rc = next_entry(key, fp, len); if (rc < 0) goto bad; - key[len] = 0; + key[len] = '\0'; rc = ebitmap_read(&usrdatum->roles, fp); if (rc) @@ -1388,7 +1388,7 @@ static int sens_read(struct policydb *p, struct hashtab *h, void *fp) rc = next_entry(key, fp, len); if (rc < 0) goto bad; - key[len] = 0; + key[len] = '\0'; levdatum->level = kmalloc(sizeof(struct mls_level), GFP_ATOMIC); if (!levdatum->level) { @@ -1440,7 +1440,7 @@ static int cat_read(struct policydb *p, struct hashtab *h, void *fp) rc = next_entry(key, fp, len); if (rc < 0) goto bad; - key[len] = 0; + key[len] = '\0'; rc = hashtab_insert(h, key, catdatum); if (rc) @@ -1523,7 +1523,7 @@ int policydb_read(struct policydb *p, void *fp) kfree(policydb_str); goto bad; } - policydb_str[len] = 0; + policydb_str[len] = '\0'; if (strcmp(policydb_str, POLICYDB_STRING)) { printk(KERN_ERR "SELinux: policydb string %s does not match " "my string %s\n", policydb_str, POLICYDB_STRING); -- cgit v1.2.3 From 0c0e186f812457e527c420f7a4d02865fd0dc7d2 Mon Sep 17 00:00:00 2001 From: Vesa-Matti J Kari Date: Mon, 21 Jul 2008 02:50:20 +0300 Subject: SELinux: trivial, remove unneeded local variable Hello, Remove unneeded local variable: struct avtab_node *newnode Signed-off-by: Vesa-Matti Kari Signed-off-by: James Morris --- security/selinux/ss/avtab.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'security/selinux') diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c index a1be97f8beea..e8ae812d6af7 100644 --- a/security/selinux/ss/avtab.c +++ b/security/selinux/ss/avtab.c @@ -98,7 +98,7 @@ struct avtab_node * avtab_insert_nonunique(struct avtab *h, struct avtab_key *key, struct avtab_datum *datum) { int hvalue; - struct avtab_node *prev, *cur, *newnode; + struct avtab_node *prev, *cur; u16 specified = key->specified & ~(AVTAB_ENABLED|AVTAB_ENABLED_OLD); if (!h || !h->htable) @@ -122,9 +122,7 @@ avtab_insert_nonunique(struct avtab *h, struct avtab_key *key, struct avtab_datu key->target_class < cur->key.target_class) break; } - newnode = avtab_insert_node(h, hvalue, prev, cur, key, datum); - - return newnode; + return avtab_insert_node(h, hvalue, prev, cur, key, datum); } struct avtab_datum *avtab_search(struct avtab *h, struct avtab_key *key) -- cgit v1.2.3 From cf9481e289247fe9cf40f2e2481220d899132049 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 27 Jul 2008 21:31:07 +1000 Subject: SELinux: Fix a potentially uninitialised variable in SELinux hooks Fix a potentially uninitialised variable in SELinux hooks that's given a pointer to the network address by selinux_parse_skb() passing a pointer back through its argument list. By restructuring selinux_parse_skb(), the compiler can see that the error case need not set it as the caller will return immediately. Signed-off-by: David Howells Signed-off-by: James Morris --- security/selinux/hooks.c | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) (limited to 'security/selinux') diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 0ffd8814af3e..3eae30609702 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3539,38 +3539,44 @@ out: #endif /* IPV6 */ static int selinux_parse_skb(struct sk_buff *skb, struct avc_audit_data *ad, - char **addrp, int src, u8 *proto) + char **_addrp, int src, u8 *proto) { - int ret = 0; + char *addrp; + int ret; switch (ad->u.net.family) { case PF_INET: ret = selinux_parse_skb_ipv4(skb, ad, proto); - if (ret || !addrp) - break; - *addrp = (char *)(src ? &ad->u.net.v4info.saddr : - &ad->u.net.v4info.daddr); - break; + if (ret) + goto parse_error; + addrp = (char *)(src ? &ad->u.net.v4info.saddr : + &ad->u.net.v4info.daddr); + goto okay; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case PF_INET6: ret = selinux_parse_skb_ipv6(skb, ad, proto); - if (ret || !addrp) - break; - *addrp = (char *)(src ? &ad->u.net.v6info.saddr : - &ad->u.net.v6info.daddr); - break; + if (ret) + goto parse_error; + addrp = (char *)(src ? &ad->u.net.v6info.saddr : + &ad->u.net.v6info.daddr); + goto okay; #endif /* IPV6 */ default: - break; + addrp = NULL; + goto okay; } - if (unlikely(ret)) - printk(KERN_WARNING - "SELinux: failure in selinux_parse_skb()," - " unable to parse packet\n"); - +parse_error: + printk(KERN_WARNING + "SELinux: failure in selinux_parse_skb()," + " unable to parse packet\n"); return ret; + +okay: + if (_addrp) + *_addrp = addrp; + return 0; } /** -- cgit v1.2.3 From 421fae06be9e0dac45747494756b3580643815f9 Mon Sep 17 00:00:00 2001 From: Vesa-Matti Kari Date: Wed, 6 Aug 2008 18:24:51 +0300 Subject: selinux: conditional expression type validation was off-by-one expr_isvalid() in conditional.c was off-by-one and allowed invalid expression type COND_LAST. However, it is this header file that needs to be fixed. That way the if-statement's disjunction's second component reads more naturally, "if expr type is greater than the last allowed value" ( rather than using ">=" in conditional.c): if (expr->expr_type <= 0 || expr->expr_type > COND_LAST) Signed-off-by: Vesa-Matti Kari Signed-off-by: James Morris --- security/selinux/ss/conditional.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'security/selinux') diff --git a/security/selinux/ss/conditional.h b/security/selinux/ss/conditional.h index 65b9f8366e9c..53ddb013ae57 100644 --- a/security/selinux/ss/conditional.h +++ b/security/selinux/ss/conditional.h @@ -28,7 +28,7 @@ struct cond_expr { #define COND_XOR 5 /* bool ^ bool */ #define COND_EQ 6 /* bool == bool */ #define COND_NEQ 7 /* bool != bool */ -#define COND_LAST 8 +#define COND_LAST COND_NEQ __u32 expr_type; __u32 bool; struct cond_expr *next; -- cgit v1.2.3 From dbc74c65b3fd841985935f676388c82d6b85c485 Mon Sep 17 00:00:00 2001 From: Vesa-Matti Kari Date: Thu, 7 Aug 2008 03:18:20 +0300 Subject: selinux: Unify for- and while-loop style Replace "thing != NULL" comparisons with just "thing" to make the code look more uniform (mixed styles were used even in the same source file). Signed-off-by: Vesa-Matti Kari Acked-by: Stephen Smalley Signed-off-by: James Morris --- security/selinux/ss/avtab.c | 2 +- security/selinux/ss/conditional.c | 16 ++++++++-------- security/selinux/ss/ebitmap.c | 4 ++-- security/selinux/ss/hashtab.c | 6 +++--- security/selinux/ss/services.c | 8 ++++---- security/selinux/ss/sidtab.c | 12 ++++++------ 6 files changed, 24 insertions(+), 24 deletions(-) (limited to 'security/selinux') diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c index e8ae812d6af7..1215b8e47dba 100644 --- a/security/selinux/ss/avtab.c +++ b/security/selinux/ss/avtab.c @@ -229,7 +229,7 @@ void avtab_destroy(struct avtab *h) for (i = 0; i < h->nslot; i++) { cur = h->htable[i]; - while (cur != NULL) { + while (cur) { temp = cur; cur = cur->next; kmem_cache_free(avtab_node_cachep, temp); diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c index f8c850a56ed1..4a4e35cac22b 100644 --- a/security/selinux/ss/conditional.c +++ b/security/selinux/ss/conditional.c @@ -29,7 +29,7 @@ static int cond_evaluate_expr(struct policydb *p, struct cond_expr *expr) int s[COND_EXPR_MAXDEPTH]; int sp = -1; - for (cur = expr; cur != NULL; cur = cur->next) { + for (cur = expr; cur; cur = cur->next) { switch (cur->expr_type) { case COND_BOOL: if (sp == (COND_EXPR_MAXDEPTH - 1)) @@ -97,14 +97,14 @@ int evaluate_cond_node(struct policydb *p, struct cond_node *node) if (new_state == -1) printk(KERN_ERR "SELinux: expression result was undefined - disabling all rules.\n"); /* turn the rules on or off */ - for (cur = node->true_list; cur != NULL; cur = cur->next) { + for (cur = node->true_list; cur; cur = cur->next) { if (new_state <= 0) cur->node->key.specified &= ~AVTAB_ENABLED; else cur->node->key.specified |= AVTAB_ENABLED; } - for (cur = node->false_list; cur != NULL; cur = cur->next) { + for (cur = node->false_list; cur; cur = cur->next) { /* -1 or 1 */ if (new_state) cur->node->key.specified &= ~AVTAB_ENABLED; @@ -128,7 +128,7 @@ int cond_policydb_init(struct policydb *p) static void cond_av_list_destroy(struct cond_av_list *list) { struct cond_av_list *cur, *next; - for (cur = list; cur != NULL; cur = next) { + for (cur = list; cur; cur = next) { next = cur->next; /* the avtab_ptr_t node is destroy by the avtab */ kfree(cur); @@ -139,7 +139,7 @@ static void cond_node_destroy(struct cond_node *node) { struct cond_expr *cur_expr, *next_expr; - for (cur_expr = node->expr; cur_expr != NULL; cur_expr = next_expr) { + for (cur_expr = node->expr; cur_expr; cur_expr = next_expr) { next_expr = cur_expr->next; kfree(cur_expr); } @@ -155,7 +155,7 @@ static void cond_list_destroy(struct cond_node *list) if (list == NULL) return; - for (cur = list; cur != NULL; cur = next) { + for (cur = list; cur; cur = next) { next = cur->next; cond_node_destroy(cur); } @@ -291,7 +291,7 @@ static int cond_insertf(struct avtab *a, struct avtab_key *k, struct avtab_datum goto err; } found = 0; - for (cur = other; cur != NULL; cur = cur->next) { + for (cur = other; cur; cur = cur->next) { if (cur->node == node_ptr) { found = 1; break; @@ -485,7 +485,7 @@ void cond_compute_av(struct avtab *ctab, struct avtab_key *key, struct av_decisi if (!ctab || !key || !avd) return; - for (node = avtab_search_node(ctab, key); node != NULL; + for (node = avtab_search_node(ctab, key); node; node = avtab_search_node_next(node, key->specified)) { if ((u16)(AVTAB_ALLOWED|AVTAB_ENABLED) == (node->key.specified & (AVTAB_ALLOWED|AVTAB_ENABLED))) diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c index ddc275490af8..68c7348d1acc 100644 --- a/security/selinux/ss/ebitmap.c +++ b/security/selinux/ss/ebitmap.c @@ -109,7 +109,7 @@ int ebitmap_netlbl_export(struct ebitmap *ebmap, *catmap = c_iter; c_iter->startbit = e_iter->startbit & ~(NETLBL_CATMAP_SIZE - 1); - while (e_iter != NULL) { + while (e_iter) { for (i = 0; i < EBITMAP_UNIT_NUMS; i++) { unsigned int delta, e_startbit, c_endbit; @@ -197,7 +197,7 @@ int ebitmap_netlbl_import(struct ebitmap *ebmap, } } c_iter = c_iter->next; - } while (c_iter != NULL); + } while (c_iter); if (e_iter != NULL) ebmap->highbit = e_iter->startbit + EBITMAP_SIZE; else diff --git a/security/selinux/ss/hashtab.c b/security/selinux/ss/hashtab.c index 2e7788e13213..933e735bb185 100644 --- a/security/selinux/ss/hashtab.c +++ b/security/selinux/ss/hashtab.c @@ -81,7 +81,7 @@ void *hashtab_search(struct hashtab *h, const void *key) hvalue = h->hash_value(h, key); cur = h->htable[hvalue]; - while (cur != NULL && h->keycmp(h, key, cur->key) > 0) + while (cur && h->keycmp(h, key, cur->key) > 0) cur = cur->next; if (cur == NULL || (h->keycmp(h, key, cur->key) != 0)) @@ -100,7 +100,7 @@ void hashtab_destroy(struct hashtab *h) for (i = 0; i < h->size; i++) { cur = h->htable[i]; - while (cur != NULL) { + while (cur) { temp = cur; cur = cur->next; kfree(temp); @@ -127,7 +127,7 @@ int hashtab_map(struct hashtab *h, for (i = 0; i < h->size; i++) { cur = h->htable[i]; - while (cur != NULL) { + while (cur) { ret = apply(cur->key, cur->datum, args); if (ret) return ret; diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index b52f923ce680..5a0536bddc63 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -356,7 +356,7 @@ static int context_struct_compute_av(struct context *scontext, avkey.source_type = i + 1; avkey.target_type = j + 1; for (node = avtab_search_node(&policydb.te_avtab, &avkey); - node != NULL; + node; node = avtab_search_node_next(node, avkey.specified)) { if (node->key.specified == AVTAB_ALLOWED) avd->allowed |= node->datum.data; @@ -1037,7 +1037,7 @@ static int security_compute_sid(u32 ssid, /* If no permanent rule, also check for enabled conditional rules */ if (!avdatum) { node = avtab_search_node(&policydb.te_cond_avtab, &avkey); - for (; node != NULL; node = avtab_search_node_next(node, specified)) { + for (; node; node = avtab_search_node_next(node, specified)) { if (node->key.specified & AVTAB_ENABLED) { avdatum = &node->datum; break; @@ -2050,7 +2050,7 @@ int security_set_bools(int len, int *values) policydb.bool_val_to_struct[i]->state = 0; } - for (cur = policydb.cond_list; cur != NULL; cur = cur->next) { + for (cur = policydb.cond_list; cur; cur = cur->next) { rc = evaluate_cond_node(&policydb, cur); if (rc) goto out; @@ -2102,7 +2102,7 @@ static int security_preserve_bools(struct policydb *p) if (booldatum) booldatum->state = bvalues[i]; } - for (cur = p->cond_list; cur != NULL; cur = cur->next) { + for (cur = p->cond_list; cur; cur = cur->next) { rc = evaluate_cond_node(p, cur); if (rc) goto out; diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c index a81ded104129..e817989764cd 100644 --- a/security/selinux/ss/sidtab.c +++ b/security/selinux/ss/sidtab.c @@ -43,7 +43,7 @@ int sidtab_insert(struct sidtab *s, u32 sid, struct context *context) hvalue = SIDTAB_HASH(sid); prev = NULL; cur = s->htable[hvalue]; - while (cur != NULL && sid > cur->sid) { + while (cur && sid > cur->sid) { prev = cur; cur = cur->next; } @@ -92,7 +92,7 @@ static struct context *sidtab_search_core(struct sidtab *s, u32 sid, int force) hvalue = SIDTAB_HASH(sid); cur = s->htable[hvalue]; - while (cur != NULL && sid > cur->sid) + while (cur && sid > cur->sid) cur = cur->next; if (force && cur && sid == cur->sid && cur->context.len) @@ -103,7 +103,7 @@ static struct context *sidtab_search_core(struct sidtab *s, u32 sid, int force) sid = SECINITSID_UNLABELED; hvalue = SIDTAB_HASH(sid); cur = s->htable[hvalue]; - while (cur != NULL && sid > cur->sid) + while (cur && sid > cur->sid) cur = cur->next; if (!cur || sid != cur->sid) return NULL; @@ -136,7 +136,7 @@ int sidtab_map(struct sidtab *s, for (i = 0; i < SIDTAB_SIZE; i++) { cur = s->htable[i]; - while (cur != NULL) { + while (cur) { rc = apply(cur->sid, &cur->context, args); if (rc) goto out; @@ -155,7 +155,7 @@ static inline u32 sidtab_search_context(struct sidtab *s, for (i = 0; i < SIDTAB_SIZE; i++) { cur = s->htable[i]; - while (cur != NULL) { + while (cur) { if (context_cmp(&cur->context, context)) return cur->sid; cur = cur->next; @@ -242,7 +242,7 @@ void sidtab_destroy(struct sidtab *s) for (i = 0; i < SIDTAB_SIZE; i++) { cur = s->htable[i]; - while (cur != NULL) { + while (cur) { temp = cur; cur = cur->next; context_destroy(&temp->context); -- cgit v1.2.3 From d9250dea3f89fe808a525f08888016b495240ed4 Mon Sep 17 00:00:00 2001 From: KaiGai Kohei Date: Thu, 28 Aug 2008 16:35:57 +0900 Subject: SELinux: add boundary support and thread context assignment The purpose of this patch is to assign per-thread security context under a constraint. It enables multi-threaded server application to kick a request handler with its fair security context, and helps some of userspace object managers to handle user's request. When we assign a per-thread security context, it must not have wider permissions than the original one. Because a multi-threaded process shares a single local memory, an arbitary per-thread security context also means another thread can easily refer violated information. The constraint on a per-thread security context requires a new domain has to be equal or weaker than its original one, when it tries to assign a per-thread security context. Bounds relationship between two types is a way to ensure a domain can never have wider permission than its bounds. We can define it in two explicit or implicit ways. The first way is using new TYPEBOUNDS statement. It enables to define a boundary of types explicitly. The other one expand the concept of existing named based hierarchy. If we defines a type with "." separated name like "httpd_t.php", toolchain implicitly set its bounds on "httpd_t". This feature requires a new policy version. The 24th version (POLICYDB_VERSION_BOUNDARY) enables to ship them into kernel space, and the following patch enables to handle it. Signed-off-by: KaiGai Kohei Acked-by: Stephen Smalley Signed-off-by: James Morris --- security/selinux/avc.c | 2 +- security/selinux/hooks.c | 15 ++- security/selinux/include/avc.h | 4 + security/selinux/include/security.h | 15 ++- security/selinux/ss/policydb.c | 205 +++++++++++++++++++++++++++++++++--- security/selinux/ss/policydb.h | 5 + security/selinux/ss/services.c | 172 +++++++++++++++++++++++++++++- 7 files changed, 398 insertions(+), 20 deletions(-) (limited to 'security/selinux') diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 114b4b4c97b2..cb30c7e350b3 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -136,7 +136,7 @@ static inline int avc_hash(u32 ssid, u32 tsid, u16 tclass) * @tclass: target security class * @av: access vector */ -static void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av) +void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av) { const char **common_pts = NULL; u32 common_base = 0; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 6b5790bba8f9..89f446d86054 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5226,8 +5226,12 @@ static int selinux_setprocattr(struct task_struct *p, if (sid == 0) return -EINVAL; - - /* Only allow single threaded processes to change context */ + /* + * SELinux allows to change context in the following case only. + * - Single threaded processes. + * - Multi threaded processes intend to change its context into + * more restricted domain (defined by TYPEBOUNDS statement). + */ if (atomic_read(&p->mm->mm_users) != 1) { struct task_struct *g, *t; struct mm_struct *mm = p->mm; @@ -5235,11 +5239,16 @@ static int selinux_setprocattr(struct task_struct *p, do_each_thread(g, t) { if (t->mm == mm && t != p) { read_unlock(&tasklist_lock); - return -EPERM; + error = security_bounded_transition(tsec->sid, sid); + if (!error) + goto boundary_ok; + + return error; } } while_each_thread(g, t); read_unlock(&tasklist_lock); } +boundary_ok: /* Check permissions for the transition. */ error = avc_has_perm(tsec->sid, sid, SECCLASS_PROCESS, diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h index 7b9769f5e775..d12ff1a9c0aa 100644 --- a/security/selinux/include/avc.h +++ b/security/selinux/include/avc.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -126,6 +127,9 @@ int avc_add_callback(int (*callback)(u32 event, u32 ssid, u32 tsid, u32 events, u32 ssid, u32 tsid, u16 tclass, u32 perms); +/* Shows permission in human readable form */ +void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av); + /* Exported to selinuxfs */ int avc_get_hash_stats(char *page); extern unsigned int avc_cache_threshold; diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 7c543003d653..72447370bc95 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -27,13 +27,14 @@ #define POLICYDB_VERSION_RANGETRANS 21 #define POLICYDB_VERSION_POLCAP 22 #define POLICYDB_VERSION_PERMISSIVE 23 +#define POLICYDB_VERSION_BOUNDARY 24 /* Range of policy versions we understand*/ #define POLICYDB_VERSION_MIN POLICYDB_VERSION_BASE #ifdef CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX #define POLICYDB_VERSION_MAX CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE #else -#define POLICYDB_VERSION_MAX POLICYDB_VERSION_PERMISSIVE +#define POLICYDB_VERSION_MAX POLICYDB_VERSION_BOUNDARY #endif #define CONTEXT_MNT 0x01 @@ -62,6 +63,16 @@ enum { extern int selinux_policycap_netpeer; extern int selinux_policycap_openperm; +/* + * type_datum properties + * available at the kernel policy version >= POLICYDB_VERSION_BOUNDARY + */ +#define TYPEDATUM_PROPERTY_PRIMARY 0x0001 +#define TYPEDATUM_PROPERTY_ATTRIBUTE 0x0002 + +/* limitation of boundary depth */ +#define POLICYDB_BOUNDS_MAXDEPTH 4 + int security_load_policy(void *data, size_t len); int security_policycap_supported(unsigned int req_cap); @@ -117,6 +128,8 @@ int security_node_sid(u16 domain, void *addr, u32 addrlen, int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid, u16 tclass); +int security_bounded_transition(u32 oldsid, u32 newsid); + int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid); int security_net_peersid_resolve(u32 nlbl_sid, u32 nlbl_type, diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index 26646305dc0e..72e4a54973aa 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "security.h" #include "policydb.h" @@ -116,7 +117,12 @@ static struct policydb_compat_info policydb_compat[] = { .version = POLICYDB_VERSION_PERMISSIVE, .sym_num = SYM_NUM, .ocon_num = OCON_NUM, - } + }, + { + .version = POLICYDB_VERSION_BOUNDARY, + .sym_num = SYM_NUM, + .ocon_num = OCON_NUM, + }, }; static struct policydb_compat_info *policydb_lookup_compat(int version) @@ -254,7 +260,9 @@ static int role_index(void *key, void *datum, void *datap) role = datum; p = datap; - if (!role->value || role->value > p->p_roles.nprim) + if (!role->value + || role->value > p->p_roles.nprim + || role->bounds > p->p_roles.nprim) return -EINVAL; p->p_role_val_to_name[role->value - 1] = key; p->role_val_to_struct[role->value - 1] = role; @@ -270,9 +278,12 @@ static int type_index(void *key, void *datum, void *datap) p = datap; if (typdatum->primary) { - if (!typdatum->value || typdatum->value > p->p_types.nprim) + if (!typdatum->value + || typdatum->value > p->p_types.nprim + || typdatum->bounds > p->p_types.nprim) return -EINVAL; p->p_type_val_to_name[typdatum->value - 1] = key; + p->type_val_to_struct[typdatum->value - 1] = typdatum; } return 0; @@ -285,7 +296,9 @@ static int user_index(void *key, void *datum, void *datap) usrdatum = datum; p = datap; - if (!usrdatum->value || usrdatum->value > p->p_users.nprim) + if (!usrdatum->value + || usrdatum->value > p->p_users.nprim + || usrdatum->bounds > p->p_users.nprim) return -EINVAL; p->p_user_val_to_name[usrdatum->value - 1] = key; p->user_val_to_struct[usrdatum->value - 1] = usrdatum; @@ -438,6 +451,14 @@ static int policydb_index_others(struct policydb *p) goto out; } + p->type_val_to_struct = + kmalloc(p->p_types.nprim * sizeof(*(p->type_val_to_struct)), + GFP_KERNEL); + if (!p->type_val_to_struct) { + rc = -ENOMEM; + goto out; + } + if (cond_init_bool_indexes(p)) { rc = -ENOMEM; goto out; @@ -625,6 +646,7 @@ void policydb_destroy(struct policydb *p) kfree(p->class_val_to_struct); kfree(p->role_val_to_struct); kfree(p->user_val_to_struct); + kfree(p->type_val_to_struct); avtab_destroy(&p->te_avtab); @@ -1176,8 +1198,8 @@ static int role_read(struct policydb *p, struct hashtab *h, void *fp) { char *key = NULL; struct role_datum *role; - int rc; - __le32 buf[2]; + int rc, to_read = 2; + __le32 buf[3]; u32 len; role = kzalloc(sizeof(*role), GFP_KERNEL); @@ -1186,12 +1208,17 @@ static int role_read(struct policydb *p, struct hashtab *h, void *fp) goto out; } - rc = next_entry(buf, fp, sizeof buf); + if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) + to_read = 3; + + rc = next_entry(buf, fp, sizeof(buf[0]) * to_read); if (rc < 0) goto bad; len = le32_to_cpu(buf[0]); role->value = le32_to_cpu(buf[1]); + if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) + role->bounds = le32_to_cpu(buf[2]); key = kmalloc(len + 1, GFP_KERNEL); if (!key) { @@ -1236,8 +1263,8 @@ static int type_read(struct policydb *p, struct hashtab *h, void *fp) { char *key = NULL; struct type_datum *typdatum; - int rc; - __le32 buf[3]; + int rc, to_read = 3; + __le32 buf[4]; u32 len; typdatum = kzalloc(sizeof(*typdatum), GFP_KERNEL); @@ -1246,13 +1273,27 @@ static int type_read(struct policydb *p, struct hashtab *h, void *fp) return rc; } - rc = next_entry(buf, fp, sizeof buf); + if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) + to_read = 4; + + rc = next_entry(buf, fp, sizeof(buf[0]) * to_read); if (rc < 0) goto bad; len = le32_to_cpu(buf[0]); typdatum->value = le32_to_cpu(buf[1]); - typdatum->primary = le32_to_cpu(buf[2]); + if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) { + u32 prop = le32_to_cpu(buf[2]); + + if (prop & TYPEDATUM_PROPERTY_PRIMARY) + typdatum->primary = 1; + if (prop & TYPEDATUM_PROPERTY_ATTRIBUTE) + typdatum->attribute = 1; + + typdatum->bounds = le32_to_cpu(buf[3]); + } else { + typdatum->primary = le32_to_cpu(buf[2]); + } key = kmalloc(len + 1, GFP_KERNEL); if (!key) { @@ -1309,8 +1350,8 @@ static int user_read(struct policydb *p, struct hashtab *h, void *fp) { char *key = NULL; struct user_datum *usrdatum; - int rc; - __le32 buf[2]; + int rc, to_read = 2; + __le32 buf[3]; u32 len; usrdatum = kzalloc(sizeof(*usrdatum), GFP_KERNEL); @@ -1319,12 +1360,17 @@ static int user_read(struct policydb *p, struct hashtab *h, void *fp) goto out; } - rc = next_entry(buf, fp, sizeof buf); + if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) + to_read = 3; + + rc = next_entry(buf, fp, sizeof(buf[0]) * to_read); if (rc < 0) goto bad; len = le32_to_cpu(buf[0]); usrdatum->value = le32_to_cpu(buf[1]); + if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) + usrdatum->bounds = le32_to_cpu(buf[2]); key = kmalloc(len + 1, GFP_KERNEL); if (!key) { @@ -1465,6 +1511,133 @@ static int (*read_f[SYM_NUM]) (struct policydb *p, struct hashtab *h, void *fp) cat_read, }; +static int user_bounds_sanity_check(void *key, void *datum, void *datap) +{ + struct user_datum *upper, *user; + struct policydb *p = datap; + int depth = 0; + + upper = user = datum; + while (upper->bounds) { + struct ebitmap_node *node; + unsigned long bit; + + if (++depth == POLICYDB_BOUNDS_MAXDEPTH) { + printk(KERN_ERR "SELinux: user %s: " + "too deep or looped boundary", + (char *) key); + return -EINVAL; + } + + upper = p->user_val_to_struct[upper->bounds - 1]; + ebitmap_for_each_positive_bit(&user->roles, node, bit) { + if (ebitmap_get_bit(&upper->roles, bit)) + continue; + + printk(KERN_ERR + "SELinux: boundary violated policy: " + "user=%s role=%s bounds=%s\n", + p->p_user_val_to_name[user->value - 1], + p->p_role_val_to_name[bit], + p->p_user_val_to_name[upper->value - 1]); + + return -EINVAL; + } + } + + return 0; +} + +static int role_bounds_sanity_check(void *key, void *datum, void *datap) +{ + struct role_datum *upper, *role; + struct policydb *p = datap; + int depth = 0; + + upper = role = datum; + while (upper->bounds) { + struct ebitmap_node *node; + unsigned long bit; + + if (++depth == POLICYDB_BOUNDS_MAXDEPTH) { + printk(KERN_ERR "SELinux: role %s: " + "too deep or looped bounds\n", + (char *) key); + return -EINVAL; + } + + upper = p->role_val_to_struct[upper->bounds - 1]; + ebitmap_for_each_positive_bit(&role->types, node, bit) { + if (ebitmap_get_bit(&upper->types, bit)) + continue; + + printk(KERN_ERR + "SELinux: boundary violated policy: " + "role=%s type=%s bounds=%s\n", + p->p_role_val_to_name[role->value - 1], + p->p_type_val_to_name[bit], + p->p_role_val_to_name[upper->value - 1]); + + return -EINVAL; + } + } + + return 0; +} + +static int type_bounds_sanity_check(void *key, void *datum, void *datap) +{ + struct type_datum *upper, *type; + struct policydb *p = datap; + int depth = 0; + + upper = type = datum; + while (upper->bounds) { + if (++depth == POLICYDB_BOUNDS_MAXDEPTH) { + printk(KERN_ERR "SELinux: type %s: " + "too deep or looped boundary\n", + (char *) key); + return -EINVAL; + } + + upper = p->type_val_to_struct[upper->bounds - 1]; + if (upper->attribute) { + printk(KERN_ERR "SELinux: type %s: " + "bounded by attribute %s", + (char *) key, + p->p_type_val_to_name[upper->value - 1]); + return -EINVAL; + } + } + + return 0; +} + +static int policydb_bounds_sanity_check(struct policydb *p) +{ + int rc; + + if (p->policyvers < POLICYDB_VERSION_BOUNDARY) + return 0; + + rc = hashtab_map(p->p_users.table, + user_bounds_sanity_check, p); + if (rc) + return rc; + + rc = hashtab_map(p->p_roles.table, + role_bounds_sanity_check, p); + if (rc) + return rc; + + rc = hashtab_map(p->p_types.table, + type_bounds_sanity_check, p); + if (rc) + return rc; + + return 0; +} + extern int ss_initialized; /* @@ -1961,6 +2134,10 @@ int policydb_read(struct policydb *p, void *fp) goto bad; } + rc = policydb_bounds_sanity_check(p); + if (rc) + goto bad; + rc = 0; out: return rc; diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h index 4253370fda6a..55152d498b53 100644 --- a/security/selinux/ss/policydb.h +++ b/security/selinux/ss/policydb.h @@ -61,6 +61,7 @@ struct class_datum { /* Role attributes */ struct role_datum { u32 value; /* internal role value */ + u32 bounds; /* boundary of role */ struct ebitmap dominates; /* set of roles dominated by this role */ struct ebitmap types; /* set of authorized types for role */ }; @@ -81,12 +82,15 @@ struct role_allow { /* Type attributes */ struct type_datum { u32 value; /* internal type value */ + u32 bounds; /* boundary of type */ unsigned char primary; /* primary name? */ + unsigned char attribute;/* attribute ?*/ }; /* User attributes */ struct user_datum { u32 value; /* internal user value */ + u32 bounds; /* bounds of user */ struct ebitmap roles; /* set of authorized roles for user */ struct mls_range range; /* MLS range (min - max) for user */ struct mls_level dfltlevel; /* default login MLS level for user */ @@ -209,6 +213,7 @@ struct policydb { struct class_datum **class_val_to_struct; struct role_datum **role_val_to_struct; struct user_datum **user_val_to_struct; + struct type_datum **type_val_to_struct; /* type enforcement access vectors and transitions */ struct avtab te_avtab; diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 5a0536bddc63..4f233d9960e7 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -88,6 +88,11 @@ static u32 latest_granting; static int context_struct_to_string(struct context *context, char **scontext, u32 *scontext_len); +static int context_struct_compute_av(struct context *scontext, + struct context *tcontext, + u16 tclass, + u32 requested, + struct av_decision *avd); /* * Return the boolean value of a constraint expression * when it is applied to the specified source and target @@ -273,6 +278,100 @@ mls_ops: return s[0]; } +/* + * security_boundary_permission - drops violated permissions + * on boundary constraint. + */ +static void type_attribute_bounds_av(struct context *scontext, + struct context *tcontext, + u16 tclass, + u32 requested, + struct av_decision *avd) +{ + struct context lo_scontext; + struct context lo_tcontext; + struct av_decision lo_avd; + struct type_datum *source + = policydb.type_val_to_struct[scontext->type - 1]; + struct type_datum *target + = policydb.type_val_to_struct[tcontext->type - 1]; + u32 masked = 0; + + if (source->bounds) { + memset(&lo_avd, 0, sizeof(lo_avd)); + + memcpy(&lo_scontext, scontext, sizeof(lo_scontext)); + lo_scontext.type = source->bounds; + + context_struct_compute_av(&lo_scontext, + tcontext, + tclass, + requested, + &lo_avd); + if ((lo_avd.allowed & avd->allowed) == avd->allowed) + return; /* no masked permission */ + masked = ~lo_avd.allowed & avd->allowed; + } + + if (target->bounds) { + memset(&lo_avd, 0, sizeof(lo_avd)); + + memcpy(&lo_tcontext, tcontext, sizeof(lo_tcontext)); + lo_tcontext.type = target->bounds; + + context_struct_compute_av(scontext, + &lo_tcontext, + tclass, + requested, + &lo_avd); + if ((lo_avd.allowed & avd->allowed) == avd->allowed) + return; /* no masked permission */ + masked = ~lo_avd.allowed & avd->allowed; + } + + if (source->bounds && target->bounds) { + memset(&lo_avd, 0, sizeof(lo_avd)); + /* + * lo_scontext and lo_tcontext are already + * set up. + */ + + context_struct_compute_av(&lo_scontext, + &lo_tcontext, + tclass, + requested, + &lo_avd); + if ((lo_avd.allowed & avd->allowed) == avd->allowed) + return; /* no masked permission */ + masked = ~lo_avd.allowed & avd->allowed; + } + + if (masked) { + struct audit_buffer *ab; + char *stype_name + = policydb.p_type_val_to_name[source->value - 1]; + char *ttype_name + = policydb.p_type_val_to_name[target->value - 1]; + char *tclass_name + = policydb.p_class_val_to_name[tclass - 1]; + + /* mask violated permissions */ + avd->allowed &= ~masked; + + /* notice to userspace via audit message */ + ab = audit_log_start(current->audit_context, + GFP_ATOMIC, AUDIT_SELINUX_ERR); + if (!ab) + return; + + audit_log_format(ab, "av boundary violation: " + "source=%s target=%s tclass=%s", + stype_name, ttype_name, tclass_name); + avc_dump_av(ab, tclass, masked); + audit_log_end(ab); + } +} + /* * Compute access vectors based on a context structure pair for * the permissions in a particular class. @@ -404,6 +503,14 @@ static int context_struct_compute_av(struct context *scontext, PROCESS__DYNTRANSITION); } + /* + * If the given source and target types have boundary + * constraint, lazy checks have to mask any violated + * permission and notice it to userspace via audit. + */ + type_attribute_bounds_av(scontext, tcontext, + tclass, requested, avd); + return 0; inval_class: @@ -549,6 +656,69 @@ out: return rc; } +/* + * security_bounded_transition - check whether the given + * transition is directed to bounded, or not. + * It returns 0, if @newsid is bounded by @oldsid. + * Otherwise, it returns error code. + * + * @oldsid : current security identifier + * @newsid : destinated security identifier + */ +int security_bounded_transition(u32 old_sid, u32 new_sid) +{ + struct context *old_context, *new_context; + struct type_datum *type; + int index; + int rc = -EINVAL; + + read_lock(&policy_rwlock); + + old_context = sidtab_search(&sidtab, old_sid); + if (!old_context) { + printk(KERN_ERR "SELinux: %s: unrecognized SID %u\n", + __func__, old_sid); + goto out; + } + + new_context = sidtab_search(&sidtab, new_sid); + if (!new_context) { + printk(KERN_ERR "SELinux: %s: unrecognized SID %u\n", + __func__, new_sid); + goto out; + } + + /* type/domain unchaned */ + if (old_context->type == new_context->type) { + rc = 0; + goto out; + } + + index = new_context->type; + while (true) { + type = policydb.type_val_to_struct[index - 1]; + BUG_ON(!type); + + /* not bounded anymore */ + if (!type->bounds) { + rc = -EPERM; + break; + } + + /* @newsid is bounded by @oldsid */ + if (type->bounds == old_context->type) { + rc = 0; + break; + } + index = type->bounds; + } +out: + read_unlock(&policy_rwlock); + + return rc; +} + + /** * security_compute_av - Compute access vector decisions. * @ssid: source security identifier @@ -794,7 +964,7 @@ static int string_to_context_struct(struct policydb *pol, *p++ = 0; typdatum = hashtab_search(pol->p_types.table, scontextp); - if (!typdatum) + if (!typdatum || typdatum->attribute) goto out; ctx->type = typdatum->value; -- cgit v1.2.3 From 8e531af90f3940615623dc0aa6c94866a6773601 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 3 Sep 2008 11:49:47 -0400 Subject: SELinux: memory leak in security_context_to_sid_core Fix a bug and a philosophical decision about who handles errors. security_context_to_sid_core() was leaking a context in the common case. This was causing problems on fedora systems which recently have started making extensive use of this function. In discussion it was decided that if string_to_context_struct() had an error it was its own responsibility to clean up any mess it created along the way. Signed-off-by: Eric Paris Acked-by: Stephen Smalley Signed-off-by: James Morris --- security/selinux/ss/services.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'security/selinux') diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index b52f923ce680..d11a8154500f 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -811,11 +811,12 @@ static int string_to_context_struct(struct policydb *pol, /* Check the validity of the new context. */ if (!policydb_context_isvalid(pol, ctx)) { rc = -EINVAL; - context_destroy(ctx); goto out; } rc = 0; out: + if (rc) + context_destroy(ctx); return rc; } @@ -868,8 +869,7 @@ static int security_context_to_sid_core(const char *scontext, u32 scontext_len, } else if (rc) goto out; rc = sidtab_context_to_sid(&sidtab, &context, sid); - if (rc) - context_destroy(&context); + context_destroy(&context); out: read_unlock(&policy_rwlock); kfree(scontext2); -- cgit v1.2.3 From f058925b201357fba48d56cc9c1719ae274b2022 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Thu, 11 Sep 2008 09:20:26 -0400 Subject: Update selinux info in MAINTAINERS and Kconfig help text Update the SELinux entry in MAINTAINERS and drop the obsolete information from the selinux Kconfig help text. Signed-off-by: Stephen Smalley Signed-off-by: James Morris --- MAINTAINERS | 3 ++- security/selinux/Kconfig | 3 --- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'security/selinux') diff --git a/MAINTAINERS b/MAINTAINERS index 28c69aaefcd9..5abb23e09fdc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3644,7 +3644,8 @@ P: Eric Paris M: eparis@parisplace.org L: linux-kernel@vger.kernel.org (kernel issues) L: selinux@tycho.nsa.gov (subscribers-only, general discussion) -W: http://www.nsa.gov/selinux +W: http://selinuxproject.org +T: git kernel.org:pub/scm/linux/kernel/git/jmorris/security-testing-2.6.git S: Supported SENSABLE PHANTOM diff --git a/security/selinux/Kconfig b/security/selinux/Kconfig index a436d1cfa88b..26301dd651d3 100644 --- a/security/selinux/Kconfig +++ b/security/selinux/Kconfig @@ -6,9 +6,6 @@ config SECURITY_SELINUX help This selects NSA Security-Enhanced Linux (SELinux). You will also need a policy configuration and a labeled filesystem. - You can obtain the policy compiler (checkpolicy), the utility for - labeling filesystems (setfiles), and an example policy configuration - from . If you are unsure how to answer this question, answer N. config SECURITY_SELINUX_BOOTPARAM -- cgit v1.2.3 From f06febc96ba8e0af80bcc3eaec0a109e88275fac Mon Sep 17 00:00:00 2001 From: Frank Mayhar Date: Fri, 12 Sep 2008 09:54:39 -0700 Subject: timers: fix itimer/many thread hang Overview This patch reworks the handling of POSIX CPU timers, including the ITIMER_PROF, ITIMER_VIRT timers and rlimit handling. It was put together with the help of Roland McGrath, the owner and original writer of this code. The problem we ran into, and the reason for this rework, has to do with using a profiling timer in a process with a large number of threads. It appears that the performance of the old implementation of run_posix_cpu_timers() was at least O(n*3) (where "n" is the number of threads in a process) or worse. Everything is fine with an increasing number of threads until the time taken for that routine to run becomes the same as or greater than the tick time, at which point things degrade rather quickly. This patch fixes bug 9906, "Weird hang with NPTL and SIGPROF." Code Changes This rework corrects the implementation of run_posix_cpu_timers() to make it run in constant time for a particular machine. (Performance may vary between one machine and another depending upon whether the kernel is built as single- or multiprocessor and, in the latter case, depending upon the number of running processors.) To do this, at each tick we now update fields in signal_struct as well as task_struct. The run_posix_cpu_timers() function uses those fields to make its decisions. We define a new structure, "task_cputime," to contain user, system and scheduler times and use these in appropriate places: struct task_cputime { cputime_t utime; cputime_t stime; unsigned long long sum_exec_runtime; }; This is included in the structure "thread_group_cputime," which is a new substructure of signal_struct and which varies for uniprocessor versus multiprocessor kernels. For uniprocessor kernels, it uses "task_cputime" as a simple substructure, while for multiprocessor kernels it is a pointer: struct thread_group_cputime { struct task_cputime totals; }; struct thread_group_cputime { struct task_cputime *totals; }; We also add a new task_cputime substructure directly to signal_struct, to cache the earliest expiration of process-wide timers, and task_cputime also replaces the it_*_expires fields of task_struct (used for earliest expiration of thread timers). The "thread_group_cputime" structure contains process-wide timers that are updated via account_user_time() and friends. In the non-SMP case the structure is a simple aggregator; unfortunately in the SMP case that simplicity was not achievable due to cache-line contention between CPUs (in one measured case performance was actually _worse_ on a 16-cpu system than the same test on a 4-cpu system, due to this contention). For SMP, the thread_group_cputime counters are maintained as a per-cpu structure allocated using alloc_percpu(). The timer functions update only the timer field in the structure corresponding to the running CPU, obtained using per_cpu_ptr(). We define a set of inline functions in sched.h that we use to maintain the thread_group_cputime structure and hide the differences between UP and SMP implementations from the rest of the kernel. The thread_group_cputime_init() function initializes the thread_group_cputime structure for the given task. The thread_group_cputime_alloc() is a no-op for UP; for SMP it calls the out-of-line function thread_group_cputime_alloc_smp() to allocate and fill in the per-cpu structures and fields. The thread_group_cputime_free() function, also a no-op for UP, in SMP frees the per-cpu structures. The thread_group_cputime_clone_thread() function (also a UP no-op) for SMP calls thread_group_cputime_alloc() if the per-cpu structures haven't yet been allocated. The thread_group_cputime() function fills the task_cputime structure it is passed with the contents of the thread_group_cputime fields; in UP it's that simple but in SMP it must also safely check that tsk->signal is non-NULL (if it is it just uses the appropriate fields of task_struct) and, if so, sums the per-cpu values for each online CPU. Finally, the three functions account_group_user_time(), account_group_system_time() and account_group_exec_runtime() are used by timer functions to update the respective fields of the thread_group_cputime structure. Non-SMP operation is trivial and will not be mentioned further. The per-cpu structure is always allocated when a task creates its first new thread, via a call to thread_group_cputime_clone_thread() from copy_signal(). It is freed at process exit via a call to thread_group_cputime_free() from cleanup_signal(). All functions that formerly summed utime/stime/sum_sched_runtime values from from all threads in the thread group now use thread_group_cputime() to snapshot the values in the thread_group_cputime structure or the values in the task structure itself if the per-cpu structure hasn't been allocated. Finally, the code in kernel/posix-cpu-timers.c has changed quite a bit. The run_posix_cpu_timers() function has been split into a fast path and a slow path; the former safely checks whether there are any expired thread timers and, if not, just returns, while the slow path does the heavy lifting. With the dedicated thread group fields, timers are no longer "rebalanced" and the process_timer_rebalance() function and related code has gone away. All summing loops are gone and all code that used them now uses the thread_group_cputime() inline. When process-wide timers are set, the new task_cputime structure in signal_struct is used to cache the earliest expiration; this is checked in the fast path. Performance The fix appears not to add significant overhead to existing operations. It generally performs the same as the current code except in two cases, one in which it performs slightly worse (Case 5 below) and one in which it performs very significantly better (Case 2 below). Overall it's a wash except in those two cases. I've since done somewhat more involved testing on a dual-core Opteron system. Case 1: With no itimer running, for a test with 100,000 threads, the fixed kernel took 1428.5 seconds, 513 seconds more than the unfixed system, all of which was spent in the system. There were twice as many voluntary context switches with the fix as without it. Case 2: With an itimer running at .01 second ticks and 4000 threads (the most an unmodified kernel can handle), the fixed kernel ran the test in eight percent of the time (5.8 seconds as opposed to 70 seconds) and had better tick accuracy (.012 seconds per tick as opposed to .023 seconds per tick). Case 3: A 4000-thread test with an initial timer tick of .01 second and an interval of 10,000 seconds (i.e. a timer that ticks only once) had very nearly the same performance in both cases: 6.3 seconds elapsed for the fixed kernel versus 5.5 seconds for the unfixed kernel. With fewer threads (eight in these tests), the Case 1 test ran in essentially the same time on both the modified and unmodified kernels (5.2 seconds versus 5.8 seconds). The Case 2 test ran in about the same time as well, 5.9 seconds versus 5.4 seconds but again with much better tick accuracy, .013 seconds per tick versus .025 seconds per tick for the unmodified kernel. Since the fix affected the rlimit code, I also tested soft and hard CPU limits. Case 4: With a hard CPU limit of 20 seconds and eight threads (and an itimer running), the modified kernel was very slightly favored in that while it killed the process in 19.997 seconds of CPU time (5.002 seconds of wall time), only .003 seconds of that was system time, the rest was user time. The unmodified kernel killed the process in 20.001 seconds of CPU (5.014 seconds of wall time) of which .016 seconds was system time. Really, though, the results were too close to call. The results were essentially the same with no itimer running. Case 5: With a soft limit of 20 seconds and a hard limit of 2000 seconds (where the hard limit would never be reached) and an itimer running, the modified kernel exhibited worse tick accuracy than the unmodified kernel: .050 seconds/tick versus .028 seconds/tick. Otherwise, performance was almost indistinguishable. With no itimer running this test exhibited virtually identical behavior and times in both cases. In times past I did some limited performance testing. those results are below. On a four-cpu Opteron system without this fix, a sixteen-thread test executed in 3569.991 seconds, of which user was 3568.435s and system was 1.556s. On the same system with the fix, user and elapsed time were about the same, but system time dropped to 0.007 seconds. Performance with eight, four and one thread were comparable. Interestingly, the timer ticks with the fix seemed more accurate: The sixteen-thread test with the fix received 149543 ticks for 0.024 seconds per tick, while the same test without the fix received 58720 for 0.061 seconds per tick. Both cases were configured for an interval of 0.01 seconds. Again, the other tests were comparable. Each thread in this test computed the primes up to 25,000,000. I also did a test with a large number of threads, 100,000 threads, which is impossible without the fix. In this case each thread computed the primes only up to 10,000 (to make the runtime manageable). System time dominated, at 1546.968 seconds out of a total 2176.906 seconds (giving a user time of 629.938s). It received 147651 ticks for 0.015 seconds per tick, still quite accurate. There is obviously no comparable test without the fix. Signed-off-by: Frank Mayhar Cc: Roland McGrath Cc: Alexey Dobriyan Cc: Andrew Morton Signed-off-by: Ingo Molnar --- fs/binfmt_elf.c | 19 +- fs/proc/array.c | 8 +- include/linux/posix-timers.h | 2 + include/linux/sched.h | 257 +++++++++++++++++++++-- include/linux/time.h | 3 + kernel/compat.c | 53 ++--- kernel/exit.c | 19 +- kernel/fork.c | 88 ++++---- kernel/itimer.c | 33 +-- kernel/posix-cpu-timers.c | 471 +++++++++++++++++++++++-------------------- kernel/sched.c | 53 ++++- kernel/sched_fair.c | 1 + kernel/sched_rt.c | 4 +- kernel/signal.c | 8 +- kernel/sys.c | 75 +++---- security/selinux/hooks.c | 9 +- 16 files changed, 677 insertions(+), 426 deletions(-) (limited to 'security/selinux') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 655ed8d30a86..a8635f637038 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1333,20 +1333,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus, prstatus->pr_pgrp = task_pgrp_vnr(p); prstatus->pr_sid = task_session_vnr(p); if (thread_group_leader(p)) { + struct task_cputime cputime; + /* - * This is the record for the group leader. Add in the - * cumulative times of previous dead threads. This total - * won't include the time of each live thread whose state - * is included in the core dump. The final total reported - * to our parent process when it calls wait4 will include - * those sums as well as the little bit more time it takes - * this and each other thread to finish dying after the - * core dump synchronization phase. + * This is the record for the group leader. It shows the + * group-wide total, not its individual thread total. */ - cputime_to_timeval(cputime_add(p->utime, p->signal->utime), - &prstatus->pr_utime); - cputime_to_timeval(cputime_add(p->stime, p->signal->stime), - &prstatus->pr_stime); + thread_group_cputime(p, &cputime); + cputime_to_timeval(cputime.utime, &prstatus->pr_utime); + cputime_to_timeval(cputime.stime, &prstatus->pr_stime); } else { cputime_to_timeval(p->utime, &prstatus->pr_utime); cputime_to_timeval(p->stime, &prstatus->pr_stime); diff --git a/fs/proc/array.c b/fs/proc/array.c index 71c9be59c9c2..933953c4e407 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -395,20 +395,20 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, /* add up live thread stats at the group level */ if (whole) { + struct task_cputime cputime; struct task_struct *t = task; do { min_flt += t->min_flt; maj_flt += t->maj_flt; - utime = cputime_add(utime, task_utime(t)); - stime = cputime_add(stime, task_stime(t)); gtime = cputime_add(gtime, task_gtime(t)); t = next_thread(t); } while (t != task); min_flt += sig->min_flt; maj_flt += sig->maj_flt; - utime = cputime_add(utime, sig->utime); - stime = cputime_add(stime, sig->stime); + thread_group_cputime(task, &cputime); + utime = cputime.utime; + stime = cputime.stime; gtime = cputime_add(gtime, sig->gtime); } diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index a7dd38f30ade..f9d8e9e94e9b 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -115,4 +115,6 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, long clock_nanosleep_restart(struct restart_block *restart_block); +void update_rlimit_cpu(unsigned long rlim_new); + #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 3d9120c5ad15..26d7a5f2d0ba 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -425,6 +425,45 @@ struct pacct_struct { unsigned long ac_minflt, ac_majflt; }; +/** + * struct task_cputime - collected CPU time counts + * @utime: time spent in user mode, in &cputime_t units + * @stime: time spent in kernel mode, in &cputime_t units + * @sum_exec_runtime: total time spent on the CPU, in nanoseconds + * + * This structure groups together three kinds of CPU time that are + * tracked for threads and thread groups. Most things considering + * CPU time want to group these counts together and treat all three + * of them in parallel. + */ +struct task_cputime { + cputime_t utime; + cputime_t stime; + unsigned long long sum_exec_runtime; +}; +/* Alternate field names when used to cache expirations. */ +#define prof_exp stime +#define virt_exp utime +#define sched_exp sum_exec_runtime + +/** + * struct thread_group_cputime - thread group interval timer counts + * @totals: thread group interval timers; substructure for + * uniprocessor kernel, per-cpu for SMP kernel. + * + * This structure contains the version of task_cputime, above, that is + * used for thread group CPU clock calculations. + */ +#ifdef CONFIG_SMP +struct thread_group_cputime { + struct task_cputime *totals; +}; +#else +struct thread_group_cputime { + struct task_cputime totals; +}; +#endif + /* * NOTE! "signal_struct" does not have it's own * locking, because a shared signal_struct always @@ -470,6 +509,17 @@ struct signal_struct { cputime_t it_prof_expires, it_virt_expires; cputime_t it_prof_incr, it_virt_incr; + /* + * Thread group totals for process CPU clocks. + * See thread_group_cputime(), et al, for details. + */ + struct thread_group_cputime cputime; + + /* Earliest-expiration cache. */ + struct task_cputime cputime_expires; + + struct list_head cpu_timers[3]; + /* job control IDs */ /* @@ -500,7 +550,7 @@ struct signal_struct { * Live threads maintain their own counters and add to these * in __exit_signal, except for the group leader. */ - cputime_t utime, stime, cutime, cstime; + cputime_t cutime, cstime; cputime_t gtime; cputime_t cgtime; unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; @@ -508,14 +558,6 @@ struct signal_struct { unsigned long inblock, oublock, cinblock, coublock; struct task_io_accounting ioac; - /* - * Cumulative ns of scheduled CPU time for dead threads in the - * group, not including a zombie group leader. (This only differs - * from jiffies_to_ns(utime + stime) if sched_clock uses something - * other than jiffies.) - */ - unsigned long long sum_sched_runtime; - /* * We don't bother to synchronize most readers of this at all, * because there is no reader checking a limit that actually needs @@ -527,8 +569,6 @@ struct signal_struct { */ struct rlimit rlim[RLIM_NLIMITS]; - struct list_head cpu_timers[3]; - /* keep the process-shared keyrings here so that they do the right * thing in threads created with CLONE_THREAD */ #ifdef CONFIG_KEYS @@ -1134,8 +1174,7 @@ struct task_struct { /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ unsigned long min_flt, maj_flt; - cputime_t it_prof_expires, it_virt_expires; - unsigned long long it_sched_expires; + struct task_cputime cputime_expires; struct list_head cpu_timers[3]; /* process credentials */ @@ -1585,6 +1624,7 @@ extern unsigned long long cpu_clock(int cpu); extern unsigned long long task_sched_runtime(struct task_struct *task); +extern unsigned long long thread_group_sched_runtime(struct task_struct *task); /* sched_exec is called by processes performing an exec */ #ifdef CONFIG_SMP @@ -2081,6 +2121,197 @@ static inline int spin_needbreak(spinlock_t *lock) #endif } +/* + * Thread group CPU time accounting. + */ +#ifdef CONFIG_SMP + +extern int thread_group_cputime_alloc_smp(struct task_struct *); +extern void thread_group_cputime_smp(struct task_struct *, struct task_cputime *); + +static inline void thread_group_cputime_init(struct signal_struct *sig) +{ + sig->cputime.totals = NULL; +} + +static inline int thread_group_cputime_clone_thread(struct task_struct *curr, + struct task_struct *new) +{ + if (curr->signal->cputime.totals) + return 0; + return thread_group_cputime_alloc_smp(curr); +} + +static inline void thread_group_cputime_free(struct signal_struct *sig) +{ + free_percpu(sig->cputime.totals); +} + +/** + * thread_group_cputime - Sum the thread group time fields across all CPUs. + * + * This is a wrapper for the real routine, thread_group_cputime_smp(). See + * that routine for details. + */ +static inline void thread_group_cputime( + struct task_struct *tsk, + struct task_cputime *times) +{ + thread_group_cputime_smp(tsk, times); +} + +/** + * thread_group_cputime_account_user - Maintain utime for a thread group. + * + * @tgtimes: Pointer to thread_group_cputime structure. + * @cputime: Time value by which to increment the utime field of that + * structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the utime field there. + */ +static inline void thread_group_cputime_account_user( + struct thread_group_cputime *tgtimes, + cputime_t cputime) +{ + if (tgtimes->totals) { + struct task_cputime *times; + + times = per_cpu_ptr(tgtimes->totals, get_cpu()); + times->utime = cputime_add(times->utime, cputime); + put_cpu_no_resched(); + } +} + +/** + * thread_group_cputime_account_system - Maintain stime for a thread group. + * + * @tgtimes: Pointer to thread_group_cputime structure. + * @cputime: Time value by which to increment the stime field of that + * structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the stime field there. + */ +static inline void thread_group_cputime_account_system( + struct thread_group_cputime *tgtimes, + cputime_t cputime) +{ + if (tgtimes->totals) { + struct task_cputime *times; + + times = per_cpu_ptr(tgtimes->totals, get_cpu()); + times->stime = cputime_add(times->stime, cputime); + put_cpu_no_resched(); + } +} + +/** + * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a + * thread group. + * + * @tgtimes: Pointer to thread_group_cputime structure. + * @ns: Time value by which to increment the sum_exec_runtime field + * of that structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the sum_exec_runtime field there. + */ +static inline void thread_group_cputime_account_exec_runtime( + struct thread_group_cputime *tgtimes, + unsigned long long ns) +{ + if (tgtimes->totals) { + struct task_cputime *times; + + times = per_cpu_ptr(tgtimes->totals, get_cpu()); + times->sum_exec_runtime += ns; + put_cpu_no_resched(); + } +} + +#else /* CONFIG_SMP */ + +static inline void thread_group_cputime_init(struct signal_struct *sig) +{ + sig->cputime.totals.utime = cputime_zero; + sig->cputime.totals.stime = cputime_zero; + sig->cputime.totals.sum_exec_runtime = 0; +} + +static inline int thread_group_cputime_alloc(struct task_struct *tsk) +{ + return 0; +} + +static inline void thread_group_cputime_free(struct signal_struct *sig) +{ +} + +static inline int thread_group_cputime_clone_thread(struct task_struct *curr, + struct task_struct *tsk) +{ +} + +static inline void thread_group_cputime(struct task_struct *tsk, + struct task_cputime *cputime) +{ + *cputime = tsk->signal->cputime.totals; +} + +static inline void thread_group_cputime_account_user( + struct thread_group_cputime *tgtimes, + cputime_t cputime) +{ + tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime); +} + +static inline void thread_group_cputime_account_system( + struct thread_group_cputime *tgtimes, + cputime_t cputime) +{ + tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime); +} + +static inline void thread_group_cputime_account_exec_runtime( + struct thread_group_cputime *tgtimes, + unsigned long long ns) +{ + tgtimes->totals->sum_exec_runtime += ns; +} + +#endif /* CONFIG_SMP */ + +static inline void account_group_user_time(struct task_struct *tsk, + cputime_t cputime) +{ + struct signal_struct *sig; + + sig = tsk->signal; + if (likely(sig)) + thread_group_cputime_account_user(&sig->cputime, cputime); +} + +static inline void account_group_system_time(struct task_struct *tsk, + cputime_t cputime) +{ + struct signal_struct *sig; + + sig = tsk->signal; + if (likely(sig)) + thread_group_cputime_account_system(&sig->cputime, cputime); +} + +static inline void account_group_exec_runtime(struct task_struct *tsk, + unsigned long long ns) +{ + struct signal_struct *sig; + + sig = tsk->signal; + if (likely(sig)) + thread_group_cputime_account_exec_runtime(&sig->cputime, ns); +} + /* * Reevaluate whether the task has signals pending delivery. * Wake the task if so. diff --git a/include/linux/time.h b/include/linux/time.h index e15206a7e82e..1b70b3c293e9 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -125,6 +125,9 @@ extern int timekeeping_valid_for_hres(void); extern void update_wall_time(void); extern void update_xtime_cache(u64 nsec); +struct tms; +extern void do_sys_times(struct tms *); + /** * timespec_to_ns - Convert timespec to nanoseconds * @ts: pointer to the timespec variable to be converted diff --git a/kernel/compat.c b/kernel/compat.c index 32c254a8ab9a..72650e39b3e6 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -150,49 +151,23 @@ asmlinkage long compat_sys_setitimer(int which, return 0; } +static compat_clock_t clock_t_to_compat_clock_t(clock_t x) +{ + return compat_jiffies_to_clock_t(clock_t_to_jiffies(x)); +} + asmlinkage long compat_sys_times(struct compat_tms __user *tbuf) { - /* - * In the SMP world we might just be unlucky and have one of - * the times increment as we use it. Since the value is an - * atomically safe type this is just fine. Conceptually its - * as if the syscall took an instant longer to occur. - */ if (tbuf) { + struct tms tms; struct compat_tms tmp; - struct task_struct *tsk = current; - struct task_struct *t; - cputime_t utime, stime, cutime, cstime; - - read_lock(&tasklist_lock); - utime = tsk->signal->utime; - stime = tsk->signal->stime; - t = tsk; - do { - utime = cputime_add(utime, t->utime); - stime = cputime_add(stime, t->stime); - t = next_thread(t); - } while (t != tsk); - - /* - * While we have tasklist_lock read-locked, no dying thread - * can be updating current->signal->[us]time. Instead, - * we got their counts included in the live thread loop. - * However, another thread can come in right now and - * do a wait call that updates current->signal->c[us]time. - * To make sure we always see that pair updated atomically, - * we take the siglock around fetching them. - */ - spin_lock_irq(&tsk->sighand->siglock); - cutime = tsk->signal->cutime; - cstime = tsk->signal->cstime; - spin_unlock_irq(&tsk->sighand->siglock); - read_unlock(&tasklist_lock); - - tmp.tms_utime = compat_jiffies_to_clock_t(cputime_to_jiffies(utime)); - tmp.tms_stime = compat_jiffies_to_clock_t(cputime_to_jiffies(stime)); - tmp.tms_cutime = compat_jiffies_to_clock_t(cputime_to_jiffies(cutime)); - tmp.tms_cstime = compat_jiffies_to_clock_t(cputime_to_jiffies(cstime)); + + do_sys_times(&tms); + /* Convert our struct tms to the compat version. */ + tmp.tms_utime = clock_t_to_compat_clock_t(tms.tms_utime); + tmp.tms_stime = clock_t_to_compat_clock_t(tms.tms_stime); + tmp.tms_cutime = clock_t_to_compat_clock_t(tms.tms_cutime); + tmp.tms_cstime = clock_t_to_compat_clock_t(tms.tms_cstime); if (copy_to_user(tbuf, &tmp, sizeof(tmp))) return -EFAULT; } diff --git a/kernel/exit.c b/kernel/exit.c index 16395644a98f..40036ac04271 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -112,8 +112,6 @@ static void __exit_signal(struct task_struct *tsk) * We won't ever get here for the group leader, since it * will have been the last reference on the signal_struct. */ - sig->utime = cputime_add(sig->utime, task_utime(tsk)); - sig->stime = cputime_add(sig->stime, task_stime(tsk)); sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); sig->min_flt += tsk->min_flt; sig->maj_flt += tsk->maj_flt; @@ -122,7 +120,6 @@ static void __exit_signal(struct task_struct *tsk) sig->inblock += task_io_get_inblock(tsk); sig->oublock += task_io_get_oublock(tsk); task_io_accounting_add(&sig->ioac, &tsk->ioac); - sig->sum_sched_runtime += tsk->se.sum_exec_runtime; sig = NULL; /* Marker for below. */ } @@ -1294,6 +1291,7 @@ static int wait_task_zombie(struct task_struct *p, int options, if (likely(!traced)) { struct signal_struct *psig; struct signal_struct *sig; + struct task_cputime cputime; /* * The resource counters for the group leader are in its @@ -1309,20 +1307,23 @@ static int wait_task_zombie(struct task_struct *p, int options, * need to protect the access to p->parent->signal fields, * as other threads in the parent group can be right * here reaping other children at the same time. + * + * We use thread_group_cputime() to get times for the thread + * group, which consolidates times for all threads in the + * group including the group leader. */ spin_lock_irq(&p->parent->sighand->siglock); psig = p->parent->signal; sig = p->signal; + thread_group_cputime(p, &cputime); psig->cutime = cputime_add(psig->cutime, - cputime_add(p->utime, - cputime_add(sig->utime, - sig->cutime))); + cputime_add(cputime.utime, + sig->cutime)); psig->cstime = cputime_add(psig->cstime, - cputime_add(p->stime, - cputime_add(sig->stime, - sig->cstime))); + cputime_add(cputime.stime, + sig->cstime)); psig->cgtime = cputime_add(psig->cgtime, cputime_add(p->gtime, diff --git a/kernel/fork.c b/kernel/fork.c index 7ce2ebe84796..a8ac2efb8e30 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -759,15 +759,44 @@ void __cleanup_sighand(struct sighand_struct *sighand) kmem_cache_free(sighand_cachep, sighand); } + +/* + * Initialize POSIX timer handling for a thread group. + */ +static void posix_cpu_timers_init_group(struct signal_struct *sig) +{ + /* Thread group counters. */ + thread_group_cputime_init(sig); + + /* Expiration times and increments. */ + sig->it_virt_expires = cputime_zero; + sig->it_virt_incr = cputime_zero; + sig->it_prof_expires = cputime_zero; + sig->it_prof_incr = cputime_zero; + + /* Cached expiration times. */ + sig->cputime_expires.prof_exp = cputime_zero; + sig->cputime_expires.virt_exp = cputime_zero; + sig->cputime_expires.sched_exp = 0; + + /* The timer lists. */ + INIT_LIST_HEAD(&sig->cpu_timers[0]); + INIT_LIST_HEAD(&sig->cpu_timers[1]); + INIT_LIST_HEAD(&sig->cpu_timers[2]); +} + static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) { struct signal_struct *sig; int ret; if (clone_flags & CLONE_THREAD) { - atomic_inc(¤t->signal->count); - atomic_inc(¤t->signal->live); - return 0; + ret = thread_group_cputime_clone_thread(current, tsk); + if (likely(!ret)) { + atomic_inc(¤t->signal->count); + atomic_inc(¤t->signal->live); + } + return ret; } sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); tsk->signal = sig; @@ -795,15 +824,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->it_real_incr.tv64 = 0; sig->real_timer.function = it_real_fn; - sig->it_virt_expires = cputime_zero; - sig->it_virt_incr = cputime_zero; - sig->it_prof_expires = cputime_zero; - sig->it_prof_incr = cputime_zero; - sig->leader = 0; /* session leadership doesn't inherit */ sig->tty_old_pgrp = NULL; - sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; + sig->cutime = sig->cstime = cputime_zero; sig->gtime = cputime_zero; sig->cgtime = cputime_zero; sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; @@ -820,14 +844,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); task_unlock(current->group_leader); - if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { - /* - * New sole thread in the process gets an expiry time - * of the whole CPU time limit. - */ - tsk->it_prof_expires = - secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); - } + posix_cpu_timers_init_group(sig); + acct_init_pacct(&sig->pacct); tty_audit_fork(sig); @@ -837,6 +855,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) void __cleanup_signal(struct signal_struct *sig) { + thread_group_cputime_free(sig); exit_thread_group_keys(sig); kmem_cache_free(signal_cachep, sig); } @@ -885,6 +904,19 @@ void mm_init_owner(struct mm_struct *mm, struct task_struct *p) } #endif /* CONFIG_MM_OWNER */ +/* + * Initialize POSIX timer handling for a single task. + */ +static void posix_cpu_timers_init(struct task_struct *tsk) +{ + tsk->cputime_expires.prof_exp = cputime_zero; + tsk->cputime_expires.virt_exp = cputime_zero; + tsk->cputime_expires.sched_exp = 0; + INIT_LIST_HEAD(&tsk->cpu_timers[0]); + INIT_LIST_HEAD(&tsk->cpu_timers[1]); + INIT_LIST_HEAD(&tsk->cpu_timers[2]); +} + /* * This creates a new process as a copy of the old one, * but does not actually start it yet. @@ -995,12 +1027,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, task_io_accounting_init(&p->ioac); acct_clear_integrals(p); - p->it_virt_expires = cputime_zero; - p->it_prof_expires = cputime_zero; - p->it_sched_expires = 0; - INIT_LIST_HEAD(&p->cpu_timers[0]); - INIT_LIST_HEAD(&p->cpu_timers[1]); - INIT_LIST_HEAD(&p->cpu_timers[2]); + posix_cpu_timers_init(p); p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); @@ -1201,21 +1228,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (clone_flags & CLONE_THREAD) { p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); - - if (!cputime_eq(current->signal->it_virt_expires, - cputime_zero) || - !cputime_eq(current->signal->it_prof_expires, - cputime_zero) || - current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY || - !list_empty(¤t->signal->cpu_timers[0]) || - !list_empty(¤t->signal->cpu_timers[1]) || - !list_empty(¤t->signal->cpu_timers[2])) { - /* - * Have child wake up on its first tick to check - * for process CPU timers. - */ - p->it_prof_expires = jiffies_to_cputime(1); - } } if (likely(p->pid)) { diff --git a/kernel/itimer.c b/kernel/itimer.c index ab982747d9bd..db7c358b9a02 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -55,17 +55,15 @@ int do_getitimer(int which, struct itimerval *value) spin_unlock_irq(&tsk->sighand->siglock); break; case ITIMER_VIRTUAL: - read_lock(&tasklist_lock); spin_lock_irq(&tsk->sighand->siglock); cval = tsk->signal->it_virt_expires; cinterval = tsk->signal->it_virt_incr; if (!cputime_eq(cval, cputime_zero)) { - struct task_struct *t = tsk; - cputime_t utime = tsk->signal->utime; - do { - utime = cputime_add(utime, t->utime); - t = next_thread(t); - } while (t != tsk); + struct task_cputime cputime; + cputime_t utime; + + thread_group_cputime(tsk, &cputime); + utime = cputime.utime; if (cputime_le(cval, utime)) { /* about to fire */ cval = jiffies_to_cputime(1); } else { @@ -73,25 +71,19 @@ int do_getitimer(int which, struct itimerval *value) } } spin_unlock_irq(&tsk->sighand->siglock); - read_unlock(&tasklist_lock); cputime_to_timeval(cval, &value->it_value); cputime_to_timeval(cinterval, &value->it_interval); break; case ITIMER_PROF: - read_lock(&tasklist_lock); spin_lock_irq(&tsk->sighand->siglock); cval = tsk->signal->it_prof_expires; cinterval = tsk->signal->it_prof_incr; if (!cputime_eq(cval, cputime_zero)) { - struct task_struct *t = tsk; - cputime_t ptime = cputime_add(tsk->signal->utime, - tsk->signal->stime); - do { - ptime = cputime_add(ptime, - cputime_add(t->utime, - t->stime)); - t = next_thread(t); - } while (t != tsk); + struct task_cputime times; + cputime_t ptime; + + thread_group_cputime(tsk, ×); + ptime = cputime_add(times.utime, times.stime); if (cputime_le(cval, ptime)) { /* about to fire */ cval = jiffies_to_cputime(1); } else { @@ -99,7 +91,6 @@ int do_getitimer(int which, struct itimerval *value) } } spin_unlock_irq(&tsk->sighand->siglock); - read_unlock(&tasklist_lock); cputime_to_timeval(cval, &value->it_value); cputime_to_timeval(cinterval, &value->it_interval); break; @@ -185,7 +176,6 @@ again: case ITIMER_VIRTUAL: nval = timeval_to_cputime(&value->it_value); ninterval = timeval_to_cputime(&value->it_interval); - read_lock(&tasklist_lock); spin_lock_irq(&tsk->sighand->siglock); cval = tsk->signal->it_virt_expires; cinterval = tsk->signal->it_virt_incr; @@ -200,7 +190,6 @@ again: tsk->signal->it_virt_expires = nval; tsk->signal->it_virt_incr = ninterval; spin_unlock_irq(&tsk->sighand->siglock); - read_unlock(&tasklist_lock); if (ovalue) { cputime_to_timeval(cval, &ovalue->it_value); cputime_to_timeval(cinterval, &ovalue->it_interval); @@ -209,7 +198,6 @@ again: case ITIMER_PROF: nval = timeval_to_cputime(&value->it_value); ninterval = timeval_to_cputime(&value->it_interval); - read_lock(&tasklist_lock); spin_lock_irq(&tsk->sighand->siglock); cval = tsk->signal->it_prof_expires; cinterval = tsk->signal->it_prof_incr; @@ -224,7 +212,6 @@ again: tsk->signal->it_prof_expires = nval; tsk->signal->it_prof_incr = ninterval; spin_unlock_irq(&tsk->sighand->siglock); - read_unlock(&tasklist_lock); if (ovalue) { cputime_to_timeval(cval, &ovalue->it_value); cputime_to_timeval(cinterval, &ovalue->it_interval); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index c42a03aef36f..dba1c334c3e8 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -8,6 +8,99 @@ #include #include +#ifdef CONFIG_SMP +/* + * Allocate the thread_group_cputime structure appropriately for SMP kernels + * and fill in the current values of the fields. Called from copy_signal() + * via thread_group_cputime_clone_thread() when adding a second or subsequent + * thread to a thread group. Assumes interrupts are enabled when called. + */ +int thread_group_cputime_alloc_smp(struct task_struct *tsk) +{ + struct signal_struct *sig = tsk->signal; + struct task_cputime *cputime; + + /* + * If we have multiple threads and we don't already have a + * per-CPU task_cputime struct, allocate one and fill it in with + * the times accumulated so far. + */ + if (sig->cputime.totals) + return 0; + cputime = alloc_percpu(struct task_cputime); + if (cputime == NULL) + return -ENOMEM; + read_lock(&tasklist_lock); + spin_lock_irq(&tsk->sighand->siglock); + if (sig->cputime.totals) { + spin_unlock_irq(&tsk->sighand->siglock); + read_unlock(&tasklist_lock); + free_percpu(cputime); + return 0; + } + sig->cputime.totals = cputime; + cputime = per_cpu_ptr(sig->cputime.totals, get_cpu()); + cputime->utime = tsk->utime; + cputime->stime = tsk->stime; + cputime->sum_exec_runtime = tsk->se.sum_exec_runtime; + put_cpu_no_resched(); + spin_unlock_irq(&tsk->sighand->siglock); + read_unlock(&tasklist_lock); + return 0; +} + +/** + * thread_group_cputime_smp - Sum the thread group time fields across all CPUs. + * + * @tsk: The task we use to identify the thread group. + * @times: task_cputime structure in which we return the summed fields. + * + * Walk the list of CPUs to sum the per-CPU time fields in the thread group + * time structure. + */ +void thread_group_cputime_smp( + struct task_struct *tsk, + struct task_cputime *times) +{ + struct signal_struct *sig; + int i; + struct task_cputime *tot; + + sig = tsk->signal; + if (unlikely(!sig) || !sig->cputime.totals) { + times->utime = tsk->utime; + times->stime = tsk->stime; + times->sum_exec_runtime = tsk->se.sum_exec_runtime; + return; + } + times->stime = times->utime = cputime_zero; + times->sum_exec_runtime = 0; + for_each_possible_cpu(i) { + tot = per_cpu_ptr(tsk->signal->cputime.totals, i); + times->utime = cputime_add(times->utime, tot->utime); + times->stime = cputime_add(times->stime, tot->stime); + times->sum_exec_runtime += tot->sum_exec_runtime; + } +} + +#endif /* CONFIG_SMP */ + +/* + * Called after updating RLIMIT_CPU to set timer expiration if necessary. + */ +void update_rlimit_cpu(unsigned long rlim_new) +{ + cputime_t cputime; + + cputime = secs_to_cputime(rlim_new); + if (cputime_eq(current->signal->it_prof_expires, cputime_zero) || + cputime_lt(current->signal->it_prof_expires, cputime)) { + spin_lock_irq(¤t->sighand->siglock); + set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); + spin_unlock_irq(¤t->sighand->siglock); + } +} + static int check_clock(const clockid_t which_clock) { int error = 0; @@ -158,10 +251,6 @@ static inline cputime_t virt_ticks(struct task_struct *p) { return p->utime; } -static inline unsigned long long sched_ns(struct task_struct *p) -{ - return task_sched_runtime(p); -} int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) { @@ -211,7 +300,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, cpu->cpu = virt_ticks(p); break; case CPUCLOCK_SCHED: - cpu->sched = sched_ns(p); + cpu->sched = task_sched_runtime(p); break; } return 0; @@ -226,31 +315,20 @@ static int cpu_clock_sample_group_locked(unsigned int clock_idx, struct task_struct *p, union cpu_time_count *cpu) { - struct task_struct *t = p; - switch (clock_idx) { + struct task_cputime cputime; + + thread_group_cputime(p, &cputime); + switch (clock_idx) { default: return -EINVAL; case CPUCLOCK_PROF: - cpu->cpu = cputime_add(p->signal->utime, p->signal->stime); - do { - cpu->cpu = cputime_add(cpu->cpu, prof_ticks(t)); - t = next_thread(t); - } while (t != p); + cpu->cpu = cputime_add(cputime.utime, cputime.stime); break; case CPUCLOCK_VIRT: - cpu->cpu = p->signal->utime; - do { - cpu->cpu = cputime_add(cpu->cpu, virt_ticks(t)); - t = next_thread(t); - } while (t != p); + cpu->cpu = cputime.utime; break; case CPUCLOCK_SCHED: - cpu->sched = p->signal->sum_sched_runtime; - /* Add in each other live thread. */ - while ((t = next_thread(t)) != p) { - cpu->sched += t->se.sum_exec_runtime; - } - cpu->sched += sched_ns(p); + cpu->sched = thread_group_sched_runtime(p); break; } return 0; @@ -471,80 +549,11 @@ void posix_cpu_timers_exit(struct task_struct *tsk) } void posix_cpu_timers_exit_group(struct task_struct *tsk) { - cleanup_timers(tsk->signal->cpu_timers, - cputime_add(tsk->utime, tsk->signal->utime), - cputime_add(tsk->stime, tsk->signal->stime), - tsk->se.sum_exec_runtime + tsk->signal->sum_sched_runtime); -} - - -/* - * Set the expiry times of all the threads in the process so one of them - * will go off before the process cumulative expiry total is reached. - */ -static void process_timer_rebalance(struct task_struct *p, - unsigned int clock_idx, - union cpu_time_count expires, - union cpu_time_count val) -{ - cputime_t ticks, left; - unsigned long long ns, nsleft; - struct task_struct *t = p; - unsigned int nthreads = atomic_read(&p->signal->live); - - if (!nthreads) - return; + struct task_cputime cputime; - switch (clock_idx) { - default: - BUG(); - break; - case CPUCLOCK_PROF: - left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu), - nthreads); - do { - if (likely(!(t->flags & PF_EXITING))) { - ticks = cputime_add(prof_ticks(t), left); - if (cputime_eq(t->it_prof_expires, - cputime_zero) || - cputime_gt(t->it_prof_expires, ticks)) { - t->it_prof_expires = ticks; - } - } - t = next_thread(t); - } while (t != p); - break; - case CPUCLOCK_VIRT: - left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu), - nthreads); - do { - if (likely(!(t->flags & PF_EXITING))) { - ticks = cputime_add(virt_ticks(t), left); - if (cputime_eq(t->it_virt_expires, - cputime_zero) || - cputime_gt(t->it_virt_expires, ticks)) { - t->it_virt_expires = ticks; - } - } - t = next_thread(t); - } while (t != p); - break; - case CPUCLOCK_SCHED: - nsleft = expires.sched - val.sched; - do_div(nsleft, nthreads); - nsleft = max_t(unsigned long long, nsleft, 1); - do { - if (likely(!(t->flags & PF_EXITING))) { - ns = t->se.sum_exec_runtime + nsleft; - if (t->it_sched_expires == 0 || - t->it_sched_expires > ns) { - t->it_sched_expires = ns; - } - } - t = next_thread(t); - } while (t != p); - break; - } + thread_group_cputime(tsk, &cputime); + cleanup_timers(tsk->signal->cpu_timers, + cputime.utime, cputime.stime, cputime.sum_exec_runtime); } static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) @@ -608,29 +617,32 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) default: BUG(); case CPUCLOCK_PROF: - if (cputime_eq(p->it_prof_expires, + if (cputime_eq(p->cputime_expires.prof_exp, cputime_zero) || - cputime_gt(p->it_prof_expires, + cputime_gt(p->cputime_expires.prof_exp, nt->expires.cpu)) - p->it_prof_expires = nt->expires.cpu; + p->cputime_expires.prof_exp = + nt->expires.cpu; break; case CPUCLOCK_VIRT: - if (cputime_eq(p->it_virt_expires, + if (cputime_eq(p->cputime_expires.virt_exp, cputime_zero) || - cputime_gt(p->it_virt_expires, + cputime_gt(p->cputime_expires.virt_exp, nt->expires.cpu)) - p->it_virt_expires = nt->expires.cpu; + p->cputime_expires.virt_exp = + nt->expires.cpu; break; case CPUCLOCK_SCHED: - if (p->it_sched_expires == 0 || - p->it_sched_expires > nt->expires.sched) - p->it_sched_expires = nt->expires.sched; + if (p->cputime_expires.sched_exp == 0 || + p->cputime_expires.sched_exp > + nt->expires.sched) + p->cputime_expires.sched_exp = + nt->expires.sched; break; } } else { /* - * For a process timer, we must balance - * all the live threads' expirations. + * For a process timer, set the cached expiration time. */ switch (CPUCLOCK_WHICH(timer->it_clock)) { default: @@ -641,7 +653,9 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) cputime_lt(p->signal->it_virt_expires, timer->it.cpu.expires.cpu)) break; - goto rebalance; + p->signal->cputime_expires.virt_exp = + timer->it.cpu.expires.cpu; + break; case CPUCLOCK_PROF: if (!cputime_eq(p->signal->it_prof_expires, cputime_zero) && @@ -652,13 +666,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) if (i != RLIM_INFINITY && i <= cputime_to_secs(timer->it.cpu.expires.cpu)) break; - goto rebalance; + p->signal->cputime_expires.prof_exp = + timer->it.cpu.expires.cpu; + break; case CPUCLOCK_SCHED: - rebalance: - process_timer_rebalance( - timer->it.cpu.task, - CPUCLOCK_WHICH(timer->it_clock), - timer->it.cpu.expires, now); + p->signal->cputime_expires.sched_exp = + timer->it.cpu.expires.sched; break; } } @@ -969,13 +982,13 @@ static void check_thread_timers(struct task_struct *tsk, struct signal_struct *const sig = tsk->signal; maxfire = 20; - tsk->it_prof_expires = cputime_zero; + tsk->cputime_expires.prof_exp = cputime_zero; while (!list_empty(timers)) { struct cpu_timer_list *t = list_first_entry(timers, struct cpu_timer_list, entry); if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) { - tsk->it_prof_expires = t->expires.cpu; + tsk->cputime_expires.prof_exp = t->expires.cpu; break; } t->firing = 1; @@ -984,13 +997,13 @@ static void check_thread_timers(struct task_struct *tsk, ++timers; maxfire = 20; - tsk->it_virt_expires = cputime_zero; + tsk->cputime_expires.virt_exp = cputime_zero; while (!list_empty(timers)) { struct cpu_timer_list *t = list_first_entry(timers, struct cpu_timer_list, entry); if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) { - tsk->it_virt_expires = t->expires.cpu; + tsk->cputime_expires.virt_exp = t->expires.cpu; break; } t->firing = 1; @@ -999,13 +1012,13 @@ static void check_thread_timers(struct task_struct *tsk, ++timers; maxfire = 20; - tsk->it_sched_expires = 0; + tsk->cputime_expires.sched_exp = 0; while (!list_empty(timers)) { struct cpu_timer_list *t = list_first_entry(timers, struct cpu_timer_list, entry); if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) { - tsk->it_sched_expires = t->expires.sched; + tsk->cputime_expires.sched_exp = t->expires.sched; break; } t->firing = 1; @@ -1055,10 +1068,10 @@ static void check_process_timers(struct task_struct *tsk, { int maxfire; struct signal_struct *const sig = tsk->signal; - cputime_t utime, stime, ptime, virt_expires, prof_expires; + cputime_t utime, ptime, virt_expires, prof_expires; unsigned long long sum_sched_runtime, sched_expires; - struct task_struct *t; struct list_head *timers = sig->cpu_timers; + struct task_cputime cputime; /* * Don't sample the current process CPU clocks if there are no timers. @@ -1074,18 +1087,10 @@ static void check_process_timers(struct task_struct *tsk, /* * Collect the current process totals. */ - utime = sig->utime; - stime = sig->stime; - sum_sched_runtime = sig->sum_sched_runtime; - t = tsk; - do { - utime = cputime_add(utime, t->utime); - stime = cputime_add(stime, t->stime); - sum_sched_runtime += t->se.sum_exec_runtime; - t = next_thread(t); - } while (t != tsk); - ptime = cputime_add(utime, stime); - + thread_group_cputime(tsk, &cputime); + utime = cputime.utime; + ptime = cputime_add(utime, cputime.stime); + sum_sched_runtime = cputime.sum_exec_runtime; maxfire = 20; prof_expires = cputime_zero; while (!list_empty(timers)) { @@ -1193,60 +1198,18 @@ static void check_process_timers(struct task_struct *tsk, } } - if (!cputime_eq(prof_expires, cputime_zero) || - !cputime_eq(virt_expires, cputime_zero) || - sched_expires != 0) { - /* - * Rebalance the threads' expiry times for the remaining - * process CPU timers. - */ - - cputime_t prof_left, virt_left, ticks; - unsigned long long sched_left, sched; - const unsigned int nthreads = atomic_read(&sig->live); - - if (!nthreads) - return; - - prof_left = cputime_sub(prof_expires, utime); - prof_left = cputime_sub(prof_left, stime); - prof_left = cputime_div_non_zero(prof_left, nthreads); - virt_left = cputime_sub(virt_expires, utime); - virt_left = cputime_div_non_zero(virt_left, nthreads); - if (sched_expires) { - sched_left = sched_expires - sum_sched_runtime; - do_div(sched_left, nthreads); - sched_left = max_t(unsigned long long, sched_left, 1); - } else { - sched_left = 0; - } - t = tsk; - do { - if (unlikely(t->flags & PF_EXITING)) - continue; - - ticks = cputime_add(cputime_add(t->utime, t->stime), - prof_left); - if (!cputime_eq(prof_expires, cputime_zero) && - (cputime_eq(t->it_prof_expires, cputime_zero) || - cputime_gt(t->it_prof_expires, ticks))) { - t->it_prof_expires = ticks; - } - - ticks = cputime_add(t->utime, virt_left); - if (!cputime_eq(virt_expires, cputime_zero) && - (cputime_eq(t->it_virt_expires, cputime_zero) || - cputime_gt(t->it_virt_expires, ticks))) { - t->it_virt_expires = ticks; - } - - sched = t->se.sum_exec_runtime + sched_left; - if (sched_expires && (t->it_sched_expires == 0 || - t->it_sched_expires > sched)) { - t->it_sched_expires = sched; - } - } while ((t = next_thread(t)) != tsk); - } + if (!cputime_eq(prof_expires, cputime_zero) && + (cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) || + cputime_gt(sig->cputime_expires.prof_exp, prof_expires))) + sig->cputime_expires.prof_exp = prof_expires; + if (!cputime_eq(virt_expires, cputime_zero) && + (cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) || + cputime_gt(sig->cputime_expires.virt_exp, virt_expires))) + sig->cputime_expires.virt_exp = virt_expires; + if (sched_expires != 0 && + (sig->cputime_expires.sched_exp == 0 || + sig->cputime_expires.sched_exp > sched_expires)) + sig->cputime_expires.sched_exp = sched_expires; } /* @@ -1314,6 +1277,78 @@ out: ++timer->it_requeue_pending; } +/** + * task_cputime_zero - Check a task_cputime struct for all zero fields. + * + * @cputime: The struct to compare. + * + * Checks @cputime to see if all fields are zero. Returns true if all fields + * are zero, false if any field is nonzero. + */ +static inline int task_cputime_zero(const struct task_cputime *cputime) +{ + if (cputime_eq(cputime->utime, cputime_zero) && + cputime_eq(cputime->stime, cputime_zero) && + cputime->sum_exec_runtime == 0) + return 1; + return 0; +} + +/** + * task_cputime_expired - Compare two task_cputime entities. + * + * @sample: The task_cputime structure to be checked for expiration. + * @expires: Expiration times, against which @sample will be checked. + * + * Checks @sample against @expires to see if any field of @sample has expired. + * Returns true if any field of the former is greater than the corresponding + * field of the latter if the latter field is set. Otherwise returns false. + */ +static inline int task_cputime_expired(const struct task_cputime *sample, + const struct task_cputime *expires) +{ + if (!cputime_eq(expires->utime, cputime_zero) && + cputime_ge(sample->utime, expires->utime)) + return 1; + if (!cputime_eq(expires->stime, cputime_zero) && + cputime_ge(cputime_add(sample->utime, sample->stime), + expires->stime)) + return 1; + if (expires->sum_exec_runtime != 0 && + sample->sum_exec_runtime >= expires->sum_exec_runtime) + return 1; + return 0; +} + +/** + * fastpath_timer_check - POSIX CPU timers fast path. + * + * @tsk: The task (thread) being checked. + * @sig: The signal pointer for that task. + * + * If there are no timers set return false. Otherwise snapshot the task and + * thread group timers, then compare them with the corresponding expiration + # times. Returns true if a timer has expired, else returns false. + */ +static inline int fastpath_timer_check(struct task_struct *tsk, + struct signal_struct *sig) +{ + struct task_cputime task_sample = { + .utime = tsk->utime, + .stime = tsk->stime, + .sum_exec_runtime = tsk->se.sum_exec_runtime + }; + struct task_cputime group_sample; + + if (task_cputime_zero(&tsk->cputime_expires) && + task_cputime_zero(&sig->cputime_expires)) + return 0; + if (task_cputime_expired(&task_sample, &tsk->cputime_expires)) + return 1; + thread_group_cputime(tsk, &group_sample); + return task_cputime_expired(&group_sample, &sig->cputime_expires); +} + /* * This is called from the timer interrupt handler. The irq handler has * already updated our counts. We need to check if any timers fire now. @@ -1323,30 +1358,29 @@ void run_posix_cpu_timers(struct task_struct *tsk) { LIST_HEAD(firing); struct k_itimer *timer, *next; + struct signal_struct *sig; + struct sighand_struct *sighand; + unsigned long flags; BUG_ON(!irqs_disabled()); -#define UNEXPIRED(clock) \ - (cputime_eq(tsk->it_##clock##_expires, cputime_zero) || \ - cputime_lt(clock##_ticks(tsk), tsk->it_##clock##_expires)) - - if (UNEXPIRED(prof) && UNEXPIRED(virt) && - (tsk->it_sched_expires == 0 || - tsk->se.sum_exec_runtime < tsk->it_sched_expires)) - return; - -#undef UNEXPIRED - + /* Pick up tsk->signal and make sure it's valid. */ + sig = tsk->signal; /* - * Double-check with locks held. + * The fast path checks that there are no expired thread or thread + * group timers. If that's so, just return. Also check that + * tsk->signal is non-NULL; this probably can't happen but cover the + * possibility anyway. */ - read_lock(&tasklist_lock); - if (likely(tsk->signal != NULL)) { - spin_lock(&tsk->sighand->siglock); - + if (unlikely(!sig) || !fastpath_timer_check(tsk, sig)) { + return; + } + sighand = lock_task_sighand(tsk, &flags); + if (likely(sighand)) { /* - * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N] - * all the timers that are firing, and put them on the firing list. + * Here we take off tsk->signal->cpu_timers[N] and + * tsk->cpu_timers[N] all the timers that are firing, and + * put them on the firing list. */ check_thread_timers(tsk, &firing); check_process_timers(tsk, &firing); @@ -1359,9 +1393,8 @@ void run_posix_cpu_timers(struct task_struct *tsk) * that gets the timer lock before we do will give it up and * spin until we've taken care of that timer below. */ - spin_unlock(&tsk->sighand->siglock); } - read_unlock(&tasklist_lock); + unlock_task_sighand(tsk, &flags); /* * Now that all the timers on our list have the firing flag, @@ -1389,10 +1422,9 @@ void run_posix_cpu_timers(struct task_struct *tsk) /* * Set one of the process-wide special case CPU timers. - * The tasklist_lock and tsk->sighand->siglock must be held by the caller. - * The oldval argument is null for the RLIMIT_CPU timer, where *newval is - * absolute; non-null for ITIMER_*, where *newval is relative and we update - * it to be absolute, *oldval is absolute and we update it to be relative. + * The tsk->sighand->siglock must be held by the caller. + * The *newval argument is relative and we update it to be absolute, *oldval + * is absolute and we update it to be relative. */ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, cputime_t *newval, cputime_t *oldval) @@ -1435,13 +1467,14 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, cputime_ge(list_first_entry(head, struct cpu_timer_list, entry)->expires.cpu, *newval)) { - /* - * Rejigger each thread's expiry time so that one will - * notice before we hit the process-cumulative expiry time. - */ - union cpu_time_count expires = { .sched = 0 }; - expires.cpu = *newval; - process_timer_rebalance(tsk, clock_idx, expires, now); + switch (clock_idx) { + case CPUCLOCK_PROF: + tsk->signal->cputime_expires.prof_exp = *newval; + break; + case CPUCLOCK_VIRT: + tsk->signal->cputime_expires.virt_exp = *newval; + break; + } } } diff --git a/kernel/sched.c b/kernel/sched.c index cc1f81b50b82..c51b5d276665 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4036,6 +4036,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat); EXPORT_PER_CPU_SYMBOL(kstat); +/* + * Return any ns on the sched_clock that have not yet been banked in + * @p in case that task is currently running. + * + * Called with task_rq_lock() held on @rq. + */ +static unsigned long long task_delta_exec(struct task_struct *p, struct rq *rq) +{ + if (task_current(rq, p)) { + u64 delta_exec; + + update_rq_clock(rq); + delta_exec = rq->clock - p->se.exec_start; + if ((s64)delta_exec > 0) + return delta_exec; + } + return 0; +} + /* * Return p->sum_exec_runtime plus any more ns on the sched_clock * that have not yet been banked in case the task is currently running. @@ -4043,17 +4062,31 @@ EXPORT_PER_CPU_SYMBOL(kstat); unsigned long long task_sched_runtime(struct task_struct *p) { unsigned long flags; - u64 ns, delta_exec; + u64 ns; struct rq *rq; rq = task_rq_lock(p, &flags); - ns = p->se.sum_exec_runtime; - if (task_current(rq, p)) { - update_rq_clock(rq); - delta_exec = rq->clock - p->se.exec_start; - if ((s64)delta_exec > 0) - ns += delta_exec; - } + ns = p->se.sum_exec_runtime + task_delta_exec(p, rq); + task_rq_unlock(rq, &flags); + + return ns; +} + +/* + * Return sum_exec_runtime for the thread group plus any more ns on the + * sched_clock that have not yet been banked in case the task is currently + * running. + */ +unsigned long long thread_group_sched_runtime(struct task_struct *p) +{ + unsigned long flags; + u64 ns; + struct rq *rq; + struct task_cputime totals; + + rq = task_rq_lock(p, &flags); + thread_group_cputime(p, &totals); + ns = totals.sum_exec_runtime + task_delta_exec(p, rq); task_rq_unlock(rq, &flags); return ns; @@ -4070,6 +4103,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime) cputime64_t tmp; p->utime = cputime_add(p->utime, cputime); + account_group_user_time(p, cputime); /* Add user time to cpustat. */ tmp = cputime_to_cputime64(cputime); @@ -4094,6 +4128,7 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime) tmp = cputime_to_cputime64(cputime); p->utime = cputime_add(p->utime, cputime); + account_group_user_time(p, cputime); p->gtime = cputime_add(p->gtime, cputime); cpustat->user = cputime64_add(cpustat->user, tmp); @@ -4129,6 +4164,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset, } p->stime = cputime_add(p->stime, cputime); + account_group_system_time(p, cputime); /* Add system time to cpustat. */ tmp = cputime_to_cputime64(cputime); @@ -4170,6 +4206,7 @@ void account_steal_time(struct task_struct *p, cputime_t steal) if (p == rq->idle) { p->stime = cputime_add(p->stime, steal); + account_group_system_time(p, steal); if (atomic_read(&rq->nr_iowait) > 0) cpustat->iowait = cputime64_add(cpustat->iowait, tmp); else diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index fb8994c6d4bb..99aa31acc544 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -507,6 +507,7 @@ static void update_curr(struct cfs_rq *cfs_rq) struct task_struct *curtask = task_of(curr); cpuacct_charge(curtask, delta_exec); + account_group_exec_runtime(curtask, delta_exec); } } diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 552310798dad..8375e69af36a 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -483,6 +483,8 @@ static void update_curr_rt(struct rq *rq) schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec)); curr->se.sum_exec_runtime += delta_exec; + account_group_exec_runtime(curr, delta_exec); + curr->se.exec_start = rq->clock; cpuacct_charge(curr, delta_exec); @@ -1412,7 +1414,7 @@ static void watchdog(struct rq *rq, struct task_struct *p) p->rt.timeout++; next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); if (p->rt.timeout > next) - p->it_sched_expires = p->se.sum_exec_runtime; + p->cputime_expires.sched_exp = p->se.sum_exec_runtime; } } diff --git a/kernel/signal.c b/kernel/signal.c index e661b01d340f..6eea5826d618 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1338,6 +1338,7 @@ int do_notify_parent(struct task_struct *tsk, int sig) struct siginfo info; unsigned long flags; struct sighand_struct *psig; + struct task_cputime cputime; int ret = sig; BUG_ON(sig == -1); @@ -1368,10 +1369,9 @@ int do_notify_parent(struct task_struct *tsk, int sig) info.si_uid = tsk->uid; - info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime, - tsk->signal->utime)); - info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime, - tsk->signal->stime)); + thread_group_cputime(tsk, &cputime); + info.si_utime = cputime_to_jiffies(cputime.utime); + info.si_stime = cputime_to_jiffies(cputime.stime); info.si_status = tsk->exit_code & 0x7f; if (tsk->exit_code & 0x80) diff --git a/kernel/sys.c b/kernel/sys.c index 038a7bc0901d..d046a7a055c2 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -853,38 +853,28 @@ asmlinkage long sys_setfsgid(gid_t gid) return old_fsgid; } +void do_sys_times(struct tms *tms) +{ + struct task_cputime cputime; + cputime_t cutime, cstime; + + spin_lock_irq(¤t->sighand->siglock); + thread_group_cputime(current, &cputime); + cutime = current->signal->cutime; + cstime = current->signal->cstime; + spin_unlock_irq(¤t->sighand->siglock); + tms->tms_utime = cputime_to_clock_t(cputime.utime); + tms->tms_stime = cputime_to_clock_t(cputime.stime); + tms->tms_cutime = cputime_to_clock_t(cutime); + tms->tms_cstime = cputime_to_clock_t(cstime); +} + asmlinkage long sys_times(struct tms __user * tbuf) { - /* - * In the SMP world we might just be unlucky and have one of - * the times increment as we use it. Since the value is an - * atomically safe type this is just fine. Conceptually its - * as if the syscall took an instant longer to occur. - */ if (tbuf) { struct tms tmp; - struct task_struct *tsk = current; - struct task_struct *t; - cputime_t utime, stime, cutime, cstime; - - spin_lock_irq(&tsk->sighand->siglock); - utime = tsk->signal->utime; - stime = tsk->signal->stime; - t = tsk; - do { - utime = cputime_add(utime, t->utime); - stime = cputime_add(stime, t->stime); - t = next_thread(t); - } while (t != tsk); - - cutime = tsk->signal->cutime; - cstime = tsk->signal->cstime; - spin_unlock_irq(&tsk->sighand->siglock); - - tmp.tms_utime = cputime_to_clock_t(utime); - tmp.tms_stime = cputime_to_clock_t(stime); - tmp.tms_cutime = cputime_to_clock_t(cutime); - tmp.tms_cstime = cputime_to_clock_t(cstime); + + do_sys_times(&tmp); if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) return -EFAULT; } @@ -1445,7 +1435,6 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) { struct rlimit new_rlim, *old_rlim; - unsigned long it_prof_secs; int retval; if (resource >= RLIM_NLIMITS) @@ -1491,18 +1480,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) if (new_rlim.rlim_cur == RLIM_INFINITY) goto out; - it_prof_secs = cputime_to_secs(current->signal->it_prof_expires); - if (it_prof_secs == 0 || new_rlim.rlim_cur <= it_prof_secs) { - unsigned long rlim_cur = new_rlim.rlim_cur; - cputime_t cputime; - - cputime = secs_to_cputime(rlim_cur); - read_lock(&tasklist_lock); - spin_lock_irq(¤t->sighand->siglock); - set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); - spin_unlock_irq(¤t->sighand->siglock); - read_unlock(&tasklist_lock); - } + update_rlimit_cpu(new_rlim.rlim_cur); out: return 0; } @@ -1540,11 +1518,8 @@ out: * */ -static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r, - cputime_t *utimep, cputime_t *stimep) +static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r) { - *utimep = cputime_add(*utimep, t->utime); - *stimep = cputime_add(*stimep, t->stime); r->ru_nvcsw += t->nvcsw; r->ru_nivcsw += t->nivcsw; r->ru_minflt += t->min_flt; @@ -1558,12 +1533,13 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) struct task_struct *t; unsigned long flags; cputime_t utime, stime; + struct task_cputime cputime; memset((char *) r, 0, sizeof *r); utime = stime = cputime_zero; if (who == RUSAGE_THREAD) { - accumulate_thread_rusage(p, r, &utime, &stime); + accumulate_thread_rusage(p, r); goto out; } @@ -1586,8 +1562,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) break; case RUSAGE_SELF: - utime = cputime_add(utime, p->signal->utime); - stime = cputime_add(stime, p->signal->stime); + thread_group_cputime(p, &cputime); + utime = cputime_add(utime, cputime.utime); + stime = cputime_add(stime, cputime.stime); r->ru_nvcsw += p->signal->nvcsw; r->ru_nivcsw += p->signal->nivcsw; r->ru_minflt += p->signal->min_flt; @@ -1596,7 +1573,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) r->ru_oublock += p->signal->oublock; t = p; do { - accumulate_thread_rusage(t, r, &utime, &stime); + accumulate_thread_rusage(t, r); t = next_thread(t); } while (t != p); break; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 03fc6a81ae32..69649783c266 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -75,6 +75,7 @@ #include #include #include +#include #include "avc.h" #include "objsec.h" @@ -2321,13 +2322,7 @@ static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm) initrlim = init_task.signal->rlim+i; rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur); } - if (current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { - /* - * This will cause RLIMIT_CPU calculations - * to be refigured. - */ - current->it_prof_expires = jiffies_to_cputime(1); - } + update_rlimit_cpu(rlim->rlim_cur); } /* Wake up the parent if it is waiting so that it can -- cgit v1.2.3 From ea6b184f7d521a503ecab71feca6e4057562252b Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Mon, 22 Sep 2008 15:41:19 -0400 Subject: selinux: use default proc sid on symlinks As we are not concerned with fine-grained control over reading of symlinks in proc, always use the default proc SID for all proc symlinks. This should help avoid permission issues upon changes to the proc tree as in the /proc/net -> /proc/self/net example. This does not alter labeling of symlinks within /proc/pid directories. ls -Zd /proc/net output before and after the patch should show the difference. Signed-off-by: Stephen D. Smalley Signed-off-by: James Morris --- security/selinux/hooks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'security/selinux') diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 89f446d86054..4a7374c12d9c 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1291,7 +1291,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent /* Default to the fs superblock SID. */ isec->sid = sbsec->sid; - if (sbsec->proc) { + if (sbsec->proc && !S_ISLNK(inode->i_mode)) { struct proc_inode *proci = PROC_I(inode); if (proci->pde) { isec->sclass = inode_mode_to_security_class(inode->i_mode); -- cgit v1.2.3 From 81990fbdd18b9cfdc93dc221ff3250f81468aed8 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 3 Oct 2008 10:51:15 -0400 Subject: selinux: Fix an uninitialized variable BUG/panic in selinux_secattr_to_sid() At some point during the 2.6.27 development cycle two new fields were added to the SELinux context structure, a string pointer and a length field. The code in selinux_secattr_to_sid() was not modified and as a result these two fields were left uninitialized which could result in erratic behavior, including kernel panics, when NetLabel is used. This patch fixes the problem by fully initializing the context in selinux_secattr_to_sid() before use and reducing the level of direct context manipulation done to help prevent future problems. Please apply this to the 2.6.27-rcX release stream. Signed-off-by: Paul Moore Signed-off-by: James Morris --- security/selinux/ss/services.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'security/selinux') diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 876b815c1ba4..ab0cc0c7b944 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2907,6 +2907,7 @@ int security_netlbl_secattr_to_sid(struct netlbl_lsm_secattr *secattr, if (ctx == NULL) goto netlbl_secattr_to_sid_return; + context_init(&ctx_new); ctx_new.user = ctx->user; ctx_new.role = ctx->role; ctx_new.type = ctx->type; @@ -2915,13 +2916,9 @@ int security_netlbl_secattr_to_sid(struct netlbl_lsm_secattr *secattr, if (ebitmap_netlbl_import(&ctx_new.range.level[0].cat, secattr->attr.mls.cat) != 0) goto netlbl_secattr_to_sid_return; - ctx_new.range.level[1].cat.highbit = - ctx_new.range.level[0].cat.highbit; - ctx_new.range.level[1].cat.node = - ctx_new.range.level[0].cat.node; - } else { - ebitmap_init(&ctx_new.range.level[0].cat); - ebitmap_init(&ctx_new.range.level[1].cat); + memcpy(&ctx_new.range.level[1].cat, + &ctx_new.range.level[0].cat, + sizeof(ctx_new.range.level[0].cat)); } if (mls_context_isvalid(&policydb, &ctx_new) != 1) goto netlbl_secattr_to_sid_return_cleanup; -- cgit v1.2.3 From 3040a6d5a2655c7967bd42b5fb4903d48daa747f Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 3 Oct 2008 10:51:15 -0400 Subject: selinux: Fix an uninitialized variable BUG/panic in selinux_secattr_to_sid() At some point during the 2.6.27 development cycle two new fields were added to the SELinux context structure, a string pointer and a length field. The code in selinux_secattr_to_sid() was not modified and as a result these two fields were left uninitialized which could result in erratic behavior, including kernel panics, when NetLabel is used. This patch fixes the problem by fully initializing the context in selinux_secattr_to_sid() before use and reducing the level of direct context manipulation done to help prevent future problems. Please apply this to the 2.6.27-rcX release stream. Signed-off-by: Paul Moore Signed-off-by: James Morris --- security/selinux/ss/services.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'security/selinux') diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index d11a8154500f..8551952ef329 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2737,6 +2737,7 @@ int security_netlbl_secattr_to_sid(struct netlbl_lsm_secattr *secattr, if (ctx == NULL) goto netlbl_secattr_to_sid_return; + context_init(&ctx_new); ctx_new.user = ctx->user; ctx_new.role = ctx->role; ctx_new.type = ctx->type; @@ -2745,13 +2746,9 @@ int security_netlbl_secattr_to_sid(struct netlbl_lsm_secattr *secattr, if (ebitmap_netlbl_import(&ctx_new.range.level[0].cat, secattr->attr.mls.cat) != 0) goto netlbl_secattr_to_sid_return; - ctx_new.range.level[1].cat.highbit = - ctx_new.range.level[0].cat.highbit; - ctx_new.range.level[1].cat.node = - ctx_new.range.level[0].cat.node; - } else { - ebitmap_init(&ctx_new.range.level[0].cat); - ebitmap_init(&ctx_new.range.level[1].cat); + memcpy(&ctx_new.range.level[1].cat, + &ctx_new.range.level[0].cat, + sizeof(ctx_new.range.level[0].cat)); } if (mls_context_isvalid(&policydb, &ctx_new) != 1) goto netlbl_secattr_to_sid_return_cleanup; -- cgit v1.2.3 From accc609322ef5ed44cba6d2d70c741afc76385fb Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 10 Oct 2008 10:16:29 -0400 Subject: selinux: Cleanup the NetLabel glue code We were doing a lot of extra work in selinux_netlbl_sock_graft() what wasn't necessary so this patch removes that code. It also removes the redundant second argument to selinux_netlbl_sock_setsid() which allows us to simplify a few other functions. Signed-off-by: Paul Moore Acked-by: James Morris --- security/selinux/netlabel.c | 38 ++++++++++---------------------------- 1 file changed, 10 insertions(+), 28 deletions(-) (limited to 'security/selinux') diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index 89b418392f11..b9ce5fcf3432 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -66,22 +66,24 @@ static int selinux_netlbl_sidlookup_cached(struct sk_buff *skb, /** * selinux_netlbl_sock_setsid - Label a socket using the NetLabel mechanism * @sk: the socket to label - * @sid: the SID to use * * Description: - * Attempt to label a socket using the NetLabel mechanism using the given - * SID. Returns zero values on success, negative values on failure. + * Attempt to label a socket using the NetLabel mechanism. Returns zero values + * on success, negative values on failure. * */ -static int selinux_netlbl_sock_setsid(struct sock *sk, u32 sid) +static int selinux_netlbl_sock_setsid(struct sock *sk) { int rc; struct sk_security_struct *sksec = sk->sk_security; struct netlbl_lsm_secattr secattr; + if (sksec->nlbl_state != NLBL_REQUIRE) + return 0; + netlbl_secattr_init(&secattr); - rc = security_netlbl_sid_to_secattr(sid, &secattr); + rc = security_netlbl_sid_to_secattr(sksec->sid, &secattr); if (rc != 0) goto sock_setsid_return; rc = netlbl_sock_setattr(sk, &secattr); @@ -174,24 +176,10 @@ int selinux_netlbl_skbuff_getsid(struct sk_buff *skb, */ void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock) { - struct sk_security_struct *sksec = sk->sk_security; - struct netlbl_lsm_secattr secattr; - u32 nlbl_peer_sid; - - if (sksec->nlbl_state != NLBL_REQUIRE) - return; - - netlbl_secattr_init(&secattr); - if (netlbl_sock_getattr(sk, &secattr) == 0 && - secattr.flags != NETLBL_SECATTR_NONE && - security_netlbl_secattr_to_sid(&secattr, &nlbl_peer_sid) == 0) - sksec->peer_sid = nlbl_peer_sid; - netlbl_secattr_destroy(&secattr); - /* Try to set the NetLabel on the socket to save time later, if we fail * here we will pick up the pieces in later calls to * selinux_netlbl_inode_permission(). */ - selinux_netlbl_sock_setsid(sk, sksec->sid); + selinux_netlbl_sock_setsid(sk); } /** @@ -205,13 +193,7 @@ void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock) */ int selinux_netlbl_socket_post_create(struct socket *sock) { - struct sock *sk = sock->sk; - struct sk_security_struct *sksec = sk->sk_security; - - if (sksec->nlbl_state != NLBL_REQUIRE) - return 0; - - return selinux_netlbl_sock_setsid(sk, sksec->sid); + return selinux_netlbl_sock_setsid(sock->sk); } /** @@ -246,7 +228,7 @@ int selinux_netlbl_inode_permission(struct inode *inode, int mask) local_bh_disable(); bh_lock_sock_nested(sk); if (likely(sksec->nlbl_state == NLBL_REQUIRE)) - rc = selinux_netlbl_sock_setsid(sk, sksec->sid); + rc = selinux_netlbl_sock_setsid(sk); else rc = 0; bh_unlock_sock(sk); -- cgit v1.2.3 From aa86290089a1e57b4bdbbb4720072233f66bd5b2 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 10 Oct 2008 10:16:29 -0400 Subject: selinux: Correctly handle IPv4 packets on IPv6 sockets in all cases We did the right thing in a few cases but there were several areas where we determined a packet's address family based on the socket's address family which is not the right thing to do since we can get IPv4 packets on IPv6 sockets. This patch fixes these problems by either taking the address family directly from the packet. Signed-off-by: Paul Moore Acked-by: James Morris --- security/selinux/hooks.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'security/selinux') diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 03fc6a81ae32..223f474bee86 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4207,10 +4207,12 @@ static int selinux_socket_getpeersec_dgram(struct socket *sock, struct sk_buff * u32 peer_secid = SECSID_NULL; u16 family; - if (sock) + if (skb && skb->protocol == htons(ETH_P_IP)) + family = PF_INET; + else if (skb && skb->protocol == htons(ETH_P_IPV6)) + family = PF_INET6; + else if (sock) family = sock->sk->sk_family; - else if (skb && skb->sk) - family = skb->sk->sk_family; else goto out; @@ -4277,10 +4279,15 @@ static int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb, { struct sk_security_struct *sksec = sk->sk_security; int err; + u16 family = sk->sk_family; u32 newsid; u32 peersid; - err = selinux_skb_peerlbl_sid(skb, sk->sk_family, &peersid); + /* handle mapped IPv4 packets arriving via IPv6 sockets */ + if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP)) + family = PF_INET; + + err = selinux_skb_peerlbl_sid(skb, family, &peersid); if (err) return err; if (peersid == SECSID_NULL) { @@ -4318,9 +4325,14 @@ static void selinux_inet_csk_clone(struct sock *newsk, static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb) { + u16 family = sk->sk_family; struct sk_security_struct *sksec = sk->sk_security; - selinux_skb_peerlbl_sid(skb, sk->sk_family, &sksec->peer_sid); + /* handle mapped IPv4 packets arriving via IPv6 sockets */ + if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP)) + family = PF_INET; + + selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid); } static void selinux_req_classify_flow(const struct request_sock *req, -- cgit v1.2.3 From d8395c876bb8a560c8a032887e191b95499a25d6 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 10 Oct 2008 10:16:30 -0400 Subject: selinux: Better local/forward check in selinux_ip_postroute() It turns out that checking to see if skb->sk is NULL is not a very good indicator of a forwarded packet as some locally generated packets also have skb->sk set to NULL. Fix this by not only checking the skb->sk field but also the IP[6]CB(skb)->flags field for the IP[6]SKB_FORWARDED flag. While we are at it, we are calling selinux_parse_skb() much earlier than we really should resulting in potentially wasted cycles parsing packets for information we might no use; so shuffle the code around a bit to fix this. Signed-off-by: Paul Moore Acked-by: James Morris --- security/selinux/hooks.c | 126 ++++++++++++++++++++++++++++++----------------- 1 file changed, 81 insertions(+), 45 deletions(-) (limited to 'security/selinux') diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 223f474bee86..b520667a24be 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4070,20 +4070,28 @@ static int selinux_sock_rcv_skb_iptables_compat(struct sock *sk, } static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb, - struct avc_audit_data *ad, - u16 family, char *addrp) + u16 family) { int err; struct sk_security_struct *sksec = sk->sk_security; u32 peer_sid; u32 sk_sid = sksec->sid; + struct avc_audit_data ad; + char *addrp; + + AVC_AUDIT_DATA_INIT(&ad, NET); + ad.u.net.netif = skb->iif; + ad.u.net.family = family; + err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL); + if (err) + return err; if (selinux_compat_net) - err = selinux_sock_rcv_skb_iptables_compat(sk, skb, ad, + err = selinux_sock_rcv_skb_iptables_compat(sk, skb, &ad, family, addrp); else err = avc_has_perm(sk_sid, skb->secmark, SECCLASS_PACKET, - PACKET__RECV, ad); + PACKET__RECV, &ad); if (err) return err; @@ -4092,12 +4100,12 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb, if (err) return err; err = avc_has_perm(sk_sid, peer_sid, - SECCLASS_PEER, PEER__RECV, ad); + SECCLASS_PEER, PEER__RECV, &ad); } else { - err = selinux_netlbl_sock_rcv_skb(sksec, skb, family, ad); + err = selinux_netlbl_sock_rcv_skb(sksec, skb, family, &ad); if (err) return err; - err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, ad); + err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, &ad); } return err; @@ -4111,6 +4119,8 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) u32 sk_sid = sksec->sid; struct avc_audit_data ad; char *addrp; + u8 secmark_active; + u8 peerlbl_active; if (family != PF_INET && family != PF_INET6) return 0; @@ -4119,6 +4129,18 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP)) family = PF_INET; + /* If any sort of compatibility mode is enabled then handoff processing + * to the selinux_sock_rcv_skb_compat() function to deal with the + * special handling. We do this in an attempt to keep this function + * as fast and as clean as possible. */ + if (selinux_compat_net || !selinux_policycap_netpeer) + return selinux_sock_rcv_skb_compat(sk, skb, family); + + secmark_active = selinux_secmark_enabled(); + peerlbl_active = netlbl_enabled() || selinux_xfrm_enabled(); + if (!secmark_active && !peerlbl_active) + return 0; + AVC_AUDIT_DATA_INIT(&ad, NET); ad.u.net.netif = skb->iif; ad.u.net.family = family; @@ -4126,15 +4148,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) if (err) return err; - /* If any sort of compatibility mode is enabled then handoff processing - * to the selinux_sock_rcv_skb_compat() function to deal with the - * special handling. We do this in an attempt to keep this function - * as fast and as clean as possible. */ - if (selinux_compat_net || !selinux_policycap_netpeer) - return selinux_sock_rcv_skb_compat(sk, skb, &ad, - family, addrp); - - if (netlbl_enabled() || selinux_xfrm_enabled()) { + if (peerlbl_active) { u32 peer_sid; err = selinux_skb_peerlbl_sid(skb, family, &peer_sid); @@ -4148,7 +4162,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) PEER__RECV, &ad); } - if (selinux_secmark_enabled()) { + if (secmark_active) { err = avc_has_perm(sk_sid, skb->secmark, SECCLASS_PACKET, PACKET__RECV, &ad); if (err) @@ -4396,15 +4410,15 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex, if (!secmark_active && !peerlbl_active) return NF_ACCEPT; + if (selinux_skb_peerlbl_sid(skb, family, &peer_sid) != 0) + return NF_DROP; + AVC_AUDIT_DATA_INIT(&ad, NET); ad.u.net.netif = ifindex; ad.u.net.family = family; if (selinux_parse_skb(skb, &ad, &addrp, 1, NULL) != 0) return NF_DROP; - if (selinux_skb_peerlbl_sid(skb, family, &peer_sid) != 0) - return NF_DROP; - if (peerlbl_active) if (selinux_inet_sys_rcv_skb(ifindex, addrp, family, peer_sid, &ad) != 0) @@ -4505,30 +4519,36 @@ static int selinux_ip_postroute_iptables_compat(struct sock *sk, static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb, int ifindex, - struct avc_audit_data *ad, - u16 family, - char *addrp, - u8 proto) + u16 family) { struct sock *sk = skb->sk; struct sk_security_struct *sksec; + struct avc_audit_data ad; + char *addrp; + u8 proto; if (sk == NULL) return NF_ACCEPT; sksec = sk->sk_security; + AVC_AUDIT_DATA_INIT(&ad, NET); + ad.u.net.netif = ifindex; + ad.u.net.family = family; + if (selinux_parse_skb(skb, &ad, &addrp, 0, &proto)) + return NF_DROP; + if (selinux_compat_net) { if (selinux_ip_postroute_iptables_compat(skb->sk, ifindex, - ad, family, addrp)) + &ad, family, addrp)) return NF_DROP; } else { if (avc_has_perm(sksec->sid, skb->secmark, - SECCLASS_PACKET, PACKET__SEND, ad)) + SECCLASS_PACKET, PACKET__SEND, &ad)) return NF_DROP; } if (selinux_policycap_netpeer) - if (selinux_xfrm_postroute_last(sksec->sid, skb, ad, proto)) + if (selinux_xfrm_postroute_last(sksec->sid, skb, &ad, proto)) return NF_DROP; return NF_ACCEPT; @@ -4542,23 +4562,15 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex, struct sock *sk; struct avc_audit_data ad; char *addrp; - u8 proto; u8 secmark_active; u8 peerlbl_active; - AVC_AUDIT_DATA_INIT(&ad, NET); - ad.u.net.netif = ifindex; - ad.u.net.family = family; - if (selinux_parse_skb(skb, &ad, &addrp, 0, &proto)) - return NF_DROP; - /* If any sort of compatibility mode is enabled then handoff processing * to the selinux_ip_postroute_compat() function to deal with the * special handling. We do this in an attempt to keep this function * as fast and as clean as possible. */ if (selinux_compat_net || !selinux_policycap_netpeer) - return selinux_ip_postroute_compat(skb, ifindex, &ad, - family, addrp, proto); + return selinux_ip_postroute_compat(skb, ifindex, family); /* If skb->dst->xfrm is non-NULL then the packet is undergoing an IPsec * packet transformation so allow the packet to pass without any checks @@ -4574,21 +4586,45 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex, if (!secmark_active && !peerlbl_active) return NF_ACCEPT; - /* if the packet is locally generated (skb->sk != NULL) then use the - * socket's label as the peer label, otherwise the packet is being - * forwarded through this system and we need to fetch the peer label - * directly from the packet */ + /* if the packet is being forwarded then get the peer label from the + * packet itself; otherwise check to see if it is from a local + * application or the kernel, if from an application get the peer label + * from the sending socket, otherwise use the kernel's sid */ sk = skb->sk; - if (sk) { + if (sk == NULL) { + switch (family) { + case PF_INET: + if (IPCB(skb)->flags & IPSKB_FORWARDED) + secmark_perm = PACKET__FORWARD_OUT; + else + secmark_perm = PACKET__SEND; + break; + case PF_INET6: + if (IP6CB(skb)->flags & IP6SKB_FORWARDED) + secmark_perm = PACKET__FORWARD_OUT; + else + secmark_perm = PACKET__SEND; + break; + default: + return NF_DROP; + } + if (secmark_perm == PACKET__FORWARD_OUT) { + if (selinux_skb_peerlbl_sid(skb, family, &peer_sid)) + return NF_DROP; + } else + peer_sid = SECINITSID_KERNEL; + } else { struct sk_security_struct *sksec = sk->sk_security; peer_sid = sksec->sid; secmark_perm = PACKET__SEND; - } else { - if (selinux_skb_peerlbl_sid(skb, family, &peer_sid)) - return NF_DROP; - secmark_perm = PACKET__FORWARD_OUT; } + AVC_AUDIT_DATA_INIT(&ad, NET); + ad.u.net.netif = ifindex; + ad.u.net.family = family; + if (selinux_parse_skb(skb, &ad, &addrp, 0, NULL)) + return NF_DROP; + if (secmark_active) if (avc_has_perm(peer_sid, skb->secmark, SECCLASS_PACKET, secmark_perm, &ad)) -- cgit v1.2.3 From 99d854d231ce141850b988bdc7e2e7c78f49b03a Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 10 Oct 2008 10:16:30 -0400 Subject: selinux: Fix a problem in security_netlbl_sid_to_secattr() Currently when SELinux fails to allocate memory in security_netlbl_sid_to_secattr() the NetLabel LSM domain field is set to NULL which triggers the default NetLabel LSM domain mapping which may not always be the desired mapping. This patch fixes this by returning an error when the kernel is unable to allocate memory. This could result in more failures on a system with heavy memory pressure but it is the "correct" thing to do. Signed-off-by: Paul Moore Acked-by: James Morris --- security/selinux/ss/services.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'security/selinux') diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 8551952ef329..c8f688a10041 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2785,7 +2785,7 @@ netlbl_secattr_to_sid_return_cleanup: */ int security_netlbl_sid_to_secattr(u32 sid, struct netlbl_lsm_secattr *secattr) { - int rc = -ENOENT; + int rc; struct context *ctx; if (!ss_initialized) @@ -2793,10 +2793,16 @@ int security_netlbl_sid_to_secattr(u32 sid, struct netlbl_lsm_secattr *secattr) read_lock(&policy_rwlock); ctx = sidtab_search(&sidtab, sid); - if (ctx == NULL) + if (ctx == NULL) { + rc = -ENOENT; goto netlbl_sid_to_secattr_failure; + } secattr->domain = kstrdup(policydb.p_type_val_to_name[ctx->type - 1], GFP_ATOMIC); + if (secattr->domain == NULL) { + rc = -ENOMEM; + goto netlbl_sid_to_secattr_failure; + } secattr->flags |= NETLBL_SECATTR_DOMAIN_CPY; mls_export_netlbl_lvl(ctx, secattr); rc = mls_export_netlbl_cat(ctx, secattr); -- cgit v1.2.3 From dfaebe9825ff34983778f287101bc5f3bce00640 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 10 Oct 2008 10:16:31 -0400 Subject: selinux: Fix missing calls to netlbl_skbuff_err() At some point I think I messed up and dropped the calls to netlbl_skbuff_err() which are necessary for CIPSO to send error notifications to remote systems. This patch re-introduces the error handling calls into the SELinux code. Signed-off-by: Paul Moore Acked-by: James Morris --- include/net/netlabel.h | 6 ++++-- net/netlabel/netlabel_kapi.c | 5 +++-- security/selinux/hooks.c | 19 +++++++++++++++---- security/selinux/include/netlabel.h | 9 +++++++++ security/selinux/netlabel.c | 20 +++++++++++++++++++- 5 files changed, 50 insertions(+), 9 deletions(-) (limited to 'security/selinux') diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 5303749b7093..e16db0961265 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -382,7 +382,7 @@ int netlbl_sock_getattr(struct sock *sk, int netlbl_skbuff_getattr(const struct sk_buff *skb, u16 family, struct netlbl_lsm_secattr *secattr); -void netlbl_skbuff_err(struct sk_buff *skb, int error); +void netlbl_skbuff_err(struct sk_buff *skb, int error, int gateway); /* * LSM label mapping cache operations @@ -454,7 +454,9 @@ static inline int netlbl_skbuff_getattr(const struct sk_buff *skb, { return -ENOSYS; } -static inline void netlbl_skbuff_err(struct sk_buff *skb, int error) +static inline void netlbl_skbuff_err(struct sk_buff *skb, + int error, + int gateway) { return; } diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 6c211fe97782..22faba620e4b 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -490,6 +490,7 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb, * netlbl_skbuff_err - Handle a LSM error on a sk_buff * @skb: the packet * @error: the error code + * @gateway: true if host is acting as a gateway, false otherwise * * Description: * Deal with a LSM problem when handling the packet in @skb, typically this is @@ -497,10 +498,10 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb, * according to the packet's labeling protocol. * */ -void netlbl_skbuff_err(struct sk_buff *skb, int error) +void netlbl_skbuff_err(struct sk_buff *skb, int error, int gateway) { if (CIPSO_V4_OPTEXIST(skb)) - cipso_v4_error(skb, error, 0); + cipso_v4_error(skb, error, gateway); } /** diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index b520667a24be..a91146a6b37d 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4101,6 +4101,8 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb, return err; err = avc_has_perm(sk_sid, peer_sid, SECCLASS_PEER, PEER__RECV, &ad); + if (err) + selinux_netlbl_err(skb, err, 0); } else { err = selinux_netlbl_sock_rcv_skb(sksec, skb, family, &ad); if (err) @@ -4156,10 +4158,14 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) return err; err = selinux_inet_sys_rcv_skb(skb->iif, addrp, family, peer_sid, &ad); - if (err) + if (err) { + selinux_netlbl_err(skb, err, 0); return err; + } err = avc_has_perm(sk_sid, peer_sid, SECCLASS_PEER, PEER__RECV, &ad); + if (err) + selinux_netlbl_err(skb, err, 0); } if (secmark_active) { @@ -4396,6 +4402,7 @@ out: static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex, u16 family) { + int err; char *addrp; u32 peer_sid; struct avc_audit_data ad; @@ -4419,10 +4426,14 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex, if (selinux_parse_skb(skb, &ad, &addrp, 1, NULL) != 0) return NF_DROP; - if (peerlbl_active) - if (selinux_inet_sys_rcv_skb(ifindex, addrp, family, - peer_sid, &ad) != 0) + if (peerlbl_active) { + err = selinux_inet_sys_rcv_skb(ifindex, addrp, family, + peer_sid, &ad); + if (err) { + selinux_netlbl_err(skb, err, 1); return NF_DROP; + } + } if (secmark_active) if (avc_has_perm(peer_sid, skb->secmark, diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h index 487a7d81fe20..d4e3ac8a7fbf 100644 --- a/security/selinux/include/netlabel.h +++ b/security/selinux/include/netlabel.h @@ -39,6 +39,8 @@ #ifdef CONFIG_NETLABEL void selinux_netlbl_cache_invalidate(void); +void selinux_netlbl_err(struct sk_buff *skb, int error, int gateway); + void selinux_netlbl_sk_security_reset(struct sk_security_struct *ssec, int family); @@ -63,6 +65,13 @@ static inline void selinux_netlbl_cache_invalidate(void) return; } +static inline void selinux_netlbl_err(struct sk_buff *skb, + int error, + int gateway) +{ + return; +} + static inline void selinux_netlbl_sk_security_reset( struct sk_security_struct *ssec, int family) diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index b9ce5fcf3432..4053f7fc95fb 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -107,6 +107,24 @@ void selinux_netlbl_cache_invalidate(void) netlbl_cache_invalidate(); } +/** + * selinux_netlbl_err - Handle a NetLabel packet error + * @skb: the packet + * @error: the error code + * @gateway: true if host is acting as a gateway, false otherwise + * + * Description: + * When a packet is dropped due to a call to avc_has_perm() pass the error + * code to the NetLabel subsystem so any protocol specific processing can be + * done. This is safe to call even if you are unsure if NetLabel labeling is + * present on the packet, NetLabel is smart enough to only act when it should. + * + */ +void selinux_netlbl_err(struct sk_buff *skb, int error, int gateway) +{ + netlbl_skbuff_err(skb, error, gateway); +} + /** * selinux_netlbl_sk_security_reset - Reset the NetLabel fields * @ssec: the sk_security_struct @@ -289,7 +307,7 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, return 0; if (nlbl_sid != SECINITSID_UNLABELED) - netlbl_skbuff_err(skb, rc); + netlbl_skbuff_err(skb, rc, 0); return rc; } -- cgit v1.2.3 From 948bf85c1bc9a84754786a9d5dd99b7ecc46451e Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 10 Oct 2008 10:16:32 -0400 Subject: netlabel: Add functionality to set the security attributes of a packet This patch builds upon the new NetLabel address selector functionality by providing the NetLabel KAPI and CIPSO engine support needed to enable the new packet-based labeling. The only new addition to the NetLabel KAPI at this point is shown below: * int netlbl_skbuff_setattr(skb, family, secattr) ... and is designed to be called from a Netfilter hook after the packet's IP header has been populated such as in the FORWARD or LOCAL_OUT hooks. This patch also provides the necessary SELinux hooks to support this new functionality. Smack support is not currently included due to uncertainty regarding the permissions needed to expand the Smack network access controls. Signed-off-by: Paul Moore Reviewed-by: James Morris --- include/net/cipso_ipv4.h | 16 +++ include/net/netlabel.h | 9 ++ net/ipv4/cipso_ipv4.c | 222 +++++++++++++++++++++++++++++------- net/netlabel/netlabel_kapi.c | 60 ++++++++++ security/selinux/hooks.c | 50 +++++++- security/selinux/include/netlabel.h | 9 ++ security/selinux/include/objsec.h | 1 + security/selinux/netlabel.c | 68 ++++++++++- 8 files changed, 393 insertions(+), 42 deletions(-) (limited to 'security/selinux') diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index 5fe6556fb3c5..2ce093ba553d 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -208,6 +208,10 @@ int cipso_v4_sock_setattr(struct sock *sk, const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr); int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr); +int cipso_v4_skbuff_setattr(struct sk_buff *skb, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr); +int cipso_v4_skbuff_delattr(struct sk_buff *skb); int cipso_v4_skbuff_getattr(const struct sk_buff *skb, struct netlbl_lsm_secattr *secattr); int cipso_v4_validate(unsigned char **option); @@ -232,6 +236,18 @@ static inline int cipso_v4_sock_getattr(struct sock *sk, return -ENOSYS; } +static inline int cipso_v4_skbuff_setattr(struct sk_buff *skb, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + +static inline int cipso_v4_skbuff_delattr(struct sk_buff *skb) +{ + return -ENOSYS; +} + static inline int cipso_v4_skbuff_getattr(const struct sk_buff *skb, struct netlbl_lsm_secattr *secattr) { diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 0729f8ce5042..3f67e6d49e40 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -382,6 +382,9 @@ int netlbl_sock_setattr(struct sock *sk, const struct netlbl_lsm_secattr *secattr); int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr); +int netlbl_skbuff_setattr(struct sk_buff *skb, + u16 family, + const struct netlbl_lsm_secattr *secattr); int netlbl_skbuff_getattr(const struct sk_buff *skb, u16 family, struct netlbl_lsm_secattr *secattr); @@ -451,6 +454,12 @@ static inline int netlbl_sock_getattr(struct sock *sk, { return -ENOSYS; } +static inline int netlbl_skbuff_setattr(struct sk_buff *skb, + u16 family, + const struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} static inline int netlbl_skbuff_getattr(const struct sk_buff *skb, u16 family, struct netlbl_lsm_secattr *secattr) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index bf87eddfec30..e13d6dbb66ab 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -13,7 +13,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -1665,48 +1665,27 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) } /** - * cipso_v4_sock_setattr - Add a CIPSO option to a socket - * @sk: the socket + * cipso_v4_genopt - Generate a CIPSO option + * @buf: the option buffer + * @buf_len: the size of opt_buf * @doi_def: the CIPSO DOI to use - * @secattr: the specific security attributes of the socket + * @secattr: the security attributes * * Description: - * Set the CIPSO option on the given socket using the DOI definition and - * security attributes passed to the function. This function requires - * exclusive access to @sk, which means it either needs to be in the - * process of being created or locked. Returns zero on success and negative - * values on failure. + * Generate a CIPSO option using the DOI definition and security attributes + * passed to the function. Returns the length of the option on success and + * negative values on failure. * */ -int cipso_v4_sock_setattr(struct sock *sk, - const struct cipso_v4_doi *doi_def, - const struct netlbl_lsm_secattr *secattr) +static int cipso_v4_genopt(unsigned char *buf, u32 buf_len, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) { - int ret_val = -EPERM; + int ret_val; u32 iter; - unsigned char *buf; - u32 buf_len = 0; - u32 opt_len; - struct ip_options *opt = NULL; - struct inet_sock *sk_inet; - struct inet_connection_sock *sk_conn; - - /* In the case of sock_create_lite(), the sock->sk field is not - * defined yet but it is not a problem as the only users of these - * "lite" PF_INET sockets are functions which do an accept() call - * afterwards so we will label the socket as part of the accept(). */ - if (sk == NULL) - return 0; - /* We allocate the maximum CIPSO option size here so we are probably - * being a little wasteful, but it makes our life _much_ easier later - * on and after all we are only talking about 40 bytes. */ - buf_len = CIPSO_V4_OPT_LEN_MAX; - buf = kmalloc(buf_len, GFP_ATOMIC); - if (buf == NULL) { - ret_val = -ENOMEM; - goto socket_setattr_failure; - } + if (buf_len <= CIPSO_V4_HDR_LEN) + return -ENOSPC; /* XXX - This code assumes only one tag per CIPSO option which isn't * really a good assumption to make but since we only support the MAC @@ -1734,8 +1713,7 @@ int cipso_v4_sock_setattr(struct sock *sk, buf_len - CIPSO_V4_HDR_LEN); break; default: - ret_val = -EPERM; - goto socket_setattr_failure; + return -EPERM; } iter++; @@ -1743,9 +1721,58 @@ int cipso_v4_sock_setattr(struct sock *sk, iter < CIPSO_V4_TAG_MAXCNT && doi_def->tags[iter] != CIPSO_V4_TAG_INVALID); if (ret_val < 0) - goto socket_setattr_failure; + return ret_val; cipso_v4_gentag_hdr(doi_def, buf, ret_val); - buf_len = CIPSO_V4_HDR_LEN + ret_val; + return CIPSO_V4_HDR_LEN + ret_val; +} + +/** + * cipso_v4_sock_setattr - Add a CIPSO option to a socket + * @sk: the socket + * @doi_def: the CIPSO DOI to use + * @secattr: the specific security attributes of the socket + * + * Description: + * Set the CIPSO option on the given socket using the DOI definition and + * security attributes passed to the function. This function requires + * exclusive access to @sk, which means it either needs to be in the + * process of being created or locked. Returns zero on success and negative + * values on failure. + * + */ +int cipso_v4_sock_setattr(struct sock *sk, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -EPERM; + unsigned char *buf = NULL; + u32 buf_len; + u32 opt_len; + struct ip_options *opt = NULL; + struct inet_sock *sk_inet; + struct inet_connection_sock *sk_conn; + + /* In the case of sock_create_lite(), the sock->sk field is not + * defined yet but it is not a problem as the only users of these + * "lite" PF_INET sockets are functions which do an accept() call + * afterwards so we will label the socket as part of the accept(). */ + if (sk == NULL) + return 0; + + /* We allocate the maximum CIPSO option size here so we are probably + * being a little wasteful, but it makes our life _much_ easier later + * on and after all we are only talking about 40 bytes. */ + buf_len = CIPSO_V4_OPT_LEN_MAX; + buf = kmalloc(buf_len, GFP_ATOMIC); + if (buf == NULL) { + ret_val = -ENOMEM; + goto socket_setattr_failure; + } + + ret_val = cipso_v4_genopt(buf, buf_len, doi_def, secattr); + if (ret_val < 0) + goto socket_setattr_failure; + buf_len = ret_val; /* We can't use ip_options_get() directly because it makes a call to * ip_options_get_alloc() which allocates memory with GFP_KERNEL and @@ -1853,6 +1880,123 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) secattr); } +/** + * cipso_v4_skbuff_setattr - Set the CIPSO option on a packet + * @skb: the packet + * @secattr: the security attributes + * + * Description: + * Set the CIPSO option on the given packet based on the security attributes. + * Returns a pointer to the IP header on success and NULL on failure. + * + */ +int cipso_v4_skbuff_setattr(struct sk_buff *skb, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + struct iphdr *iph; + struct ip_options *opt = &IPCB(skb)->opt; + unsigned char buf[CIPSO_V4_OPT_LEN_MAX]; + u32 buf_len = CIPSO_V4_OPT_LEN_MAX; + u32 opt_len; + int len_delta; + + buf_len = cipso_v4_genopt(buf, buf_len, doi_def, secattr); + if (buf_len < 0) + return buf_len; + opt_len = (buf_len + 3) & ~3; + + /* we overwrite any existing options to ensure that we have enough + * room for the CIPSO option, the reason is that we _need_ to guarantee + * that the security label is applied to the packet - we do the same + * thing when using the socket options and it hasn't caused a problem, + * if we need to we can always revisit this choice later */ + + len_delta = opt_len - opt->optlen; + /* if we don't ensure enough headroom we could panic on the skb_push() + * call below so make sure we have enough, we are also "mangling" the + * packet so we should probably do a copy-on-write call anyway */ + ret_val = skb_cow(skb, skb_headroom(skb) + len_delta); + if (ret_val < 0) + return ret_val; + + if (len_delta > 0) { + /* we assume that the header + opt->optlen have already been + * "pushed" in ip_options_build() or similar */ + iph = ip_hdr(skb); + skb_push(skb, len_delta); + memmove((char *)iph - len_delta, iph, iph->ihl << 2); + skb_reset_network_header(skb); + iph = ip_hdr(skb); + } else if (len_delta < 0) { + iph = ip_hdr(skb); + memset(iph + 1, IPOPT_NOP, opt->optlen); + } else + iph = ip_hdr(skb); + + if (opt->optlen > 0) + memset(opt, 0, sizeof(*opt)); + opt->optlen = opt_len; + opt->cipso = sizeof(struct iphdr); + opt->is_changed = 1; + + /* we have to do the following because we are being called from a + * netfilter hook which means the packet already has had the header + * fields populated and the checksum calculated - yes this means we + * are doing more work than needed but we do it to keep the core + * stack clean and tidy */ + memcpy(iph + 1, buf, buf_len); + if (opt_len > buf_len) + memset((char *)(iph + 1) + buf_len, 0, opt_len - buf_len); + if (len_delta != 0) { + iph->ihl = 5 + (opt_len >> 2); + iph->tot_len = htons(skb->len); + } + ip_send_check(iph); + + return 0; +} + +/** + * cipso_v4_skbuff_delattr - Delete any CIPSO options from a packet + * @skb: the packet + * + * Description: + * Removes any and all CIPSO options from the given packet. Returns zero on + * success, negative values on failure. + * + */ +int cipso_v4_skbuff_delattr(struct sk_buff *skb) +{ + int ret_val; + struct iphdr *iph; + struct ip_options *opt = &IPCB(skb)->opt; + unsigned char *cipso_ptr; + + if (opt->cipso == 0) + return 0; + + /* since we are changing the packet we should make a copy */ + ret_val = skb_cow(skb, skb_headroom(skb)); + if (ret_val < 0) + return ret_val; + + /* the easiest thing to do is just replace the cipso option with noop + * options since we don't change the size of the packet, although we + * still need to recalculate the checksum */ + + iph = ip_hdr(skb); + cipso_ptr = (unsigned char *)iph + opt->cipso; + memset(cipso_ptr, IPOPT_NOOP, cipso_ptr[1]); + opt->cipso = 0; + opt->is_changed = 1; + + ip_send_check(iph); + + return 0; +} + /** * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option * @skb: the packet diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 8b820dc98060..cc8047d1f505 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -472,6 +472,66 @@ int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) return cipso_v4_sock_getattr(sk, secattr); } +/** + * netlbl_skbuff_setattr - Label a packet using the correct protocol + * @skb: the packet + * @family: protocol family + * @secattr: the security attributes + * + * Description: + * Attach the correct label to the given packet using the security attributes + * specified in @secattr. Returns zero on success, negative values on failure. + * + */ +int netlbl_skbuff_setattr(struct sk_buff *skb, + u16 family, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + struct iphdr *hdr4; + struct netlbl_domaddr4_map *af4_entry; + + rcu_read_lock(); + switch (family) { + case AF_INET: + hdr4 = ip_hdr(skb); + af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, + hdr4->daddr); + if (af4_entry == NULL) { + ret_val = -ENOENT; + goto skbuff_setattr_return; + } + switch (af4_entry->type) { + case NETLBL_NLTYPE_CIPSOV4: + ret_val = cipso_v4_skbuff_setattr(skb, + af4_entry->type_def.cipsov4, + secattr); + break; + case NETLBL_NLTYPE_UNLABELED: + /* just delete the protocols we support for right now + * but we could remove other protocols if needed */ + ret_val = cipso_v4_skbuff_delattr(skb); + break; + default: + ret_val = -ENOENT; + } + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + /* since we don't support any IPv6 labeling protocols right + * now we can optimize everything away until we do */ + ret_val = 0; + break; +#endif /* IPv6 */ + default: + ret_val = 0; + } + +skbuff_setattr_return: + rcu_read_unlock(); + return ret_val; +} + /** * netlbl_skbuff_getattr - Determine the security attributes of a packet * @skb: the packet diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index a91146a6b37d..7432bdd5d367 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4407,13 +4407,15 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex, u32 peer_sid; struct avc_audit_data ad; u8 secmark_active; + u8 netlbl_active; u8 peerlbl_active; if (!selinux_policycap_netpeer) return NF_ACCEPT; secmark_active = selinux_secmark_enabled(); - peerlbl_active = netlbl_enabled() || selinux_xfrm_enabled(); + netlbl_active = netlbl_enabled(); + peerlbl_active = netlbl_active || selinux_xfrm_enabled(); if (!secmark_active && !peerlbl_active) return NF_ACCEPT; @@ -4440,6 +4442,14 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex, SECCLASS_PACKET, PACKET__FORWARD_IN, &ad)) return NF_DROP; + if (netlbl_active) + /* we do this in the FORWARD path and not the POST_ROUTING + * path because we want to make sure we apply the necessary + * labeling before IPsec is applied so we can leverage AH + * protection */ + if (selinux_netlbl_skbuff_setsid(skb, family, peer_sid) != 0) + return NF_DROP; + return NF_ACCEPT; } @@ -4463,6 +4473,37 @@ static unsigned int selinux_ipv6_forward(unsigned int hooknum, } #endif /* IPV6 */ +static unsigned int selinux_ip_output(struct sk_buff *skb, + u16 family) +{ + u32 sid; + + if (!netlbl_enabled()) + return NF_ACCEPT; + + /* we do this in the LOCAL_OUT path and not the POST_ROUTING path + * because we want to make sure we apply the necessary labeling + * before IPsec is applied so we can leverage AH protection */ + if (skb->sk) { + struct sk_security_struct *sksec = skb->sk->sk_security; + sid = sksec->sid; + } else + sid = SECINITSID_KERNEL; + if (selinux_netlbl_skbuff_setsid(skb, family, sid) != 0) + return NF_DROP; + + return NF_ACCEPT; +} + +static unsigned int selinux_ipv4_output(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return selinux_ip_output(skb, PF_INET); +} + static int selinux_ip_postroute_iptables_compat(struct sock *sk, int ifindex, struct avc_audit_data *ad, @@ -5700,6 +5741,13 @@ static struct nf_hook_ops selinux_ipv4_ops[] = { .pf = PF_INET, .hooknum = NF_INET_FORWARD, .priority = NF_IP_PRI_SELINUX_FIRST, + }, + { + .hook = selinux_ipv4_output, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_SELINUX_FIRST, } }; diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h index d4e3ac8a7fbf..b3e6ae071fc3 100644 --- a/security/selinux/include/netlabel.h +++ b/security/selinux/include/netlabel.h @@ -48,6 +48,9 @@ int selinux_netlbl_skbuff_getsid(struct sk_buff *skb, u16 family, u32 *type, u32 *sid); +int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, + u16 family, + u32 sid); void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock); int selinux_netlbl_socket_post_create(struct socket *sock); @@ -88,6 +91,12 @@ static inline int selinux_netlbl_skbuff_getsid(struct sk_buff *skb, *sid = SECSID_NULL; return 0; } +static inline int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, + u16 family, + u32 sid) +{ + return 0; +} static inline void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock) diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 91070ab874ce..f46dd1c3d01c 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -117,6 +117,7 @@ struct sk_security_struct { NLBL_UNSET = 0, NLBL_REQUIRE, NLBL_LABELED, + NLBL_REQSKB, } nlbl_state; #endif }; diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index 4053f7fc95fb..090404d6e512 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -9,7 +9,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2007 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2007, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -31,6 +31,8 @@ #include #include #include +#include +#include #include "objsec.h" #include "security.h" @@ -77,6 +79,8 @@ static int selinux_netlbl_sock_setsid(struct sock *sk) int rc; struct sk_security_struct *sksec = sk->sk_security; struct netlbl_lsm_secattr secattr; + struct inet_sock *sk_inet; + struct inet_connection_sock *sk_conn; if (sksec->nlbl_state != NLBL_REQUIRE) return 0; @@ -87,8 +91,29 @@ static int selinux_netlbl_sock_setsid(struct sock *sk) if (rc != 0) goto sock_setsid_return; rc = netlbl_sock_setattr(sk, &secattr); - if (rc == 0) + switch (rc) { + case 0: sksec->nlbl_state = NLBL_LABELED; + break; + case -EDESTADDRREQ: + /* we are going to possibly end up labeling the individual + * packets later which is problematic for stream sockets + * because of the additional IP header size, our solution is to + * allow for the maximum IP header length (40 bytes for IPv4, + * we don't have to worry about IPv6 yet) just in case */ + sk_inet = inet_sk(sk); + if (sk_inet->is_icsk) { + sk_conn = inet_csk(sk); + if (sk_inet->opt) + sk_conn->icsk_ext_hdr_len -= + sk_inet->opt->optlen; + sk_conn->icsk_ext_hdr_len += 40; + sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); + } + sksec->nlbl_state = NLBL_REQSKB; + rc = 0; + break; + } sock_setsid_return: netlbl_secattr_destroy(&secattr); @@ -182,6 +207,45 @@ int selinux_netlbl_skbuff_getsid(struct sk_buff *skb, return rc; } +/** + * selinux_netlbl_skbuff_setsid - Set the NetLabel on a packet given a sid + * @skb: the packet + * @family: protocol family + * @sid: the SID + * + * Description + * Call the NetLabel mechanism to set the label of a packet using @sid. + * Returns zero on auccess, negative values on failure. + * + */ +int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, + u16 family, + u32 sid) +{ + int rc; + struct netlbl_lsm_secattr secattr; + struct sock *sk; + + /* if this is a locally generated packet check to see if it is already + * being labeled by it's parent socket, if it is just exit */ + sk = skb->sk; + if (sk != NULL) { + struct sk_security_struct *sksec = sk->sk_security; + if (sksec->nlbl_state != NLBL_REQSKB) + return 0; + } + + netlbl_secattr_init(&secattr); + rc = security_netlbl_sid_to_secattr(sid, &secattr); + if (rc != 0) + goto skbuff_setsid_return; + rc = netlbl_skbuff_setattr(skb, family, &secattr); + +skbuff_setsid_return: + netlbl_secattr_destroy(&secattr); + return rc; +} + /** * selinux_netlbl_sock_graft - Netlabel the new socket * @sk: the new connection -- cgit v1.2.3 From 014ab19a69c325f52d7bae54ceeda73d6307ae0c Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 10 Oct 2008 10:16:33 -0400 Subject: selinux: Set socket NetLabel based on connection endpoint Previous work enabled the use of address based NetLabel selectors, which while highly useful, brought the potential for additional per-packet overhead when used. This patch attempts to solve that by applying NetLabel socket labels when sockets are connect()'d. This should alleviate the per-packet NetLabel labeling for all connected sockets (yes, it even works for connected DGRAM sockets). Signed-off-by: Paul Moore Reviewed-by: James Morris --- include/net/cipso_ipv4.h | 5 ++ include/net/netlabel.h | 13 ++++ net/ipv4/cipso_ipv4.c | 74 ++++++++++++++++++ net/netlabel/netlabel_kapi.c | 78 ++++++++++++++++++- security/selinux/hooks.c | 11 +-- security/selinux/include/netlabel.h | 19 ++++- security/selinux/include/objsec.h | 1 + security/selinux/netlabel.c | 147 +++++++++++++++++++++++++++++------- 8 files changed, 311 insertions(+), 37 deletions(-) (limited to 'security/selinux') diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index 2ce093ba553d..811febf97caf 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -207,6 +207,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway); int cipso_v4_sock_setattr(struct sock *sk, const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr); +void cipso_v4_sock_delattr(struct sock *sk); int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr); int cipso_v4_skbuff_setattr(struct sk_buff *skb, const struct cipso_v4_doi *doi_def, @@ -230,6 +231,10 @@ static inline int cipso_v4_sock_setattr(struct sock *sk, return -ENOSYS; } +static inline void cipso_v4_sock_delattr(struct sock *sk) +{ +} + static inline int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) { diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 3f67e6d49e40..074cad40ac66 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -380,8 +380,12 @@ int netlbl_secattr_catmap_setrng(struct netlbl_lsm_secattr_catmap *catmap, int netlbl_enabled(void); int netlbl_sock_setattr(struct sock *sk, const struct netlbl_lsm_secattr *secattr); +void netlbl_sock_delattr(struct sock *sk); int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr); +int netlbl_conn_setattr(struct sock *sk, + struct sockaddr *addr, + const struct netlbl_lsm_secattr *secattr); int netlbl_skbuff_setattr(struct sk_buff *skb, u16 family, const struct netlbl_lsm_secattr *secattr); @@ -449,11 +453,20 @@ static inline int netlbl_sock_setattr(struct sock *sk, { return -ENOSYS; } +static inline void netlbl_sock_delattr(struct sock *sk) +{ +} static inline int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) { return -ENOSYS; } +static inline int netlbl_conn_setattr(struct sock *sk, + struct sockaddr *addr, + const struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} static inline int netlbl_skbuff_setattr(struct sk_buff *skb, u16 family, const struct netlbl_lsm_secattr *secattr) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index e13d6dbb66ab..23768b9d6b64 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1809,6 +1809,80 @@ socket_setattr_failure: return ret_val; } +/** + * cipso_v4_sock_delattr - Delete the CIPSO option from a socket + * @sk: the socket + * + * Description: + * Removes the CIPSO option from a socket, if present. + * + */ +void cipso_v4_sock_delattr(struct sock *sk) +{ + u8 hdr_delta; + struct ip_options *opt; + struct inet_sock *sk_inet; + + sk_inet = inet_sk(sk); + opt = sk_inet->opt; + if (opt == NULL || opt->cipso == 0) + return; + + if (opt->srr || opt->rr || opt->ts || opt->router_alert) { + u8 cipso_len; + u8 cipso_off; + unsigned char *cipso_ptr; + int iter; + int optlen_new; + + cipso_off = opt->cipso - sizeof(struct iphdr); + cipso_ptr = &opt->__data[cipso_off]; + cipso_len = cipso_ptr[1]; + + if (opt->srr > opt->cipso) + opt->srr -= cipso_len; + if (opt->rr > opt->cipso) + opt->rr -= cipso_len; + if (opt->ts > opt->cipso) + opt->ts -= cipso_len; + if (opt->router_alert > opt->cipso) + opt->router_alert -= cipso_len; + opt->cipso = 0; + + memmove(cipso_ptr, cipso_ptr + cipso_len, + opt->optlen - cipso_off - cipso_len); + + /* determining the new total option length is tricky because of + * the padding necessary, the only thing i can think to do at + * this point is walk the options one-by-one, skipping the + * padding at the end to determine the actual option size and + * from there we can determine the new total option length */ + iter = 0; + optlen_new = 0; + while (iter < opt->optlen) + if (opt->__data[iter] != IPOPT_NOP) { + iter += opt->__data[iter + 1]; + optlen_new = iter; + } else + iter++; + hdr_delta = opt->optlen; + opt->optlen = (optlen_new + 3) & ~3; + hdr_delta -= opt->optlen; + } else { + /* only the cipso option was present on the socket so we can + * remove the entire option struct */ + sk_inet->opt = NULL; + hdr_delta = opt->optlen; + kfree(opt); + } + + if (sk_inet->is_icsk && hdr_delta > 0) { + struct inet_connection_sock *sk_conn = inet_csk(sk); + sk_conn->icsk_ext_hdr_len -= hdr_delta; + sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); + } +} + /** * cipso_v4_getattr - Helper function for the cipso_v4_*_getattr functions * @cipso: the CIPSO v4 option diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index cc8047d1f505..78fc557689b2 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -10,7 +10,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -455,6 +455,20 @@ socket_setattr_return: return ret_val; } +/** + * netlbl_sock_delattr - Delete all the NetLabel labels on a socket + * @sk: the socket + * + * Description: + * Remove all the NetLabel labeling from @sk. The caller is responsible for + * ensuring that @sk is locked. + * + */ +void netlbl_sock_delattr(struct sock *sk) +{ + cipso_v4_sock_delattr(sk); +} + /** * netlbl_sock_getattr - Determine the security attributes of a sock * @sk: the sock @@ -472,6 +486,68 @@ int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) return cipso_v4_sock_getattr(sk, secattr); } +/** + * netlbl_conn_setattr - Label a connected socket using the correct protocol + * @sk: the socket to label + * @addr: the destination address + * @secattr: the security attributes + * + * Description: + * Attach the correct label to the given connected socket using the security + * attributes specified in @secattr. The caller is responsible for ensuring + * that @sk is locked. Returns zero on success, negative values on failure. + * + */ +int netlbl_conn_setattr(struct sock *sk, + struct sockaddr *addr, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + struct sockaddr_in *addr4; + struct netlbl_domaddr4_map *af4_entry; + + rcu_read_lock(); + switch (addr->sa_family) { + case AF_INET: + addr4 = (struct sockaddr_in *)addr; + af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, + addr4->sin_addr.s_addr); + if (af4_entry == NULL) { + ret_val = -ENOENT; + goto conn_setattr_return; + } + switch (af4_entry->type) { + case NETLBL_NLTYPE_CIPSOV4: + ret_val = cipso_v4_sock_setattr(sk, + af4_entry->type_def.cipsov4, + secattr); + break; + case NETLBL_NLTYPE_UNLABELED: + /* just delete the protocols we support for right now + * but we could remove other protocols if needed */ + cipso_v4_sock_delattr(sk); + ret_val = 0; + break; + default: + ret_val = -ENOENT; + } + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + /* since we don't support any IPv6 labeling protocols right + * now we can optimize everything away until we do */ + ret_val = 0; + break; +#endif /* IPv6 */ + default: + ret_val = 0; + } + +conn_setattr_return: + rcu_read_unlock(); + return ret_val; +} + /** * netlbl_skbuff_setattr - Label a packet using the correct protocol * @skb: the packet diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 7432bdd5d367..632ac3e80a61 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3794,6 +3794,7 @@ out: static int selinux_socket_connect(struct socket *sock, struct sockaddr *address, int addrlen) { + struct sock *sk = sock->sk; struct inode_security_struct *isec; int err; @@ -3807,7 +3808,6 @@ static int selinux_socket_connect(struct socket *sock, struct sockaddr *address, isec = SOCK_INODE(sock)->i_security; if (isec->sclass == SECCLASS_TCP_SOCKET || isec->sclass == SECCLASS_DCCP_SOCKET) { - struct sock *sk = sock->sk; struct avc_audit_data ad; struct sockaddr_in *addr4 = NULL; struct sockaddr_in6 *addr6 = NULL; @@ -3841,6 +3841,8 @@ static int selinux_socket_connect(struct socket *sock, struct sockaddr *address, goto out; } + err = selinux_netlbl_socket_connect(sk, address); + out: return err; } @@ -4290,8 +4292,6 @@ static void selinux_sock_graft(struct sock *sk, struct socket *parent) sk->sk_family == PF_UNIX) isec->sid = sksec->sid; sksec->sclass = isec->sclass; - - selinux_netlbl_sock_graft(sk, parent); } static int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb, @@ -4342,8 +4342,7 @@ static void selinux_inet_csk_clone(struct sock *newsk, selinux_netlbl_sk_security_reset(newsksec, req->rsk_ops->family); } -static void selinux_inet_conn_established(struct sock *sk, - struct sk_buff *skb) +static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb) { u16 family = sk->sk_family; struct sk_security_struct *sksec = sk->sk_security; @@ -4353,6 +4352,8 @@ static void selinux_inet_conn_established(struct sock *sk, family = PF_INET; selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid); + + selinux_netlbl_inet_conn_established(sk, family); } static void selinux_req_classify_flow(const struct request_sock *req, diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h index b3e6ae071fc3..982bac0ac328 100644 --- a/security/selinux/include/netlabel.h +++ b/security/selinux/include/netlabel.h @@ -52,7 +52,7 @@ int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, u16 family, u32 sid); -void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock); +void selinux_netlbl_inet_conn_established(struct sock *sk, u16 family); int selinux_netlbl_socket_post_create(struct socket *sock); int selinux_netlbl_inode_permission(struct inode *inode, int mask); int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, @@ -62,6 +62,8 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, int selinux_netlbl_socket_setsockopt(struct socket *sock, int level, int optname); +int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr); + #else static inline void selinux_netlbl_cache_invalidate(void) { @@ -98,8 +100,14 @@ static inline int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, return 0; } -static inline void selinux_netlbl_sock_graft(struct sock *sk, - struct socket *sock) +static inline int selinux_netlbl_conn_setsid(struct sock *sk, + struct sockaddr *addr) +{ + return 0; +} + +static inline void selinux_netlbl_inet_conn_established(struct sock *sk, + u16 family) { return; } @@ -125,6 +133,11 @@ static inline int selinux_netlbl_socket_setsockopt(struct socket *sock, { return 0; } +static inline int selinux_netlbl_socket_connect(struct sock *sk, + struct sockaddr *addr) +{ + return 0; +} #endif /* CONFIG_NETLABEL */ #endif diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index f46dd1c3d01c..ad34787c6c02 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -118,6 +118,7 @@ struct sk_security_struct { NLBL_REQUIRE, NLBL_LABELED, NLBL_REQSKB, + NLBL_CONNLABELED, } nlbl_state; #endif }; diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index 090404d6e512..b22b7dafa0e3 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -29,10 +29,12 @@ #include #include +#include +#include #include #include -#include -#include +#include +#include #include "objsec.h" #include "security.h" @@ -79,8 +81,6 @@ static int selinux_netlbl_sock_setsid(struct sock *sk) int rc; struct sk_security_struct *sksec = sk->sk_security; struct netlbl_lsm_secattr secattr; - struct inet_sock *sk_inet; - struct inet_connection_sock *sk_conn; if (sksec->nlbl_state != NLBL_REQUIRE) return 0; @@ -96,20 +96,6 @@ static int selinux_netlbl_sock_setsid(struct sock *sk) sksec->nlbl_state = NLBL_LABELED; break; case -EDESTADDRREQ: - /* we are going to possibly end up labeling the individual - * packets later which is problematic for stream sockets - * because of the additional IP header size, our solution is to - * allow for the maximum IP header length (40 bytes for IPv4, - * we don't have to worry about IPv6 yet) just in case */ - sk_inet = inet_sk(sk); - if (sk_inet->is_icsk) { - sk_conn = inet_csk(sk); - if (sk_inet->opt) - sk_conn->icsk_ext_hdr_len -= - sk_inet->opt->optlen; - sk_conn->icsk_ext_hdr_len += 40; - sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); - } sksec->nlbl_state = NLBL_REQSKB; rc = 0; break; @@ -247,21 +233,77 @@ skbuff_setsid_return: } /** - * selinux_netlbl_sock_graft - Netlabel the new socket + * selinux_netlbl_inet_conn_established - Netlabel the newly accepted connection * @sk: the new connection - * @sock: the new socket * * Description: - * The connection represented by @sk is being grafted onto @sock so set the - * socket's NetLabel to match the SID of @sk. + * A new connection has been established on @sk so make sure it is labeled + * correctly with the NetLabel susbsystem. * */ -void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock) +void selinux_netlbl_inet_conn_established(struct sock *sk, u16 family) { - /* Try to set the NetLabel on the socket to save time later, if we fail - * here we will pick up the pieces in later calls to - * selinux_netlbl_inode_permission(). */ - selinux_netlbl_sock_setsid(sk); + int rc; + struct sk_security_struct *sksec = sk->sk_security; + struct netlbl_lsm_secattr secattr; + struct inet_sock *sk_inet = inet_sk(sk); + struct sockaddr_in addr; + + if (sksec->nlbl_state != NLBL_REQUIRE) + return; + + netlbl_secattr_init(&secattr); + if (security_netlbl_sid_to_secattr(sksec->sid, &secattr) != 0) + goto inet_conn_established_return; + + rc = netlbl_sock_setattr(sk, &secattr); + switch (rc) { + case 0: + sksec->nlbl_state = NLBL_LABELED; + break; + case -EDESTADDRREQ: + /* no PF_INET6 support yet because we don't support any IPv6 + * labeling protocols */ + if (family != PF_INET) { + sksec->nlbl_state = NLBL_UNSET; + goto inet_conn_established_return; + } + + addr.sin_family = family; + addr.sin_addr.s_addr = sk_inet->daddr; + if (netlbl_conn_setattr(sk, (struct sockaddr *)&addr, + &secattr) != 0) { + /* we failed to label the connected socket (could be + * for a variety of reasons, the actual "why" isn't + * important here) so we have to go to our backup plan, + * labeling the packets individually in the netfilter + * local output hook. this is okay but we need to + * adjust the MSS of the connection to take into + * account any labeling overhead, since we don't know + * the exact overhead at this point we'll use the worst + * case value which is 40 bytes for IPv4 */ + struct inet_connection_sock *sk_conn = inet_csk(sk); + sk_conn->icsk_ext_hdr_len += 40 - + (sk_inet->opt ? sk_inet->opt->optlen : 0); + sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); + + sksec->nlbl_state = NLBL_REQSKB; + } else + sksec->nlbl_state = NLBL_CONNLABELED; + break; + default: + /* note that we are failing to label the socket which could be + * a bad thing since it means traffic could leave the system + * without the desired labeling, however, all is not lost as + * we have a check in selinux_netlbl_inode_permission() to + * pick up the pieces that we might drop here because we can't + * return an error code */ + break; + } + +inet_conn_established_return: + netlbl_secattr_destroy(&secattr); + return; } /** @@ -398,7 +440,8 @@ int selinux_netlbl_socket_setsockopt(struct socket *sock, struct netlbl_lsm_secattr secattr; if (level == IPPROTO_IP && optname == IP_OPTIONS && - sksec->nlbl_state == NLBL_LABELED) { + (sksec->nlbl_state == NLBL_LABELED || + sksec->nlbl_state == NLBL_CONNLABELED)) { netlbl_secattr_init(&secattr); lock_sock(sk); rc = netlbl_sock_getattr(sk, &secattr); @@ -410,3 +453,51 @@ int selinux_netlbl_socket_setsockopt(struct socket *sock, return rc; } + +/** + * selinux_netlbl_socket_connect - Label a client-side socket on connect + * @sk: the socket to label + * @addr: the destination address + * + * Description: + * Attempt to label a connected socket with NetLabel using the given address. + * Returns zero values on success, negative values on failure. + * + */ +int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr) +{ + int rc; + struct sk_security_struct *sksec = sk->sk_security; + struct netlbl_lsm_secattr secattr; + + if (sksec->nlbl_state != NLBL_REQSKB && + sksec->nlbl_state != NLBL_CONNLABELED) + return 0; + + netlbl_secattr_init(&secattr); + local_bh_disable(); + bh_lock_sock_nested(sk); + + /* connected sockets are allowed to disconnect when the address family + * is set to AF_UNSPEC, if that is what is happening we want to reset + * the socket */ + if (addr->sa_family == AF_UNSPEC) { + netlbl_sock_delattr(sk); + sksec->nlbl_state = NLBL_REQSKB; + rc = 0; + goto socket_connect_return; + } + rc = security_netlbl_sid_to_secattr(sksec->sid, &secattr); + if (rc != 0) + goto socket_connect_return; + rc = netlbl_conn_setattr(sk, addr, &secattr); + if (rc != 0) + goto socket_connect_return; + sksec->nlbl_state = NLBL_CONNLABELED; + +socket_connect_return: + bh_unlock_sock(sk); + local_bh_enable(); + netlbl_secattr_destroy(&secattr); + return rc; +} -- cgit v1.2.3 From 6c5b3fc0147f79d714d2fe748b5869d7892ef2e7 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 10 Oct 2008 10:16:33 -0400 Subject: selinux: Cache NetLabel secattrs in the socket's security struct Previous work enabled the use of address based NetLabel selectors, which while highly useful, brought the potential for additional per-packet overhead when used. This patch attempts to mitigate some of that overhead by caching the NetLabel security attribute struct within the SELinux socket security structure. This should help eliminate the need to recreate the NetLabel secattr structure for each packet resulting in less overhead. Signed-off-by: Paul Moore Acked-by: James Morris --- security/selinux/hooks.c | 1 + security/selinux/include/netlabel.h | 7 +++ security/selinux/include/objsec.h | 7 ++- security/selinux/netlabel.c | 115 +++++++++++++++++++++++++----------- 4 files changed, 91 insertions(+), 39 deletions(-) (limited to 'security/selinux') diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 632ac3e80a61..3aa811eba25b 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -291,6 +291,7 @@ static void sk_free_security(struct sock *sk) struct sk_security_struct *ssec = sk->sk_security; sk->sk_security = NULL; + selinux_netlbl_sk_security_free(ssec); kfree(ssec); } diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h index 982bac0ac328..b913c8d06038 100644 --- a/security/selinux/include/netlabel.h +++ b/security/selinux/include/netlabel.h @@ -41,6 +41,7 @@ void selinux_netlbl_cache_invalidate(void); void selinux_netlbl_err(struct sk_buff *skb, int error, int gateway); +void selinux_netlbl_sk_security_free(struct sk_security_struct *ssec); void selinux_netlbl_sk_security_reset(struct sk_security_struct *ssec, int family); @@ -77,6 +78,12 @@ static inline void selinux_netlbl_err(struct sk_buff *skb, return; } +static inline void selinux_netlbl_sk_security_free( + struct sk_security_struct *ssec) +{ + return; +} + static inline void selinux_netlbl_sk_security_reset( struct sk_security_struct *ssec, int family) diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index ad34787c6c02..f8be8d7fa26d 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -109,9 +109,6 @@ struct netport_security_struct { }; struct sk_security_struct { - u32 sid; /* SID of this object */ - u32 peer_sid; /* SID of peer */ - u16 sclass; /* sock security class */ #ifdef CONFIG_NETLABEL enum { /* NetLabel state */ NLBL_UNSET = 0, @@ -120,7 +117,11 @@ struct sk_security_struct { NLBL_REQSKB, NLBL_CONNLABELED, } nlbl_state; + struct netlbl_lsm_secattr *nlbl_secattr; /* NetLabel sec attributes */ #endif + u32 sid; /* SID of this object */ + u32 peer_sid; /* SID of peer */ + u16 sclass; /* sock security class */ }; struct key_security_struct { diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index b22b7dafa0e3..f58701a7b728 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -67,6 +67,38 @@ static int selinux_netlbl_sidlookup_cached(struct sk_buff *skb, return rc; } +/** + * selinux_netlbl_sock_genattr - Generate the NetLabel socket secattr + * @sk: the socket + * + * Description: + * Generate the NetLabel security attributes for a socket, making full use of + * the socket's attribute cache. Returns a pointer to the security attributes + * on success, NULL on failure. + * + */ +static struct netlbl_lsm_secattr *selinux_netlbl_sock_genattr(struct sock *sk) +{ + int rc; + struct sk_security_struct *sksec = sk->sk_security; + struct netlbl_lsm_secattr *secattr; + + if (sksec->nlbl_secattr != NULL) + return sksec->nlbl_secattr; + + secattr = netlbl_secattr_alloc(GFP_ATOMIC); + if (secattr == NULL) + return NULL; + rc = security_netlbl_sid_to_secattr(sksec->sid, secattr); + if (rc != 0) { + netlbl_secattr_free(secattr); + return NULL; + } + sksec->nlbl_secattr = secattr; + + return secattr; +} + /** * selinux_netlbl_sock_setsid - Label a socket using the NetLabel mechanism * @sk: the socket to label @@ -80,17 +112,15 @@ static int selinux_netlbl_sock_setsid(struct sock *sk) { int rc; struct sk_security_struct *sksec = sk->sk_security; - struct netlbl_lsm_secattr secattr; + struct netlbl_lsm_secattr *secattr; if (sksec->nlbl_state != NLBL_REQUIRE) return 0; - netlbl_secattr_init(&secattr); - - rc = security_netlbl_sid_to_secattr(sksec->sid, &secattr); - if (rc != 0) - goto sock_setsid_return; - rc = netlbl_sock_setattr(sk, &secattr); + secattr = selinux_netlbl_sock_genattr(sk); + if (secattr == NULL) + return -ENOMEM; + rc = netlbl_sock_setattr(sk, secattr); switch (rc) { case 0: sksec->nlbl_state = NLBL_LABELED; @@ -101,8 +131,6 @@ static int selinux_netlbl_sock_setsid(struct sock *sk) break; } -sock_setsid_return: - netlbl_secattr_destroy(&secattr); return rc; } @@ -136,6 +164,20 @@ void selinux_netlbl_err(struct sk_buff *skb, int error, int gateway) netlbl_skbuff_err(skb, error, gateway); } +/** + * selinux_netlbl_sk_security_free - Free the NetLabel fields + * @sssec: the sk_security_struct + * + * Description: + * Free all of the memory in the NetLabel fields of a sk_security_struct. + * + */ +void selinux_netlbl_sk_security_free(struct sk_security_struct *ssec) +{ + if (ssec->nlbl_secattr != NULL) + netlbl_secattr_free(ssec->nlbl_secattr); +} + /** * selinux_netlbl_sk_security_reset - Reset the NetLabel fields * @ssec: the sk_security_struct @@ -209,7 +251,8 @@ int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, u32 sid) { int rc; - struct netlbl_lsm_secattr secattr; + struct netlbl_lsm_secattr secattr_storage; + struct netlbl_lsm_secattr *secattr = NULL; struct sock *sk; /* if this is a locally generated packet check to see if it is already @@ -219,16 +262,21 @@ int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, struct sk_security_struct *sksec = sk->sk_security; if (sksec->nlbl_state != NLBL_REQSKB) return 0; + secattr = sksec->nlbl_secattr; + } + if (secattr == NULL) { + secattr = &secattr_storage; + netlbl_secattr_init(secattr); + rc = security_netlbl_sid_to_secattr(sid, secattr); + if (rc != 0) + goto skbuff_setsid_return; } - netlbl_secattr_init(&secattr); - rc = security_netlbl_sid_to_secattr(sid, &secattr); - if (rc != 0) - goto skbuff_setsid_return; - rc = netlbl_skbuff_setattr(skb, family, &secattr); + rc = netlbl_skbuff_setattr(skb, family, secattr); skbuff_setsid_return: - netlbl_secattr_destroy(&secattr); + if (secattr == &secattr_storage) + netlbl_secattr_destroy(secattr); return rc; } @@ -245,18 +293,18 @@ void selinux_netlbl_inet_conn_established(struct sock *sk, u16 family) { int rc; struct sk_security_struct *sksec = sk->sk_security; - struct netlbl_lsm_secattr secattr; + struct netlbl_lsm_secattr *secattr; struct inet_sock *sk_inet = inet_sk(sk); struct sockaddr_in addr; if (sksec->nlbl_state != NLBL_REQUIRE) return; - netlbl_secattr_init(&secattr); - if (security_netlbl_sid_to_secattr(sksec->sid, &secattr) != 0) - goto inet_conn_established_return; + secattr = selinux_netlbl_sock_genattr(sk); + if (secattr == NULL) + return; - rc = netlbl_sock_setattr(sk, &secattr); + rc = netlbl_sock_setattr(sk, secattr); switch (rc) { case 0: sksec->nlbl_state = NLBL_LABELED; @@ -266,13 +314,13 @@ void selinux_netlbl_inet_conn_established(struct sock *sk, u16 family) * labeling protocols */ if (family != PF_INET) { sksec->nlbl_state = NLBL_UNSET; - goto inet_conn_established_return; + return; } addr.sin_family = family; addr.sin_addr.s_addr = sk_inet->daddr; if (netlbl_conn_setattr(sk, (struct sockaddr *)&addr, - &secattr) != 0) { + secattr) != 0) { /* we failed to label the connected socket (could be * for a variety of reasons, the actual "why" isn't * important here) so we have to go to our backup plan, @@ -300,10 +348,6 @@ void selinux_netlbl_inet_conn_established(struct sock *sk, u16 family) * return an error code */ break; } - -inet_conn_established_return: - netlbl_secattr_destroy(&secattr); - return; } /** @@ -468,13 +512,12 @@ int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr) { int rc; struct sk_security_struct *sksec = sk->sk_security; - struct netlbl_lsm_secattr secattr; + struct netlbl_lsm_secattr *secattr; if (sksec->nlbl_state != NLBL_REQSKB && sksec->nlbl_state != NLBL_CONNLABELED) return 0; - netlbl_secattr_init(&secattr); local_bh_disable(); bh_lock_sock_nested(sk); @@ -487,17 +530,17 @@ int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr) rc = 0; goto socket_connect_return; } - rc = security_netlbl_sid_to_secattr(sksec->sid, &secattr); - if (rc != 0) + secattr = selinux_netlbl_sock_genattr(sk); + if (secattr == NULL) { + rc = -ENOMEM; goto socket_connect_return; - rc = netlbl_conn_setattr(sk, addr, &secattr); - if (rc != 0) - goto socket_connect_return; - sksec->nlbl_state = NLBL_CONNLABELED; + } + rc = netlbl_conn_setattr(sk, addr, secattr); + if (rc == 0) + sksec->nlbl_state = NLBL_CONNLABELED; socket_connect_return: bh_unlock_sock(sk); local_bh_enable(); - netlbl_secattr_destroy(&secattr); return rc; } -- cgit v1.2.3 From 8d75899d033617316e06296b7c0729612f56aba0 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 10 Oct 2008 10:16:33 -0400 Subject: netlabel: Changes to the NetLabel security attributes to allow LSMs to pass full contexts This patch provides support for including the LSM's secid in addition to the LSM's MLS information in the NetLabel security attributes structure. Signed-off-by: Paul Moore Acked-by: James Morris --- include/net/netlabel.h | 2 +- security/selinux/ss/services.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'security/selinux') diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 074cad40ac66..d56517ac3bae 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -203,7 +203,7 @@ struct netlbl_lsm_secattr { u32 type; char *domain; struct netlbl_lsm_cache *cache; - union { + struct { struct { struct netlbl_lsm_secattr_catmap *cat; u32 lvl; diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index c8f688a10041..ed0ca649d7db 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2803,7 +2803,8 @@ int security_netlbl_sid_to_secattr(u32 sid, struct netlbl_lsm_secattr *secattr) rc = -ENOMEM; goto netlbl_sid_to_secattr_failure; } - secattr->flags |= NETLBL_SECATTR_DOMAIN_CPY; + secattr->attr.secid = sid; + secattr->flags |= NETLBL_SECATTR_DOMAIN_CPY | NETLBL_SECATTR_SECID; mls_export_netlbl_lvl(ctx, secattr); rc = mls_export_netlbl_cat(ctx, secattr); if (rc != 0) -- cgit v1.2.3 From 452a00d2ee288f2cbc36f676edd06cb14d2878c1 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 13 Oct 2008 10:39:13 +0100 Subject: tty: Make get_current_tty use a kref We now return a kref covered tty reference. That ensures the tty structure doesn't go away when you have a return from get_current_tty. This is not enough to protect you from most of the resources being freed behind your back - yet. [Updated to include fixes for SELinux problems found by Andrew Morton and an s390 leak found while debugging the former] Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/char/tty_io.c | 10 ++++++---- drivers/s390/char/fs3270.c | 3 ++- fs/dquot.c | 6 +++--- security/selinux/hooks.c | 3 ++- 4 files changed, 13 insertions(+), 9 deletions(-) (limited to 'security/selinux') diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 9a76db3cda1c..4c0e4ed31a48 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -786,12 +786,12 @@ void disassociate_ctty(int on_exit) tty = get_current_tty(); if (tty) { tty_pgrp = get_pid(tty->pgrp); - lock_kernel(); mutex_unlock(&tty_mutex); - /* XXX: here we race, there is nothing protecting tty */ + lock_kernel(); if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY) tty_vhangup(tty); unlock_kernel(); + tty_kref_put(tty); } else if (on_exit) { struct pid *old_pgrp; spin_lock_irq(¤t->sighand->siglock); @@ -819,7 +819,6 @@ void disassociate_ctty(int on_exit) spin_unlock_irq(¤t->sighand->siglock); mutex_lock(&tty_mutex); - /* It is possible that do_tty_hangup has free'd this tty */ tty = get_current_tty(); if (tty) { unsigned long flags; @@ -829,6 +828,7 @@ void disassociate_ctty(int on_exit) tty->session = NULL; tty->pgrp = NULL; spin_unlock_irqrestore(&tty->ctrl_lock, flags); + tty_kref_put(tty); } else { #ifdef TTY_DEBUG_HANGUP printk(KERN_DEBUG "error attempted to write to tty [0x%p]" @@ -1806,6 +1806,8 @@ retry_open: index = tty->index; filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */ /* noctty = 1; */ + /* FIXME: Should we take a driver reference ? */ + tty_kref_put(tty); goto got_driver; } #ifdef CONFIG_VT @@ -3135,7 +3137,7 @@ struct tty_struct *get_current_tty(void) { struct tty_struct *tty; WARN_ON_ONCE(!mutex_is_locked(&tty_mutex)); - tty = current->signal->tty; + tty = tty_kref_get(current->signal->tty); /* * session->tty can be changed/cleared from under us, make sure we * issue the load. The obtained pointer, when not NULL, is valid as diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c index d18e6d2e0b49..3ef5425d0eb8 100644 --- a/drivers/s390/char/fs3270.c +++ b/drivers/s390/char/fs3270.c @@ -430,11 +430,12 @@ fs3270_open(struct inode *inode, struct file *filp) mutex_lock(&tty_mutex); tty = get_current_tty(); if (!tty || tty->driver->major != IBM_TTY3270_MAJOR) { - mutex_unlock(&tty_mutex); + tty_kref_put(tty); rc = -ENODEV; goto out; } minor = tty->index + RAW3270_FIRSTMINOR; + tty_kref_put(tty); mutex_unlock(&tty_mutex); } /* Check if some other program is already using fullscreen mode. */ diff --git a/fs/dquot.c b/fs/dquot.c index 8ec4d6cc7633..7417a6ca3129 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -897,8 +897,9 @@ static void print_warning(struct dquot *dquot, const int warntype) mutex_lock(&tty_mutex); tty = get_current_tty(); + mutex_unlock(&tty_mutex); if (!tty) - goto out_lock; + return; tty_write_message(tty, dquot->dq_sb->s_id); if (warntype == QUOTA_NL_ISOFTWARN || warntype == QUOTA_NL_BSOFTWARN) tty_write_message(tty, ": warning, "); @@ -926,8 +927,7 @@ static void print_warning(struct dquot *dquot, const int warntype) break; } tty_write_message(tty, msg); -out_lock: - mutex_unlock(&tty_mutex); + tty_kref_put(tty); } #endif diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 4a7374c12d9c..089d61a23952 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2123,6 +2123,7 @@ static inline void flush_unauthorized_files(struct files_struct *files) mutex_lock(&tty_mutex); tty = get_current_tty(); + mutex_unlock(&tty_mutex); if (tty) { file_list_lock(); file = list_entry(tty->tty_files.next, typeof(*file), f_u.fu_list); @@ -2139,8 +2140,8 @@ static inline void flush_unauthorized_files(struct files_struct *files) } } file_list_unlock(); + tty_kref_put(tty); } - mutex_unlock(&tty_mutex); /* Reset controlling tty. */ if (drop_tty) no_tty(); -- cgit v1.2.3 From 934e6ebf96e8c1a0f299e64129fdaebc1132a427 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 13 Oct 2008 10:40:43 +0100 Subject: tty: Redo current tty locking Currently it is sometimes locked by the tty mutex and sometimes by the sighand lock. The latter is in fact correct and now we can hand back referenced objects we can fix this up without problems around sleeping functions. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/char/tty_io.c | 18 ++++-------------- drivers/s390/char/fs3270.c | 1 + fs/dquot.c | 2 -- security/selinux/hooks.c | 2 -- 4 files changed, 5 insertions(+), 18 deletions(-) (limited to 'security/selinux') diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index b5f57d0b30ee..f40298e9873a 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -739,13 +739,11 @@ void tty_vhangup_self(void) { struct tty_struct *tty; - mutex_lock(&tty_mutex); tty = get_current_tty(); if (tty) { tty_vhangup(tty); tty_kref_put(tty); } - mutex_unlock(&tty_mutex); } /** @@ -801,11 +799,9 @@ void disassociate_ctty(int on_exit) struct pid *tty_pgrp = NULL; - mutex_lock(&tty_mutex); tty = get_current_tty(); if (tty) { tty_pgrp = get_pid(tty->pgrp); - mutex_unlock(&tty_mutex); lock_kernel(); if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY) tty_vhangup(tty); @@ -822,7 +818,6 @@ void disassociate_ctty(int on_exit) kill_pgrp(old_pgrp, SIGCONT, on_exit); put_pid(old_pgrp); } - mutex_unlock(&tty_mutex); return; } if (tty_pgrp) { @@ -837,7 +832,6 @@ void disassociate_ctty(int on_exit) current->signal->tty_old_pgrp = NULL; spin_unlock_irq(¤t->sighand->siglock); - mutex_lock(&tty_mutex); tty = get_current_tty(); if (tty) { unsigned long flags; @@ -854,7 +848,6 @@ void disassociate_ctty(int on_exit) " = NULL", tty); #endif } - mutex_unlock(&tty_mutex); /* Now clear signal->tty under the lock */ read_lock(&tasklist_lock); @@ -3180,14 +3173,11 @@ static void proc_set_tty(struct task_struct *tsk, struct tty_struct *tty) struct tty_struct *get_current_tty(void) { struct tty_struct *tty; - WARN_ON_ONCE(!mutex_is_locked(&tty_mutex)); + unsigned long flags; + + spin_lock_irqsave(¤t->sighand->siglock, flags); tty = tty_kref_get(current->signal->tty); - /* - * session->tty can be changed/cleared from under us, make sure we - * issue the load. The obtained pointer, when not NULL, is valid as - * long as we hold tty_mutex. - */ - barrier(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); return tty; } EXPORT_SYMBOL_GPL(get_current_tty); diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c index 3ef5425d0eb8..84fbc90480dc 100644 --- a/drivers/s390/char/fs3270.c +++ b/drivers/s390/char/fs3270.c @@ -431,6 +431,7 @@ fs3270_open(struct inode *inode, struct file *filp) tty = get_current_tty(); if (!tty || tty->driver->major != IBM_TTY3270_MAJOR) { tty_kref_put(tty); + mutex_unlock(&tty_mutex); rc = -ENODEV; goto out; } diff --git a/fs/dquot.c b/fs/dquot.c index 7417a6ca3129..ad7e59003e04 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -895,9 +895,7 @@ static void print_warning(struct dquot *dquot, const int warntype) warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot)) return; - mutex_lock(&tty_mutex); tty = get_current_tty(); - mutex_unlock(&tty_mutex); if (!tty) return; tty_write_message(tty, dquot->dq_sb->s_id); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 089d61a23952..48881394fbd4 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2121,9 +2121,7 @@ static inline void flush_unauthorized_files(struct files_struct *files) long j = -1; int drop_tty = 0; - mutex_lock(&tty_mutex); tty = get_current_tty(); - mutex_unlock(&tty_mutex); if (tty) { file_list_lock(); file = list_entry(tty->tty_files.next, typeof(*file), f_u.fu_list); -- cgit v1.2.3 From a447c0932445f92ce6f4c1bd020f62c5097a7842 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 13 Oct 2008 10:46:57 +0100 Subject: vfs: Use const for kernel parser table This is a much better version of a previous patch to make the parser tables constant. Rather than changing the typedef, we put the "const" in all the various places where its required, allowing the __initconst exception for nfsroot which was the cause of the previous trouble. This was posted for review some time ago and I believe its been in -mm since then. Signed-off-by: Steven Whitehouse Cc: Alexander Viro Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/cell/spufs/inode.c | 2 +- arch/s390/hypfs/inode.c | 2 +- drivers/infiniband/ulp/srp/ib_srp.c | 2 +- drivers/usb/core/inode.c | 2 +- fs/9p/v9fs.c | 2 +- fs/adfs/super.c | 2 +- fs/affs/super.c | 2 +- fs/afs/super.c | 2 +- fs/autofs/inode.c | 2 +- fs/autofs4/inode.c | 2 +- fs/befs/linuxvfs.c | 2 +- fs/devpts/inode.c | 2 +- fs/ecryptfs/main.c | 2 +- fs/ext2/super.c | 2 +- fs/ext3/super.c | 2 +- fs/ext4/super.c | 2 +- fs/fat/inode.c | 6 +++--- fs/fuse/inode.c | 2 +- fs/gfs2/mount.c | 2 +- fs/hfs/super.c | 2 +- fs/hfsplus/options.c | 2 +- fs/hpfs/super.c | 2 +- fs/hugetlbfs/inode.c | 2 +- fs/isofs/inode.c | 2 +- fs/jfs/super.c | 2 +- fs/nfs/nfsroot.c | 2 +- fs/nfs/super.c | 6 +++--- fs/ocfs2/super.c | 2 +- fs/omfs/inode.c | 2 +- fs/ubifs/super.c | 2 +- fs/udf/super.c | 2 +- fs/ufs/super.c | 4 ++-- fs/xfs/linux-2.6/xfs_super.c | 2 +- include/linux/parser.h | 2 +- lib/parser.c | 2 +- net/9p/client.c | 2 +- net/9p/trans_fd.c | 2 +- security/selinux/hooks.c | 2 +- 38 files changed, 43 insertions(+), 43 deletions(-) (limited to 'security/selinux') diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 690ca7b0dcf6..2c8b8091250f 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -659,7 +659,7 @@ enum { Opt_uid, Opt_gid, Opt_mode, Opt_debug, Opt_err, }; -static match_table_t spufs_tokens = { +static const match_table_t spufs_tokens = { { Opt_uid, "uid=%d" }, { Opt_gid, "gid=%d" }, { Opt_mode, "mode=%o" }, diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 7383781f3e6a..36313801cd5c 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -219,7 +219,7 @@ static int hypfs_release(struct inode *inode, struct file *filp) enum { opt_uid, opt_gid, opt_err }; -static match_table_t hypfs_tokens = { +static const match_table_t hypfs_tokens = { {opt_uid, "uid=%u"}, {opt_gid, "gid=%u"}, {opt_err, NULL} diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index ed7c5f72cb8b..5b8b533f2908 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1683,7 +1683,7 @@ enum { SRP_OPT_SERVICE_ID), }; -static match_table_t srp_opt_tokens = { +static const match_table_t srp_opt_tokens = { { SRP_OPT_ID_EXT, "id_ext=%s" }, { SRP_OPT_IOC_GUID, "ioc_guid=%s" }, { SRP_OPT_DGID, "dgid=%s" }, diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index db410e92c80d..77fa7a080801 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -97,7 +97,7 @@ enum { Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_devuid, "devuid=%u"}, {Opt_devgid, "devgid=%u"}, {Opt_devmode, "devmode=%o"}, diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 047c791427aa..c061c3f18e7c 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -55,7 +55,7 @@ enum { Opt_err }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_debug, "debug=%x"}, {Opt_dfltuid, "dfltuid=%u"}, {Opt_dfltgid, "dfltgid=%u"}, diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 26f3b43726bb..7f83a46f2b7e 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -157,7 +157,7 @@ static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt) enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_err}; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, {Opt_ownmask, "ownmask=%o"}, diff --git a/fs/affs/super.c b/fs/affs/super.c index 3a89094f93d0..8989c93193ed 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -135,7 +135,7 @@ enum { Opt_verbose, Opt_volume, Opt_ignore, Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_bs, "bs=%u"}, {Opt_mode, "mode=%o"}, {Opt_mufs, "mufs"}, diff --git a/fs/afs/super.c b/fs/afs/super.c index 250d8c4d66e4..aee239a048cb 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -64,7 +64,7 @@ enum { afs_opt_vol, }; -static match_table_t afs_options_list = { +static const match_table_t afs_options_list = { { afs_opt_cell, "cell=%s" }, { afs_opt_rwpath, "rwpath" }, { afs_opt_vol, "vol=%s" }, diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index dda510d31f84..b70eea1e8c59 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -59,7 +59,7 @@ static const struct super_operations autofs_sops = { enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto}; -static match_table_t autofs_tokens = { +static const match_table_t autofs_tokens = { {Opt_fd, "fd=%u"}, {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 7bb3e5ba0537..45d55819203d 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -213,7 +213,7 @@ static const struct super_operations autofs4_sops = { enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto, Opt_indirect, Opt_direct, Opt_offset}; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_fd, "fd=%u"}, {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 740f53672a8a..9286b2af893a 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -650,7 +650,7 @@ enum { Opt_uid, Opt_gid, Opt_charset, Opt_debug, Opt_err, }; -static match_table_t befs_tokens = { +static const match_table_t befs_tokens = { {Opt_uid, "uid=%d"}, {Opt_gid, "gid=%d"}, {Opt_charset, "iocharset=%s"}, diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index a70d5d0890c7..4a714f6c1bed 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -49,7 +49,7 @@ enum { Opt_err }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, {Opt_mode, "mode=%o"}, diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 448dfd597b5f..8ebe9a5d1d99 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -211,7 +211,7 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata, ecryptfs_opt_encrypted_view, ecryptfs_opt_err }; -static match_table_t tokens = { +static const match_table_t tokens = { {ecryptfs_opt_sig, "sig=%s"}, {ecryptfs_opt_ecryptfs_sig, "ecryptfs_sig=%s"}, {ecryptfs_opt_cipher, "cipher=%s"}, diff --git a/fs/ext2/super.c b/fs/ext2/super.c index fd88c7b43e66..647cd888ac87 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -393,7 +393,7 @@ enum { Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_bsd_df, "bsddf"}, {Opt_minix_df, "minixdf"}, {Opt_grpid, "grpid"}, diff --git a/fs/ext3/super.c b/fs/ext3/super.c index f38a5afc39a1..399a96a6c556 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -760,7 +760,7 @@ enum { Opt_grpquota }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_bsd_df, "bsddf"}, {Opt_minix_df, "minixdf"}, {Opt_grpid, "grpid"}, diff --git a/fs/ext4/super.c b/fs/ext4/super.c index fb940c22ab0d..dea8f13c2fd9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -919,7 +919,7 @@ enum { Opt_inode_readahead_blks }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_bsd_df, "bsddf"}, {Opt_minix_df, "minixdf"}, {Opt_grpid, "grpid"}, diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 80ff3381fa21..d12cdf2a0406 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -855,7 +855,7 @@ enum { Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err, }; -static match_table_t fat_tokens = { +static const match_table_t fat_tokens = { {Opt_check_r, "check=relaxed"}, {Opt_check_s, "check=strict"}, {Opt_check_n, "check=normal"}, @@ -890,14 +890,14 @@ static match_table_t fat_tokens = { {Opt_tz_utc, "tz=UTC"}, {Opt_err, NULL}, }; -static match_table_t msdos_tokens = { +static const match_table_t msdos_tokens = { {Opt_nodots, "nodots"}, {Opt_nodots, "dotsOK=no"}, {Opt_dots, "dots"}, {Opt_dots, "dotsOK=yes"}, {Opt_err, NULL} }; -static match_table_t vfat_tokens = { +static const match_table_t vfat_tokens = { {Opt_charset, "iocharset=%s"}, {Opt_shortname_lower, "shortname=lower"}, {Opt_shortname_win95, "shortname=win95"}, diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index d2249f174e20..6a84388cacff 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -354,7 +354,7 @@ enum { OPT_ERR }; -static match_table_t tokens = { +static const match_table_t tokens = { {OPT_FD, "fd=%u"}, {OPT_ROOTMODE, "rootmode=%o"}, {OPT_USER_ID, "user_id=%u"}, diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c index df48333e6f01..f96eb90a2cfa 100644 --- a/fs/gfs2/mount.c +++ b/fs/gfs2/mount.c @@ -46,7 +46,7 @@ enum { Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_lockproto, "lockproto=%s"}, {Opt_locktable, "locktable=%s"}, {Opt_hostdata, "hostdata=%s"}, diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 4abb1047c689..3c7c7637719c 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -173,7 +173,7 @@ enum { opt_err }; -static match_table_t tokens = { +static const match_table_t tokens = { { opt_uid, "uid=%u" }, { opt_gid, "gid=%u" }, { opt_umask, "umask=%o" }, diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index 9997cbf8beb5..9699c56d323f 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -25,7 +25,7 @@ enum { opt_force, opt_err }; -static match_table_t tokens = { +static const match_table_t tokens = { { opt_creator, "creator=%s" }, { opt_type, "type=%s" }, { opt_umask, "umask=%o" }, diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index b8ae9c90ada0..29ad461d568f 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -215,7 +215,7 @@ enum { Opt_timeshift, Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_help, "help"}, {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 3f58923fb39b..61edc701b0e6 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -57,7 +57,7 @@ enum { Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_size, "size=%s"}, {Opt_nr_inodes, "nr_inodes=%s"}, {Opt_mode, "mode=%o"}, diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 26948a6033b6..3f8af0f1505b 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -310,7 +310,7 @@ enum { Opt_nocompress, Opt_hide, Opt_showassoc, Opt_dmode, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_norock, "norock"}, {Opt_nojoliet, "nojoliet"}, {Opt_unhide, "unhide"}, diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 3630718be395..0dae345e481b 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -199,7 +199,7 @@ enum { Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_integrity, "integrity"}, {Opt_nointegrity, "nointegrity"}, {Opt_iocharset, "iocharset=%s"}, diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 46763d1cd397..8478fc25daee 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -127,7 +127,7 @@ enum { Opt_err }; -static match_table_t __initdata tokens = { +static match_table_t __initconst tokens = { {Opt_port, "port=%u"}, {Opt_rsize, "rsize=%u"}, {Opt_wsize, "wsize=%u"}, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e9b20173fef3..ffb697416cb1 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -98,7 +98,7 @@ enum { Opt_err }; -static match_table_t nfs_mount_option_tokens = { +static const match_table_t nfs_mount_option_tokens = { { Opt_userspace, "bg" }, { Opt_userspace, "fg" }, { Opt_userspace, "retry=%s" }, @@ -163,7 +163,7 @@ enum { Opt_xprt_err }; -static match_table_t nfs_xprt_protocol_tokens = { +static const match_table_t nfs_xprt_protocol_tokens = { { Opt_xprt_udp, "udp" }, { Opt_xprt_tcp, "tcp" }, { Opt_xprt_rdma, "rdma" }, @@ -180,7 +180,7 @@ enum { Opt_sec_err }; -static match_table_t nfs_secflavor_tokens = { +static const match_table_t nfs_secflavor_tokens = { { Opt_sec_none, "none" }, { Opt_sec_none, "null" }, { Opt_sec_sys, "sys" }, diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 88255d3f52b4..70334d85aff1 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -157,7 +157,7 @@ enum { Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_barrier, "barrier=%u"}, {Opt_err_panic, "errors=panic"}, {Opt_err_ro, "errors=remount-ro"}, diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index d29047b1b9b0..cbf047a847c5 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -346,7 +346,7 @@ enum { Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, {Opt_umask, "umask=%o"}, diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 3f4902060c7a..9a9220333b3b 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -848,7 +848,7 @@ enum { Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_fast_unmount, "fast_unmount"}, {Opt_norm_unmount, "norm_unmount"}, {Opt_err, NULL}, diff --git a/fs/udf/super.c b/fs/udf/super.c index 5698bbf83bbf..e25e7010627b 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -369,7 +369,7 @@ enum { Opt_err, Opt_uforget, Opt_uignore, Opt_gforget, Opt_gignore }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_novrs, "novrs"}, {Opt_nostrict, "nostrict"}, {Opt_bs, "bs=%u"}, diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 3141969b456d..e65212dfb60e 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -309,7 +309,7 @@ enum { Opt_err }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_type_old, "ufstype=old"}, {Opt_type_sunx86, "ufstype=sunx86"}, {Opt_type_sun, "ufstype=sun"}, @@ -1233,7 +1233,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs) { struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb); unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE; - struct match_token *tp = tokens; + const struct match_token *tp = tokens; while (tp->token != Opt_onerror_panic && tp->token != mval) ++tp; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 18d3c8487835..7227b2efef22 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -158,7 +158,7 @@ enum { Opt_barrier, Opt_nobarrier, Opt_err }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_barrier, "barrier"}, {Opt_nobarrier, "nobarrier"}, {Opt_err, NULL} diff --git a/include/linux/parser.h b/include/linux/parser.h index 7dcd05075756..ea2281e726f6 100644 --- a/include/linux/parser.h +++ b/include/linux/parser.h @@ -25,7 +25,7 @@ typedef struct { char *to; } substring_t; -int match_token(char *, match_table_t table, substring_t args[]); +int match_token(char *, const match_table_t table, substring_t args[]); int match_int(substring_t *, int *result); int match_octal(substring_t *, int *result); int match_hex(substring_t *, int *result); diff --git a/lib/parser.c b/lib/parser.c index 4f0cbc03e0e8..b00d02059a5f 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -100,7 +100,7 @@ static int match_one(char *s, const char *p, substring_t args[]) * format identifiers which will be taken into account when matching the * tokens, and whose locations will be returned in the @args array. */ -int match_token(char *s, match_table_t table, substring_t args[]) +int match_token(char *s, const match_table_t table, substring_t args[]) { const struct match_token *p; diff --git a/net/9p/client.c b/net/9p/client.c index 10e320307ec0..e053e06028a5 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -52,7 +52,7 @@ enum { Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_msize, "msize=%u"}, {Opt_legacy, "noextend"}, {Opt_trans, "trans=%s"}, diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index d652baf5ff91..6dabbdb66651 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -86,7 +86,7 @@ enum { Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_port, "port=%u"}, {Opt_rfdno, "rfdno=%u"}, {Opt_wfdno, "wfdno=%u"}, diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 88f19536efad..576e51199079 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -325,7 +325,7 @@ enum { Opt_rootcontext = 4, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_context, CONTEXT_STR "%s"}, {Opt_fscontext, FSCONTEXT_STR "%s"}, {Opt_defcontext, DEFCONTEXT_STR "%s"}, -- cgit v1.2.3