diff options
Diffstat (limited to 'Documentation')
26 files changed, 1065 insertions, 343 deletions
diff --git a/Documentation/DMA-mapping.txt b/Documentation/DMA-mapping.txt index 7c717699032c..63392c9132b4 100644 --- a/Documentation/DMA-mapping.txt +++ b/Documentation/DMA-mapping.txt @@ -698,12 +698,12 @@ these interfaces. Remember that, as defined, consistent mappings are always going to be SAC addressable. The first thing your driver needs to do is query the PCI platform -layer with your devices DAC addressing capabilities: +layer if it is capable of handling your devices DAC addressing +capabilities: - int pci_dac_set_dma_mask(struct pci_dev *pdev, u64 mask); + int pci_dac_dma_supported(struct pci_dev *hwdev, u64 mask); -This routine behaves identically to pci_set_dma_mask. You may not -use the following interfaces if this routine fails. +You may not use the following interfaces if this routine fails. Next, DMA addresses using this API are kept track of using the dma64_addr_t type. It is guaranteed to be big enough to hold any diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index 1ae4dc0fd856..f8fe882e33dc 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl @@ -59,6 +59,9 @@ !Iinclude/linux/hrtimer.h !Ekernel/hrtimer.c </sect1> + <sect1><title>Workqueues and Kevents</title> +!Ekernel/workqueue.c + </sect1> <sect1><title>Internal Functions</title> !Ikernel/exit.c !Ikernel/signal.c @@ -300,7 +303,7 @@ X!Ekernel/module.c </sect1> <sect1><title>Resources Management</title> -!Ekernel/resource.c +!Ikernel/resource.c </sect1> <sect1><title>MTRR Handling</title> @@ -312,9 +315,7 @@ X!Ekernel/module.c !Edrivers/pci/pci-driver.c !Edrivers/pci/remove.c !Edrivers/pci/pci-acpi.c -<!-- kerneldoc does not understand __devinit -X!Edrivers/pci/search.c - --> +!Edrivers/pci/search.c !Edrivers/pci/msi.c !Edrivers/pci/bus.c <!-- FIXME: Removed for now since no structured comments in source diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 4f41a60e5111..318df44259b3 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -687,8 +687,9 @@ diff shows how closely related RCU and reader-writer locking can be. + spin_lock(&listmutex); list_for_each_entry(p, head, lp) { if (p->key == key) { - list_del(&p->list); + - list_del(&p->list); - write_unlock(&listmutex); + + list_del_rcu(&p->list); + spin_unlock(&listmutex); + synchronize_rcu(); kfree(p); @@ -736,7 +737,7 @@ Or, for those who prefer a side-by-side listing: 5 write_lock(&listmutex); 5 spin_lock(&listmutex); 6 list_for_each_entry(p, head, lp) { 6 list_for_each_entry(p, head, lp) { 7 if (p->key == key) { 7 if (p->key == key) { - 8 list_del(&p->list); 8 list_del(&p->list); + 8 list_del(&p->list); 8 list_del_rcu(&p->list); 9 write_unlock(&listmutex); 9 spin_unlock(&listmutex); 10 synchronize_rcu(); 10 kfree(p); 11 kfree(p); diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist index 8230098da529..a10bfb6ecd9f 100644 --- a/Documentation/SubmitChecklist +++ b/Documentation/SubmitChecklist @@ -1,57 +1,63 @@ Linux Kernel patch sumbittal checklist ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Here are some basic things that developers should do if they -want to see their kernel patch submittals accepted quicker. +Here are some basic things that developers should do if they want to see their +kernel patch submissions accepted more quickly. -These are all above and beyond the documentation that is provided -in Documentation/SubmittingPatches and elsewhere about submitting -Linux kernel patches. +These are all above and beyond the documentation that is provided in +Documentation/SubmittingPatches and elsewhere regarding submitting Linux +kernel patches. -- Builds cleanly with applicable or modified CONFIG options =y, =m, and =n. - No gcc warnings/errors, no linker warnings/errors. +1: Builds cleanly with applicable or modified CONFIG options =y, =m, and + =n. No gcc warnings/errors, no linker warnings/errors. -- Passes allnoconfig, allmodconfig +2: Passes allnoconfig, allmodconfig -- Builds on multiple CPU arch-es by using local cross-compile tools - or something like PLM at OSDL. +3: Builds on multiple CPU architectures by using local cross-compile tools + or something like PLM at OSDL. -- ppc64 is a good architecture for cross-compilation checking because it - tends to use `unsigned long' for 64-bit quantities. +4: ppc64 is a good architecture for cross-compilation checking because it + tends to use `unsigned long' for 64-bit quantities. -- Matches kernel coding style(!) +5: Matches kernel coding style(!) -- Any new or modified CONFIG options don't muck up the config menu. +6: Any new or modified CONFIG options don't muck up the config menu. -- All new Kconfig options have help text. +7: All new Kconfig options have help text. -- Has been carefully reviewed with respect to relevant Kconfig - combinations. This is very hard to get right with testing -- - brainpower pays off here. +8: Has been carefully reviewed with respect to relevant Kconfig + combinations. This is very hard to get right with testing -- brainpower + pays off here. -- Check cleanly with sparse. +9: Check cleanly with sparse. -- Use 'make checkstack' and 'make namespacecheck' and fix any - problems that they find. Note: checkstack does not point out - problems explicitly, but any one function that uses more than - 512 bytes on the stack is a candidate for change. +10: Use 'make checkstack' and 'make namespacecheck' and fix any problems + that they find. Note: checkstack does not point out problems explicitly, + but any one function that uses more than 512 bytes on the stack is a + candidate for change. -- Include kernel-doc to document global kernel APIs. (Not required - for static functions, but OK there also.) Use 'make htmldocs' - or 'make mandocs' to check the kernel-doc and fix any issues. +11: Include kernel-doc to document global kernel APIs. (Not required for + static functions, but OK there also.) Use 'make htmldocs' or 'make + mandocs' to check the kernel-doc and fix any issues. -- Has been tested with CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT, - CONFIG_DEBUG_SLAB, CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES, - CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_SPINLOCK_SLEEP all simultaneously - enabled. +12: Has been tested with CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT, + CONFIG_DEBUG_SLAB, CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES, + CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_SPINLOCK_SLEEP all simultaneously + enabled. -- Has been build- and runtime tested with and without CONFIG_SMP and - CONFIG_PREEMPT. +13: Has been build- and runtime tested with and without CONFIG_SMP and + CONFIG_PREEMPT. -- If the patch affects IO/Disk, etc: has been tested with and without - CONFIG_LBD. +14: If the patch affects IO/Disk, etc: has been tested with and without + CONFIG_LBD. +15: All codepaths have been exercised with all lockdep features enabled. -2006-APR-27 +16: All new /proc entries are documented under Documentation/ + +17: All new kernel boot parameters are documented in + Documentation/kernel-parameters.txt. + +18: All new module parameters are documented with MODULE_PARM_DESC() diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index c2c85bcb3d43..2cd7f02ffd0b 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -10,7 +10,9 @@ kernel, the process can sometimes be daunting if you're not familiar with "the system." This text is a collection of suggestions which can greatly increase the chances of your change being accepted. -If you are submitting a driver, also read Documentation/SubmittingDrivers. +Read Documentation/SubmitChecklist for a list of items to check +before submitting code. If you are submitting a driver, also read +Documentation/SubmittingDrivers. @@ -74,9 +76,6 @@ There are a number of scripts which can aid in this: Quilt: http://savannah.nongnu.org/projects/quilt -Randy Dunlap's patch scripts: -http://www.xenotime.net/linux/scripts/patching-scripts-002.tar.gz - Andrew Morton's patch scripts: http://www.zip.com.au/~akpm/linux/patches/ Instead of these scripts, quilt is the recommended patch management @@ -484,7 +483,7 @@ Greg Kroah-Hartman "How to piss off a kernel subsystem maintainer". <http://www.kroah.com/log/2005/10/19/> <http://www.kroah.com/log/2006/01/11/> -NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!. +NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people! <http://marc.theaimsgroup.com/?l=linux-kernel&m=112112749912944&w=2> Kernel Documentation/CodingStyle @@ -493,4 +492,3 @@ Kernel Documentation/CodingStyle Linus Torvald's mail on the canonical patch format: <http://lkml.org/lkml/2005/4/7/183> -- -Last updated on 17 Nov 2005. diff --git a/Documentation/accounting/delay-accounting.txt b/Documentation/accounting/delay-accounting.txt new file mode 100644 index 000000000000..1443cd71d263 --- /dev/null +++ b/Documentation/accounting/delay-accounting.txt @@ -0,0 +1,112 @@ +Delay accounting +---------------- + +Tasks encounter delays in execution when they wait +for some kernel resource to become available e.g. a +runnable task may wait for a free CPU to run on. + +The per-task delay accounting functionality measures +the delays experienced by a task while + +a) waiting for a CPU (while being runnable) +b) completion of synchronous block I/O initiated by the task +c) swapping in pages + +and makes these statistics available to userspace through +the taskstats interface. + +Such delays provide feedback for setting a task's cpu priority, +io priority and rss limit values appropriately. Long delays for +important tasks could be a trigger for raising its corresponding priority. + +The functionality, through its use of the taskstats interface, also provides +delay statistics aggregated for all tasks (or threads) belonging to a +thread group (corresponding to a traditional Unix process). This is a commonly +needed aggregation that is more efficiently done by the kernel. + +Userspace utilities, particularly resource management applications, can also +aggregate delay statistics into arbitrary groups. To enable this, delay +statistics of a task are available both during its lifetime as well as on its +exit, ensuring continuous and complete monitoring can be done. + + +Interface +--------- + +Delay accounting uses the taskstats interface which is described +in detail in a separate document in this directory. Taskstats returns a +generic data structure to userspace corresponding to per-pid and per-tgid +statistics. The delay accounting functionality populates specific fields of +this structure. See + include/linux/taskstats.h +for a description of the fields pertaining to delay accounting. +It will generally be in the form of counters returning the cumulative +delay seen for cpu, sync block I/O, swapin etc. + +Taking the difference of two successive readings of a given +counter (say cpu_delay_total) for a task will give the delay +experienced by the task waiting for the corresponding resource +in that interval. + +When a task exits, records containing the per-task statistics +are sent to userspace without requiring a command. If it is the last exiting +task of a thread group, the per-tgid statistics are also sent. More details +are given in the taskstats interface description. + +The getdelays.c userspace utility in this directory allows simple commands to +be run and the corresponding delay statistics to be displayed. It also serves +as an example of using the taskstats interface. + +Usage +----- + +Compile the kernel with + CONFIG_TASK_DELAY_ACCT=y + CONFIG_TASKSTATS=y + +Delay accounting is enabled by default at boot up. +To disable, add + nodelayacct +to the kernel boot options. The rest of the instructions +below assume this has not been done. + +After the system has booted up, use a utility +similar to getdelays.c to access the delays +seen by a given task or a task group (tgid). +The utility also allows a given command to be +executed and the corresponding delays to be +seen. + +General format of the getdelays command + +getdelays [-t tgid] [-p pid] [-c cmd...] + + +Get delays, since system boot, for pid 10 +# ./getdelays -p 10 +(output similar to next case) + +Get sum of delays, since system boot, for all pids with tgid 5 +# ./getdelays -t 5 + + +CPU count real total virtual total delay total + 7876 92005750 100000000 24001500 +IO count delay total + 0 0 +MEM count delay total + 0 0 + +Get delays seen in executing a given simple command +# ./getdelays -c ls / + +bin data1 data3 data5 dev home media opt root srv sys usr +boot data2 data4 data6 etc lib mnt proc sbin subdomain tmp var + + +CPU count real total virtual total delay total + 6 4000250 4000000 0 +IO count delay total + 0 0 +MEM count delay total + 0 0 diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c new file mode 100644 index 000000000000..795ca3911cc5 --- /dev/null +++ b/Documentation/accounting/getdelays.c @@ -0,0 +1,396 @@ +/* getdelays.c + * + * Utility to get per-pid and per-tgid delay accounting statistics + * Also illustrates usage of the taskstats interface + * + * Copyright (C) Shailabh Nagar, IBM Corp. 2005 + * Copyright (C) Balbir Singh, IBM Corp. 2006 + * Copyright (c) Jay Lan, SGI. 2006 + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <unistd.h> +#include <poll.h> +#include <string.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <signal.h> + +#include <linux/genetlink.h> +#include <linux/taskstats.h> + +/* + * Generic macros for dealing with netlink sockets. Might be duplicated + * elsewhere. It is recommended that commercial grade applications use + * libnl or libnetlink and use the interfaces provided by the library + */ +#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN)) +#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN) +#define NLA_DATA(na) ((void *)((char*)(na) + NLA_HDRLEN)) +#define NLA_PAYLOAD(len) (len - NLA_HDRLEN) + +#define err(code, fmt, arg...) do { printf(fmt, ##arg); exit(code); } while (0) +int done = 0; +int rcvbufsz=0; + + char name[100]; +int dbg=0, print_delays=0; +__u64 stime, utime; +#define PRINTF(fmt, arg...) { \ + if (dbg) { \ + printf(fmt, ##arg); \ + } \ + } + +/* Maximum size of response requested or message sent */ +#define MAX_MSG_SIZE 256 +/* Maximum number of cpus expected to be specified in a cpumask */ +#define MAX_CPUS 32 +/* Maximum length of pathname to log file */ +#define MAX_FILENAME 256 + +struct msgtemplate { + struct nlmsghdr n; + struct genlmsghdr g; + char buf[MAX_MSG_SIZE]; +}; + +char cpumask[100+6*MAX_CPUS]; + +/* + * Create a raw netlink socket and bind + */ +static int create_nl_socket(int protocol) +{ + int fd; + struct sockaddr_nl local; + + fd = socket(AF_NETLINK, SOCK_RAW, protocol); + if (fd < 0) + return -1; + + if (rcvbufsz) + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, + &rcvbufsz, sizeof(rcvbufsz)) < 0) { + printf("Unable to set socket rcv buf size to %d\n", + rcvbufsz); + return -1; + } + + memset(&local, 0, sizeof(local)); + local.nl_family = AF_NETLINK; + + if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) + goto error; + + return fd; +error: + close(fd); + return -1; +} + + +int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, + __u8 genl_cmd, __u16 nla_type, + void *nla_data, int nla_len) +{ + struct nlattr *na; + struct sockaddr_nl nladdr; + int r, buflen; + char *buf; + + struct msgtemplate msg; + + msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); + msg.n.nlmsg_type = nlmsg_type; + msg.n.nlmsg_flags = NLM_F_REQUEST; + msg.n.nlmsg_seq = 0; + msg.n.nlmsg_pid = nlmsg_pid; + msg.g.cmd = genl_cmd; + msg.g.version = 0x1; + na = (struct nlattr *) GENLMSG_DATA(&msg); + na->nla_type = nla_type; + na->nla_len = nla_len + 1 + NLA_HDRLEN; + memcpy(NLA_DATA(na), nla_data, nla_len); + msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); + + buf = (char *) &msg; + buflen = msg.n.nlmsg_len ; + memset(&nladdr, 0, sizeof(nladdr)); + nladdr.nl_family = AF_NETLINK; + while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr, + sizeof(nladdr))) < buflen) { + if (r > 0) { + buf += r; + buflen -= r; + } else if (errno != EAGAIN) + return -1; + } + return 0; +} + + +/* + * Probe the controller in genetlink to find the family id + * for the TASKSTATS family + */ +int get_family_id(int sd) +{ + struct { + struct nlmsghdr n; + struct genlmsghdr g; + char buf[256]; + } ans; + + int id, rc; + struct nlattr *na; + int rep_len; + + strcpy(name, TASKSTATS_GENL_NAME); + rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, + CTRL_ATTR_FAMILY_NAME, (void *)name, + strlen(TASKSTATS_GENL_NAME)+1); + + rep_len = recv(sd, &ans, sizeof(ans), 0); + if (ans.n.nlmsg_type == NLMSG_ERROR || + (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len)) + return 0; + + na = (struct nlattr *) GENLMSG_DATA(&ans); + na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len)); + if (na->nla_type == CTRL_ATTR_FAMILY_ID) { + id = *(__u16 *) NLA_DATA(na); + } + return id; +} + +void print_delayacct(struct taskstats *t) +{ + printf("\n\nCPU %15s%15s%15s%15s\n" + " %15llu%15llu%15llu%15llu\n" + "IO %15s%15s\n" + " %15llu%15llu\n" + "MEM %15s%15s\n" + " %15llu%15llu\n\n", + "count", "real total", "virtual total", "delay total", + t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total, + t->cpu_delay_total, + "count", "delay total", + t->blkio_count, t->blkio_delay_total, + "count", "delay total", t->swapin_count, t->swapin_delay_total); +} + +int main(int argc, char *argv[]) +{ + int c, rc, rep_len, aggr_len, len2, cmd_type; + __u16 id; + __u32 mypid; + + struct nlattr *na; + int nl_sd = -1; + int len = 0; + pid_t tid = 0; + pid_t rtid = 0; + + int fd = 0; + int count = 0; + int write_file = 0; + int maskset = 0; + char logfile[128]; + int loop = 0; + + struct msgtemplate msg; + + while (1) { + c = getopt(argc, argv, "dw:r:m:t:p:v:l"); + if (c < 0) + break; + + switch (c) { + case 'd': + printf("print delayacct stats ON\n"); + print_delays = 1; + break; + case 'w': + strncpy(logfile, optarg, MAX_FILENAME); + printf("write to file %s\n", logfile); + write_file = 1; + break; + case 'r': + rcvbufsz = atoi(optarg); + printf("receive buf size %d\n", rcvbufsz); + if (rcvbufsz < 0) + err(1, "Invalid rcv buf size\n"); + break; + case 'm': + strncpy(cpumask, optarg, sizeof(cpumask)); + maskset = 1; + printf("cpumask %s maskset %d\n", cpumask, maskset); + break; + case 't': + tid = atoi(optarg); + if (!tid) + err(1, "Invalid tgid\n"); + cmd_type = TASKSTATS_CMD_ATTR_TGID; + print_delays = 1; + break; + case 'p': + tid = atoi(optarg); + if (!tid) + err(1, "Invalid pid\n"); + cmd_type = TASKSTATS_CMD_ATTR_PID; + print_delays = 1; + break; + case 'v': + printf("debug on\n"); + dbg = 1; + break; + case 'l': + printf("listen forever\n"); + loop = 1; + break; + default: + printf("Unknown option %d\n", c); + exit(-1); + } + } + + if (write_file) { + fd = open(logfile, O_WRONLY | O_CREAT | O_TRUNC, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (fd == -1) { + perror("Cannot open output file\n"); + exit(1); + } + } + + if ((nl_sd = create_nl_socket(NETLINK_GENERIC)) < 0) + err(1, "error creating Netlink socket\n"); + + + mypid = getpid(); + id = get_family_id(nl_sd); + if (!id) { + printf("Error getting family id, errno %d", errno); + goto err; + } + PRINTF("family id %d\n", id); + + if (maskset) { + rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET, + TASKSTATS_CMD_ATTR_REGISTER_CPUMASK, + &cpumask, sizeof(cpumask)); + PRINTF("Sent register cpumask, retval %d\n", rc); + if (rc < 0) { + printf("error sending register cpumask\n"); + goto err; + } + } + + if (tid) { + rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET, + cmd_type, &tid, sizeof(__u32)); + PRINTF("Sent pid/tgid, retval %d\n", rc); + if (rc < 0) { + printf("error sending tid/tgid cmd\n"); + goto done; + } + } + + do { + int i; + + rep_len = recv(nl_sd, &msg, sizeof(msg), 0); + PRINTF("received %d bytes\n", rep_len); + + if (rep_len < 0) { + printf("nonfatal reply error: errno %d\n", errno); + continue; + } + if (msg.n.nlmsg_type == NLMSG_ERROR || + !NLMSG_OK((&msg.n), rep_len)) { + printf("fatal reply error, errno %d\n", errno); + goto done; + } + + PRINTF("nlmsghdr size=%d, nlmsg_len=%d, rep_len=%d\n", + sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len); + + + rep_len = GENLMSG_PAYLOAD(&msg.n); + + na = (struct nlattr *) GENLMSG_DATA(&msg); + len = 0; + i = 0; + while (len < rep_len) { + len += NLA_ALIGN(na->nla_len); + switch (na->nla_type) { + case TASKSTATS_TYPE_AGGR_TGID: + /* Fall through */ + case TASKSTATS_TYPE_AGGR_PID: + aggr_len = NLA_PAYLOAD(na->nla_len); + len2 = 0; + /* For nested attributes, na follows */ + na = (struct nlattr *) NLA_DATA(na); + done = 0; + while (len2 < aggr_len) { + switch (na->nla_type) { + case TASKSTATS_TYPE_PID: + rtid = *(int *) NLA_DATA(na); + if (print_delays) + printf("PID\t%d\n", rtid); + break; + case TASKSTATS_TYPE_TGID: + rtid = *(int *) NLA_DATA(na); + if (print_delays) + printf("TGID\t%d\n", rtid); + break; + case TASKSTATS_TYPE_STATS: + count++; + if (print_delays) + print_delayacct((struct taskstats *) NLA_DATA(na)); + if (fd) { + if (write(fd, NLA_DATA(na), na->nla_len) < 0) { + err(1,"write error\n"); + } + } + if (!loop) + goto done; + break; + default: + printf("Unknown nested nla_type %d\n", na->nla_type); + break; + } + len2 += NLA_ALIGN(na->nla_len); + na = (struct nlattr *) ((char *) na + len2); + } + break; + + default: + printf("Unknown nla_type %d\n", na->nla_type); + break; + } + na = (struct nlattr *) (GENLMSG_DATA(&msg) + len); + } + } while (loop); +done: + if (maskset) { + rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET, + TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK, + &cpumask, sizeof(cpumask)); + printf("Sent deregister mask, retval %d\n", rc); + if (rc < 0) + err(rc, "error sending deregister cpumask\n"); + } +err: + close(nl_sd); + if (fd) + close(fd); + return 0; +} diff --git a/Documentation/accounting/taskstats.txt b/Documentation/accounting/taskstats.txt new file mode 100644 index 000000000000..92ebf29e9041 --- /dev/null +++ b/Documentation/accounting/taskstats.txt @@ -0,0 +1,181 @@ +Per-task statistics interface +----------------------------- + + +Taskstats is a netlink-based interface for sending per-task and +per-process statistics from the kernel to userspace. + +Taskstats was designed for the following benefits: + +- efficiently provide statistics during lifetime of a task and on its exit +- unified interface for multiple accounting subsystems +- extensibility for use by future accounting patches + +Terminology +----------- + +"pid", "tid" and "task" are used interchangeably and refer to the standard +Linux task defined by struct task_struct. per-pid stats are the same as +per-task stats. + +"tgid", "process" and "thread group" are used interchangeably and refer to the +tasks that share an mm_struct i.e. the traditional Unix process. Despite the +use of tgid, there is no special treatment for the task that is thread group +leader - a process is deemed alive as long as it has any task belonging to it. + +Usage +----- + +To get statistics during a task's lifetime, userspace opens a unicast netlink +socket (NETLINK_GENERIC family) and sends commands specifying a pid or a tgid. +The response contains statistics for a task (if pid is specified) or the sum of +statistics for all tasks of the process (if tgid is specified). + +To obtain statistics for tasks which are exiting, the userspace listener +sends a register command and specifies a cpumask. Whenever a task exits on +one of the cpus in the cpumask, its per-pid statistics are sent to the +registered listener. Using cpumasks allows the data received by one listener +to be limited and assists in flow control over the netlink interface and is +explained in more detail below. + +If the exiting task is the last thread exiting its thread group, +an additional record containing the per-tgid stats is also sent to userspace. +The latter contains the sum of per-pid stats for all threads in the thread +group, both past and present. + +getdelays.c is a simple utility demonstrating usage of the taskstats interface +for reporting delay accounting statistics. Users can register cpumasks, +send commands and process responses, listen for per-tid/tgid exit data, +write the data received to a file and do basic flow control by increasing +receive buffer sizes. + +Interface +--------- + +The user-kernel interface is encapsulated in include/linux/taskstats.h + +To avoid this documentation becoming obsolete as the interface evolves, only +an outline of the current version is given. taskstats.h always overrides the +description here. + +struct taskstats is the common accounting structure for both per-pid and +per-tgid data. It is versioned and can be extended by each accounting subsystem +that is added to the kernel. The fields and their semantics are defined in the +taskstats.h file. + +The data exchanged between user and kernel space is a netlink message belonging +to the NETLINK_GENERIC family and using the netlink attributes interface. +The messages are in the format + + +----------+- - -+-------------+-------------------+ + | nlmsghdr | Pad | genlmsghdr | taskstats payload | + +----------+- - -+-------------+-------------------+ + + +The taskstats payload is one of the following three kinds: + +1. Commands: Sent from user to kernel. Commands to get data on +a pid/tgid consist of one attribute, of type TASKSTATS_CMD_ATTR_PID/TGID, +containing a u32 pid or tgid in the attribute payload. The pid/tgid denotes +the task/process for which userspace wants statistics. + +Commands to register/deregister interest in exit data from a set of cpus +consist of one attribute, of type +TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK and contain a cpumask in the +attribute payload. The cpumask is specified as an ascii string of +comma-separated cpu ranges e.g. to listen to exit data from cpus 1,2,3,5,7,8 +the cpumask would be "1-3,5,7-8". If userspace forgets to deregister interest +in cpus before closing the listening socket, the kernel cleans up its interest +set over time. However, for the sake of efficiency, an explicit deregistration +is advisable. + +2. Response for a command: sent from the kernel in response to a userspace +command. The payload is a series of three attributes of type: + +a) TASKSTATS_TYPE_AGGR_PID/TGID : attribute containing no payload but indicates +a pid/tgid will be followed by some stats. + +b) TASKSTATS_TYPE_PID/TGID: attribute whose payload is the pid/tgid whose stats +is being returned. + +c) TASKSTATS_TYPE_STATS: attribute with a struct taskstsats as payload. The +same structure is used for both per-pid and per-tgid stats. + +3. New message sent by kernel whenever a task exits. The payload consists of a + series of attributes of the following type: + +a) TASKSTATS_TYPE_AGGR_PID: indicates next two attributes will be pid+stats +b) TASKSTATS_TYPE_PID: contains exiting task's pid +c) TASKSTATS_TYPE_STATS: contains the exiting task's per-pid stats +d) TASKSTATS_TYPE_AGGR_TGID: indicates next two attributes will be tgid+stats +e) TASKSTATS_TYPE_TGID: contains tgid of process to which task belongs +f) TASKSTATS_TYPE_STATS: contains the per-tgid stats for exiting task's process + + +per-tgid stats +-------------- + +Taskstats provides per-process stats, in addition to per-task stats, since +resource management is often done at a process granularity and aggregating task +stats in userspace alone is inefficient and potentially inaccurate (due to lack +of atomicity). + +However, maintaining per-process, in addition to per-task stats, within the +kernel has space and time overheads. To address this, the taskstats code +accumalates each exiting task's statistics into a process-wide data structure. +When the last task of a process exits, the process level data accumalated also +gets sent to userspace (along with the per-task data). + +When a user queries to get per-tgid data, the sum of all other live threads in +the group is added up and added to the accumalated total for previously exited +threads of the same thread group. + +Extending taskstats +------------------- + +There are two ways to extend the taskstats interface to export more +per-task/process stats as patches to collect them get added to the kernel +in future: + +1. Adding more fields to the end of the existing struct taskstats. Backward + compatibility is ensured by the version number within the + structure. Userspace will use only the fields of the struct that correspond + to the version its using. + +2. Defining separate statistic structs and using the netlink attributes + interface to return them. Since userspace processes each netlink attribute + independently, it can always ignore attributes whose type it does not + understand (because it is using an older version of the interface). + + +Choosing between 1. and 2. is a matter of trading off flexibility and +overhead. If only a few fields need to be added, then 1. is the preferable +path since the kernel and userspace don't need to incur the overhead of +processing new netlink attributes. But if the new fields expand the existing +struct too much, requiring disparate userspace accounting utilities to +unnecessarily receive large structures whose fields are of no interest, then +extending the attributes structure would be worthwhile. + +Flow control for taskstats +-------------------------- + +When the rate of task exits becomes large, a listener may not be able to keep +up with the kernel's rate of sending per-tid/tgid exit data leading to data +loss. This possibility gets compounded when the taskstats structure gets +extended and the number of cpus grows large. + +To avoid losing statistics, userspace should do one or more of the following: + +- increase the receive buffer sizes for the netlink sockets opened by +listeners to receive exit data. + +- create more listeners and reduce the number of cpus being listened to by +each listener. In the extreme case, there could be one listener for each cpu. +Users may also consider setting the cpu affinity of the listener to the subset +of cpus to which it listens, especially if they are listening to just one cpu. + +Despite these measures, if the userspace receives ENOBUFS error messages +indicated overflow of receive buffers, it should take measures to handle the +loss of data. + +---- diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index 1bcf69996c9d..bc107cb157a8 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt @@ -251,16 +251,24 @@ A: This is what you would need in your kernel code to receive notifications. return NOTIFY_OK; } - static struct notifier_block foobar_cpu_notifer = + static struct notifier_block __cpuinitdata foobar_cpu_notifer = { .notifier_call = foobar_cpu_callback, }; +You need to call register_cpu_notifier() from your init function. +Init functions could be of two types: +1. early init (init function called when only the boot processor is online). +2. late init (init function called _after_ all the CPUs are online). -In your init function, +For the first case, you should add the following to your init function register_cpu_notifier(&foobar_cpu_notifier); +For the second case, you should add the following to your init function + + register_hotcpu_notifier(&foobar_cpu_notifier); + You can fail PREPARE notifiers if something doesn't work to prepare resources. This will stop the activity and send a following CANCELED event back. diff --git a/Documentation/devices.txt b/Documentation/devices.txt index 4aaf68fafebe..66c725f530f3 100644 --- a/Documentation/devices.txt +++ b/Documentation/devices.txt @@ -2565,10 +2565,10 @@ Your cooperation is appreciated. 243 = /dev/usb/dabusb3 Fourth dabusb device 180 block USB block devices - 0 = /dev/uba First USB block device - 8 = /dev/ubb Second USB block device - 16 = /dev/ubc Thrid USB block device - ... + 0 = /dev/uba First USB block device + 8 = /dev/ubb Second USB block device + 16 = /dev/ubc Third USB block device + ... 181 char Conrad Electronic parallel port radio clocks 0 = /dev/pcfclock0 First Conrad radio clock diff --git a/Documentation/drivers/edac/edac.txt b/Documentation/drivers/edac/edac.txt index 70d96a62e5e1..7b3d969d2964 100644 --- a/Documentation/drivers/edac/edac.txt +++ b/Documentation/drivers/edac/edac.txt @@ -35,15 +35,14 @@ the vendor should tie the parity status bits to 0 if they do not intend to generate parity. Some vendors do not do this, and thus the parity bit can "float" giving false positives. -The PCI Parity EDAC device has the ability to "skip" known flaky -cards during the parity scan. These are set by the parity "blacklist" -interface in the sysfs for PCI Parity. (See the PCI section in the sysfs -section below.) There is also a parity "whitelist" which is used as -an explicit list of devices to scan, while the blacklist is a list -of devices to skip. +[There are patches in the kernel queue which will allow for storage of +quirks of PCI devices reporting false parity positives. The 2.6.18 +kernel should have those patches included. When that becomes available, +then EDAC will be patched to utilize that information to "skip" such +devices.] -EDAC will have future error detectors that will be added or integrated -into EDAC in the following list: +EDAC will have future error detectors that will be integrated with +EDAC or added to it, in the following list: MCE Machine Check Exception MCA Machine Check Architecture @@ -93,22 +92,24 @@ EDAC lives in the /sys/devices/system/edac directory. Within this directory there currently reside 2 'edac' components: mc memory controller(s) system - pci PCI status system + pci PCI control and status system ============================================================================ Memory Controller (mc) Model First a background on the memory controller's model abstracted in EDAC. -Each mc device controls a set of DIMM memory modules. These modules are +Each 'mc' device controls a set of DIMM memory modules. These modules are laid out in a Chip-Select Row (csrowX) and Channel table (chX). There can -be multiple csrows and two channels. +be multiple csrows and multiple channels. Memory controllers allow for several csrows, with 8 csrows being a typical value. Yet, the actual number of csrows depends on the electrical "loading" of a given motherboard, memory controller and DIMM characteristics. Dual channels allows for 128 bit data transfers to the CPU from memory. +Some newer chipsets allow for more than 2 channels, like Fully Buffered DIMMs +(FB-DIMMs). The following example will assume 2 channels: Channel 0 Channel 1 @@ -234,23 +235,15 @@ Polling period control file: The time period, in milliseconds, for polling for error information. Too small a value wastes resources. Too large a value might delay necessary handling of errors and might loose valuable information for - locating the error. 1000 milliseconds (once each second) is about - right for most uses. + locating the error. 1000 milliseconds (once each second) is the current + default. Systems which require all the bandwidth they can get, may + increase this. LOAD TIME: module/kernel parameter: poll_msec=[0|1] RUN TIME: echo "1000" >/sys/devices/system/edac/mc/poll_msec -Module Version read-only attribute file: - - 'mc_version' - - The EDAC CORE module's version and compile date are shown here to - indicate what EDAC is running. - - - ============================================================================ 'mcX' DIRECTORIES @@ -284,35 +277,6 @@ Seconds since last counter reset control file: -DIMM capability attribute file: - - 'edac_capability' - - The EDAC (Error Detection and Correction) capabilities/modes of - the memory controller hardware. - - -DIMM Current Capability attribute file: - - 'edac_current_capability' - - The EDAC capabilities available with the hardware - configuration. This may not be the same as "EDAC capability" - if the correct memory is not used. If a memory controller is - capable of EDAC, but DIMMs without check bits are in use, then - Parity, SECDED, S4ECD4ED capabilities will not be available - even though the memory controller might be capable of those - modes with the proper memory loaded. - - -Memory Type supported on this controller attribute file: - - 'supported_mem_type' - - This attribute file displays the memory type, usually - buffered and unbuffered DIMMs. - - Memory Controller name attribute file: 'mc_name' @@ -321,16 +285,6 @@ Memory Controller name attribute file: that is being utilized. -Memory Controller Module name attribute file: - - 'module_name' - - This attribute file displays the memory controller module name, - version and date built. The name of the memory controller - hardware - some drivers work with multiple controllers and - this field shows which hardware is present. - - Total memory managed by this memory controller attribute file: 'size_mb' @@ -432,6 +386,9 @@ Memory Type attribute file: This attribute file will display what type of memory is currently on this csrow. Normally, either buffered or unbuffered memory. + Examples: + Registered-DDR + Unbuffered-DDR EDAC Mode of operation attribute file: @@ -446,8 +403,13 @@ Device type attribute file: 'dev_type' - This attribute file will display what type of DIMM device is - being utilized. Example: x4 + This attribute file will display what type of DRAM device is + being utilized on this DIMM. + Examples: + x1 + x2 + x4 + x8 Channel 0 CE Count attribute file: @@ -522,10 +484,10 @@ SYSTEM LOGGING If logging for UEs and CEs are enabled then system logs will have error notices indicating errors that have been detected: -MC0: CE page 0x283, offset 0xce0, grain 8, syndrome 0x6ec3, row 0, +EDAC MC0: CE page 0x283, offset 0xce0, grain 8, syndrome 0x6ec3, row 0, channel 1 "DIMM_B1": amd76x_edac -MC0: CE page 0x1e5, offset 0xfb0, grain 8, syndrome 0xb741, row 0, +EDAC MC0: CE page 0x1e5, offset 0xfb0, grain 8, syndrome 0xb741, row 0, channel 1 "DIMM_B1": amd76x_edac @@ -610,64 +572,4 @@ Parity Count: -PCI Device Whitelist: - - 'pci_parity_whitelist' - - This control file allows for an explicit list of PCI devices to be - scanned for parity errors. Only devices found on this list will - be examined. The list is a line of hexadecimal VENDOR and DEVICE - ID tuples: - - 1022:7450,1434:16a6 - - One or more can be inserted, separated by a comma. - - To write the above list doing the following as one command line: - - echo "1022:7450,1434:16a6" - > /sys/devices/system/edac/pci/pci_parity_whitelist - - - - To display what the whitelist is, simply 'cat' the same file. - - -PCI Device Blacklist: - - 'pci_parity_blacklist' - - This control file allows for a list of PCI devices to be - skipped for scanning. - The list is a line of hexadecimal VENDOR and DEVICE ID tuples: - - 1022:7450,1434:16a6 - - One or more can be inserted, separated by a comma. - - To write the above list doing the following as one command line: - - echo "1022:7450,1434:16a6" - > /sys/devices/system/edac/pci/pci_parity_blacklist - - - To display what the whitelist currently contains, - simply 'cat' the same file. - ======================================================================= - -PCI Vendor and Devices IDs can be obtained with the lspci command. Using -the -n option lspci will display the vendor and device IDs. The system -administrator will have to determine which devices should be scanned or -skipped. - - - -The two lists (white and black) are prioritized. blacklist is the lower -priority and will NOT be utilized when a whitelist has been set. -Turn OFF a whitelist by an empty echo command: - - echo > /sys/devices/system/edac/pci/pci_parity_whitelist - -and any previous blacklist will be utilized. - diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 99f219a01e0e..87851efb0228 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -55,14 +55,6 @@ Who: Mauro Carvalho Chehab <mchehab@brturbo.com.br> --------------------------- -What: remove EXPORT_SYMBOL(insert_resource) -When: April 2006 -Files: kernel/resource.c -Why: No modular usage in the kernel. -Who: Adrian Bunk <bunk@stusta.de> - ---------------------------- - What: PCMCIA control ioctl (needed for pcmcia-cs [cardmgr, cardctl]) When: November 2005 Files: drivers/pcmcia/: pcmcia_ioctl.c @@ -166,17 +158,6 @@ Who: Arjan van de Ven <arjan@linux.intel.com> --------------------------- -What: remove EXPORT_SYMBOL(tasklist_lock) -When: August 2006 -Files: kernel/fork.c -Why: tasklist_lock protects the kernel internal task list. Modules have - no business looking at it, and all instances in drivers have been due - to use of too-lowlevel APIs. Having this symbol exported prevents - moving to more scalable locking schemes for the task list. -Who: Christoph Hellwig <hch@lst.de> - ---------------------------- - What: mount/umount uevents When: February 2007 Why: These events are not correct, and do not properly let userspace know @@ -266,3 +247,30 @@ Why: The interrupt related SA_* flags are replaced by IRQF_* to move them Who: Thomas Gleixner <tglx@linutronix.de> --------------------------- + +What: i2c-ite and i2c-algo-ite drivers +When: September 2006 +Why: These drivers never compiled since they were added to the kernel + tree 5 years ago. This feature removal can be reevaluated if + someone shows interest in the drivers, fixes them and takes over + maintenance. + http://marc.theaimsgroup.com/?l=linux-mips&m=115040510817448 +Who: Jean Delvare <khali@linux-fr.org> + +--------------------------- + +What: Bridge netfilter deferred IPv4/IPv6 output hook calling +When: January 2007 +Why: The deferred output hooks are a layering violation causing unusual + and broken behaviour on bridge devices. Examples of things they + break include QoS classifation using the MARK or CLASSIFY targets, + the IPsec policy match and connection tracking with VLANs on a + bridge. Their only use is to enable bridge output port filtering + within iptables with the physdev match, which can also be done by + combining iptables and ebtables using netfilter marks. Until it + will get removed the hook deferral is disabled by default and is + only enabled when needed. + +Who: Patrick McHardy <kaber@trash.net> + +--------------------------- diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index d31efbbdfe50..247d7f619aa2 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -142,8 +142,8 @@ see also dquot_operations section. --------------------------- file_system_type --------------------------- prototypes: - struct int (*get_sb) (struct file_system_type *, int, - const char *, void *, struct vfsmount *); + int (*get_sb) (struct file_system_type *, int, + const char *, void *, struct vfsmount *); void (*kill_sb) (struct super_block *); locking rules: may block BKL diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 9d3aed628bc1..1cb7e8be927a 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -113,8 +113,8 @@ members are defined: struct file_system_type { const char *name; int fs_flags; - struct int (*get_sb) (struct file_system_type *, int, - const char *, void *, struct vfsmount *); + int (*get_sb) (struct file_system_type *, int, + const char *, void *, struct vfsmount *); void (*kill_sb) (struct super_block *); struct module *owner; struct file_system_type * next; diff --git a/Documentation/hwmon/abituguru b/Documentation/hwmon/abituguru index 69cdb527d58f..b2c0d61b39a2 100644 --- a/Documentation/hwmon/abituguru +++ b/Documentation/hwmon/abituguru @@ -2,13 +2,36 @@ Kernel driver abituguru ======================= Supported chips: - * Abit uGuru (Hardware Monitor part only) + * Abit uGuru revision 1-3 (Hardware Monitor part only) Prefix: 'abituguru' Addresses scanned: ISA 0x0E0 Datasheet: Not available, this driver is based on reverse engineering. A "Datasheet" has been written based on the reverse engineering it should be available in the same dir as this file under the name abituguru-datasheet. + Note: + The uGuru is a microcontroller with onboard firmware which programs + it to behave as a hwmon IC. There are many different revisions of the + firmware and thus effectivly many different revisions of the uGuru. + Below is an incomplete list with which revisions are used for which + Motherboards: + uGuru 1.00 ~ 1.24 (AI7, KV8-MAX3, AN7) (1) + uGuru 2.0.0.0 ~ 2.0.4.2 (KV8-PRO) + uGuru 2.1.0.0 ~ 2.1.2.8 (AS8, AV8, AA8, AG8, AA8XE, AX8) + uGuru 2.2.0.0 ~ 2.2.0.6 (AA8 Fatal1ty) + uGuru 2.3.0.0 ~ 2.3.0.9 (AN8) + uGuru 3.0.0.0 ~ 3.0.1.2 (AW8, AL8, NI8) + uGuru 4.xxxxx? (AT8 32X) (2) + 1) For revisions 2 and 3 uGuru's the driver can autodetect the + sensortype (Volt or Temp) for bank1 sensors, for revision 1 uGuru's + this doesnot always work. For these uGuru's the autodection can + be overriden with the bank1_types module param. For all 3 known + revison 1 motherboards the correct use of this param is: + bank1_types=1,1,0,0,0,0,0,2,0,0,0,0,2,0,0,1 + You may also need to specify the fan_sensors option for these boards + fan_sensors=5 + 2) The current version of the abituguru driver is known to NOT work + on these Motherboards Authors: Hans de Goede <j.w.r.degoede@hhs.nl>, @@ -22,6 +45,11 @@ Module Parameters * force: bool Force detection. Note this parameter only causes the detection to be skipped, if the uGuru can't be read the module initialization (insmod) will still fail. +* bank1_types: int[] Bank1 sensortype autodetection override: + -1 autodetect (default) + 0 volt sensor + 1 temp sensor + 2 not connected * fan_sensors: int Tell the driver how many fan speed sensors there are on your motherboard. Default: 0 (autodetect). * pwms: int Tell the driver how many fan speed controls (fan @@ -29,7 +57,7 @@ Module Parameters * verbose: int How verbose should the driver be? (0-3): 0 normal output 1 + verbose error reporting - 2 + sensors type probing info\n" + 2 + sensors type probing info (default) 3 + retryable error reporting Default: 2 (the driver is still in the testing phase) diff --git a/Documentation/i2c/busses/i2c-sis96x b/Documentation/i2c/busses/i2c-sis96x index 00a009b977e9..08d7b2dac69a 100644 --- a/Documentation/i2c/busses/i2c-sis96x +++ b/Documentation/i2c/busses/i2c-sis96x @@ -42,8 +42,8 @@ I suspect that this driver could be made to work for the following SiS chipsets as well: 635, and 635T. If anyone owns a board with those chips AND is willing to risk crashing & burning an otherwise well-behaved kernel in the name of progress... please contact me at <mhoffman@lightlink.com> or -via the project's mailing list: <lm-sensors@lm-sensors.org>. Please -send bug reports and/or success stories as well. +via the project's mailing list: <i2c@lm-sensors.org>. Please send bug +reports and/or success stories as well. TO DOs diff --git a/Documentation/initrd.txt b/Documentation/initrd.txt index b1b6440237a6..15f1b35deb34 100644 --- a/Documentation/initrd.txt +++ b/Documentation/initrd.txt @@ -72,6 +72,22 @@ initrd adds the following new options: initrd is mounted as root, and the normal boot procedure is followed, with the RAM disk still mounted as root. +Compressed cpio images +---------------------- + +Recent kernels have support for populating a ramdisk from a compressed cpio +archive, on such systems, the creation of a ramdisk image doesn't need to +involve special block devices or loopbacks, you merely create a directory on +disk with the desired initrd content, cd to that directory, and run (as an +example): + +find . | cpio --quiet -c -o | gzip -9 -n > /boot/imagefile.img + +Examining the contents of an existing image file is just as simple: + +mkdir /tmp/imagefile +cd /tmp/imagefile +gzip -cd /boot/imagefile.img | cpio -imd --quiet Installation ------------ diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index 14ef3868a328..0706699c9da9 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -407,6 +407,20 @@ more details, with real examples. The second argument is optional, and if supplied will be used if first argument is not supported. + ld-option + ld-option is used to check if $(CC) when used to link object files + supports the given option. An optional second option may be + specified if first option are not supported. + + Example: + #arch/i386/kernel/Makefile + vsyscall-flags += $(call ld-option, -Wl$(comma)--hash-style=sysv) + + In the above example vsyscall-flags will be assigned the option + -Wl$(comma)--hash-style=sysv if it is supported by $(CC). + The second argument is optional, and if supplied will be used + if first argument is not supported. + cc-option cc-option is used to check if $(CC) support a given option, and not supported to use an optional second option. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 149f62ba14a5..b50595a0550f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1029,6 +1029,8 @@ running once the system is up. nocache [ARM] + nodelayacct [KNL] Disable per-task delay accounting + nodisconnect [HW,SCSI,M68K] Disables SCSI disconnects. noexec [IA-64] diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 28d1bc3edb1c..46b9b389df35 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1015,10 +1015,9 @@ CPU from reordering them. There are some more advanced barrier functions: (*) set_mb(var, value) - (*) set_wmb(var, value) - These assign the value to the variable and then insert at least a write - barrier after it, depending on the function. They aren't guaranteed to + This assigns the value to the variable and then inserts at least a write + barrier after it, depending on the function. It isn't guaranteed to insert anything more than a compiler barrier in a UP compilation. diff --git a/Documentation/mips/time.README b/Documentation/mips/time.README index 70bc0dd43d6d..69ddc5c14b79 100644 --- a/Documentation/mips/time.README +++ b/Documentation/mips/time.README @@ -65,7 +65,7 @@ the following functions or values: 1. (optional) set up RTC routines 2. (optional) calibrate and set the mips_counter_frequency - b) board_timer_setup - a function pointer. Invoked at the end of time_init() + b) plat_timer_setup - a function pointer. Invoked at the end of time_init() 1. (optional) over-ride any decisions made in time_init() 2. set up the irqaction for timer interrupt. 3. enable the timer interrupt @@ -116,19 +116,17 @@ Step 2: the machine setup() function If you supply board_time_init(), set the function poointer. - Set the function pointer board_timer_setup() (mandatory) - -Step 3: implement rtc routines, board_time_init() and board_timer_setup() +Step 3: implement rtc routines, board_time_init() and plat_timer_setup() if needed. - board_time_init() - + board_time_init() - a) (optional) set up RTC routines, b) (optional) calibrate and set the mips_counter_frequency (only needed if you intended to use fixed_rate_gettimeoffset or use cpu counter as timer interrupt source) - board_timer_setup() - + plat_timer_setup() - a) (optional) over-write any choices made above by time_init(). b) machine specific code should setup the timer irqaction. c) enable the timer interrupt diff --git a/Documentation/nfsroot.txt b/Documentation/nfsroot.txt index d56dc71d9430..3cc953cb288f 100644 --- a/Documentation/nfsroot.txt +++ b/Documentation/nfsroot.txt @@ -4,15 +4,16 @@ Mounting the root filesystem via NFS (nfsroot) Written 1996 by Gero Kuhlmann <gero@gkminix.han.de> Updated 1997 by Martin Mares <mj@atrey.karlin.mff.cuni.cz> Updated 2006 by Nico Schottelius <nico-kernel-nfsroot@schottelius.org> +Updated 2006 by Horms <horms@verge.net.au> -If you want to use a diskless system, as an X-terminal or printer -server for example, you have to put your root filesystem onto a -non-disk device. This can either be a ramdisk (see initrd.txt in -this directory for further information) or a filesystem mounted -via NFS. The following text describes on how to use NFS for the -root filesystem. For the rest of this text 'client' means the +In order to use a diskless system, such as an X-terminal or printer server +for example, it is necessary for the root filesystem to be present on a +non-disk device. This may be an initramfs (see Documentation/filesystems/ +ramfs-rootfs-initramfs.txt), a ramdisk (see Documenation/initrd.txt) or a +filesystem mounted via NFS. The following text describes on how to use NFS +for the root filesystem. For the rest of this text 'client' means the diskless system, and 'server' means the NFS server. @@ -21,11 +22,13 @@ diskless system, and 'server' means the NFS server. 1.) Enabling nfsroot capabilities ----------------------------- -In order to use nfsroot you have to select support for NFS during -kernel configuration. Note that NFS cannot be loaded as a module -in this case. The configuration script will then ask you whether -you want to use nfsroot, and if yes what kind of auto configuration -system you want to use. Selecting both BOOTP and RARP is safe. +In order to use nfsroot, NFS client support needs to be selected as +built-in during configuration. Once this has been selected, the nfsroot +option will become available, which should also be selected. + +In the networking options, kernel level autoconfiguration can be selected, +along with the types of autoconfiguration to support. Selecting all of +DHCP, BOOTP and RARP is safe. @@ -33,11 +36,10 @@ system you want to use. Selecting both BOOTP and RARP is safe. 2.) Kernel command line ------------------- -When the kernel has been loaded by a boot loader (either by loadlin, -LILO or a network boot program) it has to be told what root fs device -to use, and where to find the server and the name of the directory -on the server to mount as root. This can be established by a couple -of kernel command line parameters: +When the kernel has been loaded by a boot loader (see below) it needs to be +told what root fs device to use. And in the case of nfsroot, where to find +both the server and the name of the directory on the server to mount as root. +This can be established using the following kernel command line parameters: root=/dev/nfs @@ -49,23 +51,21 @@ root=/dev/nfs nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>] - If the `nfsroot' parameter is NOT given on the command line, the default - "/tftpboot/%s" will be used. + If the `nfsroot' parameter is NOT given on the command line, + the default "/tftpboot/%s" will be used. - <server-ip> Specifies the IP address of the NFS server. If this field - is not given, the default address as determined by the - `ip' variable (see below) is used. One use of this - parameter is for example to allow using different servers - for RARP and NFS. Usually you can leave this blank. + <server-ip> Specifies the IP address of the NFS server. + The default address is determined by the `ip' parameter + (see below). This parameter allows the use of different + servers for IP autoconfiguration and NFS. - <root-dir> Name of the directory on the server to mount as root. If - there is a "%s" token in the string, the token will be - replaced by the ASCII-representation of the client's IP - address. + <root-dir> Name of the directory on the server to mount as root. + If there is a "%s" token in the string, it will be + replaced by the ASCII-representation of the client's + IP address. <nfs-options> Standard NFS options. All options are separated by commas. - If the options field is not given, the following defaults - will be used: + The following defaults are used: port = as given by server portmap daemon rsize = 1024 wsize = 1024 @@ -81,129 +81,174 @@ nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>] ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf> This parameter tells the kernel how to configure IP addresses of devices - and also how to set up the IP routing table. It was originally called `nfsaddrs', - but now the boot-time IP configuration works independently of NFS, so it - was renamed to `ip' and the old name remained as an alias for compatibility - reasons. + and also how to set up the IP routing table. It was originally called + `nfsaddrs', but now the boot-time IP configuration works independently of + NFS, so it was renamed to `ip' and the old name remained as an alias for + compatibility reasons. If this parameter is missing from the kernel command line, all fields are assumed to be empty, and the defaults mentioned below apply. In general - this means that the kernel tries to configure everything using both - RARP and BOOTP (depending on what has been enabled during kernel confi- - guration, and if both what protocol answer got in first). + this means that the kernel tries to configure everything using + autoconfiguration. + + The <autoconf> parameter can appear alone as the value to the `ip' + parameter (without all the ':' characters before) in which case auto- + configuration is used. + + <client-ip> IP address of the client. - <client-ip> IP address of the client. If empty, the address will either - be determined by RARP or BOOTP. What protocol is used de- - pends on what has been enabled during kernel configuration - and on the <autoconf> parameter. If this parameter is not - empty, neither RARP nor BOOTP will be used. + Default: Determined using autoconfiguration. <server-ip> IP address of the NFS server. If RARP is used to determine the client address and this parameter is NOT empty only - replies from the specified server are accepted. To use - different RARP and NFS server, specify your RARP server - here (or leave it blank), and specify your NFS server in - the `nfsroot' parameter (see above). If this entry is blank - the address of the server is used which answered the RARP - or BOOTP request. - - <gw-ip> IP address of a gateway if the server is on a different - subnet. If this entry is empty no gateway is used and the - server is assumed to be on the local network, unless a - value has been received by BOOTP. - - <netmask> Netmask for local network interface. If this is empty, + replies from the specified server are accepted. + + Only required for for NFS root. That is autoconfiguration + will not be triggered if it is missing and NFS root is not + in operation. + + Default: Determined using autoconfiguration. + The address of the autoconfiguration server is used. + + <gw-ip> IP address of a gateway if the server is on a different subnet. + + Default: Determined using autoconfiguration. + + <netmask> Netmask for local network interface. If unspecified the netmask is derived from the client IP address assuming - classful addressing, unless overridden in BOOTP reply. + classful addressing. - <hostname> Name of the client. If empty, the client IP address is - used in ASCII notation, or the value received by BOOTP. + Default: Determined using autoconfiguration. - <device> Name of network device to use. If this is empty, all - devices are used for RARP and BOOTP requests, and the - first one we receive a reply on is configured. If you have - only one device, you can safely leave this blank. + <hostname> Name of the client. May be supplied by autoconfiguration, + but its absence will not trigger autoconfiguration. - <autoconf> Method to use for autoconfiguration. If this is either - 'rarp' or 'bootp', the specified protocol is used. - If the value is 'both' or empty, both protocols are used - so far as they have been enabled during kernel configura- - tion. 'off' means no autoconfiguration. + Default: Client IP address is used in ASCII notation. - The <autoconf> parameter can appear alone as the value to the `ip' - parameter (without all the ':' characters before) in which case auto- - configuration is used. + <device> Name of network device to use. + + Default: If the host only has one device, it is used. + Otherwise the device is determined using + autoconfiguration. This is done by sending + autoconfiguration requests out of all devices, + and using the device that received the first reply. + <autoconf> Method to use for autoconfiguration. In the case of options + which specify multiple autoconfiguration protocols, + requests are sent using all protocols, and the first one + to reply is used. + Only autoconfiguration protocols that have been compiled + into the kernel will be used, regardless of the value of + this option. + off or none: don't use autoconfiguration (default) + on or any: use any protocol available in the kernel + dhcp: use DHCP + bootp: use BOOTP + rarp: use RARP + both: use both BOOTP and RARP but not DHCP + (old option kept for backwards compatibility) -3.) Kernel loader - ------------- + Default: any -To get the kernel into memory different approaches can be used. They -depend on what facilities are available: -3.1) Writing the kernel onto a floppy using dd: - As always you can just write the kernel onto a floppy using dd, - but then it's not possible to use kernel command lines at all. - To substitute the 'root=' parameter, create a dummy device on any - linux system with major number 0 and minor number 255 using mknod: - mknod /dev/boot255 c 0 255 +3.) Boot Loader + ---------- - Then copy the kernel zImage file onto a floppy using dd: +To get the kernel into memory different approaches can be used. +They depend on various facilities being available: - dd if=/usr/src/linux/arch/i386/boot/zImage of=/dev/fd0 - And finally use rdev to set the root device: +3.1) Booting from a floppy using syslinux - rdev /dev/fd0 /dev/boot255 + When building kernels, an easy way to create a boot floppy that uses + syslinux is to use the zdisk or bzdisk make targets which use + and bzimage images respectively. Both targets accept the + FDARGS parameter which can be used to set the kernel command line. - You can then remove the dummy device /dev/boot255 again. There - is no real device available for it. - The other two kernel command line parameters cannot be substi- - tuted with rdev. Therefore, using this method the kernel will - by default use RARP and/or BOOTP, and if it gets an answer via - RARP will mount the directory /tftpboot/<client-ip>/ as its - root. If it got a BOOTP answer the directory name in that answer - is used. + e.g. + make bzdisk FDARGS="root=/dev/nfs" + + Note that the user running this command will need to have + access to the floppy drive device, /dev/fd0 + + For more information on syslinux, including how to create bootdisks + for prebuilt kernels, see http://syslinux.zytor.com/ + + N.B: Previously it was possible to write a kernel directly to + a floppy using dd, configure the boot device using rdev, and + boot using the resulting floppy. Linux no longer supports this + method of booting. + +3.2) Booting from a cdrom using isolinux + + When building kernels, an easy way to create a bootable cdrom that + uses isolinux is to use the isoimage target which uses a bzimage + image. Like zdisk and bzdisk, this target accepts the FDARGS + parameter which can be used to set the kernel command line. + + e.g. + make isoimage FDARGS="root=/dev/nfs" + + The resulting iso image will be arch/<ARCH>/boot/image.iso + This can be written to a cdrom using a variety of tools including + cdrecord. + + e.g. + cdrecord dev=ATAPI:1,0,0 arch/i386/boot/image.iso + + For more information on isolinux, including how to create bootdisks + for prebuilt kernels, see http://syslinux.zytor.com/ 3.2) Using LILO - When using LILO you can specify all necessary command line - parameters with the 'append=' command in the LILO configuration - file. However, to use the 'root=' command you also need to - set up a dummy device as described in 3.1 above. For how to use - LILO and its 'append=' command please refer to the LILO - documentation. + When using LILO all the necessary command line parameters may be + specified using the 'append=' directive in the LILO configuration + file. + + However, to use the 'root=' directive you also need to create + a dummy root device, which may be removed after LILO is run. + + mknod /dev/boot255 c 0 255 + + For information on configuring LILO, please refer to its documentation. 3.3) Using GRUB - When you use GRUB, you simply append the parameters after the kernel - specification: "kernel <kernel> <parameters>" (without the quotes). + When using GRUB, kernel parameter are simply appended after the kernel + specification: kernel <kernel> <parameters> 3.4) Using loadlin - When you want to boot Linux from a DOS command prompt without - having a local hard disk to mount as root, you can use loadlin. - I was told that it works, but haven't used it myself yet. In - general you should be able to create a kernel command line simi- - lar to how LILO is doing it. Please refer to the loadlin docu- - mentation for further information. + loadlin may be used to boot Linux from a DOS command prompt without + requiring a local hard disk to mount as root. This has not been + thoroughly tested by the authors of this document, but in general + it should be possible configure the kernel command line similarly + to the configuration of LILO. + + Please refer to the loadlin documentation for further information. 3.5) Using a boot ROM - This is probably the most elegant way of booting a diskless - client. With a boot ROM the kernel gets loaded using the TFTP - protocol. As far as I know, no commercial boot ROMs yet - support booting Linux over the network, but there are two - free implementations of a boot ROM available on sunsite.unc.edu - and its mirrors. They are called 'netboot-nfs' and 'etherboot'. - Both contain everything you need to boot a diskless Linux client. + This is probably the most elegant way of booting a diskless client. + With a boot ROM the kernel is loaded using the TFTP protocol. The + authors of this document are not aware of any no commercial boot + ROMs that support booting Linux over the network. However, there + are two free implementations of a boot ROM, netboot-nfs and + etherboot, both of which are available on sunsite.unc.edu, and both + of which contain everything you need to boot a diskless Linux client. 3.6) Using pxelinux - Using pxelinux you specify the kernel you built with + Pxelinux may be used to boot linux using the PXE boot loader + which is present on many modern network cards. + + When using pxelinux, the kernel image is specified using "kernel <relative-path-below /tftpboot>". The nfsroot parameters are passed to the kernel by adding them to the "append" line. - You may perhaps also want to fine tune the console output, - see Documentation/serial-console.txt for serial console help. + It is common to use serial console in conjunction with pxeliunx, + see Documentation/serial-console.txt for more information. + + For more information on isolinux, including how to create bootdisks + for prebuilt kernels, see http://syslinux.zytor.com/ diff --git a/Documentation/ramdisk.txt b/Documentation/ramdisk.txt index 7c25584e082c..52f75b7d51c2 100644 --- a/Documentation/ramdisk.txt +++ b/Documentation/ramdisk.txt @@ -6,7 +6,7 @@ Contents: 1) Overview 2) Kernel Command Line Parameters 3) Using "rdev -r" - 4) An Example of Creating a Compressed RAM Disk + 4) An Example of Creating a Compressed RAM Disk 1) Overview @@ -34,7 +34,7 @@ make it clearer. The original "ramdisk=<ram_size>" has been kept around for compatibility reasons, but it may be removed in the future. The new RAM disk also has the ability to load compressed RAM disk images, -allowing one to squeeze more programs onto an average installation or +allowing one to squeeze more programs onto an average installation or rescue floppy disk. @@ -51,7 +51,7 @@ default is 4096 (4 MB) (8192 (8 MB) on S390). =================== This parameter tells the RAM disk driver how many bytes to use per block. The -default is 512. +default is 1024 (BLOCK_SIZE). 3) Using "rdev -r" @@ -70,7 +70,7 @@ These numbers are no magical secrets, as seen below: ./arch/i386/kernel/setup.c:#define RAMDISK_PROMPT_FLAG 0x8000 ./arch/i386/kernel/setup.c:#define RAMDISK_LOAD_FLAG 0x4000 -Consider a typical two floppy disk setup, where you will have the +Consider a typical two floppy disk setup, where you will have the kernel on disk one, and have already put a RAM disk image onto disk #2. Hence you want to set bits 0 to 13 as 0, meaning that your RAM disk @@ -97,12 +97,12 @@ Since the default start = 0 and the default prompt = 1, you could use: append = "load_ramdisk=1" -4) An Example of Creating a Compressed RAM Disk +4) An Example of Creating a Compressed RAM Disk ---------------------------------------------- To create a RAM disk image, you will need a spare block device to construct it on. This can be the RAM disk device itself, or an -unused disk partition (such as an unmounted swap partition). For this +unused disk partition (such as an unmounted swap partition). For this example, we will use the RAM disk device, "/dev/ram0". Note: This technique should not be done on a machine with less than 8 MB diff --git a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl index 69866d5997a4..b8dc51ca776c 100644 --- a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl +++ b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl @@ -1172,7 +1172,7 @@ } /* PCI IDs */ - static struct pci_device_id snd_mychip_ids[] __devinitdata = { + static struct pci_device_id snd_mychip_ids[] = { { PCI_VENDOR_ID_FOO, PCI_DEVICE_ID_BAR, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, }, .... @@ -1565,7 +1565,7 @@ <informalexample> <programlisting> <![CDATA[ - static struct pci_device_id snd_mychip_ids[] __devinitdata = { + static struct pci_device_id snd_mychip_ids[] = { { PCI_VENDOR_ID_FOO, PCI_DEVICE_ID_BAR, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, }, .... diff --git a/Documentation/usb/usb-serial.txt b/Documentation/usb/usb-serial.txt index f001cd93b79b..02b0f7beb6d1 100644 --- a/Documentation/usb/usb-serial.txt +++ b/Documentation/usb/usb-serial.txt @@ -399,10 +399,10 @@ REINER SCT cyberJack pinpad/e-com USB chipcard reader Prolific PL2303 Driver - This driver support any device that has the PL2303 chip from Prolific + This driver supports any device that has the PL2303 chip from Prolific in it. This includes a number of single port USB to serial converters and USB GPS devices. Devices from Aten (the UC-232) and - IO-Data work with this driver. + IO-Data work with this driver, as does the DCU-11 mobile-phone cable. For any questions or problems with this driver, please contact Greg Kroah-Hartman at greg@kroah.com diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt index 6887d44d2661..6da24e7a56cb 100644 --- a/Documentation/x86_64/boot-options.txt +++ b/Documentation/x86_64/boot-options.txt @@ -238,6 +238,13 @@ Debugging pagefaulttrace Dump all page faults. Only useful for extreme debugging and will create a lot of output. + call_trace=[old|both|newfallback|new] + old: use old inexact backtracer + new: use new exact dwarf2 unwinder + both: print entries from both + newfallback: use new unwinder but fall back to old if it gets + stuck (default) + Misc noreplacement Don't replace instructions with more appropriate ones |