From a7aea373b4ca428f1be2c1fedd2f26c8e3f2864d Mon Sep 17 00:00:00 2001 From: "Ira W. Snyder" Date: Thu, 23 Apr 2009 16:17:54 -0700 Subject: fsldma: use PCI Read Multiple command By default, the Freescale 83xx DMA controller uses the PCI Read Line command when reading data over the PCI bus. Setting the controller to use the PCI Read Multiple command instead allows the controller to read much larger bursts of data, which provides a drastic speed increase. The slowdown due to using PCI Read Line was only observed when a PCI-to-PCI bridge was between the devices trying to communicate. A simple test driver showed an increase from 4MB/sec to 116MB/sec when performing DMA over the PCI bus. Using DMA to transfer between blocks of local SDRAM showed no change in performance with this patch. The dmatest driver was also used to verify the correctness of the transfers, and showed no errors. Signed-off-by: Ira W. Snyder Acked-by: Timur Tabi Acked-by: Kumar Gala Signed-off-by: Dan Williams --- drivers/dma/fsldma.c | 10 ++++++++-- drivers/dma/fsldma.h | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index f18d1bde0439..a1cb25e277b5 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -12,6 +12,11 @@ * also fit for MPC8560, MPC8555, MPC8548, MPC8641, and etc. * The support for MPC8349 DMA contorller is also added. * + * This driver instructs the DMA controller to issue the PCI Read Multiple + * command for PCI read operations, instead of using the default PCI Read Line + * command. Please be aware that this setting may result in read pre-fetching + * on some platforms. + * * This is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -49,9 +54,10 @@ static void dma_init(struct fsl_dma_chan *fsl_chan) case FSL_DMA_IP_83XX: /* Set the channel to below modes: * EOTIE - End-of-transfer interrupt enable + * PRC_RM - PCI read multiple */ - DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, FSL_DMA_MR_EOTIE, - 32); + DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, FSL_DMA_MR_EOTIE + | FSL_DMA_MR_PRC_RM, 32); break; } diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h index 4f21a512d848..dc7f26865797 100644 --- a/drivers/dma/fsldma.h +++ b/drivers/dma/fsldma.h @@ -38,6 +38,7 @@ /* Special MR definition for MPC8349 */ #define FSL_DMA_MR_EOTIE 0x00000080 +#define FSL_DMA_MR_PRC_RM 0x00000800 #define FSL_DMA_SR_CH 0x00000020 #define FSL_DMA_SR_PE 0x00000010 -- cgit v1.2.3 From be30b226f2ae618cd719e40267d9923db1db9001 Mon Sep 17 00:00:00 2001 From: Ira Snyder Date: Thu, 28 May 2009 09:20:42 +0000 Subject: fsldma: enable external start for the 83xx controller The 83xx controller has external start capability, but lacks external pause capability. Hook up the external start function pointer for the 83xx controller. Signed-off-by: Ira W. Snyder Signed-off-by: Dan Williams --- drivers/dma/fsldma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index a1cb25e277b5..10bcf0cb0efc 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -877,9 +877,9 @@ static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev, switch (new_fsl_chan->feature & FSL_DMA_IP_MASK) { case FSL_DMA_IP_85XX: - new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start; new_fsl_chan->toggle_ext_pause = fsl_chan_toggle_ext_pause; case FSL_DMA_IP_83XX: + new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start; new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size; new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size; } -- cgit v1.2.3 From 43a1a3ed6bf5a1b9ae197b4f5f20033baf19db61 Mon Sep 17 00:00:00 2001 From: Ira Snyder Date: Thu, 28 May 2009 09:26:40 +0000 Subject: fsldma: do not clear bandwidth control bits on the 83xx controller The 83xx controller does not support the external pause feature. The bit in the mode register that controls external pause on the 85xx controller happens to be part of the bandwidth control settings for the 83xx controller. This patch fixes the driver so that it only clears the external pause bit if the hardware is the 85xx controller. When driving the 83xx controller, the bit is left untouched. This follows the existing convention that mode registers settings are not touched unless necessary. Signed-off-by: Ira W. Snyder Signed-off-by: Dan Williams --- drivers/dma/fsldma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index 10bcf0cb0efc..6e60c77a145c 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -147,10 +147,11 @@ static void dma_start(struct fsl_dma_chan *fsl_chan) if (fsl_chan->feature & FSL_DMA_CHAN_PAUSE_EXT) { DMA_OUT(fsl_chan, &fsl_chan->reg_base->bcr, 0, 32); mr_set |= FSL_DMA_MR_EMP_EN; - } else + } else if ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) { DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) & ~FSL_DMA_MR_EMP_EN, 32); + } if (fsl_chan->feature & FSL_DMA_CHAN_START_EXT) mr_set |= FSL_DMA_MR_EMS_EN; -- cgit v1.2.3 From 04a820ead0838c76e9c1242feb5e71048bf3e9dc Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Tue, 30 Jun 2009 02:14:00 -0400 Subject: olpc_battery: Fix up eeprom read function The eeprom read function was placing values into the wrong place in 'buf'; we were starting from buf[off], rather than buf[0]. Also, the for loop that we were using was much uglier than it needed to be. This cleans it up a bit. Signed-off-by: Andres Salomon Signed-off-by: Anton Vorontsov --- drivers/power/olpc_battery.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c index 5fbca2681baa..9c216dd41550 100644 --- a/drivers/power/olpc_battery.c +++ b/drivers/power/olpc_battery.c @@ -8,6 +8,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include @@ -334,21 +335,21 @@ static ssize_t olpc_bat_eeprom_read(struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t off, size_t count) { uint8_t ec_byte; - int ret, end; + int ret; + int i; if (off >= EEPROM_SIZE) return 0; if (off + count > EEPROM_SIZE) count = EEPROM_SIZE - off; - end = EEPROM_START + off + count; - for (ec_byte = EEPROM_START + off; ec_byte < end; ec_byte++) { - ret = olpc_ec_cmd(EC_BAT_EEPROM, &ec_byte, 1, - &buf[ec_byte - EEPROM_START], 1); + for (i = 0; i < count; i++) { + ec_byte = EEPROM_START + off + i; + ret = olpc_ec_cmd(EC_BAT_EEPROM, &ec_byte, 1, &buf[i], 1); if (ret) { - printk(KERN_ERR "olpc-battery: EC command " - "EC_BAT_EEPROM @ 0x%x failed -" - " %d!\n", ec_byte, ret); + pr_err("olpc-battery: " + "EC_BAT_EEPROM cmd @ 0x%x failed - %d!\n", + ec_byte, ret); return -EIO; } } -- cgit v1.2.3 From 8f7e57985fa794ab6afdcd3642581d9e1fe6de31 Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Tue, 30 Jun 2009 02:16:17 -0400 Subject: olpc_battery: Ensure that the TRICKLE bit is checked There are times when the battery is present but trickle charging, and the EC sets only the TRICKLE bit. So we must check for the bit when we're checking the charging/present status. Signed-off-by: Andres Salomon Signed-off-by: Anton Vorontsov --- drivers/power/olpc_battery.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c index 9c216dd41550..58e419299cd6 100644 --- a/drivers/power/olpc_battery.c +++ b/drivers/power/olpc_battery.c @@ -36,6 +36,7 @@ #define BAT_STAT_AC 0x10 #define BAT_STAT_CHARGING 0x20 #define BAT_STAT_DISCHARGING 0x40 +#define BAT_STAT_TRICKLE 0x80 #define BAT_ERR_INFOFAIL 0x02 #define BAT_ERR_OVERVOLTAGE 0x04 @@ -90,7 +91,7 @@ static char bat_serial[17]; /* Ick */ static int olpc_bat_get_status(union power_supply_propval *val, uint8_t ec_byte) { if (olpc_platform_info.ecver > 0x44) { - if (ec_byte & BAT_STAT_CHARGING) + if (ec_byte & (BAT_STAT_CHARGING | BAT_STAT_TRICKLE)) val->intval = POWER_SUPPLY_STATUS_CHARGING; else if (ec_byte & BAT_STAT_DISCHARGING) val->intval = POWER_SUPPLY_STATUS_DISCHARGING; @@ -220,7 +221,8 @@ static int olpc_bat_get_property(struct power_supply *psy, It doesn't matter though -- the EC will return the last-known information, and it's as if we just ran that _little_ bit faster and managed to read it out before the battery went away. */ - if (!(ec_byte & BAT_STAT_PRESENT) && psp != POWER_SUPPLY_PROP_PRESENT) + if (!(ec_byte & (BAT_STAT_PRESENT | BAT_STAT_TRICKLE)) && + psp != POWER_SUPPLY_PROP_PRESENT) return -ENODEV; switch (psp) { @@ -230,7 +232,8 @@ static int olpc_bat_get_property(struct power_supply *psy, return ret; break; case POWER_SUPPLY_PROP_PRESENT: - val->intval = !!(ec_byte & BAT_STAT_PRESENT); + val->intval = !!(ec_byte & (BAT_STAT_PRESENT | + BAT_STAT_TRICKLE)); break; case POWER_SUPPLY_PROP_HEALTH: -- cgit v1.2.3 From bfdb46ce8494eae30dbaae65c81e684e6db6228b Mon Sep 17 00:00:00 2001 From: Ryan Mallon Date: Thu, 18 Jun 2009 11:26:26 +1200 Subject: Add ds2782 battery gas gauge driver This patch adds a driver for ds2782 battery devices. Signed-off-by: Ryan Mallon Signed-off-by: Anton Vorontsov --- drivers/power/Kconfig | 7 + drivers/power/Makefile | 1 + drivers/power/ds2782_battery.c | 330 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 338 insertions(+) create mode 100644 drivers/power/ds2782_battery.c (limited to 'drivers') diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig index 7eda34838bfe..bdbc4f73fcdc 100644 --- a/drivers/power/Kconfig +++ b/drivers/power/Kconfig @@ -43,6 +43,13 @@ config BATTERY_DS2760 help Say Y here to enable support for batteries with ds2760 chip. +config BATTERY_DS2782 + tristate "DS2782 standalone gas-gauge" + depends on I2C + help + Say Y here to enable support for the DS2782 standalone battery + gas-gauge. + config BATTERY_PMU tristate "Apple PMU battery" depends on PPC32 && ADB_PMU diff --git a/drivers/power/Makefile b/drivers/power/Makefile index daf3179689aa..380d17c9ae29 100644 --- a/drivers/power/Makefile +++ b/drivers/power/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_APM_POWER) += apm_power.o obj-$(CONFIG_WM8350_POWER) += wm8350_power.o obj-$(CONFIG_BATTERY_DS2760) += ds2760_battery.o +obj-$(CONFIG_BATTERY_DS2782) += ds2782_battery.o obj-$(CONFIG_BATTERY_PMU) += pmu_battery.o obj-$(CONFIG_BATTERY_OLPC) += olpc_battery.o obj-$(CONFIG_BATTERY_TOSA) += tosa_battery.o diff --git a/drivers/power/ds2782_battery.c b/drivers/power/ds2782_battery.c new file mode 100644 index 000000000000..da14f374cb60 --- /dev/null +++ b/drivers/power/ds2782_battery.c @@ -0,0 +1,330 @@ +/* + * I2C client/driver for the Maxim/Dallas DS2782 Stand-Alone Fuel Gauge IC + * + * Copyright (C) 2009 Bluewater Systems Ltd + * + * Author: Ryan Mallon + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DS2782_REG_RARC 0x06 /* Remaining active relative capacity */ + +#define DS2782_REG_VOLT_MSB 0x0c +#define DS2782_REG_TEMP_MSB 0x0a +#define DS2782_REG_CURRENT_MSB 0x0e + +/* EEPROM Block */ +#define DS2782_REG_RSNSP 0x69 /* Sense resistor value */ + +/* Current unit measurement in uA for a 1 milli-ohm sense resistor */ +#define DS2782_CURRENT_UNITS 1563 + +#define to_ds2782_info(x) container_of(x, struct ds2782_info, battery) + +struct ds2782_info { + struct i2c_client *client; + struct power_supply battery; + int id; +}; + +static DEFINE_IDR(battery_id); +static DEFINE_MUTEX(battery_lock); + +static inline int ds2782_read_reg(struct ds2782_info *info, int reg, u8 *val) +{ + int ret; + + ret = i2c_smbus_read_byte_data(info->client, reg); + if (ret < 0) { + dev_err(&info->client->dev, "register read failed\n"); + return ret; + } + + *val = ret; + return 0; +} + +static inline int ds2782_read_reg16(struct ds2782_info *info, int reg_msb, + s16 *val) +{ + int ret; + + ret = swab16(i2c_smbus_read_word_data(info->client, reg_msb)); + if (ret < 0) { + dev_err(&info->client->dev, "register read failed\n"); + return ret; + } + + *val = ret; + return 0; +} + +static int ds2782_get_temp(struct ds2782_info *info, int *temp) +{ + s16 raw; + int err; + + /* + * Temperature is measured in units of 0.125 degrees celcius, the + * power_supply class measures temperature in tenths of degrees + * celsius. The temperature value is stored as a 10 bit number, plus + * sign in the upper bits of a 16 bit register. + */ + err = ds2782_read_reg16(info, DS2782_REG_TEMP_MSB, &raw); + if (err) + return err; + *temp = ((raw / 32) * 125) / 100; + return 0; +} + +static int ds2782_get_current(struct ds2782_info *info, int *current_uA) +{ + int sense_res; + int err; + u8 sense_res_raw; + s16 raw; + + /* + * The units of measurement for current are dependent on the value of + * the sense resistor. + */ + err = ds2782_read_reg(info, DS2782_REG_RSNSP, &sense_res_raw); + if (err) + return err; + if (sense_res_raw == 0) { + dev_err(&info->client->dev, "sense resistor value is 0\n"); + return -ENXIO; + } + sense_res = 1000 / sense_res_raw; + + dev_dbg(&info->client->dev, "sense resistor = %d milli-ohms\n", + sense_res); + err = ds2782_read_reg16(info, DS2782_REG_CURRENT_MSB, &raw); + if (err) + return err; + *current_uA = raw * (DS2782_CURRENT_UNITS / sense_res); + return 0; +} + +static int ds2782_get_voltage(struct ds2782_info *info, int *voltage_uA) +{ + s16 raw; + int err; + + /* + * Voltage is measured in units of 4.88mV. The voltage is stored as + * a 10-bit number plus sign, in the upper bits of a 16-bit register + */ + err = ds2782_read_reg16(info, DS2782_REG_VOLT_MSB, &raw); + if (err) + return err; + *voltage_uA = (raw / 32) * 4800; + return 0; +} + +static int ds2782_get_capacity(struct ds2782_info *info, int *capacity) +{ + int err; + u8 raw; + + err = ds2782_read_reg(info, DS2782_REG_RARC, &raw); + if (err) + return err; + *capacity = raw; + return raw; +} + +static int ds2782_get_status(struct ds2782_info *info, int *status) +{ + int err; + int current_uA; + int capacity; + + err = ds2782_get_current(info, ¤t_uA); + if (err) + return err; + + err = ds2782_get_capacity(info, &capacity); + if (err) + return err; + + if (capacity == 100) + *status = POWER_SUPPLY_STATUS_FULL; + else if (current_uA == 0) + *status = POWER_SUPPLY_STATUS_NOT_CHARGING; + else if (current_uA < 0) + *status = POWER_SUPPLY_STATUS_DISCHARGING; + else + *status = POWER_SUPPLY_STATUS_CHARGING; + + return 0; +} + +static int ds2782_battery_get_property(struct power_supply *psy, + enum power_supply_property prop, + union power_supply_propval *val) +{ + struct ds2782_info *info = to_ds2782_info(psy); + int ret; + + switch (prop) { + case POWER_SUPPLY_PROP_STATUS: + ret = ds2782_get_status(info, &val->intval); + break; + + case POWER_SUPPLY_PROP_CAPACITY: + ret = ds2782_get_capacity(info, &val->intval); + break; + + case POWER_SUPPLY_PROP_VOLTAGE_NOW: + ret = ds2782_get_voltage(info, &val->intval); + break; + + case POWER_SUPPLY_PROP_CURRENT_NOW: + ret = ds2782_get_current(info, &val->intval); + break; + + case POWER_SUPPLY_PROP_TEMP: + ret = ds2782_get_temp(info, &val->intval); + break; + + default: + ret = -EINVAL; + } + + return ret; +} + +static enum power_supply_property ds2782_battery_props[] = { + POWER_SUPPLY_PROP_STATUS, + POWER_SUPPLY_PROP_CAPACITY, + POWER_SUPPLY_PROP_VOLTAGE_NOW, + POWER_SUPPLY_PROP_CURRENT_NOW, + POWER_SUPPLY_PROP_TEMP, +}; + +static void ds2782_power_supply_init(struct power_supply *battery) +{ + battery->type = POWER_SUPPLY_TYPE_BATTERY; + battery->properties = ds2782_battery_props; + battery->num_properties = ARRAY_SIZE(ds2782_battery_props); + battery->get_property = ds2782_battery_get_property; + battery->external_power_changed = NULL; +} + +static int ds2782_battery_remove(struct i2c_client *client) +{ + struct ds2782_info *info = i2c_get_clientdata(client); + + power_supply_unregister(&info->battery); + kfree(info->battery.name); + + mutex_lock(&battery_lock); + idr_remove(&battery_id, info->id); + mutex_unlock(&battery_lock); + + i2c_set_clientdata(client, info); + + kfree(info); + return 0; +} + +static int ds2782_battery_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct ds2782_info *info; + int ret; + int num; + + /* Get an ID for this battery */ + ret = idr_pre_get(&battery_id, GFP_KERNEL); + if (ret == 0) { + ret = -ENOMEM; + goto fail_id; + } + + mutex_lock(&battery_lock); + ret = idr_get_new(&battery_id, client, &num); + mutex_unlock(&battery_lock); + if (ret < 0) + goto fail_id; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + ret = -ENOMEM; + goto fail_info; + } + + info->battery.name = kasprintf(GFP_KERNEL, "ds2782-%d", num); + if (!info->battery.name) { + ret = -ENOMEM; + goto fail_name; + } + + i2c_set_clientdata(client, info); + info->client = client; + ds2782_power_supply_init(&info->battery); + + ret = power_supply_register(&client->dev, &info->battery); + if (ret) { + dev_err(&client->dev, "failed to register battery\n"); + goto fail_register; + } + + return 0; + +fail_register: + kfree(info->battery.name); +fail_name: + i2c_set_clientdata(client, info); + kfree(info); +fail_info: + mutex_lock(&battery_lock); + idr_remove(&battery_id, num); + mutex_unlock(&battery_lock); +fail_id: + return ret; +} + +static const struct i2c_device_id ds2782_id[] = { + {"ds2782", 0}, + {}, +}; + +static struct i2c_driver ds2782_battery_driver = { + .driver = { + .name = "ds2782-battery", + }, + .probe = ds2782_battery_probe, + .remove = ds2782_battery_remove, + .id_table = ds2782_id, +}; + +static int __init ds2782_init(void) +{ + return i2c_add_driver(&ds2782_battery_driver); +} +module_init(ds2782_init); + +static void __exit ds2782_exit(void) +{ + i2c_del_driver(&ds2782_battery_driver); +} +module_exit(ds2782_exit); + +MODULE_AUTHOR("Ryan Mallon "); +MODULE_DESCRIPTION("Maxim/Dallas DS2782 Stand-Alone Fuel Gauage IC driver"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From daf4219dbcbb2efcd638fcd3c29a622e1c18cc38 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 1 Jul 2009 16:12:53 -0700 Subject: dmaengine: move HIGHMEM64G restriction to ASYNC_TX_DMA On HIGHMEM64G systems dma_addr_t is known to be larger than (void *) which precludes async_xor from performing dma address conversions by reusing the input parameter address list. However, other parts of the dmaengine infrastructure do not suffer this constraint, so the HIGHMEM64G restriction can be down-levelled. Signed-off-by: Dan Williams --- crypto/async_tx/async_xor.c | 2 +- drivers/dma/Kconfig | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 95fe2c8d6c51..90dd3f8bd283 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -300,7 +300,7 @@ EXPORT_SYMBOL_GPL(async_xor_zero_sum); static int __init async_xor_init(void) { - #ifdef CONFIG_DMA_ENGINE + #ifdef CONFIG_ASYNC_TX_DMA /* To conserve stack space the input src_list (array of page pointers) * is reused to hold the array of dma addresses passed to the driver. * This conversion is only possible when dma_addr_t is less than the diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 3b3c01b6f1ee..babf214a509b 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -4,7 +4,7 @@ menuconfig DMADEVICES bool "DMA Engine support" - depends on !HIGHMEM64G && HAS_DMA + depends on HAS_DMA help DMA engines can do asynchronous data transfers without involving the host CPU. Currently, this framework can be @@ -100,7 +100,7 @@ config NET_DMA config ASYNC_TX_DMA bool "Async_tx: Offload support for the async_tx api" - depends on DMA_ENGINE + depends on DMA_ENGINE && !HIGHMEM64G help This allows the async_tx api to take advantage of offload engines for memcpy, memset, xor, and raid6 p+q operations. If your platform has -- cgit v1.2.3 From 832cc28d5bc676331e6376d940ae45d5937aa688 Mon Sep 17 00:00:00 2001 From: Florian Mickler Date: Mon, 13 Jul 2009 18:40:32 +0800 Subject: drm/i915: Set lvds dual channel according to register from vbios Vbios will set lvds register correctly based on current algorithm for lingle/dual Channel LVDS when system boot, so we can accept this configuration directly, regardless of LVDS enable status. It fixed freedesktop.org bug #22262 Signed-off-by: Florian Mickler Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 508838ee31e0..3371cb0ba81b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -645,7 +645,7 @@ intel_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc, int err = target; if (IS_I9XX(dev) && intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS) && - (I915_READ(LVDS) & LVDS_PORT_EN) != 0) { + (I915_READ(LVDS)) != 0) { /* * For LVDS, if the panel is on, just rely on its current * settings for dual-channel. We haven't figured out how to -- cgit v1.2.3 From 8a90523639f49dc4b4fa7ae47bb9c8ed73ea8577 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Sat, 11 Jul 2009 16:48:03 -0400 Subject: drm/i915: refactor error detection & collection This patch refactors the existing error detection and collection code, placing most of it in i915_handle_error(). Additionally, we introduce a work queue for scheduling post-crash tasks such as generating a uevent. Using the uevent facility, userspace should be able to capture a post-mortem dump for diagnostics. Signed-off-by: Jesse Barnes Signed-off-by: Ben Gamari Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_debugfs.c | 2 + drivers/gpu/drm/i915/i915_irq.c | 232 ++++++++++++++++++++++---------- 3 files changed, 161 insertions(+), 74 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d08752875885..b05b44dd3bf6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -229,6 +229,7 @@ typedef struct drm_i915_private { spinlock_t error_lock; struct drm_i915_error_state *first_error; + struct work_struct error_work; /* Register state */ u8 saveLBB; diff --git a/drivers/gpu/drm/i915/i915_gem_debugfs.c b/drivers/gpu/drm/i915/i915_gem_debugfs.c index 9a44bfcb8139..cb3b97405fbf 100644 --- a/drivers/gpu/drm/i915/i915_gem_debugfs.c +++ b/drivers/gpu/drm/i915/i915_gem_debugfs.c @@ -343,6 +343,8 @@ static int i915_error_state(struct seq_file *m, void *unused) error = dev_priv->first_error; + seq_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec, + error->time.tv_usec); seq_printf(m, "EIR: 0x%08x\n", error->eir); seq_printf(m, " PGTBL_ER: 0x%08x\n", error->pgtbl_er); seq_printf(m, " INSTPM: 0x%08x\n", error->instpm); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7ba23a69a0c0..f340b3fd54e6 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -290,6 +290,35 @@ irqreturn_t igdng_irq_handler(struct drm_device *dev) return ret; } +/** + * i915_error_work_func - do process context error handling work + * @work: work struct + * + * Fire an error uevent so userspace can see that a hang or error + * was detected. + */ +static void i915_error_work_func(struct work_struct *work) +{ + drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t, + error_work); + struct drm_device *dev = dev_priv->dev; + char *event_string = "ERROR=1"; + char *envp[] = { event_string, NULL }; + + DRM_DEBUG("generating error event\n"); + + kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, envp); +} + +/** + * i915_capture_error_state - capture an error record for later analysis + * @dev: drm device + * + * Should be called when an error is detected (either a hang or an error + * interrupt) to capture error state from the time of the error. Fills + * out a structure which becomes available in debugfs for user level tools + * to pick up. + */ static void i915_capture_error_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -325,12 +354,137 @@ static void i915_capture_error_state(struct drm_device *dev) error->acthd = I915_READ(ACTHD_I965); } + do_gettimeofday(&error->time); + dev_priv->first_error = error; out: spin_unlock_irqrestore(&dev_priv->error_lock, flags); } +/** + * i915_handle_error - handle an error interrupt + * @dev: drm device + * + * Do some basic checking of regsiter state at error interrupt time and + * dump it to the syslog. Also call i915_capture_error_state() to make + * sure we get a record and make it available in debugfs. Fire a uevent + * so userspace knows something bad happened (should trigger collection + * of a ring dump etc.). + */ +static void i915_handle_error(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 eir = I915_READ(EIR); + u32 pipea_stats = I915_READ(PIPEASTAT); + u32 pipeb_stats = I915_READ(PIPEBSTAT); + + i915_capture_error_state(dev); + + printk(KERN_ERR "render error detected, EIR: 0x%08x\n", + eir); + + if (IS_G4X(dev)) { + if (eir & (GM45_ERROR_MEM_PRIV | GM45_ERROR_CP_PRIV)) { + u32 ipeir = I915_READ(IPEIR_I965); + + printk(KERN_ERR " IPEIR: 0x%08x\n", + I915_READ(IPEIR_I965)); + printk(KERN_ERR " IPEHR: 0x%08x\n", + I915_READ(IPEHR_I965)); + printk(KERN_ERR " INSTDONE: 0x%08x\n", + I915_READ(INSTDONE_I965)); + printk(KERN_ERR " INSTPS: 0x%08x\n", + I915_READ(INSTPS)); + printk(KERN_ERR " INSTDONE1: 0x%08x\n", + I915_READ(INSTDONE1)); + printk(KERN_ERR " ACTHD: 0x%08x\n", + I915_READ(ACTHD_I965)); + I915_WRITE(IPEIR_I965, ipeir); + (void)I915_READ(IPEIR_I965); + } + if (eir & GM45_ERROR_PAGE_TABLE) { + u32 pgtbl_err = I915_READ(PGTBL_ER); + printk(KERN_ERR "page table error\n"); + printk(KERN_ERR " PGTBL_ER: 0x%08x\n", + pgtbl_err); + I915_WRITE(PGTBL_ER, pgtbl_err); + (void)I915_READ(PGTBL_ER); + } + } + + if (IS_I9XX(dev)) { + if (eir & I915_ERROR_PAGE_TABLE) { + u32 pgtbl_err = I915_READ(PGTBL_ER); + printk(KERN_ERR "page table error\n"); + printk(KERN_ERR " PGTBL_ER: 0x%08x\n", + pgtbl_err); + I915_WRITE(PGTBL_ER, pgtbl_err); + (void)I915_READ(PGTBL_ER); + } + } + + if (eir & I915_ERROR_MEMORY_REFRESH) { + printk(KERN_ERR "memory refresh error\n"); + printk(KERN_ERR "PIPEASTAT: 0x%08x\n", + pipea_stats); + printk(KERN_ERR "PIPEBSTAT: 0x%08x\n", + pipeb_stats); + /* pipestat has already been acked */ + } + if (eir & I915_ERROR_INSTRUCTION) { + printk(KERN_ERR "instruction error\n"); + printk(KERN_ERR " INSTPM: 0x%08x\n", + I915_READ(INSTPM)); + if (!IS_I965G(dev)) { + u32 ipeir = I915_READ(IPEIR); + + printk(KERN_ERR " IPEIR: 0x%08x\n", + I915_READ(IPEIR)); + printk(KERN_ERR " IPEHR: 0x%08x\n", + I915_READ(IPEHR)); + printk(KERN_ERR " INSTDONE: 0x%08x\n", + I915_READ(INSTDONE)); + printk(KERN_ERR " ACTHD: 0x%08x\n", + I915_READ(ACTHD)); + I915_WRITE(IPEIR, ipeir); + (void)I915_READ(IPEIR); + } else { + u32 ipeir = I915_READ(IPEIR_I965); + + printk(KERN_ERR " IPEIR: 0x%08x\n", + I915_READ(IPEIR_I965)); + printk(KERN_ERR " IPEHR: 0x%08x\n", + I915_READ(IPEHR_I965)); + printk(KERN_ERR " INSTDONE: 0x%08x\n", + I915_READ(INSTDONE_I965)); + printk(KERN_ERR " INSTPS: 0x%08x\n", + I915_READ(INSTPS)); + printk(KERN_ERR " INSTDONE1: 0x%08x\n", + I915_READ(INSTDONE1)); + printk(KERN_ERR " ACTHD: 0x%08x\n", + I915_READ(ACTHD_I965)); + I915_WRITE(IPEIR_I965, ipeir); + (void)I915_READ(IPEIR_I965); + } + } + + I915_WRITE(EIR, eir); + (void)I915_READ(EIR); + eir = I915_READ(EIR); + if (eir) { + /* + * some errors might have become stuck, + * mask them. + */ + DRM_ERROR("EIR stuck: 0x%08x, masking\n", eir); + I915_WRITE(EMR, I915_READ(EMR) | eir); + I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT); + } + + schedule_work(&dev_priv->error_work); +} + irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) { struct drm_device *dev = (struct drm_device *) arg; @@ -372,6 +526,9 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) pipea_stats = I915_READ(PIPEASTAT); pipeb_stats = I915_READ(PIPEBSTAT); + if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) + i915_handle_error(dev); + /* * Clear the PIPE(A|B)STAT regs before the IIR */ @@ -409,80 +566,6 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) I915_READ(PORT_HOTPLUG_STAT); } - if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) { - u32 eir = I915_READ(EIR); - - i915_capture_error_state(dev); - - printk(KERN_ERR "render error detected, EIR: 0x%08x\n", - eir); - if (eir & I915_ERROR_PAGE_TABLE) { - u32 pgtbl_err = I915_READ(PGTBL_ER); - printk(KERN_ERR "page table error\n"); - printk(KERN_ERR " PGTBL_ER: 0x%08x\n", - pgtbl_err); - I915_WRITE(PGTBL_ER, pgtbl_err); - (void)I915_READ(PGTBL_ER); - } - if (eir & I915_ERROR_MEMORY_REFRESH) { - printk(KERN_ERR "memory refresh error\n"); - printk(KERN_ERR "PIPEASTAT: 0x%08x\n", - pipea_stats); - printk(KERN_ERR "PIPEBSTAT: 0x%08x\n", - pipeb_stats); - /* pipestat has already been acked */ - } - if (eir & I915_ERROR_INSTRUCTION) { - printk(KERN_ERR "instruction error\n"); - printk(KERN_ERR " INSTPM: 0x%08x\n", - I915_READ(INSTPM)); - if (!IS_I965G(dev)) { - u32 ipeir = I915_READ(IPEIR); - - printk(KERN_ERR " IPEIR: 0x%08x\n", - I915_READ(IPEIR)); - printk(KERN_ERR " IPEHR: 0x%08x\n", - I915_READ(IPEHR)); - printk(KERN_ERR " INSTDONE: 0x%08x\n", - I915_READ(INSTDONE)); - printk(KERN_ERR " ACTHD: 0x%08x\n", - I915_READ(ACTHD)); - I915_WRITE(IPEIR, ipeir); - (void)I915_READ(IPEIR); - } else { - u32 ipeir = I915_READ(IPEIR_I965); - - printk(KERN_ERR " IPEIR: 0x%08x\n", - I915_READ(IPEIR_I965)); - printk(KERN_ERR " IPEHR: 0x%08x\n", - I915_READ(IPEHR_I965)); - printk(KERN_ERR " INSTDONE: 0x%08x\n", - I915_READ(INSTDONE_I965)); - printk(KERN_ERR " INSTPS: 0x%08x\n", - I915_READ(INSTPS)); - printk(KERN_ERR " INSTDONE1: 0x%08x\n", - I915_READ(INSTDONE1)); - printk(KERN_ERR " ACTHD: 0x%08x\n", - I915_READ(ACTHD_I965)); - I915_WRITE(IPEIR_I965, ipeir); - (void)I915_READ(IPEIR_I965); - } - } - - I915_WRITE(EIR, eir); - (void)I915_READ(EIR); - eir = I915_READ(EIR); - if (eir) { - /* - * some errors might have become stuck, - * mask them. - */ - DRM_ERROR("EIR stuck: 0x%08x, masking\n", eir); - I915_WRITE(EMR, I915_READ(EMR) | eir); - I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT); - } - } - I915_WRITE(IIR, iir); new_iir = I915_READ(IIR); /* Flush posted writes */ @@ -830,6 +913,7 @@ void i915_driver_irq_preinstall(struct drm_device * dev) atomic_set(&dev_priv->irq_received, 0); INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func); + INIT_WORK(&dev_priv->error_work, i915_error_work_func); if (IS_IGDNG(dev)) { igdng_irq_preinstall(dev); -- cgit v1.2.3 From 5e4d6fa72619aeea271d2ad704757717b06e291a Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Sun, 12 Jul 2009 23:53:17 -0700 Subject: drm/i915: Allow frame buffers up to 4096x4096 on 915/945 class hardware The 915 and 945 scanout engines can handle frame buffers up to 4096 pixels wide. Pre-9xx hardware has an 8192 byte stride limit, and so we leave the existing 2048 max in place. I'm not sure why we limit the height to the same value; there's no intrinsic hardware limit in the scanout engine. Signed-off-by: Keith Packard Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_display.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3371cb0ba81b..984645e26a2d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3148,6 +3148,9 @@ void intel_modeset_init(struct drm_device *dev) if (IS_I965G(dev)) { dev->mode_config.max_width = 8192; dev->mode_config.max_height = 8192; + } else if (IS_I9XX(dev)) { + dev->mode_config.max_width = 4096; + dev->mode_config.max_height = 4096; } else { dev->mode_config.max_width = 2048; dev->mode_config.max_height = 2048; -- cgit v1.2.3 From ed8c754b292f02d0550596481527b7bf2b52d024 Mon Sep 17 00:00:00 2001 From: Tormod Volden Date: Mon, 13 Jul 2009 22:26:48 +0200 Subject: drm/i915: ignore lvds on AOpen Mini PC MP-915 This motherboard thinks it has an LVDS connected, so without this patch the screen goes blank on the connected VGA monitor. More information (for the non-KMS case) in fd.o bug #18004. Signed-off-by: Tormod Volden Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_lvds.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 9ab38efffecf..43c7d9aa59ce 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -778,6 +778,14 @@ static const struct dmi_system_id intel_no_lvds[] = { DMI_MATCH(DMI_PRODUCT_NAME, "i965GMx-IF"), }, }, + { + .callback = intel_no_lvds_dmi_callback, + .ident = "AOpen Mini PC MP915", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "AOpen"), + DMI_MATCH(DMI_BOARD_NAME, "i915GMx-F"), + }, + }, { .callback = intel_no_lvds_dmi_callback, .ident = "Aopen i945GTt-VFA", -- cgit v1.2.3 From dff33cfcefa31c30b72c57f44586754ea9e8f3e2 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Tue, 14 Jul 2009 10:15:56 -0700 Subject: drm/i915: FIFO watermark calculation fixes I discovered several bugs in the FIFO code that was recently applied. Some of them fell into the "how did this ever work" category, since in some cases we were using the wrong FIFO size values, and the calculations ended up being way off. This patch fixes all the bugs I found, and works well on my GM45, 915GM and 855GM test machines; but as usual with these sorts of patches broader testing is definitely requested (in particular this patch affects 830, 845 and 865 for which I don't have test hardware). Overall, the patch clarifies the watermark calculation function by adding some comments and debug info, and making the variable names a bit clearer. The "get FIFO size" portion of the code has also been corrected, so we should be able to properly detect the FIFO allocations for each pipe, for use in the watermark calculation. Signed-off-by: Jesse Barnes Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_reg.h | 2 +- drivers/gpu/drm/i915/intel_display.c | 186 +++++++++++++++++++++-------------- 2 files changed, 113 insertions(+), 75 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 6c0858484094..897a116cad14 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1618,7 +1618,7 @@ #define I830_FIFO_LINE_SIZE 32 #define I945_FIFO_SIZE 127 /* 945 & 965 */ #define I915_FIFO_SIZE 95 -#define I855GM_FIFO_SIZE 255 +#define I855GM_FIFO_SIZE 127 /* In cachelines */ #define I830_FIFO_SIZE 95 #define I915_MAX_WM 0x3f diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 984645e26a2d..3fa0d63c83b9 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1623,44 +1623,67 @@ static struct intel_watermark_params igd_cursor_hplloff_wm = { IGD_FIFO_LINE_SIZE }; static struct intel_watermark_params i945_wm_info = { - I915_FIFO_LINE_SIZE, + I945_FIFO_SIZE, I915_MAX_WM, 1, - 0, - IGD_FIFO_LINE_SIZE + 2, + I915_FIFO_LINE_SIZE }; static struct intel_watermark_params i915_wm_info = { - I945_FIFO_SIZE, + I915_FIFO_SIZE, I915_MAX_WM, 1, - 0, + 2, I915_FIFO_LINE_SIZE }; static struct intel_watermark_params i855_wm_info = { I855GM_FIFO_SIZE, I915_MAX_WM, 1, - 0, + 2, I830_FIFO_LINE_SIZE }; static struct intel_watermark_params i830_wm_info = { I830_FIFO_SIZE, I915_MAX_WM, 1, - 0, + 2, I830_FIFO_LINE_SIZE }; +/** + * intel_calculate_wm - calculate watermark level + * @clock_in_khz: pixel clock + * @wm: chip FIFO params + * @pixel_size: display pixel size + * @latency_ns: memory latency for the platform + * + * Calculate the watermark level (the level at which the display plane will + * start fetching from memory again). Each chip has a different display + * FIFO size and allocation, so the caller needs to figure that out and pass + * in the correct intel_watermark_params structure. + * + * As the pixel clock runs, the FIFO will be drained at a rate that depends + * on the pixel size. When it reaches the watermark level, it'll start + * fetching FIFO line sized based chunks from memory until the FIFO fills + * past the watermark point. If the FIFO drains completely, a FIFO underrun + * will occur, and a display engine hang could result. + */ static unsigned long intel_calculate_wm(unsigned long clock_in_khz, struct intel_watermark_params *wm, int pixel_size, unsigned long latency_ns) { - unsigned long bytes_required, wm_size; + unsigned long entries_required, wm_size; + + entries_required = (clock_in_khz * pixel_size * latency_ns) / 1000000; + entries_required /= wm->cacheline_size; + + DRM_DEBUG("FIFO entries required for mode: %d\n", entries_required); - bytes_required = (clock_in_khz * pixel_size * latency_ns) / 1000000; - bytes_required /= wm->cacheline_size; - wm_size = wm->fifo_size - bytes_required - wm->guard_size; + wm_size = wm->fifo_size - (entries_required + wm->guard_size); + + DRM_DEBUG("FIFO watermark level: %d\n", wm_size); if (wm_size > wm->max_wm) wm_size = wm->max_wm; @@ -1799,8 +1822,37 @@ static void igd_enable_cxsr(struct drm_device *dev, unsigned long clock, return; } -const static int latency_ns = 5000; /* default for non-igd platforms */ +const static int latency_ns = 3000; /* default for non-igd platforms */ + +static int intel_get_fifo_size(struct drm_device *dev, int plane) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t dsparb = I915_READ(DSPARB); + int size; + + if (IS_I9XX(dev)) { + if (plane == 0) + size = dsparb & 0x7f; + else + size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - + (dsparb & 0x7f); + } else if (IS_I85X(dev)) { + if (plane == 0) + size = dsparb & 0x1ff; + else + size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - + (dsparb & 0x1ff); + size >>= 1; /* Convert to cachelines */ + } else { + size = dsparb & 0x7f; + size >>= 1; /* Convert to cachelines */ + } + + DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A", + size); + return size; +} static void i965_update_wm(struct drm_device *dev) { @@ -1817,101 +1869,87 @@ static void i9xx_update_wm(struct drm_device *dev, int planea_clock, int planeb_clock, int sr_hdisplay, int pixel_size) { struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t fwater_lo = I915_READ(FW_BLC) & MM_FIFO_WATERMARK; - uint32_t fwater_hi = I915_READ(FW_BLC2) & LM_FIFO_WATERMARK; - int bsize, asize, cwm, bwm = 1, awm = 1, srwm = 1; - uint32_t dsparb = I915_READ(DSPARB); - int planea_entries, planeb_entries; - struct intel_watermark_params *wm_params; + uint32_t fwater_lo; + uint32_t fwater_hi; + int total_size, cacheline_size, cwm, srwm = 1; + int planea_wm, planeb_wm; + struct intel_watermark_params planea_params, planeb_params; unsigned long line_time_us; int sr_clock, sr_entries = 0; + /* Create copies of the base settings for each pipe */ if (IS_I965GM(dev) || IS_I945GM(dev)) - wm_params = &i945_wm_info; + planea_params = planeb_params = i945_wm_info; else if (IS_I9XX(dev)) - wm_params = &i915_wm_info; + planea_params = planeb_params = i915_wm_info; else - wm_params = &i855_wm_info; - - planea_entries = intel_calculate_wm(planea_clock, wm_params, - pixel_size, latency_ns); - planeb_entries = intel_calculate_wm(planeb_clock, wm_params, - pixel_size, latency_ns); + planea_params = planeb_params = i855_wm_info; - DRM_DEBUG("FIFO entries - A: %d, B: %d\n", planea_entries, - planeb_entries); + /* Grab a couple of global values before we overwrite them */ + total_size = planea_params.fifo_size; + cacheline_size = planea_params.cacheline_size; - if (IS_I9XX(dev)) { - asize = dsparb & 0x7f; - bsize = (dsparb >> DSPARB_CSTART_SHIFT) & 0x7f; - } else { - asize = dsparb & 0x1ff; - bsize = (dsparb >> DSPARB_BEND_SHIFT) & 0x1ff; - } - DRM_DEBUG("FIFO size - A: %d, B: %d\n", asize, bsize); + /* Update per-plane FIFO sizes */ + planea_params.fifo_size = intel_get_fifo_size(dev, 0); + planeb_params.fifo_size = intel_get_fifo_size(dev, 1); - /* Two extra entries for padding */ - awm = asize - (planea_entries + 2); - bwm = bsize - (planeb_entries + 2); - - /* Sanity check against potentially bad FIFO allocations */ - if (awm <= 0) { - /* pipe is on but has too few FIFO entries */ - if (planea_entries != 0) - DRM_DEBUG("plane A needs more FIFO entries\n"); - awm = 1; - } - if (bwm <= 0) { - if (planeb_entries != 0) - DRM_DEBUG("plane B needs more FIFO entries\n"); - bwm = 1; - } + planea_wm = intel_calculate_wm(planea_clock, &planea_params, + pixel_size, latency_ns); + planeb_wm = intel_calculate_wm(planeb_clock, &planeb_params, + pixel_size, latency_ns); + DRM_DEBUG("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm); /* * Overlay gets an aggressive default since video jitter is bad. */ cwm = 2; - /* Calc sr entries for one pipe configs */ + /* Calc sr entries for one plane configs */ if (!planea_clock || !planeb_clock) { + /* self-refresh has much higher latency */ + const static int sr_latency_ns = 6000; + sr_clock = planea_clock ? planea_clock : planeb_clock; - line_time_us = (sr_hdisplay * 1000) / sr_clock; - sr_entries = (((latency_ns / line_time_us) + 1) * pixel_size * - sr_hdisplay) / 1000; - sr_entries = roundup(sr_entries / wm_params->cacheline_size, 1); - if (sr_entries < wm_params->fifo_size) - srwm = wm_params->fifo_size - sr_entries; + line_time_us = ((sr_hdisplay * 1000) / sr_clock); + + /* Use ns/us then divide to preserve precision */ + sr_entries = (((sr_latency_ns / line_time_us) + 1) * + pixel_size * sr_hdisplay) / 1000; + sr_entries = roundup(sr_entries / cacheline_size, 1); + DRM_DEBUG("self-refresh entries: %d\n", sr_entries); + srwm = total_size - sr_entries; + if (srwm < 0) + srwm = 1; } DRM_DEBUG("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n", - awm, bwm, cwm, srwm); + planea_wm, planeb_wm, cwm, srwm); - fwater_lo = fwater_lo | ((bwm & 0x3f) << 16) | (awm & 0x3f); - fwater_hi = fwater_hi | (cwm & 0x1f); + fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f); + fwater_hi = (cwm & 0x1f); + + /* Set request length to 8 cachelines per fetch */ + fwater_lo = fwater_lo | (1 << 24) | (1 << 8); + fwater_hi = fwater_hi | (1 << 8); I915_WRITE(FW_BLC, fwater_lo); I915_WRITE(FW_BLC2, fwater_hi); if (IS_I9XX(dev)) - I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN | (srwm & 0x3f)); + I915_WRITE(FW_BLC_SELF, (srwm & 0x3f)); } static void i830_update_wm(struct drm_device *dev, int planea_clock, int pixel_size) { struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t dsparb = I915_READ(DSPARB); uint32_t fwater_lo = I915_READ(FW_BLC) & MM_FIFO_WATERMARK; - unsigned int asize, awm; - int planea_entries; - - planea_entries = intel_calculate_wm(planea_clock, &i830_wm_info, - pixel_size, latency_ns); - - asize = dsparb & 0x7f; + int planea_wm; - awm = asize - planea_entries; + i830_wm_info.fifo_size = intel_get_fifo_size(dev, 0); - fwater_lo = fwater_lo | awm; + planea_wm = intel_calculate_wm(planea_clock, &i830_wm_info, + pixel_size, latency_ns); + fwater_lo = fwater_lo | planea_wm; I915_WRITE(FW_BLC, fwater_lo); } @@ -1984,7 +2022,7 @@ static void intel_update_watermarks(struct drm_device *dev) if (enabled <= 0) return; - /* Single pipe configs can enable self refresh */ + /* Single plane configs can enable self refresh */ if (enabled == 1 && IS_IGD(dev)) igd_enable_cxsr(dev, sr_clock, pixel_size); else if (IS_IGD(dev)) -- cgit v1.2.3 From 5381837f125cc62ad703fbcdfcd7566fc81fd404 Mon Sep 17 00:00:00 2001 From: Tom Peng Date: Wed, 1 Jul 2009 20:37:26 +0800 Subject: [SCSI] libsas: reuse the original port when hotplugging phys in wide ports There's a hotplug problem in the way libsas allocates ports: it loops over the available ports first trying to add to an existing for a wide port and otherwise allocating the next free port. This scheme only works if the port array is packed from zero, which fails if a port gets hot unplugged and the array becomes sparse. In that case, a new port is formed even if there's a wide port it should be part of. Fix this by creating two loops over all the ports: the first to see if the phy should be part of a wide port and the second to form a new port in an empty port slot. Signed-off-by: Tom Peng Signed-off-by: Jack Wang Signed-off-by: Lindar Liu Cc: Stable Tree Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_port.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c index e6ac59c023f1..fe8b74c706d2 100644 --- a/drivers/scsi/libsas/sas_port.c +++ b/drivers/scsi/libsas/sas_port.c @@ -56,7 +56,7 @@ static void sas_form_port(struct asd_sas_phy *phy) } } - /* find a port */ + /* see if the phy should be part of a wide port */ spin_lock_irqsave(&sas_ha->phy_port_lock, flags); for (i = 0; i < sas_ha->num_phys; i++) { port = sas_ha->sas_port[i]; @@ -69,12 +69,23 @@ static void sas_form_port(struct asd_sas_phy *phy) SAS_DPRINTK("phy%d matched wide port%d\n", phy->id, port->id); break; - } else if (*(u64 *) port->sas_addr == 0 && port->num_phys==0) { - memcpy(port->sas_addr, phy->sas_addr, SAS_ADDR_SIZE); - break; } spin_unlock(&port->phy_list_lock); } + /* The phy does not match any existing port, create a new one */ + if (i == sas_ha->num_phys) { + for (i = 0; i < sas_ha->num_phys; i++) { + port = sas_ha->sas_port[i]; + spin_lock(&port->phy_list_lock); + if (*(u64 *)port->sas_addr == 0 + && port->num_phys == 0) { + memcpy(port->sas_addr, phy->sas_addr, + SAS_ADDR_SIZE); + break; + } + spin_unlock(&port->phy_list_lock); + } + } if (i >= sas_ha->num_phys) { printk(KERN_NOTICE "%s: couldn't find a free port, bug?\n", -- cgit v1.2.3 From 390c4dd448b1a5f04ea497c20f5ff664f8eeed01 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Thu, 16 Jul 2009 13:01:01 -0700 Subject: drm/i915: handle FIFO oversubsription correctly If you're pushing a plane hard (i.e. you need most or all of the FIFO entries just to cover your frame refresh latency), the watermark level may end up being negative. So fix up the signed vs. unsigned math in the calculation function to handle this correctly, giving all available FIFO entries to such a configuration. Reported-by: Eric Anholt Tested-by: Eric Anholt Signed-off-by: Jesse Barnes Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_display.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3fa0d63c83b9..890f7108e723 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1674,7 +1674,7 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz, int pixel_size, unsigned long latency_ns) { - unsigned long entries_required, wm_size; + long entries_required, wm_size; entries_required = (clock_in_khz * pixel_size * latency_ns) / 1000000; entries_required /= wm->cacheline_size; @@ -1685,9 +1685,10 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz, DRM_DEBUG("FIFO watermark level: %d\n", wm_size); - if (wm_size > wm->max_wm) + /* Don't promote wm_size to unsigned... */ + if (wm_size > (long)wm->max_wm) wm_size = wm->max_wm; - if (wm_size == 0) + if (wm_size <= 0) wm_size = wm->default_wm; return wm_size; } -- cgit v1.2.3 From 2a2430f4542467502d39660bfd66b0004fd8d6a9 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Thu, 16 Jul 2009 13:01:02 -0700 Subject: drm/i915: correct self-refresh calculation in "everything off" case If no planes are enabled, the self-refresh calculation may end up doing a divide by zero. This patch should prevent that by making sure at least one of the CRTCs had a valid hdisplay value. Reported-by: Eric Anholt Tested-by: Eric Anholt Signed-off-by: Jesse Barnes Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_display.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 890f7108e723..a58bfadabd6f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1906,7 +1906,7 @@ static void i9xx_update_wm(struct drm_device *dev, int planea_clock, cwm = 2; /* Calc sr entries for one plane configs */ - if (!planea_clock || !planeb_clock) { + if (sr_hdisplay && (!planea_clock || !planeb_clock)) { /* self-refresh has much higher latency */ const static int sr_latency_ns = 6000; @@ -1921,6 +1921,8 @@ static void i9xx_update_wm(struct drm_device *dev, int planea_clock, srwm = total_size - sr_entries; if (srwm < 0) srwm = 1; + if (IS_I9XX(dev)) + I915_WRITE(FW_BLC_SELF, (srwm & 0x3f)); } DRM_DEBUG("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n", @@ -1935,8 +1937,6 @@ static void i9xx_update_wm(struct drm_device *dev, int planea_clock, I915_WRITE(FW_BLC, fwater_lo); I915_WRITE(FW_BLC2, fwater_hi); - if (IS_I9XX(dev)) - I915_WRITE(FW_BLC_SELF, (srwm & 0x3f)); } static void i830_update_wm(struct drm_device *dev, int planea_clock, -- cgit v1.2.3 From 2ded9e2747d0a390d281bb5b16ff7f640ec85f78 Mon Sep 17 00:00:00 2001 From: "ling.ma@intel.com" Date: Thu, 16 Jul 2009 17:23:09 +0800 Subject: drm/i915: hdmi detection according by reading edid According to investigations from windows team ,hw team, and our test results on all 4x platofrms available (gm45, g45b, q45, g45a, g45c, g41a, and g41), we find currently Hot plug live status and Hot plug interrupt detection are not reliable, sometime the results from the two approaches are contradicts. So we chose edid detection for hdmi output. Signed-off-by: Ma Ling Reviewed-by: Ian Romanick Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_hdmi.c | 64 +++------------------------------------ 1 file changed, 4 insertions(+), 60 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 9e30daae37dc..1842290cded3 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -130,16 +130,17 @@ static bool intel_hdmi_mode_fixup(struct drm_encoder *encoder, } static enum drm_connector_status -intel_hdmi_edid_detect(struct drm_connector *connector) +intel_hdmi_detect(struct drm_connector *connector) { struct intel_output *intel_output = to_intel_output(connector); struct intel_hdmi_priv *hdmi_priv = intel_output->dev_priv; struct edid *edid = NULL; enum drm_connector_status status = connector_status_disconnected; + hdmi_priv->has_hdmi_sink = false; edid = drm_get_edid(&intel_output->base, intel_output->ddc_bus); - hdmi_priv->has_hdmi_sink = false; + if (edid) { if (edid->input & DRM_EDID_INPUT_DIGITAL) { status = connector_status_connected; @@ -148,65 +149,8 @@ intel_hdmi_edid_detect(struct drm_connector *connector) intel_output->base.display_info.raw_edid = NULL; kfree(edid); } - return status; -} - -static enum drm_connector_status -igdng_hdmi_detect(struct drm_connector *connector) -{ - struct intel_output *intel_output = to_intel_output(connector); - struct intel_hdmi_priv *hdmi_priv = intel_output->dev_priv; - - /* FIXME hotplug detect */ - - hdmi_priv->has_hdmi_sink = false; - return intel_hdmi_edid_detect(connector); -} -static enum drm_connector_status -intel_hdmi_detect(struct drm_connector *connector) -{ - struct drm_device *dev = connector->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_output *intel_output = to_intel_output(connector); - struct intel_hdmi_priv *hdmi_priv = intel_output->dev_priv; - u32 temp, bit; - - if (IS_IGDNG(dev)) - return igdng_hdmi_detect(connector); - - temp = I915_READ(PORT_HOTPLUG_EN); - - switch (hdmi_priv->sdvox_reg) { - case SDVOB: - temp |= HDMIB_HOTPLUG_INT_EN; - break; - case SDVOC: - temp |= HDMIC_HOTPLUG_INT_EN; - break; - default: - return connector_status_unknown; - } - - I915_WRITE(PORT_HOTPLUG_EN, temp); - - POSTING_READ(PORT_HOTPLUG_EN); - - switch (hdmi_priv->sdvox_reg) { - case SDVOB: - bit = HDMIB_HOTPLUG_INT_STATUS; - break; - case SDVOC: - bit = HDMIC_HOTPLUG_INT_STATUS; - break; - default: - return connector_status_unknown; - } - - if ((I915_READ(PORT_HOTPLUG_STAT) & bit) != 0) - return intel_hdmi_edid_detect(connector); - else - return connector_status_disconnected; + return status; } static int intel_hdmi_get_modes(struct drm_connector *connector) -- cgit v1.2.3 From e3d433040ee6077e33d4ad22e2f60a38b085786d Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 28 Jun 2009 09:26:20 -0700 Subject: drivers/dma/fsldma.c: Remove unnecessary semicolons Signed-off-by: Joe Perches Signed-off-by: Dan Williams --- drivers/dma/fsldma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index 6e60c77a145c..ef87a8984145 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -142,7 +142,7 @@ static int dma_is_idle(struct fsl_dma_chan *fsl_chan) static void dma_start(struct fsl_dma_chan *fsl_chan) { - u32 mr_set = 0;; + u32 mr_set = 0; if (fsl_chan->feature & FSL_DMA_CHAN_PAUSE_EXT) { DMA_OUT(fsl_chan, &fsl_chan->reg_base->bcr, 0, 32); -- cgit v1.2.3 From c019894efc9c9ba5939948caa78c133b1ec8ae63 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 28 Jun 2009 09:26:21 -0700 Subject: drivers/dma: Remove unnecessary semicolons Signed-off-by: Joe Perches Signed-off-by: Dan Williams --- drivers/dma/dmatest.c | 2 +- drivers/dma/mv_xor.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index fb7da5141e96..cec1ec0b7d00 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -114,7 +114,7 @@ static void dmatest_init_srcs(u8 **bufs, unsigned int start, unsigned int len) buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK); for ( ; i < start + len; i++) buf[i] = PATTERN_SRC | PATTERN_COPY - | (~i & PATTERN_COUNT_MASK);; + | (~i & PATTERN_COUNT_MASK); for ( ; i < test_buf_size; i++) buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK); buf++; diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index ddab94f51224..3f23eabe09f2 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -1176,7 +1176,7 @@ static int __devinit mv_xor_probe(struct platform_device *pdev) if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) dma_dev->device_prep_dma_memset = mv_xor_prep_dma_memset; if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { - dma_dev->max_xor = 8; ; + dma_dev->max_xor = 8; dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor; } -- cgit v1.2.3 From 0a2ff57d6fba92842272889b4bca447344cd9d36 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Fri, 3 Jul 2009 19:26:51 +0200 Subject: dmaengine: dmatest: add a maximum number of test iterations The dmatest usually waits for the killing of its kthreads to stop running tests. This patch adds a parameter that sets a maximum number of test iterations. This feature is quite interesting for debugging when you set a lot of traces in your dmaengine controller driver. Signed-off-by: Nicolas Ferre Cc: Haavard Skinnemoen Acked-by: Maciej Sosnowski Signed-off-by: Andrew Morton Signed-off-by: Dan Williams --- drivers/dma/dmatest.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index cec1ec0b7d00..2d973d60e7b9 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -38,6 +38,11 @@ module_param(max_channels, uint, S_IRUGO); MODULE_PARM_DESC(max_channels, "Maximum number of channels to use (default: all)"); +static unsigned int iterations; +module_param(iterations, uint, S_IRUGO); +MODULE_PARM_DESC(iterations, + "Iterations before stopping test (default: infinite)"); + static unsigned int xor_sources = 3; module_param(xor_sources, uint, S_IRUGO); MODULE_PARM_DESC(xor_sources, @@ -270,7 +275,8 @@ static int dmatest_func(void *data) flags = DMA_CTRL_ACK | DMA_COMPL_SKIP_DEST_UNMAP | DMA_PREP_INTERRUPT; - while (!kthread_should_stop()) { + while (!kthread_should_stop() + && !(iterations && total_tests >= iterations)) { struct dma_device *dev = chan->device; struct dma_async_tx_descriptor *tx = NULL; dma_addr_t dma_srcs[src_cnt]; @@ -416,6 +422,13 @@ err_srcbuf: err_srcs: pr_notice("%s: terminating after %u tests, %u failures (status %d)\n", thread_name, total_tests, failed_tests, ret); + + if (iterations > 0) + while (!kthread_should_stop()) { + DECLARE_WAIT_QUEUE_HEAD(wait_dmatest_exit); + interruptible_sleep_on(&wait_dmatest_exit); + } + return ret; } -- cgit v1.2.3 From f1aef8b6e6abf32a3a269542f95a19e2cb319f6c Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Mon, 6 Jul 2009 18:19:44 +0200 Subject: dmaengine: dmatest: correct thread_count while using multiple thread per channel It seems that thread_count is not properly calculated in dmatest. In fact the thread count number that is returned from dmatest_add_threads() is not correctly added to the thread_count and thus not properly printed. Signed-off-by: Nicolas Ferre Acked-by: Haavard Skinnemoen Acked-by: Maciej Sosnowski Signed-off-by: Andrew Morton Signed-off-by: Dan Williams --- drivers/dma/dmatest.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index 2d973d60e7b9..d93017fc7872 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -508,11 +508,11 @@ static int dmatest_add_channel(struct dma_chan *chan) if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { cnt = dmatest_add_threads(dtc, DMA_MEMCPY); - thread_count += cnt > 0 ?: 0; + thread_count += cnt > 0 ? cnt : 0; } if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { cnt = dmatest_add_threads(dtc, DMA_XOR); - thread_count += cnt > 0 ?: 0; + thread_count += cnt > 0 ? cnt : 0; } pr_info("dmatest: Started %u threads using %s\n", -- cgit v1.2.3 From dc78baa2b90b289590911b40b6800f77d0dc935a Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Fri, 3 Jul 2009 19:24:33 +0200 Subject: dmaengine: at_hdmac: new driver for the Atmel AHB DMA Controller This AHB DMA Controller (aka HDMA or DMAC on AT91 systems) is availlable on at91sam9rl chip. It will be used on other products in the future. This first release covers only the memory-to-memory tranfer type. This is the only tranfer type supported by this chip. On other products, it will be used also for peripheral DMA transfer (slave API support to come). I used dmatest client without problem in different configurations to test it. Full documentation for this controller can be found in the SAM9RL datasheet: http://www.atmel.com/dyn/products/product_card.asp?part_id=4243 Signed-off-by: Nicolas Ferre Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- arch/arm/mach-at91/include/mach/at_hdmac.h | 26 + drivers/dma/Kconfig | 8 + drivers/dma/Makefile | 1 + drivers/dma/at_hdmac.c | 1009 ++++++++++++++++++++++++++++ drivers/dma/at_hdmac_regs.h | 386 +++++++++++ 5 files changed, 1430 insertions(+) create mode 100644 arch/arm/mach-at91/include/mach/at_hdmac.h create mode 100644 drivers/dma/at_hdmac.c create mode 100644 drivers/dma/at_hdmac_regs.h (limited to 'drivers') diff --git a/arch/arm/mach-at91/include/mach/at_hdmac.h b/arch/arm/mach-at91/include/mach/at_hdmac.h new file mode 100644 index 000000000000..21a5554f9cb8 --- /dev/null +++ b/arch/arm/mach-at91/include/mach/at_hdmac.h @@ -0,0 +1,26 @@ +/* + * Header file for the Atmel AHB DMA Controller driver + * + * Copyright (C) 2008 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ +#ifndef AT_HDMAC_H +#define AT_HDMAC_H + +#include + +/** + * struct at_dma_platform_data - Controller configuration parameters + * @nr_channels: Number of channels supported by hardware (max 8) + * @cap_mask: dma_capability flags supported by the platform + */ +struct at_dma_platform_data { + unsigned int nr_channels; + dma_cap_mask_t cap_mask; +}; + +#endif /* AT_HDMAC_H */ diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index babf214a509b..bc8fb41cd623 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -46,6 +46,14 @@ config DW_DMAC Support the Synopsys DesignWare AHB DMA controller. This can be integrated in chips such as the Atmel AT32ap7000. +config AT_HDMAC + tristate "Atmel AHB DMA support" + depends on ARCH_AT91SAM9RL + select DMA_ENGINE + help + Support the Atmel AHB DMA controller. This can be integrated in + chips such as the Atmel AT91SAM9RL. + config FSL_DMA tristate "Freescale Elo and Elo Plus DMA support" depends on FSL_SOC diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 2e5dc96700d2..d7bc5fd17d84 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -7,4 +7,5 @@ obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o obj-$(CONFIG_FSL_DMA) += fsldma.o obj-$(CONFIG_MV_XOR) += mv_xor.o obj-$(CONFIG_DW_DMAC) += dw_dmac.o +obj-$(CONFIG_AT_HDMAC) += at_hdmac.o obj-$(CONFIG_MX3_IPU) += ipu/ diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c new file mode 100644 index 000000000000..64dbf0ce128e --- /dev/null +++ b/drivers/dma/at_hdmac.c @@ -0,0 +1,1009 @@ +/* + * Driver for the Atmel AHB DMA Controller (aka HDMA or DMAC on AT91 systems) + * + * Copyright (C) 2008 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * + * This supports the Atmel AHB DMA Controller, + * + * The driver has currently been tested with the Atmel AT91SAM9RL + * and AT91SAM9G45 series. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "at_hdmac_regs.h" + +/* + * Glossary + * -------- + * + * at_hdmac : Name of the ATmel AHB DMA Controller + * at_dma_ / atdma : ATmel DMA controller entity related + * atc_ / atchan : ATmel DMA Channel entity related + */ + +#define ATC_DEFAULT_CFG (ATC_FIFOCFG_HALFFIFO) +#define ATC_DEFAULT_CTRLA (0) +#define ATC_DEFAULT_CTRLB (ATC_SIF(0) \ + |ATC_DIF(1)) + +/* + * Initial number of descriptors to allocate for each channel. This could + * be increased during dma usage. + */ +static unsigned int init_nr_desc_per_channel = 64; +module_param(init_nr_desc_per_channel, uint, 0644); +MODULE_PARM_DESC(init_nr_desc_per_channel, + "initial descriptors per channel (default: 64)"); + + +/* prototypes */ +static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx); + + +/*----------------------------------------------------------------------*/ + +static struct at_desc *atc_first_active(struct at_dma_chan *atchan) +{ + return list_first_entry(&atchan->active_list, + struct at_desc, desc_node); +} + +static struct at_desc *atc_first_queued(struct at_dma_chan *atchan) +{ + return list_first_entry(&atchan->queue, + struct at_desc, desc_node); +} + +/** + * atc_alloc_descriptor - allocate and return an initilized descriptor + * @chan: the channel to allocate descriptors for + * @gfp_flags: GFP allocation flags + * + * Note: The ack-bit is positioned in the descriptor flag at creation time + * to make initial allocation more convenient. This bit will be cleared + * and control will be given to client at usage time (during + * preparation functions). + */ +static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan, + gfp_t gfp_flags) +{ + struct at_desc *desc = NULL; + struct at_dma *atdma = to_at_dma(chan->device); + dma_addr_t phys; + + desc = dma_pool_alloc(atdma->dma_desc_pool, gfp_flags, &phys); + if (desc) { + memset(desc, 0, sizeof(struct at_desc)); + dma_async_tx_descriptor_init(&desc->txd, chan); + /* txd.flags will be overwritten in prep functions */ + desc->txd.flags = DMA_CTRL_ACK; + desc->txd.tx_submit = atc_tx_submit; + desc->txd.phys = phys; + } + + return desc; +} + +/** + * atc_desc_get - get a unsused descriptor from free_list + * @atchan: channel we want a new descriptor for + */ +static struct at_desc *atc_desc_get(struct at_dma_chan *atchan) +{ + struct at_desc *desc, *_desc; + struct at_desc *ret = NULL; + unsigned int i = 0; + LIST_HEAD(tmp_list); + + spin_lock_bh(&atchan->lock); + list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) { + i++; + if (async_tx_test_ack(&desc->txd)) { + list_del(&desc->desc_node); + ret = desc; + break; + } + dev_dbg(chan2dev(&atchan->chan_common), + "desc %p not ACKed\n", desc); + } + spin_unlock_bh(&atchan->lock); + dev_vdbg(chan2dev(&atchan->chan_common), + "scanned %u descriptors on freelist\n", i); + + /* no more descriptor available in initial pool: create one more */ + if (!ret) { + ret = atc_alloc_descriptor(&atchan->chan_common, GFP_ATOMIC); + if (ret) { + spin_lock_bh(&atchan->lock); + atchan->descs_allocated++; + spin_unlock_bh(&atchan->lock); + } else { + dev_err(chan2dev(&atchan->chan_common), + "not enough descriptors available\n"); + } + } + + return ret; +} + +/** + * atc_desc_put - move a descriptor, including any children, to the free list + * @atchan: channel we work on + * @desc: descriptor, at the head of a chain, to move to free list + */ +static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc) +{ + if (desc) { + struct at_desc *child; + + spin_lock_bh(&atchan->lock); + list_for_each_entry(child, &desc->txd.tx_list, desc_node) + dev_vdbg(chan2dev(&atchan->chan_common), + "moving child desc %p to freelist\n", + child); + list_splice_init(&desc->txd.tx_list, &atchan->free_list); + dev_vdbg(chan2dev(&atchan->chan_common), + "moving desc %p to freelist\n", desc); + list_add(&desc->desc_node, &atchan->free_list); + spin_unlock_bh(&atchan->lock); + } +} + +/** + * atc_assign_cookie - compute and assign new cookie + * @atchan: channel we work on + * @desc: descriptor to asign cookie for + * + * Called with atchan->lock held and bh disabled + */ +static dma_cookie_t +atc_assign_cookie(struct at_dma_chan *atchan, struct at_desc *desc) +{ + dma_cookie_t cookie = atchan->chan_common.cookie; + + if (++cookie < 0) + cookie = 1; + + atchan->chan_common.cookie = cookie; + desc->txd.cookie = cookie; + + return cookie; +} + +/** + * atc_dostart - starts the DMA engine for real + * @atchan: the channel we want to start + * @first: first descriptor in the list we want to begin with + * + * Called with atchan->lock held and bh disabled + */ +static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first) +{ + struct at_dma *atdma = to_at_dma(atchan->chan_common.device); + + /* ASSERT: channel is idle */ + if (atc_chan_is_enabled(atchan)) { + dev_err(chan2dev(&atchan->chan_common), + "BUG: Attempted to start non-idle channel\n"); + dev_err(chan2dev(&atchan->chan_common), + " channel: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n", + channel_readl(atchan, SADDR), + channel_readl(atchan, DADDR), + channel_readl(atchan, CTRLA), + channel_readl(atchan, CTRLB), + channel_readl(atchan, DSCR)); + + /* The tasklet will hopefully advance the queue... */ + return; + } + + vdbg_dump_regs(atchan); + + /* clear any pending interrupt */ + while (dma_readl(atdma, EBCISR)) + cpu_relax(); + + channel_writel(atchan, SADDR, 0); + channel_writel(atchan, DADDR, 0); + channel_writel(atchan, CTRLA, 0); + channel_writel(atchan, CTRLB, 0); + channel_writel(atchan, DSCR, first->txd.phys); + dma_writel(atdma, CHER, atchan->mask); + + vdbg_dump_regs(atchan); +} + +/** + * atc_chain_complete - finish work for one transaction chain + * @atchan: channel we work on + * @desc: descriptor at the head of the chain we want do complete + * + * Called with atchan->lock held and bh disabled */ +static void +atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) +{ + dma_async_tx_callback callback; + void *param; + struct dma_async_tx_descriptor *txd = &desc->txd; + + dev_vdbg(chan2dev(&atchan->chan_common), + "descriptor %u complete\n", txd->cookie); + + atchan->completed_cookie = txd->cookie; + callback = txd->callback; + param = txd->callback_param; + + /* move children to free_list */ + list_splice_init(&txd->tx_list, &atchan->free_list); + /* move myself to free_list */ + list_move(&desc->desc_node, &atchan->free_list); + + /* unmap dma addresses */ + if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) + dma_unmap_single(chan2parent(&atchan->chan_common), + desc->lli.daddr, + desc->len, DMA_FROM_DEVICE); + else + dma_unmap_page(chan2parent(&atchan->chan_common), + desc->lli.daddr, + desc->len, DMA_FROM_DEVICE); + } + if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) + dma_unmap_single(chan2parent(&atchan->chan_common), + desc->lli.saddr, + desc->len, DMA_TO_DEVICE); + else + dma_unmap_page(chan2parent(&atchan->chan_common), + desc->lli.saddr, + desc->len, DMA_TO_DEVICE); + } + + /* + * The API requires that no submissions are done from a + * callback, so we don't need to drop the lock here + */ + if (callback) + callback(param); + + dma_run_dependencies(txd); +} + +/** + * atc_complete_all - finish work for all transactions + * @atchan: channel to complete transactions for + * + * Eventually submit queued descriptors if any + * + * Assume channel is idle while calling this function + * Called with atchan->lock held and bh disabled + */ +static void atc_complete_all(struct at_dma_chan *atchan) +{ + struct at_desc *desc, *_desc; + LIST_HEAD(list); + + dev_vdbg(chan2dev(&atchan->chan_common), "complete all\n"); + + BUG_ON(atc_chan_is_enabled(atchan)); + + /* + * Submit queued descriptors ASAP, i.e. before we go through + * the completed ones. + */ + if (!list_empty(&atchan->queue)) + atc_dostart(atchan, atc_first_queued(atchan)); + /* empty active_list now it is completed */ + list_splice_init(&atchan->active_list, &list); + /* empty queue list by moving descriptors (if any) to active_list */ + list_splice_init(&atchan->queue, &atchan->active_list); + + list_for_each_entry_safe(desc, _desc, &list, desc_node) + atc_chain_complete(atchan, desc); +} + +/** + * atc_cleanup_descriptors - cleanup up finished descriptors in active_list + * @atchan: channel to be cleaned up + * + * Called with atchan->lock held and bh disabled + */ +static void atc_cleanup_descriptors(struct at_dma_chan *atchan) +{ + struct at_desc *desc, *_desc; + struct at_desc *child; + + dev_vdbg(chan2dev(&atchan->chan_common), "cleanup descriptors\n"); + + list_for_each_entry_safe(desc, _desc, &atchan->active_list, desc_node) { + if (!(desc->lli.ctrla & ATC_DONE)) + /* This one is currently in progress */ + return; + + list_for_each_entry(child, &desc->txd.tx_list, desc_node) + if (!(child->lli.ctrla & ATC_DONE)) + /* Currently in progress */ + return; + + /* + * No descriptors so far seem to be in progress, i.e. + * this chain must be done. + */ + atc_chain_complete(atchan, desc); + } +} + +/** + * atc_advance_work - at the end of a transaction, move forward + * @atchan: channel where the transaction ended + * + * Called with atchan->lock held and bh disabled + */ +static void atc_advance_work(struct at_dma_chan *atchan) +{ + dev_vdbg(chan2dev(&atchan->chan_common), "advance_work\n"); + + if (list_empty(&atchan->active_list) || + list_is_singular(&atchan->active_list)) { + atc_complete_all(atchan); + } else { + atc_chain_complete(atchan, atc_first_active(atchan)); + /* advance work */ + atc_dostart(atchan, atc_first_active(atchan)); + } +} + + +/** + * atc_handle_error - handle errors reported by DMA controller + * @atchan: channel where error occurs + * + * Called with atchan->lock held and bh disabled + */ +static void atc_handle_error(struct at_dma_chan *atchan) +{ + struct at_desc *bad_desc; + struct at_desc *child; + + /* + * The descriptor currently at the head of the active list is + * broked. Since we don't have any way to report errors, we'll + * just have to scream loudly and try to carry on. + */ + bad_desc = atc_first_active(atchan); + list_del_init(&bad_desc->desc_node); + + /* As we are stopped, take advantage to push queued descriptors + * in active_list */ + list_splice_init(&atchan->queue, atchan->active_list.prev); + + /* Try to restart the controller */ + if (!list_empty(&atchan->active_list)) + atc_dostart(atchan, atc_first_active(atchan)); + + /* + * KERN_CRITICAL may seem harsh, but since this only happens + * when someone submits a bad physical address in a + * descriptor, we should consider ourselves lucky that the + * controller flagged an error instead of scribbling over + * random memory locations. + */ + dev_crit(chan2dev(&atchan->chan_common), + "Bad descriptor submitted for DMA!\n"); + dev_crit(chan2dev(&atchan->chan_common), + " cookie: %d\n", bad_desc->txd.cookie); + atc_dump_lli(atchan, &bad_desc->lli); + list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) + atc_dump_lli(atchan, &child->lli); + + /* Pretend the descriptor completed successfully */ + atc_chain_complete(atchan, bad_desc); +} + + +/*-- IRQ & Tasklet ---------------------------------------------------*/ + +static void atc_tasklet(unsigned long data) +{ + struct at_dma_chan *atchan = (struct at_dma_chan *)data; + + /* Channel cannot be enabled here */ + if (atc_chan_is_enabled(atchan)) { + dev_err(chan2dev(&atchan->chan_common), + "BUG: channel enabled in tasklet\n"); + return; + } + + spin_lock(&atchan->lock); + if (test_and_clear_bit(0, &atchan->error_status)) + atc_handle_error(atchan); + else + atc_advance_work(atchan); + + spin_unlock(&atchan->lock); +} + +static irqreturn_t at_dma_interrupt(int irq, void *dev_id) +{ + struct at_dma *atdma = (struct at_dma *)dev_id; + struct at_dma_chan *atchan; + int i; + u32 status, pending, imr; + int ret = IRQ_NONE; + + do { + imr = dma_readl(atdma, EBCIMR); + status = dma_readl(atdma, EBCISR); + pending = status & imr; + + if (!pending) + break; + + dev_vdbg(atdma->dma_common.dev, + "interrupt: status = 0x%08x, 0x%08x, 0x%08x\n", + status, imr, pending); + + for (i = 0; i < atdma->dma_common.chancnt; i++) { + atchan = &atdma->chan[i]; + if (pending & (AT_DMA_CBTC(i) | AT_DMA_ERR(i))) { + if (pending & AT_DMA_ERR(i)) { + /* Disable channel on AHB error */ + dma_writel(atdma, CHDR, atchan->mask); + /* Give information to tasklet */ + set_bit(0, &atchan->error_status); + } + tasklet_schedule(&atchan->tasklet); + ret = IRQ_HANDLED; + } + } + + } while (pending); + + return ret; +} + + +/*-- DMA Engine API --------------------------------------------------*/ + +/** + * atc_tx_submit - set the prepared descriptor(s) to be executed by the engine + * @desc: descriptor at the head of the transaction chain + * + * Queue chain if DMA engine is working already + * + * Cookie increment and adding to active_list or queue must be atomic + */ +static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct at_desc *desc = txd_to_at_desc(tx); + struct at_dma_chan *atchan = to_at_dma_chan(tx->chan); + dma_cookie_t cookie; + + spin_lock_bh(&atchan->lock); + cookie = atc_assign_cookie(atchan, desc); + + if (list_empty(&atchan->active_list)) { + dev_vdbg(chan2dev(tx->chan), "tx_submit: started %u\n", + desc->txd.cookie); + atc_dostart(atchan, desc); + list_add_tail(&desc->desc_node, &atchan->active_list); + } else { + dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n", + desc->txd.cookie); + list_add_tail(&desc->desc_node, &atchan->queue); + } + + spin_unlock_bh(&atchan->lock); + + return cookie; +} + +/** + * atc_prep_dma_memcpy - prepare a memcpy operation + * @chan: the channel to prepare operation on + * @dest: operation virtual destination address + * @src: operation virtual source address + * @len: operation length + * @flags: tx descriptor status flags + */ +static struct dma_async_tx_descriptor * +atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_desc *desc = NULL; + struct at_desc *first = NULL; + struct at_desc *prev = NULL; + size_t xfer_count; + size_t offset; + unsigned int src_width; + unsigned int dst_width; + u32 ctrla; + u32 ctrlb; + + dev_vdbg(chan2dev(chan), "prep_dma_memcpy: d0x%x s0x%x l0x%zx f0x%lx\n", + dest, src, len, flags); + + if (unlikely(!len)) { + dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n"); + return NULL; + } + + ctrla = ATC_DEFAULT_CTRLA; + ctrlb = ATC_DEFAULT_CTRLB + | ATC_SRC_ADDR_MODE_INCR + | ATC_DST_ADDR_MODE_INCR + | ATC_FC_MEM2MEM; + + /* + * We can be a lot more clever here, but this should take care + * of the most common optimization. + */ + if (!((src | dest | len) & 3)) { + ctrla |= ATC_SRC_WIDTH_WORD | ATC_DST_WIDTH_WORD; + src_width = dst_width = 2; + } else if (!((src | dest | len) & 1)) { + ctrla |= ATC_SRC_WIDTH_HALFWORD | ATC_DST_WIDTH_HALFWORD; + src_width = dst_width = 1; + } else { + ctrla |= ATC_SRC_WIDTH_BYTE | ATC_DST_WIDTH_BYTE; + src_width = dst_width = 0; + } + + for (offset = 0; offset < len; offset += xfer_count << src_width) { + xfer_count = min_t(size_t, (len - offset) >> src_width, + ATC_BTSIZE_MAX); + + desc = atc_desc_get(atchan); + if (!desc) + goto err_desc_get; + + desc->lli.saddr = src + offset; + desc->lli.daddr = dest + offset; + desc->lli.ctrla = ctrla | xfer_count; + desc->lli.ctrlb = ctrlb; + + desc->txd.cookie = 0; + async_tx_ack(&desc->txd); + + if (!first) { + first = desc; + } else { + /* inform the HW lli about chaining */ + prev->lli.dscr = desc->txd.phys; + /* insert the link descriptor to the LD ring */ + list_add_tail(&desc->desc_node, + &first->txd.tx_list); + } + prev = desc; + } + + /* First descriptor of the chain embedds additional information */ + first->txd.cookie = -EBUSY; + first->len = len; + + /* set end-of-link to the last link descriptor of list*/ + set_desc_eol(desc); + + desc->txd.flags = flags; /* client is in control of this ack */ + + return &first->txd; + +err_desc_get: + atc_desc_put(atchan, first); + return NULL; +} + +/** + * atc_is_tx_complete - poll for transaction completion + * @chan: DMA channel + * @cookie: transaction identifier to check status of + * @done: if not %NULL, updated with last completed transaction + * @used: if not %NULL, updated with last used transaction + * + * If @done and @used are passed in, upon return they reflect the driver + * internal state and can be used with dma_async_is_complete() to check + * the status of multiple cookies without re-checking hardware state. + */ +static enum dma_status +atc_is_tx_complete(struct dma_chan *chan, + dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + dma_cookie_t last_used; + dma_cookie_t last_complete; + enum dma_status ret; + + dev_vdbg(chan2dev(chan), "is_tx_complete: %d (d%d, u%d)\n", + cookie, done ? *done : 0, used ? *used : 0); + + spin_lock_bh(atchan->lock); + + last_complete = atchan->completed_cookie; + last_used = chan->cookie; + + ret = dma_async_is_complete(cookie, last_complete, last_used); + if (ret != DMA_SUCCESS) { + atc_cleanup_descriptors(atchan); + + last_complete = atchan->completed_cookie; + last_used = chan->cookie; + + ret = dma_async_is_complete(cookie, last_complete, last_used); + } + + spin_unlock_bh(atchan->lock); + + if (done) + *done = last_complete; + if (used) + *used = last_used; + + return ret; +} + +/** + * atc_issue_pending - try to finish work + * @chan: target DMA channel + */ +static void atc_issue_pending(struct dma_chan *chan) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + + dev_vdbg(chan2dev(chan), "issue_pending\n"); + + if (!atc_chan_is_enabled(atchan)) { + spin_lock_bh(&atchan->lock); + atc_advance_work(atchan); + spin_unlock_bh(&atchan->lock); + } +} + +/** + * atc_alloc_chan_resources - allocate resources for DMA channel + * @chan: allocate descriptor resources for this channel + * @client: current client requesting the channel be ready for requests + * + * return - the number of allocated descriptors + */ +static int atc_alloc_chan_resources(struct dma_chan *chan) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma *atdma = to_at_dma(chan->device); + struct at_desc *desc; + int i; + LIST_HEAD(tmp_list); + + dev_vdbg(chan2dev(chan), "alloc_chan_resources\n"); + + /* ASSERT: channel is idle */ + if (atc_chan_is_enabled(atchan)) { + dev_dbg(chan2dev(chan), "DMA channel not idle ?\n"); + return -EIO; + } + + /* have we already been set up? */ + if (!list_empty(&atchan->free_list)) + return atchan->descs_allocated; + + /* Allocate initial pool of descriptors */ + for (i = 0; i < init_nr_desc_per_channel; i++) { + desc = atc_alloc_descriptor(chan, GFP_KERNEL); + if (!desc) { + dev_err(atdma->dma_common.dev, + "Only %d initial descriptors\n", i); + break; + } + list_add_tail(&desc->desc_node, &tmp_list); + } + + spin_lock_bh(&atchan->lock); + atchan->descs_allocated = i; + list_splice(&tmp_list, &atchan->free_list); + atchan->completed_cookie = chan->cookie = 1; + spin_unlock_bh(&atchan->lock); + + /* channel parameters */ + channel_writel(atchan, CFG, ATC_DEFAULT_CFG); + + dev_dbg(chan2dev(chan), + "alloc_chan_resources: allocated %d descriptors\n", + atchan->descs_allocated); + + return atchan->descs_allocated; +} + +/** + * atc_free_chan_resources - free all channel resources + * @chan: DMA channel + */ +static void atc_free_chan_resources(struct dma_chan *chan) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma *atdma = to_at_dma(chan->device); + struct at_desc *desc, *_desc; + LIST_HEAD(list); + + dev_dbg(chan2dev(chan), "free_chan_resources: (descs allocated=%u)\n", + atchan->descs_allocated); + + /* ASSERT: channel is idle */ + BUG_ON(!list_empty(&atchan->active_list)); + BUG_ON(!list_empty(&atchan->queue)); + BUG_ON(atc_chan_is_enabled(atchan)); + + list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) { + dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc); + list_del(&desc->desc_node); + /* free link descriptor */ + dma_pool_free(atdma->dma_desc_pool, desc, desc->txd.phys); + } + list_splice_init(&atchan->free_list, &list); + atchan->descs_allocated = 0; + + dev_vdbg(chan2dev(chan), "free_chan_resources: done\n"); +} + + +/*-- Module Management -----------------------------------------------*/ + +/** + * at_dma_off - disable DMA controller + * @atdma: the Atmel HDAMC device + */ +static void at_dma_off(struct at_dma *atdma) +{ + dma_writel(atdma, EN, 0); + + /* disable all interrupts */ + dma_writel(atdma, EBCIDR, -1L); + + /* confirm that all channels are disabled */ + while (dma_readl(atdma, CHSR) & atdma->all_chan_mask) + cpu_relax(); +} + +static int __init at_dma_probe(struct platform_device *pdev) +{ + struct at_dma_platform_data *pdata; + struct resource *io; + struct at_dma *atdma; + size_t size; + int irq; + int err; + int i; + + /* get DMA Controller parameters from platform */ + pdata = pdev->dev.platform_data; + if (!pdata || pdata->nr_channels > AT_DMA_MAX_NR_CHANNELS) + return -EINVAL; + + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!io) + return -EINVAL; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + size = sizeof(struct at_dma); + size += pdata->nr_channels * sizeof(struct at_dma_chan); + atdma = kzalloc(size, GFP_KERNEL); + if (!atdma) + return -ENOMEM; + + /* discover transaction capabilites from the platform data */ + atdma->dma_common.cap_mask = pdata->cap_mask; + atdma->all_chan_mask = (1 << pdata->nr_channels) - 1; + + size = io->end - io->start + 1; + if (!request_mem_region(io->start, size, pdev->dev.driver->name)) { + err = -EBUSY; + goto err_kfree; + } + + atdma->regs = ioremap(io->start, size); + if (!atdma->regs) { + err = -ENOMEM; + goto err_release_r; + } + + atdma->clk = clk_get(&pdev->dev, "dma_clk"); + if (IS_ERR(atdma->clk)) { + err = PTR_ERR(atdma->clk); + goto err_clk; + } + clk_enable(atdma->clk); + + /* force dma off, just in case */ + at_dma_off(atdma); + + err = request_irq(irq, at_dma_interrupt, 0, "at_hdmac", atdma); + if (err) + goto err_irq; + + platform_set_drvdata(pdev, atdma); + + /* create a pool of consistent memory blocks for hardware descriptors */ + atdma->dma_desc_pool = dma_pool_create("at_hdmac_desc_pool", + &pdev->dev, sizeof(struct at_desc), + 4 /* word alignment */, 0); + if (!atdma->dma_desc_pool) { + dev_err(&pdev->dev, "No memory for descriptors dma pool\n"); + err = -ENOMEM; + goto err_pool_create; + } + + /* clear any pending interrupt */ + while (dma_readl(atdma, EBCISR)) + cpu_relax(); + + /* initialize channels related values */ + INIT_LIST_HEAD(&atdma->dma_common.channels); + for (i = 0; i < pdata->nr_channels; i++, atdma->dma_common.chancnt++) { + struct at_dma_chan *atchan = &atdma->chan[i]; + + atchan->chan_common.device = &atdma->dma_common; + atchan->chan_common.cookie = atchan->completed_cookie = 1; + atchan->chan_common.chan_id = i; + list_add_tail(&atchan->chan_common.device_node, + &atdma->dma_common.channels); + + atchan->ch_regs = atdma->regs + ch_regs(i); + spin_lock_init(&atchan->lock); + atchan->mask = 1 << i; + + INIT_LIST_HEAD(&atchan->active_list); + INIT_LIST_HEAD(&atchan->queue); + INIT_LIST_HEAD(&atchan->free_list); + + tasklet_init(&atchan->tasklet, atc_tasklet, + (unsigned long)atchan); + atc_enable_irq(atchan); + } + + /* set base routines */ + atdma->dma_common.device_alloc_chan_resources = atc_alloc_chan_resources; + atdma->dma_common.device_free_chan_resources = atc_free_chan_resources; + atdma->dma_common.device_is_tx_complete = atc_is_tx_complete; + atdma->dma_common.device_issue_pending = atc_issue_pending; + atdma->dma_common.dev = &pdev->dev; + + /* set prep routines based on capability */ + if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask)) + atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy; + + dma_writel(atdma, EN, AT_DMA_ENABLE); + + dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s), %d channels\n", + dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask) ? "cpy " : "", + dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask) ? "slave " : "", + atdma->dma_common.chancnt); + + dma_async_device_register(&atdma->dma_common); + + return 0; + +err_pool_create: + platform_set_drvdata(pdev, NULL); + free_irq(platform_get_irq(pdev, 0), atdma); +err_irq: + clk_disable(atdma->clk); + clk_put(atdma->clk); +err_clk: + iounmap(atdma->regs); + atdma->regs = NULL; +err_release_r: + release_mem_region(io->start, size); +err_kfree: + kfree(atdma); + return err; +} + +static int __exit at_dma_remove(struct platform_device *pdev) +{ + struct at_dma *atdma = platform_get_drvdata(pdev); + struct dma_chan *chan, *_chan; + struct resource *io; + + at_dma_off(atdma); + dma_async_device_unregister(&atdma->dma_common); + + dma_pool_destroy(atdma->dma_desc_pool); + platform_set_drvdata(pdev, NULL); + free_irq(platform_get_irq(pdev, 0), atdma); + + list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels, + device_node) { + struct at_dma_chan *atchan = to_at_dma_chan(chan); + + /* Disable interrupts */ + atc_disable_irq(atchan); + tasklet_disable(&atchan->tasklet); + + tasklet_kill(&atchan->tasklet); + list_del(&chan->device_node); + } + + clk_disable(atdma->clk); + clk_put(atdma->clk); + + iounmap(atdma->regs); + atdma->regs = NULL; + + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); + release_mem_region(io->start, io->end - io->start + 1); + + kfree(atdma); + + return 0; +} + +static void at_dma_shutdown(struct platform_device *pdev) +{ + struct at_dma *atdma = platform_get_drvdata(pdev); + + at_dma_off(platform_get_drvdata(pdev)); + clk_disable(atdma->clk); +} + +static int at_dma_suspend_late(struct platform_device *pdev, pm_message_t mesg) +{ + struct at_dma *atdma = platform_get_drvdata(pdev); + + at_dma_off(platform_get_drvdata(pdev)); + clk_disable(atdma->clk); + return 0; +} + +static int at_dma_resume_early(struct platform_device *pdev) +{ + struct at_dma *atdma = platform_get_drvdata(pdev); + + clk_enable(atdma->clk); + dma_writel(atdma, EN, AT_DMA_ENABLE); + return 0; + +} + +static struct platform_driver at_dma_driver = { + .remove = __exit_p(at_dma_remove), + .shutdown = at_dma_shutdown, + .suspend_late = at_dma_suspend_late, + .resume_early = at_dma_resume_early, + .driver = { + .name = "at_hdmac", + }, +}; + +static int __init at_dma_init(void) +{ + return platform_driver_probe(&at_dma_driver, at_dma_probe); +} +module_init(at_dma_init); + +static void __exit at_dma_exit(void) +{ + platform_driver_unregister(&at_dma_driver); +} +module_exit(at_dma_exit); + +MODULE_DESCRIPTION("Atmel AHB DMA Controller driver"); +MODULE_AUTHOR("Nicolas Ferre "); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:at_hdmac"); diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h new file mode 100644 index 000000000000..ad2d4f402bf7 --- /dev/null +++ b/drivers/dma/at_hdmac_regs.h @@ -0,0 +1,386 @@ +/* + * Header file for the Atmel AHB DMA Controller driver + * + * Copyright (C) 2008 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ +#ifndef AT_HDMAC_REGS_H +#define AT_HDMAC_REGS_H + +#include + +#define AT_DMA_MAX_NR_CHANNELS 8 + + +#define AT_DMA_GCFG 0x00 /* Global Configuration Register */ +#define AT_DMA_IF_BIGEND(i) (0x1 << (i)) /* AHB-Lite Interface i in Big-endian mode */ +#define AT_DMA_ARB_CFG (0x1 << 4) /* Arbiter mode. */ +#define AT_DMA_ARB_CFG_FIXED (0x0 << 4) +#define AT_DMA_ARB_CFG_ROUND_ROBIN (0x1 << 4) + +#define AT_DMA_EN 0x04 /* Controller Enable Register */ +#define AT_DMA_ENABLE (0x1 << 0) + +#define AT_DMA_SREQ 0x08 /* Software Single Request Register */ +#define AT_DMA_SSREQ(x) (0x1 << ((x) << 1)) /* Request a source single transfer on channel x */ +#define AT_DMA_DSREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination single transfer on channel x */ + +#define AT_DMA_CREQ 0x0C /* Software Chunk Transfer Request Register */ +#define AT_DMA_SCREQ(x) (0x1 << ((x) << 1)) /* Request a source chunk transfer on channel x */ +#define AT_DMA_DCREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination chunk transfer on channel x */ + +#define AT_DMA_LAST 0x10 /* Software Last Transfer Flag Register */ +#define AT_DMA_SLAST(x) (0x1 << ((x) << 1)) /* This src rq is last tx of buffer on channel x */ +#define AT_DMA_DLAST(x) (0x1 << (1 + ((x) << 1))) /* This dst rq is last tx of buffer on channel x */ + +#define AT_DMA_SYNC 0x14 /* Request Synchronization Register */ +#define AT_DMA_SYR(h) (0x1 << (h)) /* Synchronize handshake line h */ + +/* Error, Chained Buffer transfer completed and Buffer transfer completed Interrupt registers */ +#define AT_DMA_EBCIER 0x18 /* Enable register */ +#define AT_DMA_EBCIDR 0x1C /* Disable register */ +#define AT_DMA_EBCIMR 0x20 /* Mask Register */ +#define AT_DMA_EBCISR 0x24 /* Status Register */ +#define AT_DMA_CBTC_OFFSET 8 +#define AT_DMA_ERR_OFFSET 16 +#define AT_DMA_BTC(x) (0x1 << (x)) +#define AT_DMA_CBTC(x) (0x1 << (AT_DMA_CBTC_OFFSET + (x))) +#define AT_DMA_ERR(x) (0x1 << (AT_DMA_ERR_OFFSET + (x))) + +#define AT_DMA_CHER 0x28 /* Channel Handler Enable Register */ +#define AT_DMA_ENA(x) (0x1 << (x)) +#define AT_DMA_SUSP(x) (0x1 << ( 8 + (x))) +#define AT_DMA_KEEP(x) (0x1 << (24 + (x))) + +#define AT_DMA_CHDR 0x2C /* Channel Handler Disable Register */ +#define AT_DMA_DIS(x) (0x1 << (x)) +#define AT_DMA_RES(x) (0x1 << ( 8 + (x))) + +#define AT_DMA_CHSR 0x30 /* Channel Handler Status Register */ +#define AT_DMA_EMPT(x) (0x1 << (16 + (x))) +#define AT_DMA_STAL(x) (0x1 << (24 + (x))) + + +#define AT_DMA_CH_REGS_BASE 0x3C /* Channel registers base address */ +#define ch_regs(x) (AT_DMA_CH_REGS_BASE + (x) * 0x28) /* Channel x base addr */ + +/* Hardware register offset for each channel */ +#define ATC_SADDR_OFFSET 0x00 /* Source Address Register */ +#define ATC_DADDR_OFFSET 0x04 /* Destination Address Register */ +#define ATC_DSCR_OFFSET 0x08 /* Descriptor Address Register */ +#define ATC_CTRLA_OFFSET 0x0C /* Control A Register */ +#define ATC_CTRLB_OFFSET 0x10 /* Control B Register */ +#define ATC_CFG_OFFSET 0x14 /* Configuration Register */ +#define ATC_SPIP_OFFSET 0x18 /* Src PIP Configuration Register */ +#define ATC_DPIP_OFFSET 0x1C /* Dst PIP Configuration Register */ + + +/* Bitfield definitions */ + +/* Bitfields in DSCR */ +#define ATC_DSCR_IF(i) (0x3 & (i)) /* Dsc feched via AHB-Lite Interface i */ + +/* Bitfields in CTRLA */ +#define ATC_BTSIZE_MAX 0xFFFFUL /* Maximum Buffer Transfer Size */ +#define ATC_BTSIZE(x) (ATC_BTSIZE_MAX & (x)) /* Buffer Transfer Size */ +#define ATC_SCSIZE_MASK (0x7 << 16) /* Source Chunk Transfer Size */ +#define ATC_SCSIZE_1 (0x0 << 16) +#define ATC_SCSIZE_4 (0x1 << 16) +#define ATC_SCSIZE_8 (0x2 << 16) +#define ATC_SCSIZE_16 (0x3 << 16) +#define ATC_SCSIZE_32 (0x4 << 16) +#define ATC_SCSIZE_64 (0x5 << 16) +#define ATC_SCSIZE_128 (0x6 << 16) +#define ATC_SCSIZE_256 (0x7 << 16) +#define ATC_DCSIZE_MASK (0x7 << 20) /* Destination Chunk Transfer Size */ +#define ATC_DCSIZE_1 (0x0 << 20) +#define ATC_DCSIZE_4 (0x1 << 20) +#define ATC_DCSIZE_8 (0x2 << 20) +#define ATC_DCSIZE_16 (0x3 << 20) +#define ATC_DCSIZE_32 (0x4 << 20) +#define ATC_DCSIZE_64 (0x5 << 20) +#define ATC_DCSIZE_128 (0x6 << 20) +#define ATC_DCSIZE_256 (0x7 << 20) +#define ATC_SRC_WIDTH_MASK (0x3 << 24) /* Source Single Transfer Size */ +#define ATC_SRC_WIDTH_BYTE (0x0 << 24) +#define ATC_SRC_WIDTH_HALFWORD (0x1 << 24) +#define ATC_SRC_WIDTH_WORD (0x2 << 24) +#define ATC_DST_WIDTH_MASK (0x3 << 28) /* Destination Single Transfer Size */ +#define ATC_DST_WIDTH_BYTE (0x0 << 28) +#define ATC_DST_WIDTH_HALFWORD (0x1 << 28) +#define ATC_DST_WIDTH_WORD (0x2 << 28) +#define ATC_DONE (0x1 << 31) /* Tx Done (only written back in descriptor) */ + +/* Bitfields in CTRLB */ +#define ATC_SIF(i) (0x3 & (i)) /* Src tx done via AHB-Lite Interface i */ +#define ATC_DIF(i) ((0x3 & (i)) << 4) /* Dst tx done via AHB-Lite Interface i */ +#define ATC_SRC_PIP (0x1 << 8) /* Source Picture-in-Picture enabled */ +#define ATC_DST_PIP (0x1 << 12) /* Destination Picture-in-Picture enabled */ +#define ATC_SRC_DSCR_DIS (0x1 << 16) /* Src Descriptor fetch disable */ +#define ATC_DST_DSCR_DIS (0x1 << 20) /* Dst Descriptor fetch disable */ +#define ATC_FC_MASK (0x7 << 21) /* Choose Flow Controller */ +#define ATC_FC_MEM2MEM (0x0 << 21) /* Mem-to-Mem (DMA) */ +#define ATC_FC_MEM2PER (0x1 << 21) /* Mem-to-Periph (DMA) */ +#define ATC_FC_PER2MEM (0x2 << 21) /* Periph-to-Mem (DMA) */ +#define ATC_FC_PER2PER (0x3 << 21) /* Periph-to-Periph (DMA) */ +#define ATC_FC_PER2MEM_PER (0x4 << 21) /* Periph-to-Mem (Peripheral) */ +#define ATC_FC_MEM2PER_PER (0x5 << 21) /* Mem-to-Periph (Peripheral) */ +#define ATC_FC_PER2PER_PER (0x6 << 21) /* Periph-to-Periph (Src Peripheral) */ +#define ATC_SRC_ADDR_MODE_MASK (0x3 << 24) +#define ATC_SRC_ADDR_MODE_INCR (0x0 << 24) /* Incrementing Mode */ +#define ATC_SRC_ADDR_MODE_DECR (0x1 << 24) /* Decrementing Mode */ +#define ATC_SRC_ADDR_MODE_FIXED (0x2 << 24) /* Fixed Mode */ +#define ATC_DST_ADDR_MODE_MASK (0x3 << 28) +#define ATC_DST_ADDR_MODE_INCR (0x0 << 28) /* Incrementing Mode */ +#define ATC_DST_ADDR_MODE_DECR (0x1 << 28) /* Decrementing Mode */ +#define ATC_DST_ADDR_MODE_FIXED (0x2 << 28) /* Fixed Mode */ +#define ATC_IEN (0x1 << 30) /* BTC interrupt enable (active low) */ +#define ATC_AUTO (0x1 << 31) /* Auto multiple buffer tx enable */ + +/* Bitfields in CFG */ +#define ATC_SRC_PER(h) (0xFU & (h)) /* Channel src rq associated with periph handshaking ifc h */ +#define ATC_DST_PER(h) ((0xFU & (h)) << 4) /* Channel dst rq associated with periph handshaking ifc h */ +#define ATC_SRC_REP (0x1 << 8) /* Source Replay Mod */ +#define ATC_SRC_H2SEL (0x1 << 9) /* Source Handshaking Mod */ +#define ATC_SRC_H2SEL_SW (0x0 << 9) +#define ATC_SRC_H2SEL_HW (0x1 << 9) +#define ATC_DST_REP (0x1 << 12) /* Destination Replay Mod */ +#define ATC_DST_H2SEL (0x1 << 13) /* Destination Handshaking Mod */ +#define ATC_DST_H2SEL_SW (0x0 << 13) +#define ATC_DST_H2SEL_HW (0x1 << 13) +#define ATC_SOD (0x1 << 16) /* Stop On Done */ +#define ATC_LOCK_IF (0x1 << 20) /* Interface Lock */ +#define ATC_LOCK_B (0x1 << 21) /* AHB Bus Lock */ +#define ATC_LOCK_IF_L (0x1 << 22) /* Master Interface Arbiter Lock */ +#define ATC_LOCK_IF_L_CHUNK (0x0 << 22) +#define ATC_LOCK_IF_L_BUFFER (0x1 << 22) +#define ATC_AHB_PROT_MASK (0x7 << 24) /* AHB Protection */ +#define ATC_FIFOCFG_MASK (0x3 << 28) /* FIFO Request Configuration */ +#define ATC_FIFOCFG_LARGESTBURST (0x0 << 28) +#define ATC_FIFOCFG_HALFFIFO (0x1 << 28) +#define ATC_FIFOCFG_ENOUGHSPACE (0x2 << 28) + +/* Bitfields in SPIP */ +#define ATC_SPIP_HOLE(x) (0xFFFFU & (x)) +#define ATC_SPIP_BOUNDARY(x) ((0x3FF & (x)) << 16) + +/* Bitfields in DPIP */ +#define ATC_DPIP_HOLE(x) (0xFFFFU & (x)) +#define ATC_DPIP_BOUNDARY(x) ((0x3FF & (x)) << 16) + + +/*-- descriptors -----------------------------------------------------*/ + +/* LLI == Linked List Item; aka DMA buffer descriptor */ +struct at_lli { + /* values that are not changed by hardware */ + dma_addr_t saddr; + dma_addr_t daddr; + /* value that may get written back: */ + u32 ctrla; + /* more values that are not changed by hardware */ + u32 ctrlb; + dma_addr_t dscr; /* chain to next lli */ +}; + +/** + * struct at_desc - software descriptor + * @at_lli: hardware lli structure + * @txd: support for the async_tx api + * @desc_node: node on the channed descriptors list + * @len: total transaction bytecount + */ +struct at_desc { + /* FIRST values the hardware uses */ + struct at_lli lli; + + /* THEN values for driver housekeeping */ + struct dma_async_tx_descriptor txd; + struct list_head desc_node; + size_t len; +}; + +static inline struct at_desc * +txd_to_at_desc(struct dma_async_tx_descriptor *txd) +{ + return container_of(txd, struct at_desc, txd); +} + + +/*-- Channels --------------------------------------------------------*/ + +/** + * struct at_dma_chan - internal representation of an Atmel HDMAC channel + * @chan_common: common dmaengine channel object members + * @device: parent device + * @ch_regs: memory mapped register base + * @mask: channel index in a mask + * @error_status: transmit error status information from irq handler + * to tasklet (use atomic operations) + * @tasklet: bottom half to finish transaction work + * @lock: serializes enqueue/dequeue operations to descriptors lists + * @completed_cookie: identifier for the most recently completed operation + * @active_list: list of descriptors dmaengine is being running on + * @queue: list of descriptors ready to be submitted to engine + * @free_list: list of descriptors usable by the channel + * @descs_allocated: records the actual size of the descriptor pool + */ +struct at_dma_chan { + struct dma_chan chan_common; + struct at_dma *device; + void __iomem *ch_regs; + u8 mask; + unsigned long error_status; + struct tasklet_struct tasklet; + + spinlock_t lock; + + /* these other elements are all protected by lock */ + dma_cookie_t completed_cookie; + struct list_head active_list; + struct list_head queue; + struct list_head free_list; + unsigned int descs_allocated; +}; + +#define channel_readl(atchan, name) \ + __raw_readl((atchan)->ch_regs + ATC_##name##_OFFSET) + +#define channel_writel(atchan, name, val) \ + __raw_writel((val), (atchan)->ch_regs + ATC_##name##_OFFSET) + +static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan) +{ + return container_of(dchan, struct at_dma_chan, chan_common); +} + + +/*-- Controller ------------------------------------------------------*/ + +/** + * struct at_dma - internal representation of an Atmel HDMA Controller + * @chan_common: common dmaengine dma_device object members + * @ch_regs: memory mapped register base + * @clk: dma controller clock + * @all_chan_mask: all channels availlable in a mask + * @dma_desc_pool: base of DMA descriptor region (DMA address) + * @chan: channels table to store at_dma_chan structures + */ +struct at_dma { + struct dma_device dma_common; + void __iomem *regs; + struct clk *clk; + + u8 all_chan_mask; + + struct dma_pool *dma_desc_pool; + /* AT THE END channels table */ + struct at_dma_chan chan[0]; +}; + +#define dma_readl(atdma, name) \ + __raw_readl((atdma)->regs + AT_DMA_##name) +#define dma_writel(atdma, name, val) \ + __raw_writel((val), (atdma)->regs + AT_DMA_##name) + +static inline struct at_dma *to_at_dma(struct dma_device *ddev) +{ + return container_of(ddev, struct at_dma, dma_common); +} + + +/*-- Helper functions ------------------------------------------------*/ + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} +static struct device *chan2parent(struct dma_chan *chan) +{ + return chan->dev->device.parent; +} + +#if defined(VERBOSE_DEBUG) +static void vdbg_dump_regs(struct at_dma_chan *atchan) +{ + struct at_dma *atdma = to_at_dma(atchan->chan_common.device); + + dev_err(chan2dev(&atchan->chan_common), + " channel %d : imr = 0x%x, chsr = 0x%x\n", + atchan->chan_common.chan_id, + dma_readl(atdma, EBCIMR), + dma_readl(atdma, CHSR)); + + dev_err(chan2dev(&atchan->chan_common), + " channel: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n", + channel_readl(atchan, SADDR), + channel_readl(atchan, DADDR), + channel_readl(atchan, CTRLA), + channel_readl(atchan, CTRLB), + channel_readl(atchan, DSCR)); +} +#else +static void vdbg_dump_regs(struct at_dma_chan *atchan) {} +#endif + +static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli) +{ + dev_printk(KERN_CRIT, chan2dev(&atchan->chan_common), + " desc: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n", + lli->saddr, lli->daddr, + lli->ctrla, lli->ctrlb, lli->dscr); +} + + +static void atc_setup_irq(struct at_dma_chan *atchan, int on) +{ + struct at_dma *atdma = to_at_dma(atchan->chan_common.device); + u32 ebci; + + /* enable interrupts on buffer chain completion & error */ + ebci = AT_DMA_CBTC(atchan->chan_common.chan_id) + | AT_DMA_ERR(atchan->chan_common.chan_id); + if (on) + dma_writel(atdma, EBCIER, ebci); + else + dma_writel(atdma, EBCIDR, ebci); +} + +static inline void atc_enable_irq(struct at_dma_chan *atchan) +{ + atc_setup_irq(atchan, 1); +} + +static inline void atc_disable_irq(struct at_dma_chan *atchan) +{ + atc_setup_irq(atchan, 0); +} + + +/** + * atc_chan_is_enabled - test if given channel is enabled + * @atchan: channel we want to test status + */ +static inline int atc_chan_is_enabled(struct at_dma_chan *atchan) +{ + struct at_dma *atdma = to_at_dma(atchan->chan_common.device); + + return !!(dma_readl(atdma, CHSR) & atchan->mask); +} + + +/** + * set_desc_eol - set end-of-link to descriptor so it will end transfer + * @desc: descriptor, signle or at the end of a chain, to end chain on + */ +static void set_desc_eol(struct at_desc *desc) +{ + desc->lli.ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS; + desc->lli.dscr = 0; +} + +#endif /* AT_HDMAC_REGS_H */ -- cgit v1.2.3 From 808347f6a31792079e345ec865e9cfcb6e8ae6b2 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Wed, 22 Jul 2009 20:04:45 +0200 Subject: dmaengine: at_hdmac: add DMA slave transfers This patch for at_hdmac adds the slave transfers capability to the Atmel DMA controller available on some AT91 SOCs. This allow peripheral to memory and memory to peripheral transfers with hardware handshaking. Slave structure for controller specific information is passed through channel private data. This at_dma_slave structure is defined in at_hdmac.h header file and relative hardware definition are moved to this file from at_hdmac_regs.h. Doing this we allow the channel configuration from platform definition code. This work is intensively based on dw_dmac and several slave implementations. Signed-off-by: Nicolas Ferre Signed-off-by: Dan Williams --- arch/arm/mach-at91/include/mach/at_hdmac.h | 76 +++++++++++ drivers/dma/at_hdmac.c | 208 ++++++++++++++++++++++++++++- drivers/dma/at_hdmac_regs.h | 49 ++----- 3 files changed, 290 insertions(+), 43 deletions(-) (limited to 'drivers') diff --git a/arch/arm/mach-at91/include/mach/at_hdmac.h b/arch/arm/mach-at91/include/mach/at_hdmac.h index 21a5554f9cb8..187cb58345c0 100644 --- a/arch/arm/mach-at91/include/mach/at_hdmac.h +++ b/arch/arm/mach-at91/include/mach/at_hdmac.h @@ -23,4 +23,80 @@ struct at_dma_platform_data { dma_cap_mask_t cap_mask; }; +/** + * enum at_dma_slave_width - DMA slave register access width. + * @AT_DMA_SLAVE_WIDTH_8BIT: Do 8-bit slave register accesses + * @AT_DMA_SLAVE_WIDTH_16BIT: Do 16-bit slave register accesses + * @AT_DMA_SLAVE_WIDTH_32BIT: Do 32-bit slave register accesses + */ +enum at_dma_slave_width { + AT_DMA_SLAVE_WIDTH_8BIT = 0, + AT_DMA_SLAVE_WIDTH_16BIT, + AT_DMA_SLAVE_WIDTH_32BIT, +}; + +/** + * struct at_dma_slave - Controller-specific information about a slave + * @dma_dev: required DMA master device + * @tx_reg: physical address of data register used for + * memory-to-peripheral transfers + * @rx_reg: physical address of data register used for + * peripheral-to-memory transfers + * @reg_width: peripheral register width + * @cfg: Platform-specific initializer for the CFG register + * @ctrla: Platform-specific initializer for the CTRLA register + */ +struct at_dma_slave { + struct device *dma_dev; + dma_addr_t tx_reg; + dma_addr_t rx_reg; + enum at_dma_slave_width reg_width; + u32 cfg; + u32 ctrla; +}; + + +/* Platform-configurable bits in CFG */ +#define ATC_SRC_PER(h) (0xFU & (h)) /* Channel src rq associated with periph handshaking ifc h */ +#define ATC_DST_PER(h) ((0xFU & (h)) << 4) /* Channel dst rq associated with periph handshaking ifc h */ +#define ATC_SRC_REP (0x1 << 8) /* Source Replay Mod */ +#define ATC_SRC_H2SEL (0x1 << 9) /* Source Handshaking Mod */ +#define ATC_SRC_H2SEL_SW (0x0 << 9) +#define ATC_SRC_H2SEL_HW (0x1 << 9) +#define ATC_DST_REP (0x1 << 12) /* Destination Replay Mod */ +#define ATC_DST_H2SEL (0x1 << 13) /* Destination Handshaking Mod */ +#define ATC_DST_H2SEL_SW (0x0 << 13) +#define ATC_DST_H2SEL_HW (0x1 << 13) +#define ATC_SOD (0x1 << 16) /* Stop On Done */ +#define ATC_LOCK_IF (0x1 << 20) /* Interface Lock */ +#define ATC_LOCK_B (0x1 << 21) /* AHB Bus Lock */ +#define ATC_LOCK_IF_L (0x1 << 22) /* Master Interface Arbiter Lock */ +#define ATC_LOCK_IF_L_CHUNK (0x0 << 22) +#define ATC_LOCK_IF_L_BUFFER (0x1 << 22) +#define ATC_AHB_PROT_MASK (0x7 << 24) /* AHB Protection */ +#define ATC_FIFOCFG_MASK (0x3 << 28) /* FIFO Request Configuration */ +#define ATC_FIFOCFG_LARGESTBURST (0x0 << 28) +#define ATC_FIFOCFG_HALFFIFO (0x1 << 28) +#define ATC_FIFOCFG_ENOUGHSPACE (0x2 << 28) + +/* Platform-configurable bits in CTRLA */ +#define ATC_SCSIZE_MASK (0x7 << 16) /* Source Chunk Transfer Size */ +#define ATC_SCSIZE_1 (0x0 << 16) +#define ATC_SCSIZE_4 (0x1 << 16) +#define ATC_SCSIZE_8 (0x2 << 16) +#define ATC_SCSIZE_16 (0x3 << 16) +#define ATC_SCSIZE_32 (0x4 << 16) +#define ATC_SCSIZE_64 (0x5 << 16) +#define ATC_SCSIZE_128 (0x6 << 16) +#define ATC_SCSIZE_256 (0x7 << 16) +#define ATC_DCSIZE_MASK (0x7 << 20) /* Destination Chunk Transfer Size */ +#define ATC_DCSIZE_1 (0x0 << 20) +#define ATC_DCSIZE_4 (0x1 << 20) +#define ATC_DCSIZE_8 (0x2 << 20) +#define ATC_DCSIZE_16 (0x3 << 20) +#define ATC_DCSIZE_32 (0x4 << 20) +#define ATC_DCSIZE_64 (0x5 << 20) +#define ATC_DCSIZE_128 (0x6 << 20) +#define ATC_DCSIZE_256 (0x7 << 20) + #endif /* AT_HDMAC_H */ diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 64dbf0ce128e..9a1e5fb412ed 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -608,6 +608,187 @@ err_desc_get: return NULL; } + +/** + * atc_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction + * @chan: DMA channel + * @sgl: scatterlist to transfer to/from + * @sg_len: number of entries in @scatterlist + * @direction: DMA direction + * @flags: tx descriptor status flags + */ +static struct dma_async_tx_descriptor * +atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_data_direction direction, + unsigned long flags) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma_slave *atslave = chan->private; + struct at_desc *first = NULL; + struct at_desc *prev = NULL; + u32 ctrla; + u32 ctrlb; + dma_addr_t reg; + unsigned int reg_width; + unsigned int mem_width; + unsigned int i; + struct scatterlist *sg; + size_t total_len = 0; + + dev_vdbg(chan2dev(chan), "prep_slave_sg: %s f0x%lx\n", + direction == DMA_TO_DEVICE ? "TO DEVICE" : "FROM DEVICE", + flags); + + if (unlikely(!atslave || !sg_len)) { + dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n"); + return NULL; + } + + reg_width = atslave->reg_width; + + sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction); + + ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla; + ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN; + + switch (direction) { + case DMA_TO_DEVICE: + ctrla |= ATC_DST_WIDTH(reg_width); + ctrlb |= ATC_DST_ADDR_MODE_FIXED + | ATC_SRC_ADDR_MODE_INCR + | ATC_FC_MEM2PER; + reg = atslave->tx_reg; + for_each_sg(sgl, sg, sg_len, i) { + struct at_desc *desc; + u32 len; + u32 mem; + + desc = atc_desc_get(atchan); + if (!desc) + goto err_desc_get; + + mem = sg_phys(sg); + len = sg_dma_len(sg); + mem_width = 2; + if (unlikely(mem & 3 || len & 3)) + mem_width = 0; + + desc->lli.saddr = mem; + desc->lli.daddr = reg; + desc->lli.ctrla = ctrla + | ATC_SRC_WIDTH(mem_width) + | len >> mem_width; + desc->lli.ctrlb = ctrlb; + + if (!first) { + first = desc; + } else { + /* inform the HW lli about chaining */ + prev->lli.dscr = desc->txd.phys; + /* insert the link descriptor to the LD ring */ + list_add_tail(&desc->desc_node, + &first->txd.tx_list); + } + prev = desc; + total_len += len; + } + break; + case DMA_FROM_DEVICE: + ctrla |= ATC_SRC_WIDTH(reg_width); + ctrlb |= ATC_DST_ADDR_MODE_INCR + | ATC_SRC_ADDR_MODE_FIXED + | ATC_FC_PER2MEM; + + reg = atslave->rx_reg; + for_each_sg(sgl, sg, sg_len, i) { + struct at_desc *desc; + u32 len; + u32 mem; + + desc = atc_desc_get(atchan); + if (!desc) + goto err_desc_get; + + mem = sg_phys(sg); + len = sg_dma_len(sg); + mem_width = 2; + if (unlikely(mem & 3 || len & 3)) + mem_width = 0; + + desc->lli.saddr = reg; + desc->lli.daddr = mem; + desc->lli.ctrla = ctrla + | ATC_DST_WIDTH(mem_width) + | len >> mem_width; + desc->lli.ctrlb = ctrlb; + + if (!first) { + first = desc; + } else { + /* inform the HW lli about chaining */ + prev->lli.dscr = desc->txd.phys; + /* insert the link descriptor to the LD ring */ + list_add_tail(&desc->desc_node, + &first->txd.tx_list); + } + prev = desc; + total_len += len; + } + break; + default: + return NULL; + } + + /* set end-of-link to the last link descriptor of list*/ + set_desc_eol(prev); + + /* First descriptor of the chain embedds additional information */ + first->txd.cookie = -EBUSY; + first->len = total_len; + + /* last link descriptor of list is responsible of flags */ + prev->txd.flags = flags; /* client is in control of this ack */ + + return &first->txd; + +err_desc_get: + dev_err(chan2dev(chan), "not enough descriptors available\n"); + atc_desc_put(atchan, first); + return NULL; +} + +static void atc_terminate_all(struct dma_chan *chan) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma *atdma = to_at_dma(chan->device); + struct at_desc *desc, *_desc; + LIST_HEAD(list); + + /* + * This is only called when something went wrong elsewhere, so + * we don't really care about the data. Just disable the + * channel. We still have to poll the channel enable bit due + * to AHB/HSB limitations. + */ + spin_lock_bh(&atchan->lock); + + dma_writel(atdma, CHDR, atchan->mask); + + /* confirm that this channel is disabled */ + while (dma_readl(atdma, CHSR) & atchan->mask) + cpu_relax(); + + /* active_list entries will end up before queued entries */ + list_splice_init(&atchan->queue, &list); + list_splice_init(&atchan->active_list, &list); + + spin_unlock_bh(&atchan->lock); + + /* Flush all pending and queued descriptors */ + list_for_each_entry_safe(desc, _desc, &list, desc_node) + atc_chain_complete(atchan, desc); +} + /** * atc_is_tx_complete - poll for transaction completion * @chan: DMA channel @@ -686,7 +867,9 @@ static int atc_alloc_chan_resources(struct dma_chan *chan) struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); struct at_desc *desc; + struct at_dma_slave *atslave; int i; + u32 cfg; LIST_HEAD(tmp_list); dev_vdbg(chan2dev(chan), "alloc_chan_resources\n"); @@ -697,7 +880,23 @@ static int atc_alloc_chan_resources(struct dma_chan *chan) return -EIO; } - /* have we already been set up? */ + cfg = ATC_DEFAULT_CFG; + + atslave = chan->private; + if (atslave) { + /* + * We need controller-specific data to set up slave + * transfers. + */ + BUG_ON(!atslave->dma_dev || atslave->dma_dev != atdma->dma_common.dev); + + /* if cfg configuration specified take it instad of default */ + if (atslave->cfg) + cfg = atslave->cfg; + } + + /* have we already been set up? + * reconfigure channel but no need to reallocate descriptors */ if (!list_empty(&atchan->free_list)) return atchan->descs_allocated; @@ -719,7 +918,7 @@ static int atc_alloc_chan_resources(struct dma_chan *chan) spin_unlock_bh(&atchan->lock); /* channel parameters */ - channel_writel(atchan, CFG, ATC_DEFAULT_CFG); + channel_writel(atchan, CFG, cfg); dev_dbg(chan2dev(chan), "alloc_chan_resources: allocated %d descriptors\n", @@ -888,6 +1087,11 @@ static int __init at_dma_probe(struct platform_device *pdev) if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask)) atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy; + if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)) { + atdma->dma_common.device_prep_slave_sg = atc_prep_slave_sg; + atdma->dma_common.device_terminate_all = atc_terminate_all; + } + dma_writel(atdma, EN, AT_DMA_ENABLE); dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s), %d channels\n", diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h index ad2d4f402bf7..4c972afc49ec 100644 --- a/drivers/dma/at_hdmac_regs.h +++ b/drivers/dma/at_hdmac_regs.h @@ -87,29 +87,14 @@ /* Bitfields in CTRLA */ #define ATC_BTSIZE_MAX 0xFFFFUL /* Maximum Buffer Transfer Size */ #define ATC_BTSIZE(x) (ATC_BTSIZE_MAX & (x)) /* Buffer Transfer Size */ -#define ATC_SCSIZE_MASK (0x7 << 16) /* Source Chunk Transfer Size */ -#define ATC_SCSIZE_1 (0x0 << 16) -#define ATC_SCSIZE_4 (0x1 << 16) -#define ATC_SCSIZE_8 (0x2 << 16) -#define ATC_SCSIZE_16 (0x3 << 16) -#define ATC_SCSIZE_32 (0x4 << 16) -#define ATC_SCSIZE_64 (0x5 << 16) -#define ATC_SCSIZE_128 (0x6 << 16) -#define ATC_SCSIZE_256 (0x7 << 16) -#define ATC_DCSIZE_MASK (0x7 << 20) /* Destination Chunk Transfer Size */ -#define ATC_DCSIZE_1 (0x0 << 20) -#define ATC_DCSIZE_4 (0x1 << 20) -#define ATC_DCSIZE_8 (0x2 << 20) -#define ATC_DCSIZE_16 (0x3 << 20) -#define ATC_DCSIZE_32 (0x4 << 20) -#define ATC_DCSIZE_64 (0x5 << 20) -#define ATC_DCSIZE_128 (0x6 << 20) -#define ATC_DCSIZE_256 (0x7 << 20) +/* Chunck Tranfer size definitions are in at_hdmac.h */ #define ATC_SRC_WIDTH_MASK (0x3 << 24) /* Source Single Transfer Size */ +#define ATC_SRC_WIDTH(x) ((x) << 24) #define ATC_SRC_WIDTH_BYTE (0x0 << 24) #define ATC_SRC_WIDTH_HALFWORD (0x1 << 24) #define ATC_SRC_WIDTH_WORD (0x2 << 24) #define ATC_DST_WIDTH_MASK (0x3 << 28) /* Destination Single Transfer Size */ +#define ATC_DST_WIDTH(x) ((x) << 28) #define ATC_DST_WIDTH_BYTE (0x0 << 28) #define ATC_DST_WIDTH_HALFWORD (0x1 << 28) #define ATC_DST_WIDTH_WORD (0x2 << 28) @@ -129,7 +114,8 @@ #define ATC_FC_PER2PER (0x3 << 21) /* Periph-to-Periph (DMA) */ #define ATC_FC_PER2MEM_PER (0x4 << 21) /* Periph-to-Mem (Peripheral) */ #define ATC_FC_MEM2PER_PER (0x5 << 21) /* Mem-to-Periph (Peripheral) */ -#define ATC_FC_PER2PER_PER (0x6 << 21) /* Periph-to-Periph (Src Peripheral) */ +#define ATC_FC_PER2PER_SRCPER (0x6 << 21) /* Periph-to-Periph (Src Peripheral) */ +#define ATC_FC_PER2PER_DSTPER (0x7 << 21) /* Periph-to-Periph (Dst Peripheral) */ #define ATC_SRC_ADDR_MODE_MASK (0x3 << 24) #define ATC_SRC_ADDR_MODE_INCR (0x0 << 24) /* Incrementing Mode */ #define ATC_SRC_ADDR_MODE_DECR (0x1 << 24) /* Decrementing Mode */ @@ -142,27 +128,7 @@ #define ATC_AUTO (0x1 << 31) /* Auto multiple buffer tx enable */ /* Bitfields in CFG */ -#define ATC_SRC_PER(h) (0xFU & (h)) /* Channel src rq associated with periph handshaking ifc h */ -#define ATC_DST_PER(h) ((0xFU & (h)) << 4) /* Channel dst rq associated with periph handshaking ifc h */ -#define ATC_SRC_REP (0x1 << 8) /* Source Replay Mod */ -#define ATC_SRC_H2SEL (0x1 << 9) /* Source Handshaking Mod */ -#define ATC_SRC_H2SEL_SW (0x0 << 9) -#define ATC_SRC_H2SEL_HW (0x1 << 9) -#define ATC_DST_REP (0x1 << 12) /* Destination Replay Mod */ -#define ATC_DST_H2SEL (0x1 << 13) /* Destination Handshaking Mod */ -#define ATC_DST_H2SEL_SW (0x0 << 13) -#define ATC_DST_H2SEL_HW (0x1 << 13) -#define ATC_SOD (0x1 << 16) /* Stop On Done */ -#define ATC_LOCK_IF (0x1 << 20) /* Interface Lock */ -#define ATC_LOCK_B (0x1 << 21) /* AHB Bus Lock */ -#define ATC_LOCK_IF_L (0x1 << 22) /* Master Interface Arbiter Lock */ -#define ATC_LOCK_IF_L_CHUNK (0x0 << 22) -#define ATC_LOCK_IF_L_BUFFER (0x1 << 22) -#define ATC_AHB_PROT_MASK (0x7 << 24) /* AHB Protection */ -#define ATC_FIFOCFG_MASK (0x3 << 28) /* FIFO Request Configuration */ -#define ATC_FIFOCFG_LARGESTBURST (0x0 << 28) -#define ATC_FIFOCFG_HALFFIFO (0x1 << 28) -#define ATC_FIFOCFG_ENOUGHSPACE (0x2 << 28) +/* are in at_hdmac.h */ /* Bitfields in SPIP */ #define ATC_SPIP_HOLE(x) (0xFFFFU & (x)) @@ -316,11 +282,12 @@ static void vdbg_dump_regs(struct at_dma_chan *atchan) dma_readl(atdma, CHSR)); dev_err(chan2dev(&atchan->chan_common), - " channel: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n", + " channel: s0x%x d0x%x ctrl0x%x:0x%x cfg0x%x l0x%x\n", channel_readl(atchan, SADDR), channel_readl(atchan, DADDR), channel_readl(atchan, CTRLA), channel_readl(atchan, CTRLB), + channel_readl(atchan, CFG), channel_readl(atchan, DSCR)); } #else -- cgit v1.2.3 From 7194e6f9c083e87171ddfc8b746f05e007f58132 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 24 Jul 2009 17:05:00 +0300 Subject: UBI: fix double free on error path If we fail in 'ubi_eba_init_scan()', we free 'ubi->volumes[i]->eba_tbl' in there, but also later free it in 'free_internal_volumes()'. Fix this by assigning NULL to 'ubi->volumes[i]->eba_tbl' after it is freed. Signed-off-by: Adrian Hunter Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/eba.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index 0f2034c3ed2f..e4d9ef0c965a 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -1254,6 +1254,7 @@ out_free: if (!ubi->volumes[i]) continue; kfree(ubi->volumes[i]->eba_tbl); + ubi->volumes[i]->eba_tbl = NULL; } return err; } -- cgit v1.2.3 From 32bc4820287a1a03982979515949e8ea56eac641 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 24 Jul 2009 19:16:04 +0300 Subject: UBI: compatible fallback in absense of sequence numbers Fall back onto thinking everything's OK if either of the sequence numbers we are asked to compare is zero, which is what was used before sequence numbers were introduced. [ Artem: modified the patch to be applicable to upstream UBI, added big comment ] Signed-off-by: Adrian Hunter Signed-off-by: Phil Carmody Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/scan.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index a423131b6171..b847745394b4 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -781,11 +781,22 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, return -EINVAL; } + /* + * Make sure that all PEBs have the same image sequence number. + * This allows us to detect situations when users flash UBI + * images incorrectly, so that the flash has the new UBI image + * and leftovers from the old one. This feature was added + * relatively recently, and the sequence number was always + * zero, because old UBI implementations always set it to zero. + * For this reasons, we do not panic if some PEBs have zero + * sequence number, while other PEBs have non-zero sequence + * number. + */ image_seq = be32_to_cpu(ech->image_seq); if (!si->image_seq_set) { ubi->image_seq = image_seq; si->image_seq_set = 1; - } else if (ubi->image_seq != image_seq) { + } else if (ubi->image_seq && ubi->image_seq != image_seq) { ubi_err("bad image sequence number %d in PEB %d, " "expected %d", image_seq, pnum, ubi->image_seq); ubi_dbg_dump_ec_hdr(ech); -- cgit v1.2.3 From 3995bd9332a51b626237d6671cfeb7235e6c1305 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 Jul 2009 11:13:14 -0700 Subject: iwlwifi: fix TX queue race I had a problem on 4965 hardware (well, probably other hardware too, but others don't survive my stress testing right now, unfortunately) where the driver was sending invalid commands to the device, but no such thing could be seen from the driver's point of view. I could reproduce this fairly easily by sending multiple TCP streams with iperf on different TIDs, though sometimes a single iperf stream was sufficient. It even happened with a single core, but I have forced preemption turned on. The culprit was a queue overrun, where we advanced the queue's write pointer over the read pointer. After careful analysis I've come to the conclusion that the cause is a race condition between iwlwifi and mac80211. mac80211, of course, checks whether the queue is stopped, before transmitting a frame. This effectively looks like this: lock(queues) if (stopped(queue)) { unlock(queues) return busy; } unlock(queues) ... <-- this place will be important there is some more code here drv_tx(frame) The driver, on the other hand, can stop and start queues, which does lock(queues) mark_running/stopped(queue) unlock(queues) [if marked running: wake up tasklet to send pending frames] Now, however, once the driver starts the queue, mac80211 can see that and end up at the marked place above, at which point for some reason the driver seems to stop the queue again (I don't understand that) and then we end up transmitting while the queue is actually full. Now, this shouldn't actually matter much, but for some reason I've seen it happen multiple times in a row and the queue actually overflows, at which point the queue bites itself in the tail and things go completely wrong. This patch fixes this by just dropping the packet should this have happened, and making the lock in iwlwifi cover everything so iwlwifi can't race against itself (dropping the lock there might make it more likely, but it did seem to happen without that too). Since we can't hold the lock across drv_tx() above, I see no way to fix this in mac80211, but I also don't understand why I haven't seen this before -- maybe I just never stress tested it this badly. With this patch, the device has survived many minutes of simultanously sending two iperf streams on different TIDs with combined throughput of about 60 Mbps. Signed-off-by: Johannes Berg Signed-off-by: Reinette Chatre Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-tx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c index 9bbeec9427f0..5febb3186365 100644 --- a/drivers/net/wireless/iwlwifi/iwl-tx.c +++ b/drivers/net/wireless/iwlwifi/iwl-tx.c @@ -720,8 +720,6 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb) goto drop_unlock; } - spin_unlock_irqrestore(&priv->lock, flags); - hdr_len = ieee80211_hdrlen(fc); /* Find (or create) index into station table for destination station */ @@ -729,7 +727,7 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb) if (sta_id == IWL_INVALID_STATION) { IWL_DEBUG_DROP(priv, "Dropping - INVALID STATION: %pM\n", hdr->addr1); - goto drop; + goto drop_unlock; } IWL_DEBUG_TX(priv, "station Id %d\n", sta_id); @@ -750,14 +748,17 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb) txq_id = priv->stations[sta_id].tid[tid].agg.txq_id; swq_id = iwl_virtual_agg_queue_num(swq_id, txq_id); } - priv->stations[sta_id].tid[tid].tfds_in_queue++; } txq = &priv->txq[txq_id]; q = &txq->q; txq->swq_id = swq_id; - spin_lock_irqsave(&priv->lock, flags); + if (unlikely(iwl_queue_space(q) < q->high_mark)) + goto drop_unlock; + + if (ieee80211_is_data_qos(fc)) + priv->stations[sta_id].tid[tid].tfds_in_queue++; /* Set up driver data for this TFD */ memset(&(txq->txb[q->write_ptr]), 0, sizeof(struct iwl_tx_info)); @@ -902,7 +903,6 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb) drop_unlock: spin_unlock_irqrestore(&priv->lock, flags); -drop: return -1; } EXPORT_SYMBOL(iwl_tx_skb); -- cgit v1.2.3 From 45f5fa32b130b2a59f9b726be45ce7fa73fb834c Mon Sep 17 00:00:00 2001 From: reinette chatre Date: Tue, 21 Jul 2009 09:29:07 -0700 Subject: iwlagn: fix minimum number of queues setting We need to provide a reasonable minimum that will result in a working setup if used. Set minimum to be 10 to provide for 4 standard TX queues + 1 command queue + 2 (unused) HCCA queues + 4 HT queues (one per AC). We allow the user to change the number of queues used via a module parameter and use this minimum value to check if it is valid. Without this patch a user can select a value for the number of queues that will result in a failing setup. Signed-off-by: Reinette Chatre Reviewed-by: Tomas Winkler Acked-by: Tomas Winkler Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-3945.h | 2 +- drivers/net/wireless/iwlwifi/iwl-dev.h | 6 ++++-- drivers/net/wireless/iwlwifi/iwl3945-base.c | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.h b/drivers/net/wireless/iwlwifi/iwl-3945.h index fbb3a573463e..2de6471d4be9 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945.h +++ b/drivers/net/wireless/iwlwifi/iwl-3945.h @@ -112,7 +112,7 @@ enum iwl3945_antenna { #define IWL_TX_FIFO_NONE 7 /* Minimum number of queues. MAX_NUM is defined in hw specific files */ -#define IWL_MIN_NUM_QUEUES 4 +#define IWL39_MIN_NUM_QUEUES 4 #define IEEE80211_DATA_LEN 2304 #define IEEE80211_4ADDR_LEN 30 diff --git a/drivers/net/wireless/iwlwifi/iwl-dev.h b/drivers/net/wireless/iwlwifi/iwl-dev.h index e2d620f0b6e8..650e20af20fa 100644 --- a/drivers/net/wireless/iwlwifi/iwl-dev.h +++ b/drivers/net/wireless/iwlwifi/iwl-dev.h @@ -258,8 +258,10 @@ struct iwl_channel_info { #define IWL_TX_FIFO_HCCA_2 6 #define IWL_TX_FIFO_NONE 7 -/* Minimum number of queues. MAX_NUM is defined in hw specific files */ -#define IWL_MIN_NUM_QUEUES 4 +/* Minimum number of queues. MAX_NUM is defined in hw specific files. + * Set the minimum to accommodate the 4 standard TX queues, 1 command + * queue, 2 (unused) HCCA queues, and 4 HT queues (one for each AC) */ +#define IWL_MIN_NUM_QUEUES 10 /* Power management (not Tx power) structures */ diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 956798f2c80c..2f50ab60bfdf 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -4018,10 +4018,10 @@ static int iwl3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *e SET_IEEE80211_DEV(hw, &pdev->dev); if ((iwl3945_mod_params.num_of_queues > IWL39_MAX_NUM_QUEUES) || - (iwl3945_mod_params.num_of_queues < IWL_MIN_NUM_QUEUES)) { + (iwl3945_mod_params.num_of_queues < IWL39_MIN_NUM_QUEUES)) { IWL_ERR(priv, "invalid queues_num, should be between %d and %d\n", - IWL_MIN_NUM_QUEUES, IWL39_MAX_NUM_QUEUES); + IWL39_MIN_NUM_QUEUES, IWL39_MAX_NUM_QUEUES); err = -EINVAL; goto out_ieee80211_free_hw; } -- cgit v1.2.3 From 2a21f86917f7a9fe13b180e895a816871a234dee Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 25 Jul 2009 15:22:59 +0300 Subject: wireless: ERR_PTR vs null iwm_wdev_alloc() returns an ERR_PTR on failure and not null. It also prints its own dev_err() message so I removed that as well. Compile tested only. Sorry. Found by smatch (http://repo.or.cz/w/smatch.git). Signed-off-by: Dan Carpenter Acked-by: Zhu Yi Signed-off-by: John W. Linville --- drivers/net/wireless/iwmc3200wifi/netdev.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/net/wireless/iwmc3200wifi/netdev.c b/drivers/net/wireless/iwmc3200wifi/netdev.c index aea5ccf24ccf..bf294e41753b 100644 --- a/drivers/net/wireless/iwmc3200wifi/netdev.c +++ b/drivers/net/wireless/iwmc3200wifi/netdev.c @@ -106,10 +106,8 @@ void *iwm_if_alloc(int sizeof_bus, struct device *dev, int ret = 0; wdev = iwm_wdev_alloc(sizeof_bus, dev); - if (!wdev) { - dev_err(dev, "no memory for wireless device instance\n"); - return ERR_PTR(-ENOMEM); - } + if (IS_ERR(wdev)) + return wdev; iwm = wdev_to_iwm(wdev); iwm->bus_ops = if_ops; -- cgit v1.2.3 From 3d0ccd021b23c18ea2d399fe4a43c955485c765c Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Sat, 25 Jul 2009 23:02:32 +0200 Subject: airo: Buffer overflow SSID_rid has space for only 3 ssids. txPowerLevels[i] is read before the bounds check for i Signed-off-by: Roel Kluin Acked-by: Dan Williams Signed-off-by: John W. Linville --- drivers/net/wireless/airo.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index c70604f0329e..8ce5e4cee168 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c @@ -5918,20 +5918,19 @@ static int airo_set_essid(struct net_device *dev, readSsidRid(local, &SSID_rid); /* Check if we asked for `any' */ - if(dwrq->flags == 0) { + if (dwrq->flags == 0) { /* Just send an empty SSID list */ memset(&SSID_rid, 0, sizeof(SSID_rid)); } else { - int index = (dwrq->flags & IW_ENCODE_INDEX) - 1; + unsigned index = (dwrq->flags & IW_ENCODE_INDEX) - 1; /* Check the size of the string */ - if(dwrq->length > IW_ESSID_MAX_SIZE) { + if (dwrq->length > IW_ESSID_MAX_SIZE) return -E2BIG ; - } + /* Check if index is valid */ - if((index < 0) || (index >= 4)) { + if (index >= ARRAY_SIZE(SSID_rid.ssids)) return -EINVAL; - } /* Set the SSID */ memset(SSID_rid.ssids[index].ssid, 0, @@ -6819,7 +6818,7 @@ static int airo_set_txpow(struct net_device *dev, return -EINVAL; } clear_bit (FLAG_RADIO_OFF, &local->flags); - for (i = 0; cap_rid.txPowerLevels[i] && (i < 8); i++) + for (i = 0; i < 8 && cap_rid.txPowerLevels[i]; i++) if (v == cap_rid.txPowerLevels[i]) { readConfigRid(local, 1); local->config.txPower = v; -- cgit v1.2.3 From 008749fc9917b799c469478141ddd1a4c81d06ca Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Sat, 25 Jul 2009 23:21:22 +0200 Subject: ath9k: Read outside array bounds Incorrect limits leads to reads outside array bounds. Signed-off-by: Roel Kluin Acked-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/eeprom.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/wireless/ath/ath9k/eeprom.c b/drivers/net/wireless/ath/ath9k/eeprom.c index a2fda702b620..ce0e86c36a82 100644 --- a/drivers/net/wireless/ath/ath9k/eeprom.c +++ b/drivers/net/wireless/ath/ath9k/eeprom.c @@ -460,7 +460,7 @@ static int ath9k_hw_4k_check_eeprom(struct ath_hw *ah) integer = swab32(eep->modalHeader.antCtrlCommon); eep->modalHeader.antCtrlCommon = integer; - for (i = 0; i < AR5416_MAX_CHAINS; i++) { + for (i = 0; i < AR5416_EEP4K_MAX_CHAINS; i++) { integer = swab32(eep->modalHeader.antCtrlChain[i]); eep->modalHeader.antCtrlChain[i] = integer; } @@ -914,7 +914,7 @@ static void ath9k_hw_set_4k_power_per_rate_table(struct ath_hw *ah, ctlMode, numCtlModes, isHt40CtlMode, (pCtlMode[ctlMode] & EXT_ADDITIVE)); - for (i = 0; (i < AR5416_NUM_CTLS) && + for (i = 0; (i < AR5416_EEP4K_NUM_CTLS) && pEepData->ctlIndex[i]; i++) { DPRINTF(ah->ah_sc, ATH_DBG_EEPROM, " LOOP-Ctlidx %d: cfgCtl 0x%2.2x " -- cgit v1.2.3 From 082e708acc50a5b625b9bde0bb1af90dfdbd1942 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Sat, 25 Jul 2009 23:34:31 +0200 Subject: iwlwifi: Read outside array bounds tid is bounded (above) by the size of default_tid_to_tx_fifo (17 elements), but the size of priv->stations[].tid[] is MAX_TID_COUNT (9) elements. Signed-off-by: Roel Kluin Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-tx.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c index 5febb3186365..2e89040e63be 100644 --- a/drivers/net/wireless/iwlwifi/iwl-tx.c +++ b/drivers/net/wireless/iwlwifi/iwl-tx.c @@ -1171,6 +1171,8 @@ int iwl_tx_agg_start(struct iwl_priv *priv, const u8 *ra, u16 tid, u16 *ssn) IWL_ERR(priv, "Start AGG on invalid station\n"); return -ENXIO; } + if (unlikely(tid >= MAX_TID_COUNT)) + return -EINVAL; if (priv->stations[sta_id].tid[tid].agg.state != IWL_AGG_OFF) { IWL_ERR(priv, "Start AGG when state is not IWL_AGG_OFF !\n"); -- cgit v1.2.3 From 7cb7f45c7feef43c8f71f5cfedfc0b19be2142f7 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Mon, 27 Jul 2009 18:42:38 -0400 Subject: Revert "ACPICA: Remove obsolete acpi_os_validate_address interface" This reverts commit f9ca058430333c9a24c5ca926aa445125f88df18. which caused a regression: http://bugzilla.kernel.org/show_bug.cgi?id=13620 Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/acpica/acobject.h | 1 + drivers/acpi/acpica/dsopcode.c | 24 ++++++++++++++++++++++++ drivers/acpi/acpica/exfldio.c | 6 ++++++ include/acpi/acpiosxf.h | 4 ++++ 4 files changed, 35 insertions(+) (limited to 'drivers') diff --git a/drivers/acpi/acpica/acobject.h b/drivers/acpi/acpica/acobject.h index 544dcf834922..eb6f038b03d9 100644 --- a/drivers/acpi/acpica/acobject.h +++ b/drivers/acpi/acpica/acobject.h @@ -97,6 +97,7 @@ #define AOPOBJ_OBJECT_INITIALIZED 0x08 #define AOPOBJ_SETUP_COMPLETE 0x10 #define AOPOBJ_SINGLE_DATUM 0x20 +#define AOPOBJ_INVALID 0x40 /* Used if host OS won't allow an op_region address */ /****************************************************************************** * diff --git a/drivers/acpi/acpica/dsopcode.c b/drivers/acpi/acpica/dsopcode.c index 584d766e6f12..b79978f7bc71 100644 --- a/drivers/acpi/acpica/dsopcode.c +++ b/drivers/acpi/acpica/dsopcode.c @@ -397,6 +397,30 @@ acpi_status acpi_ds_get_region_arguments(union acpi_operand_object *obj_desc) status = acpi_ds_execute_arguments(node, acpi_ns_get_parent_node(node), extra_desc->extra.aml_length, extra_desc->extra.aml_start); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + /* Validate the region address/length via the host OS */ + + status = acpi_os_validate_address(obj_desc->region.space_id, + obj_desc->region.address, + (acpi_size) obj_desc->region.length, + acpi_ut_get_node_name(node)); + + if (ACPI_FAILURE(status)) { + /* + * Invalid address/length. We will emit an error message and mark + * the region as invalid, so that it will cause an additional error if + * it is ever used. Then return AE_OK. + */ + ACPI_EXCEPTION((AE_INFO, status, + "During address validation of OpRegion [%4.4s]", + node->name.ascii)); + obj_desc->common.flags |= AOPOBJ_INVALID; + status = AE_OK; + } + return_ACPI_STATUS(status); } diff --git a/drivers/acpi/acpica/exfldio.c b/drivers/acpi/acpica/exfldio.c index d4075b821021..6687be167f5f 100644 --- a/drivers/acpi/acpica/exfldio.c +++ b/drivers/acpi/acpica/exfldio.c @@ -113,6 +113,12 @@ acpi_ex_setup_region(union acpi_operand_object *obj_desc, } } + /* Exit if Address/Length have been disallowed by the host OS */ + + if (rgn_desc->common.flags & AOPOBJ_INVALID) { + return_ACPI_STATUS(AE_AML_ILLEGAL_ADDRESS); + } + /* * Exit now for SMBus address space, it has a non-linear address space * and the request cannot be directly validated diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h index 3e798593b17b..ab0b85cf21f3 100644 --- a/include/acpi/acpiosxf.h +++ b/include/acpi/acpiosxf.h @@ -242,6 +242,10 @@ acpi_os_derive_pci_id(acpi_handle rhandle, acpi_status acpi_os_validate_interface(char *interface); acpi_status acpi_osi_invalidate(char* interface); +acpi_status +acpi_os_validate_address(u8 space_id, acpi_physical_address address, + acpi_size length, char *name); + u64 acpi_os_get_timer(void); acpi_status acpi_os_signal(u32 function, void *info); -- cgit v1.2.3 From 48f5690d45b79ffeedc5ab24243b576056f1d2ff Mon Sep 17 00:00:00 2001 From: unsik Kim Date: Tue, 28 Jul 2009 08:52:06 +0200 Subject: mg_disk: remove prohibited sleep operation mflash's polling driver operate in standard request_fn_proc's context, sleep in this isn't permitted. Signed-off-by: unsik Kim Signed-off-by: Jens Axboe --- drivers/block/mg_disk.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index f703f5478246..5b120eab2baa 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -245,8 +245,6 @@ static unsigned int mg_wait(struct mg_host *host, u32 expect, u32 msec) mg_dump_status("not ready", status, host); return MG_ERR_INV_STAT; } - if (prv_data->use_polling) - msleep(1); status = inb((unsigned long)host->dev_base + MG_REG_STATUS); } while (time_before(cur_jiffies, expire)); -- cgit v1.2.3 From eb32baec15c38ae6f06cb898a9f791578c5f8c79 Mon Sep 17 00:00:00 2001 From: unsik Kim Date: Tue, 28 Jul 2009 08:52:07 +0200 Subject: mg_disk: fix reading invalid status when use polling driver When using polling driver, little delay is required to access status register. Without this, host might read invalid status. Signed-off-by: unsik Kim Signed-off-by: Jens Axboe --- drivers/block/mg_disk.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers') diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index 5b120eab2baa..6440d5945414 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -219,6 +219,16 @@ static unsigned int mg_wait(struct mg_host *host, u32 expect, u32 msec) host->error = MG_ERR_NONE; expire = jiffies + msecs_to_jiffies(msec); + /* These 2 times dummy status read prevents reading invalid + * status. A very little time (3 times of mflash operating clk) + * is required for busy bit is set. Use dummy read instead of + * busy wait, because mflash's PLL is machine dependent. + */ + if (prv_data->use_polling) { + status = inb((unsigned long)host->dev_base + MG_REG_STATUS); + status = inb((unsigned long)host->dev_base + MG_REG_STATUS); + } + status = inb((unsigned long)host->dev_base + MG_REG_STATUS); do { -- cgit v1.2.3 From 394c6cc63c1d6900ad7498a3221a1d48fc00c4fa Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 28 Jul 2009 08:56:34 +0200 Subject: mg_disk: fix issue with data integrity on error in mg_write() We cannot acknowledge the sector write before checking its status (which is done on the next loop iteration) and we also need to do the final status register check after writing the last sector. Fix mg_write() to match mg_write_intr() in this regard. While at it: - add mg_read_one() and mg_write_one() helpers - always use MG_SECTOR_SIZE and remove MG_STORAGE_BUFFER_SIZE [bart: thanks to Tejun for porting the patch over recent block changes] Cc: unsik Kim Cc: Tejun Heo Signed-off-by: Bartlomiej Zolnierkiewicz =================================================================== Signed-off-by: Jens Axboe --- drivers/block/mg_disk.c | 89 ++++++++++++++++++++++++++----------------------- 1 file changed, 47 insertions(+), 42 deletions(-) (limited to 'drivers') diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index 6440d5945414..19917d5481bd 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -36,7 +36,6 @@ /* Register offsets */ #define MG_BUFF_OFFSET 0x8000 -#define MG_STORAGE_BUFFER_SIZE 0x200 #define MG_REG_OFFSET 0xC000 #define MG_REG_FEATURE (MG_REG_OFFSET + 2) /* write case */ #define MG_REG_ERROR (MG_REG_OFFSET + 2) /* read case */ @@ -477,9 +476,18 @@ static unsigned int mg_out(struct mg_host *host, return MG_ERR_NONE; } +static void mg_read_one(struct mg_host *host, struct request *req) +{ + u16 *buff = (u16 *)req->buffer; + u32 i; + + for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) + *buff++ = inw((unsigned long)host->dev_base + MG_BUFF_OFFSET + + (i << 1)); +} + static void mg_read(struct request *req) { - u32 j; struct mg_host *host = req->rq_disk->private_data; if (mg_out(host, blk_rq_pos(req), blk_rq_sectors(req), @@ -490,26 +498,33 @@ static void mg_read(struct request *req) blk_rq_sectors(req), blk_rq_pos(req), req->buffer); do { - u16 *buff = (u16 *)req->buffer; - if (mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_RD_DRQ) != MG_ERR_NONE) { mg_bad_rw_intr(host); return; } - for (j = 0; j < MG_SECTOR_SIZE >> 1; j++) - *buff++ = inw((unsigned long)host->dev_base + - MG_BUFF_OFFSET + (j << 1)); + + mg_read_one(host, req); outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); } while (mg_end_request(host, 0, MG_SECTOR_SIZE)); } +static void mg_write_one(struct mg_host *host, struct request *req) +{ + u16 *buff = (u16 *)req->buffer; + u32 i; + + for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) + outw(*buff++, (unsigned long)host->dev_base + MG_BUFF_OFFSET + + (i << 1)); +} + static void mg_write(struct request *req) { - u32 j; struct mg_host *host = req->rq_disk->private_data; + bool rem; if (mg_out(host, blk_rq_pos(req), blk_rq_sectors(req), MG_CMD_WR, NULL) != MG_ERR_NONE) { @@ -520,27 +535,37 @@ static void mg_write(struct request *req) MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", blk_rq_sectors(req), blk_rq_pos(req), req->buffer); - do { - u16 *buff = (u16 *)req->buffer; + if (mg_wait(host, ATA_DRQ, + MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { + mg_bad_rw_intr(host); + return; + } + + mg_write_one(host, req); + + outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); - if (mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { + do { + if (blk_rq_sectors(req) > 1 && + mg_wait(host, ATA_DRQ, + MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { mg_bad_rw_intr(host); return; } - for (j = 0; j < MG_SECTOR_SIZE >> 1; j++) - outw(*buff++, (unsigned long)host->dev_base + - MG_BUFF_OFFSET + (j << 1)); - outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + - MG_REG_COMMAND); - } while (mg_end_request(host, 0, MG_SECTOR_SIZE)); + rem = mg_end_request(host, 0, MG_SECTOR_SIZE); + if (rem) + mg_write_one(host, req); + + outb(MG_CMD_WR_CONF, + (unsigned long)host->dev_base + MG_REG_COMMAND); + } while (rem); } static void mg_read_intr(struct mg_host *host) { struct request *req = host->req; u32 i; - u16 *buff; /* check status */ do { @@ -558,13 +583,7 @@ static void mg_read_intr(struct mg_host *host) return; ok_to_read: - /* get current segment of request */ - buff = (u16 *)req->buffer; - - /* read 1 sector */ - for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) - *buff++ = inw((unsigned long)host->dev_base + MG_BUFF_OFFSET + - (i << 1)); + mg_read_one(host, req); MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", blk_rq_pos(req), blk_rq_sectors(req) - 1, req->buffer); @@ -583,8 +602,7 @@ ok_to_read: static void mg_write_intr(struct mg_host *host) { struct request *req = host->req; - u32 i, j; - u16 *buff; + u32 i; bool rem; /* check status */ @@ -605,12 +623,7 @@ static void mg_write_intr(struct mg_host *host) ok_to_write: if ((rem = mg_end_request(host, 0, MG_SECTOR_SIZE))) { /* write 1 sector and set handler if remains */ - buff = (u16 *)req->buffer; - for (j = 0; j < MG_STORAGE_BUFFER_SIZE >> 1; j++) { - outw(*buff, (unsigned long)host->dev_base + - MG_BUFF_OFFSET + (j << 1)); - buff++; - } + mg_write_one(host, req); MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", blk_rq_pos(req), blk_rq_sectors(req), req->buffer); host->mg_do_intr = mg_write_intr; @@ -675,9 +688,6 @@ static unsigned int mg_issue_req(struct request *req, unsigned int sect_num, unsigned int sect_cnt) { - u16 *buff; - u32 i; - switch (rq_data_dir(req)) { case READ: if (mg_out(host, sect_num, sect_cnt, MG_CMD_RD, &mg_read_intr) @@ -701,12 +711,7 @@ static unsigned int mg_issue_req(struct request *req, mg_bad_rw_intr(host); return host->error; } - buff = (u16 *)req->buffer; - for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) { - outw(*buff, (unsigned long)host->dev_base + - MG_BUFF_OFFSET + (i << 1)); - buff++; - } + mg_write_one(host, req); mod_timer(&host->timer, jiffies + 3 * HZ); outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); -- cgit v1.2.3 From a85a00a699740f6f9863f88aef22060fe1534681 Mon Sep 17 00:00:00 2001 From: unsik Kim Date: Tue, 28 Jul 2009 08:57:33 +0200 Subject: mg_disk: Add missing ready status check on mg_write() When last sector is written, ready bit of status register should be checked. Signed-off-by: unsik Kim Acked-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jens Axboe --- drivers/block/mg_disk.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index 19917d5481bd..6d7fbaa92248 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -524,16 +524,16 @@ static void mg_write_one(struct mg_host *host, struct request *req) static void mg_write(struct request *req) { struct mg_host *host = req->rq_disk->private_data; - bool rem; + unsigned int rem = blk_rq_sectors(req); - if (mg_out(host, blk_rq_pos(req), blk_rq_sectors(req), + if (mg_out(host, blk_rq_pos(req), rem, MG_CMD_WR, NULL) != MG_ERR_NONE) { mg_bad_rw_intr(host); return; } MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", - blk_rq_sectors(req), blk_rq_pos(req), req->buffer); + rem, blk_rq_pos(req), req->buffer); if (mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { @@ -541,25 +541,23 @@ static void mg_write(struct request *req) return; } - mg_write_one(host, req); + do { + mg_write_one(host, req); - outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); + outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + + MG_REG_COMMAND); - do { - if (blk_rq_sectors(req) > 1 && - mg_wait(host, ATA_DRQ, - MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { + rem--; + if (rem > 1 && mg_wait(host, ATA_DRQ, + MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { + mg_bad_rw_intr(host); + return; + } else if (mg_wait(host, MG_STAT_READY, + MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { mg_bad_rw_intr(host); return; } - - rem = mg_end_request(host, 0, MG_SECTOR_SIZE); - if (rem) - mg_write_one(host, req); - - outb(MG_CMD_WR_CONF, - (unsigned long)host->dev_base + MG_REG_COMMAND); - } while (rem); + } while (mg_end_request(host, 0, MG_SECTOR_SIZE)); } static void mg_read_intr(struct mg_host *host) -- cgit v1.2.3 From 430453fc2a5f3f2c1d98ebc3c3d4c54f3060e3c3 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 28 Jul 2009 09:59:47 +0200 Subject: libertas: Read outside array bounds reads bss->rates[j] before checking bounds of index, and should use ARRAY_SIZE to determine the size of the array. Signed-off-by: Roel Kluin Acked-by: Holger Schurig Acked-by: Dan Williams Signed-off-by: John W. Linville --- drivers/net/wireless/libertas/scan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/wireless/libertas/scan.c b/drivers/net/wireless/libertas/scan.c index 601b54249677..6c95af3023cc 100644 --- a/drivers/net/wireless/libertas/scan.c +++ b/drivers/net/wireless/libertas/scan.c @@ -5,6 +5,7 @@ * for sending scan commands to the firmware. */ #include +#include #include #include #include @@ -876,7 +877,7 @@ static inline char *lbs_translate_scan(struct lbs_private *priv, iwe.u.bitrate.disabled = 0; iwe.u.bitrate.value = 0; - for (j = 0; bss->rates[j] && (j < sizeof(bss->rates)); j++) { + for (j = 0; j < ARRAY_SIZE(bss->rates) && bss->rates[j]; j++) { /* Bit rate given in 500 kb/s units */ iwe.u.bitrate.value = bss->rates[j] * 500000; current_val = iwe_stream_add_value(info, start, current_val, -- cgit v1.2.3 From 57921c312e8cef72ba35a4cfe870b376da0b1b87 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 28 Jul 2009 12:05:00 +0200 Subject: libertas: Read buffer overflow Several arrays were read before checking whether the index was within bounds. ARRAY_SIZE() should be used to determine the size of arrays. rates->rates has an arraysize of 1, so calling get_common_rates() with a rates_size of MAX_RATES (14) was causing reads out of bounds. tmp_size can increment at most to (ARRAY_SIZE(lbs_bg_rates) - 1) * (*rates_size - 1), so that should be the number of elements of tmp[]. A goto can be eliminated: ret was already set upon its declaration. Signed-off-by: Roel Kluin Signed-off-by: John W. Linville --- drivers/net/wireless/libertas/assoc.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/net/wireless/libertas/assoc.c b/drivers/net/wireless/libertas/assoc.c index b9b374119033..d6997371c27e 100644 --- a/drivers/net/wireless/libertas/assoc.c +++ b/drivers/net/wireless/libertas/assoc.c @@ -1,6 +1,7 @@ /* Copyright (C) 2006, Red Hat, Inc. */ #include +#include #include #include #include @@ -43,21 +44,21 @@ static int get_common_rates(struct lbs_private *priv, u16 *rates_size) { u8 *card_rates = lbs_bg_rates; - size_t num_card_rates = sizeof(lbs_bg_rates); int ret = 0, i, j; - u8 tmp[30]; + u8 tmp[(ARRAY_SIZE(lbs_bg_rates) - 1) * (*rates_size - 1)]; size_t tmp_size = 0; /* For each rate in card_rates that exists in rate1, copy to tmp */ - for (i = 0; card_rates[i] && (i < num_card_rates); i++) { - for (j = 0; rates[j] && (j < *rates_size); j++) { + for (i = 0; i < ARRAY_SIZE(lbs_bg_rates) && card_rates[i]; i++) { + for (j = 0; j < *rates_size && rates[j]; j++) { if (rates[j] == card_rates[i]) tmp[tmp_size++] = card_rates[i]; } } lbs_deb_hex(LBS_DEB_JOIN, "AP rates ", rates, *rates_size); - lbs_deb_hex(LBS_DEB_JOIN, "card rates ", card_rates, num_card_rates); + lbs_deb_hex(LBS_DEB_JOIN, "card rates ", card_rates, + ARRAY_SIZE(lbs_bg_rates)); lbs_deb_hex(LBS_DEB_JOIN, "common rates", tmp, tmp_size); lbs_deb_join("TX data rate 0x%02x\n", priv->cur_rate); @@ -69,10 +70,7 @@ static int get_common_rates(struct lbs_private *priv, lbs_pr_alert("Previously set fixed data rate %#x isn't " "compatible with the network.\n", priv->cur_rate); ret = -1; - goto done; } - ret = 0; - done: memset(rates, 0, *rates_size); *rates_size = min_t(int, tmp_size, *rates_size); @@ -322,7 +320,7 @@ static int lbs_associate(struct lbs_private *priv, rates = (struct mrvl_ie_rates_param_set *) pos; rates->header.type = cpu_to_le16(TLV_TYPE_RATES); memcpy(&rates->rates, &bss->rates, MAX_RATES); - tmplen = MAX_RATES; + tmplen = min_t(u16, ARRAY_SIZE(rates->rates), MAX_RATES); if (get_common_rates(priv, rates->rates, &tmplen)) { ret = -1; goto done; @@ -598,7 +596,7 @@ static int lbs_adhoc_join(struct lbs_private *priv, /* Copy Data rates from the rates recorded in scan response */ memset(cmd.bss.rates, 0, sizeof(cmd.bss.rates)); - ratesize = min_t(u16, sizeof(cmd.bss.rates), MAX_RATES); + ratesize = min_t(u16, ARRAY_SIZE(cmd.bss.rates), MAX_RATES); memcpy(cmd.bss.rates, bss->rates, ratesize); if (get_common_rates(priv, cmd.bss.rates, &ratesize)) { lbs_deb_join("ADHOC_JOIN: get_common_rates returned error.\n"); -- cgit v1.2.3 From cdaa052b05e26d96a990af5d253fd2af5db2b1fc Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 28 Jul 2009 10:54:13 +0800 Subject: drm/I915: Fix offset to DVO timings in LVDS data Now the DVO timing in LVDS data entry is obtained by using the following step: a. get the entry size for every LVDS panel data b. Get the LVDS fp entry for the preferred panel type c. get the DVO timing by using entry->dvo_timing In our driver the entry->dvo_timing is related with the size of lvds_fp_timing. For example: the size is 46. But it seems that the size of lvds_fp_timing varies on the differnt platform. In such case we will get the incorrect DVO timing entry because of the incorrect DVO offset in LVDS panel data entry. This also removes a hack on new IGDNG to get proper DVO timing. Calculate the DVO timing offset in LVDS data entry to get the DVO timing a. get the DVO timing offset in the LVDS fp data entry by using the pointer definition in LVDS data ptr b. get the LVDS data entry c. get the DVO timing by adding the DVO timing offset to data entry https://bugs.freedesktop.org/show_bug.cgi?id=22787 Signed-off-by: Zhao Yakui Tested-by: Zhenyu Wang Acked-by: Jesse Barnes Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_bios.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 7cc447191028..5c7a62a96ae7 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -97,14 +97,13 @@ static void parse_lfp_panel_data(struct drm_i915_private *dev_priv, struct bdb_header *bdb) { - struct drm_device *dev = dev_priv->dev; struct bdb_lvds_options *lvds_options; struct bdb_lvds_lfp_data *lvds_lfp_data; struct bdb_lvds_lfp_data_ptrs *lvds_lfp_data_ptrs; struct bdb_lvds_lfp_data_entry *entry; struct lvds_dvo_timing *dvo_timing; struct drm_display_mode *panel_fixed_mode; - int lfp_data_size; + int lfp_data_size, dvo_timing_offset; /* Defaults if we can't find VBT info */ dev_priv->lvds_dither = 0; @@ -133,14 +132,16 @@ parse_lfp_panel_data(struct drm_i915_private *dev_priv, entry = (struct bdb_lvds_lfp_data_entry *) ((uint8_t *)lvds_lfp_data->data + (lfp_data_size * lvds_options->panel_type)); + dvo_timing_offset = lvds_lfp_data_ptrs->ptr[0].dvo_timing_offset - + lvds_lfp_data_ptrs->ptr[0].fp_timing_offset; - /* On IGDNG mobile, LVDS data block removes panel fitting registers. - So dec 2 dword from dvo_timing offset */ - if (IS_IGDNG(dev)) - dvo_timing = (struct lvds_dvo_timing *) - ((u8 *)&entry->dvo_timing - 8); - else - dvo_timing = &entry->dvo_timing; + /* + * the size of fp_timing varies on the different platform. + * So calculate the DVO timing relative offset in LVDS data + * entry to get the DVO timing entry + */ + dvo_timing = (struct lvds_dvo_timing *) + ((unsigned char *)entry + dvo_timing_offset); panel_fixed_mode = kzalloc(sizeof(*panel_fixed_mode), GFP_KERNEL); -- cgit v1.2.3 From 24f119c769bacac5729297b682fec7811a983cc6 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 24 Jul 2009 01:00:28 +0800 Subject: drm/i915: disable VGA plane reliably This does VGA disable like DDX driver. SR01 bit 5 should be set before VGA plane disable through control register, otherwise we might get random crash and lockups. Signed-off-by: Zhenyu Wang Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_display.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a58bfadabd6f..b06364ed2591 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -998,6 +998,29 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, return 0; } +/* Disable the VGA plane that we never use */ +static void i915_disable_vga (struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u8 sr1; + u32 vga_reg; + + if (IS_IGDNG(dev)) + vga_reg = CPU_VGACNTRL; + else + vga_reg = VGACNTRL; + + if (I915_READ(vga_reg) & VGA_DISP_DISABLE) + return; + + I915_WRITE8(VGA_SR_INDEX, 1); + sr1 = I915_READ8(VGA_SR_DATA); + I915_WRITE8(VGA_SR_DATA, sr1 | (1 << 5)); + udelay(100); + + I915_WRITE(vga_reg, VGA_DISP_DISABLE); +} + static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode) { struct drm_device *dev = crtc->dev; @@ -1200,8 +1223,7 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode) case DRM_MODE_DPMS_OFF: DRM_DEBUG("crtc %d dpms off\n", pipe); - /* Disable the VGA plane that we never use */ - I915_WRITE(CPU_VGACNTRL, VGA_DISP_DISABLE); + i915_disable_vga(dev); /* Disable display plane */ temp = I915_READ(dspcntr_reg); @@ -1342,7 +1364,7 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode) //intel_crtc_dpms_video(crtc, FALSE); TODO /* Disable the VGA plane that we never use */ - I915_WRITE(VGACNTRL, VGA_DISP_DISABLE); + i915_disable_vga(dev); /* Disable display plane */ temp = I915_READ(dspcntr_reg); -- cgit v1.2.3 From 249c0e64c24bf455a4e4815f72750f2b16cedd94 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 24 Jul 2009 01:00:29 +0800 Subject: drm/i915: fix issue in display pipe setup on IGDNG During pipe DPMS off, instead of busy waiting pipe off, insert delays during wait and don't loop after enough tries which matches spec requirement. Also try to match DPMS on path by disable FDI TX PLL in DPMS off. Disable PF by writing PF_WIN_SZ which really trigger the update. Signed-off-by: Zhenyu Wang Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_reg.h | 2 ++ drivers/gpu/drm/i915/intel_display.c | 56 ++++++++++++++++++++++++++++-------- 2 files changed, 46 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 897a116cad14..7c77c4c53c52 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1848,6 +1848,8 @@ #define PFA_CTL_1 0x68080 #define PFB_CTL_1 0x68880 #define PF_ENABLE (1<<31) +#define PFA_WIN_SZ 0x68074 +#define PFB_WIN_SZ 0x68874 /* legacy palette */ #define LGC_PALETTE_A 0x4a000 diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b06364ed2591..15a8c3908acb 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1038,6 +1038,7 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode) int fdi_rx_imr_reg = (pipe == 0) ? FDI_RXA_IMR : FDI_RXB_IMR; int transconf_reg = (pipe == 0) ? TRANSACONF : TRANSBCONF; int pf_ctl_reg = (pipe == 0) ? PFA_CTL_1 : PFB_CTL_1; + int pf_win_size = (pipe == 0) ? PFA_WIN_SZ : PFB_WIN_SZ; int cpu_htot_reg = (pipe == 0) ? HTOTAL_A : HTOTAL_B; int cpu_hblank_reg = (pipe == 0) ? HBLANK_A : HBLANK_B; int cpu_hsync_reg = (pipe == 0) ? HSYNC_A : HSYNC_B; @@ -1051,7 +1052,7 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode) int trans_vblank_reg = (pipe == 0) ? TRANS_VBLANK_A : TRANS_VBLANK_B; int trans_vsync_reg = (pipe == 0) ? TRANS_VSYNC_A : TRANS_VSYNC_B; u32 temp; - int tries = 5, j; + int tries = 5, j, n; /* XXX: When our outputs are all unaware of DPMS modes other than off * and on, we should map those modes to DRM_MODE_DPMS_OFF in the CRTC. @@ -1239,19 +1240,21 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode) if ((temp & PIPEACONF_ENABLE) != 0) { I915_WRITE(pipeconf_reg, temp & ~PIPEACONF_ENABLE); I915_READ(pipeconf_reg); + n = 0; /* wait for cpu pipe off, pipe state */ - while ((I915_READ(pipeconf_reg) & I965_PIPECONF_ACTIVE) != 0) - ; + while ((I915_READ(pipeconf_reg) & I965_PIPECONF_ACTIVE) != 0) { + n++; + if (n < 60) { + udelay(500); + continue; + } else { + DRM_DEBUG("pipe %d off delay\n", pipe); + break; + } + } } else DRM_DEBUG("crtc %d is disabled\n", pipe); - /* IGDNG-A : disable cpu panel fitter ? */ - temp = I915_READ(pf_ctl_reg); - if ((temp & PF_ENABLE) != 0) { - I915_WRITE(pf_ctl_reg, temp & ~PF_ENABLE); - I915_READ(pf_ctl_reg); - } - /* disable CPU FDI tx and PCH FDI rx */ temp = I915_READ(fdi_tx_reg); I915_WRITE(fdi_tx_reg, temp & ~FDI_TX_ENABLE); @@ -1261,6 +1264,8 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode) I915_WRITE(fdi_rx_reg, temp & ~FDI_RX_ENABLE); I915_READ(fdi_rx_reg); + udelay(100); + /* still set train pattern 1 */ temp = I915_READ(fdi_tx_reg); temp &= ~FDI_LINK_TRAIN_NONE; @@ -1272,14 +1277,25 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode) temp |= FDI_LINK_TRAIN_PATTERN_1; I915_WRITE(fdi_rx_reg, temp); + udelay(100); + /* disable PCH transcoder */ temp = I915_READ(transconf_reg); if ((temp & TRANS_ENABLE) != 0) { I915_WRITE(transconf_reg, temp & ~TRANS_ENABLE); I915_READ(transconf_reg); + n = 0; /* wait for PCH transcoder off, transcoder state */ - while ((I915_READ(transconf_reg) & TRANS_STATE_ENABLE) != 0) - ; + while ((I915_READ(transconf_reg) & TRANS_STATE_ENABLE) != 0) { + n++; + if (n < 60) { + udelay(500); + continue; + } else { + DRM_DEBUG("transcoder %d off delay\n", pipe); + break; + } + } } /* disable PCH DPLL */ @@ -1297,6 +1313,22 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode) I915_READ(fdi_rx_reg); } + /* Disable CPU FDI TX PLL */ + temp = I915_READ(fdi_tx_reg); + if ((temp & FDI_TX_PLL_ENABLE) != 0) { + I915_WRITE(fdi_tx_reg, temp & ~FDI_TX_PLL_ENABLE); + I915_READ(fdi_tx_reg); + udelay(100); + } + + /* Disable PF */ + temp = I915_READ(pf_ctl_reg); + if ((temp & PF_ENABLE) != 0) { + I915_WRITE(pf_ctl_reg, temp & ~PF_ENABLE); + I915_READ(pf_ctl_reg); + } + I915_WRITE(pf_win_size, 0); + /* Wait for the clocks to turn off. */ udelay(150); break; -- cgit v1.2.3 From eebc863e469cd91d96c4e3636450596ae29f0502 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 24 Jul 2009 01:00:30 +0800 Subject: drm/i915: Fix channel ending action for DP aux transaction We should use current channel 'status' bits to clear DP aux channel's done and error bits, instead of using the channel setting bits, that will set send/busy bit again to initiate new transaction. This also includes also some minor cleanup. Signed-off-by: Zhenyu Wang Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_dp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 6770ae88370d..afec65c5ad8a 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -236,7 +236,7 @@ intel_dp_aux_ch(struct intel_output *intel_output, } /* Clear done status and any errors */ - I915_WRITE(ch_ctl, (ctl | + I915_WRITE(ch_ctl, (status | DP_AUX_CH_CTL_DONE | DP_AUX_CH_CTL_TIME_OUT_ERROR | DP_AUX_CH_CTL_RECEIVE_ERROR)); @@ -295,7 +295,7 @@ intel_dp_aux_native_write(struct intel_output *intel_output, return -1; msg[0] = AUX_NATIVE_WRITE << 4; msg[1] = address >> 8; - msg[2] = address; + msg[2] = address & 0xff; msg[3] = send_bytes - 1; memcpy(&msg[4], send, send_bytes); msg_bytes = send_bytes + 4; @@ -387,8 +387,8 @@ intel_dp_i2c_init(struct intel_output *intel_output, const char *name) memset(&dp_priv->adapter, '\0', sizeof (dp_priv->adapter)); dp_priv->adapter.owner = THIS_MODULE; dp_priv->adapter.class = I2C_CLASS_DDC; - strncpy (dp_priv->adapter.name, name, sizeof dp_priv->adapter.name - 1); - dp_priv->adapter.name[sizeof dp_priv->adapter.name - 1] = '\0'; + strncpy (dp_priv->adapter.name, name, sizeof(dp_priv->adapter.name) - 1); + dp_priv->adapter.name[sizeof(dp_priv->adapter.name) - 1] = '\0'; dp_priv->adapter.algo_data = &dp_priv->algo; dp_priv->adapter.dev.parent = &intel_output->base.kdev; -- cgit v1.2.3 From 5eb08b69f510fadaba77eb9a1bda0f7299c4ebcc Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 24 Jul 2009 01:00:31 +0800 Subject: drm/i915: enable DisplayPort support on IGDNG Signed-off-by: Zhenyu Wang Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_reg.h | 25 +++++++++ drivers/gpu/drm/i915/intel_display.c | 51 +++++++++++++++++- drivers/gpu/drm/i915/intel_dp.c | 102 ++++++++++++++++++++++++++++------- 3 files changed, 157 insertions(+), 21 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 7c77c4c53c52..4518a9489ed1 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1444,6 +1444,7 @@ #define DP_CLOCK_OUTPUT_ENABLE (1 << 13) #define DP_SCRAMBLING_DISABLE (1 << 12) +#define DP_SCRAMBLING_DISABLE_IGDNG (1 << 7) /** limit RGB values to avoid confusing TVs */ #define DP_COLOR_RANGE_16_235 (1 << 8) @@ -2210,4 +2211,28 @@ #define PCH_PP_OFF_DELAYS 0xc720c #define PCH_PP_DIVISOR 0xc7210 +#define PCH_DP_B 0xe4100 +#define PCH_DPB_AUX_CH_CTL 0xe4110 +#define PCH_DPB_AUX_CH_DATA1 0xe4114 +#define PCH_DPB_AUX_CH_DATA2 0xe4118 +#define PCH_DPB_AUX_CH_DATA3 0xe411c +#define PCH_DPB_AUX_CH_DATA4 0xe4120 +#define PCH_DPB_AUX_CH_DATA5 0xe4124 + +#define PCH_DP_C 0xe4200 +#define PCH_DPC_AUX_CH_CTL 0xe4210 +#define PCH_DPC_AUX_CH_DATA1 0xe4214 +#define PCH_DPC_AUX_CH_DATA2 0xe4218 +#define PCH_DPC_AUX_CH_DATA3 0xe421c +#define PCH_DPC_AUX_CH_DATA4 0xe4220 +#define PCH_DPC_AUX_CH_DATA5 0xe4224 + +#define PCH_DP_D 0xe4300 +#define PCH_DPD_AUX_CH_CTL 0xe4310 +#define PCH_DPD_AUX_CH_DATA1 0xe4314 +#define PCH_DPD_AUX_CH_DATA2 0xe4318 +#define PCH_DPD_AUX_CH_DATA3 0xe431c +#define PCH_DPD_AUX_CH_DATA4 0xe4320 +#define PCH_DPD_AUX_CH_DATA5 0xe4324 + #endif /* _I915_REG_H_ */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 15a8c3908acb..34c50460eaa7 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -268,6 +268,9 @@ intel_igdng_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc, static bool intel_find_pll_g4x_dp(const intel_limit_t *, struct drm_crtc *crtc, int target, int refclk, intel_clock_t *best_clock); +static bool +intel_find_pll_igdng_dp(const intel_limit_t *, struct drm_crtc *crtc, + int target, int refclk, intel_clock_t *best_clock); static const intel_limit_t intel_limits_i8xx_dvo = { .dot = { .min = I8XX_DOT_MIN, .max = I8XX_DOT_MAX }, @@ -751,6 +754,30 @@ intel_g4x_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc, return found; } +static bool +intel_find_pll_igdng_dp(const intel_limit_t *limit, struct drm_crtc *crtc, + int target, int refclk, intel_clock_t *best_clock) +{ + struct drm_device *dev = crtc->dev; + intel_clock_t clock; + if (target < 200000) { + clock.n = 1; + clock.p1 = 2; + clock.p2 = 10; + clock.m1 = 12; + clock.m2 = 9; + } else { + clock.n = 2; + clock.p1 = 1; + clock.p2 = 10; + clock.m1 = 14; + clock.m2 = 8; + } + intel_clock(dev, refclk, &clock); + memcpy(best_clock, &clock, sizeof(intel_clock_t)); + return true; +} + static bool intel_igdng_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc, int target, int refclk, intel_clock_t *best_clock) @@ -763,6 +790,10 @@ intel_igdng_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc, int err_most = 47; found = false; + if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT)) + return intel_find_pll_igdng_dp(limit, crtc, target, + refclk, best_clock); + if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) { if ((I915_READ(LVDS) & LVDS_CLKB_POWER_MASK) == LVDS_CLKB_POWER_UP) @@ -2136,6 +2167,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, int lvds_reg = LVDS; u32 temp; int sdvo_pixel_multiply; + int target_clock; drm_vblank_pre_modeset(dev, pipe); @@ -2218,11 +2250,18 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, } /* FDI link */ - if (IS_IGDNG(dev)) + if (IS_IGDNG(dev)) { + /* DP over FDI requires target mode clock + instead of link clock */ + if (is_dp) + target_clock = mode->clock; + else + target_clock = adjusted_mode->clock; igdng_compute_m_n(3, 4, /* lane num 4 */ - adjusted_mode->clock, + target_clock, 270000, /* lane clock */ &m_n); + } if (IS_IGD(dev)) fp = (1 << clock.n) << 16 | clock.m1 << 8 | clock.m2; @@ -3050,6 +3089,8 @@ static void intel_setup_outputs(struct drm_device *dev) found = 0; if (!found) intel_hdmi_init(dev, HDMIB); + if (!found && (I915_READ(PCH_DP_B) & DP_DETECTED)) + intel_dp_init(dev, PCH_DP_B); } if (I915_READ(HDMIC) & PORT_DETECTED) @@ -3058,6 +3099,12 @@ static void intel_setup_outputs(struct drm_device *dev) if (I915_READ(HDMID) & PORT_DETECTED) intel_hdmi_init(dev, HDMID); + if (I915_READ(PCH_DP_C) & DP_DETECTED) + intel_dp_init(dev, PCH_DP_C); + + if (I915_READ(PCH_DP_D) & DP_DETECTED) + intel_dp_init(dev, PCH_DP_D); + } else if (IS_I9XX(dev)) { int found; u32 reg; diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index afec65c5ad8a..0715911cbd84 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -206,7 +206,12 @@ intel_dp_aux_ch(struct intel_output *intel_output, * and would like to run at 2MHz. So, take the * hrawclk value and divide by 2 and use that */ - aux_clock_divider = intel_hrawclk(dev) / 2; + /* IGDNG: input clock fixed at 125Mhz, so aux_bit_clk always 62 */ + if (IS_IGDNG(dev)) + aux_clock_divider = 62; + else + aux_clock_divider = intel_hrawclk(dev) / 2; + /* Must try at least 3 times according to DP spec */ for (try = 0; try < 5; try++) { /* Load the send data into the aux channel data registers */ @@ -493,22 +498,40 @@ intel_dp_set_m_n(struct drm_crtc *crtc, struct drm_display_mode *mode, intel_dp_compute_m_n(3, lane_count, mode->clock, adjusted_mode->clock, &m_n); - if (intel_crtc->pipe == 0) { - I915_WRITE(PIPEA_GMCH_DATA_M, - ((m_n.tu - 1) << PIPE_GMCH_DATA_M_TU_SIZE_SHIFT) | - m_n.gmch_m); - I915_WRITE(PIPEA_GMCH_DATA_N, - m_n.gmch_n); - I915_WRITE(PIPEA_DP_LINK_M, m_n.link_m); - I915_WRITE(PIPEA_DP_LINK_N, m_n.link_n); + if (IS_IGDNG(dev)) { + if (intel_crtc->pipe == 0) { + I915_WRITE(TRANSA_DATA_M1, + ((m_n.tu - 1) << PIPE_GMCH_DATA_M_TU_SIZE_SHIFT) | + m_n.gmch_m); + I915_WRITE(TRANSA_DATA_N1, m_n.gmch_n); + I915_WRITE(TRANSA_DP_LINK_M1, m_n.link_m); + I915_WRITE(TRANSA_DP_LINK_N1, m_n.link_n); + } else { + I915_WRITE(TRANSB_DATA_M1, + ((m_n.tu - 1) << PIPE_GMCH_DATA_M_TU_SIZE_SHIFT) | + m_n.gmch_m); + I915_WRITE(TRANSB_DATA_N1, m_n.gmch_n); + I915_WRITE(TRANSB_DP_LINK_M1, m_n.link_m); + I915_WRITE(TRANSB_DP_LINK_N1, m_n.link_n); + } } else { - I915_WRITE(PIPEB_GMCH_DATA_M, - ((m_n.tu - 1) << PIPE_GMCH_DATA_M_TU_SIZE_SHIFT) | - m_n.gmch_m); - I915_WRITE(PIPEB_GMCH_DATA_N, - m_n.gmch_n); - I915_WRITE(PIPEB_DP_LINK_M, m_n.link_m); - I915_WRITE(PIPEB_DP_LINK_N, m_n.link_n); + if (intel_crtc->pipe == 0) { + I915_WRITE(PIPEA_GMCH_DATA_M, + ((m_n.tu - 1) << PIPE_GMCH_DATA_M_TU_SIZE_SHIFT) | + m_n.gmch_m); + I915_WRITE(PIPEA_GMCH_DATA_N, + m_n.gmch_n); + I915_WRITE(PIPEA_DP_LINK_M, m_n.link_m); + I915_WRITE(PIPEA_DP_LINK_N, m_n.link_n); + } else { + I915_WRITE(PIPEB_GMCH_DATA_M, + ((m_n.tu - 1) << PIPE_GMCH_DATA_M_TU_SIZE_SHIFT) | + m_n.gmch_m); + I915_WRITE(PIPEB_GMCH_DATA_N, + m_n.gmch_n); + I915_WRITE(PIPEB_DP_LINK_M, m_n.link_m); + I915_WRITE(PIPEB_DP_LINK_N, m_n.link_n); + } } } @@ -935,6 +958,12 @@ intel_dp_link_down(struct intel_output *intel_output, uint32_t DP) struct drm_i915_private *dev_priv = dev->dev_private; struct intel_dp_priv *dp_priv = intel_output->dev_priv; + DP &= ~DP_LINK_TRAIN_MASK; + I915_WRITE(dp_priv->output_reg, DP | DP_LINK_TRAIN_PAT_IDLE); + POSTING_READ(dp_priv->output_reg); + + udelay(17000); + I915_WRITE(dp_priv->output_reg, DP & ~DP_PORT_EN); POSTING_READ(dp_priv->output_reg); } @@ -978,6 +1007,24 @@ intel_dp_check_link_status(struct intel_output *intel_output) intel_dp_link_train(intel_output, dp_priv->DP, dp_priv->link_configuration); } +static enum drm_connector_status +igdng_dp_detect(struct drm_connector *connector) +{ + struct intel_output *intel_output = to_intel_output(connector); + struct intel_dp_priv *dp_priv = intel_output->dev_priv; + enum drm_connector_status status; + + status = connector_status_disconnected; + if (intel_dp_aux_native_read(intel_output, + 0x000, dp_priv->dpcd, + sizeof (dp_priv->dpcd)) == sizeof (dp_priv->dpcd)) + { + if (dp_priv->dpcd[0] != 0) + status = connector_status_connected; + } + return status; +} + /** * Uses CRT_HOTPLUG_EN and CRT_HOTPLUG_STAT to detect DP connection. * @@ -996,6 +1043,9 @@ intel_dp_detect(struct drm_connector *connector) dp_priv->has_audio = false; + if (IS_IGDNG(dev)) + return igdng_dp_detect(connector); + temp = I915_READ(PORT_HOTPLUG_EN); I915_WRITE(PORT_HOTPLUG_EN, @@ -1106,6 +1156,7 @@ intel_dp_init(struct drm_device *dev, int output_reg) struct drm_connector *connector; struct intel_output *intel_output; struct intel_dp_priv *dp_priv; + const char *name = NULL; intel_output = kcalloc(sizeof(struct intel_output) + sizeof(struct intel_dp_priv), 1, GFP_KERNEL); @@ -1139,9 +1190,22 @@ intel_dp_init(struct drm_device *dev, int output_reg) drm_sysfs_connector_add(connector); /* Set up the DDC bus. */ - intel_dp_i2c_init(intel_output, - (output_reg == DP_B) ? "DPDDC-B" : - (output_reg == DP_C) ? "DPDDC-C" : "DPDDC-D"); + switch (output_reg) { + case DP_B: + case PCH_DP_B: + name = "DPDDC-B"; + break; + case DP_C: + case PCH_DP_C: + name = "DPDDC-C"; + break; + case DP_D: + case PCH_DP_D: + name = "DPDDC-D"; + break; + } + + intel_dp_i2c_init(intel_output, name); intel_output->ddc_bus = &dp_priv->adapter; intel_output->hot_plug = intel_dp_hot_plug; -- cgit v1.2.3 From 32f9d658aee5be09ebdd28fc730630e61d0b46db Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 24 Jul 2009 01:00:32 +0800 Subject: drm/i915: Add eDP support on IGDNG mobile chip This adds embedded DisplayPort support on next mobile chip which aims to replace origin LVDS port. VBT's driver feature block has been used to determine the type of current internal panel for eDP or LVDS. Currently no panel fitting support for eDP and backlight control would be added in future. Signed-off-by: Zhenyu Wang Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_reg.h | 16 ++ drivers/gpu/drm/i915/intel_bios.c | 21 ++ drivers/gpu/drm/i915/intel_bios.h | 45 ++++ drivers/gpu/drm/i915/intel_display.c | 433 ++++++++++++++++++++++------------- drivers/gpu/drm/i915/intel_dp.c | 112 ++++++++- drivers/gpu/drm/i915/intel_drv.h | 3 + drivers/gpu/drm/i915/intel_lvds.c | 4 + 8 files changed, 468 insertions(+), 168 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b05b44dd3bf6..5f3a259d95e9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -219,6 +219,7 @@ typedef struct drm_i915_private { unsigned int lvds_vbt:1; unsigned int int_crt_support:1; unsigned int lvds_use_ssc:1; + unsigned int edp_support:1; int lvds_ssc_freq; struct drm_i915_fence_reg fence_regs[16]; /* assume 965 */ @@ -889,6 +890,7 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller); IS_I915GM(dev))) #define SUPPORTS_INTEGRATED_HDMI(dev) (IS_G4X(dev) || IS_IGDNG(dev)) #define SUPPORTS_INTEGRATED_DP(dev) (IS_G4X(dev) || IS_IGDNG(dev)) +#define SUPPORTS_EDP(dev) (IS_IGDNG_M(dev)) #define I915_HAS_HOTPLUG(dev) (IS_I945G(dev) || IS_I945GM(dev) || IS_I965G(dev)) /* dsparb controlled by hw only */ #define DSPARB_HWCONTROL(dev) (IS_G4X(dev) || IS_IGDNG(dev)) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 4518a9489ed1..2955083aa471 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1395,6 +1395,7 @@ #define TV_V_CHROMA_42 0x684a8 /* Display Port */ +#define DP_A 0x64000 /* eDP */ #define DP_B 0x64100 #define DP_C 0x64200 #define DP_D 0x64300 @@ -1437,9 +1438,17 @@ /* Mystic DPCD version 1.1 special mode */ #define DP_ENHANCED_FRAMING (1 << 18) +/* eDP */ +#define DP_PLL_FREQ_270MHZ (0 << 16) +#define DP_PLL_FREQ_160MHZ (1 << 16) +#define DP_PLL_FREQ_MASK (3 << 16) + /** locked once port is enabled */ #define DP_PORT_REVERSAL (1 << 15) +/* eDP */ +#define DP_PLL_ENABLE (1 << 14) + /** sends the clock on lane 15 of the PEG for debug */ #define DP_CLOCK_OUTPUT_ENABLE (1 << 13) @@ -1464,6 +1473,13 @@ * is 20 bytes in each direction, hence the 5 fixed * data registers */ +#define DPA_AUX_CH_CTL 0x64010 +#define DPA_AUX_CH_DATA1 0x64014 +#define DPA_AUX_CH_DATA2 0x64018 +#define DPA_AUX_CH_DATA3 0x6401c +#define DPA_AUX_CH_DATA4 0x64020 +#define DPA_AUX_CH_DATA5 0x64024 + #define DPB_AUX_CH_CTL 0x64110 #define DPB_AUX_CH_DATA1 0x64114 #define DPB_AUX_CH_DATA2 0x64118 diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 5c7a62a96ae7..300aee3296c2 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -296,6 +296,25 @@ parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, } return; } + +static void +parse_driver_features(struct drm_i915_private *dev_priv, + struct bdb_header *bdb) +{ + struct drm_device *dev = dev_priv->dev; + struct bdb_driver_features *driver; + + /* set default for chips without eDP */ + if (!SUPPORTS_EDP(dev)) { + dev_priv->edp_support = 0; + return; + } + + driver = find_section(bdb, BDB_DRIVER_FEATURES); + if (driver && driver->lvds_config == BDB_DRIVER_FEATURE_EDP) + dev_priv->edp_support = 1; +} + /** * intel_init_bios - initialize VBIOS settings & find VBT * @dev: DRM device @@ -346,6 +365,8 @@ intel_init_bios(struct drm_device *dev) parse_lfp_panel_data(dev_priv, bdb); parse_sdvo_panel_data(dev_priv, bdb); parse_sdvo_device_mapping(dev_priv, bdb); + parse_driver_features(dev_priv, bdb); + pci_unmap_rom(pdev, bios); return 0; diff --git a/drivers/gpu/drm/i915/intel_bios.h b/drivers/gpu/drm/i915/intel_bios.h index fe72e1c225d8..0f8e5f69ac7a 100644 --- a/drivers/gpu/drm/i915/intel_bios.h +++ b/drivers/gpu/drm/i915/intel_bios.h @@ -381,6 +381,51 @@ struct bdb_sdvo_lvds_options { } __attribute__((packed)); +#define BDB_DRIVER_FEATURE_NO_LVDS 0 +#define BDB_DRIVER_FEATURE_INT_LVDS 1 +#define BDB_DRIVER_FEATURE_SDVO_LVDS 2 +#define BDB_DRIVER_FEATURE_EDP 3 + +struct bdb_driver_features { + u8 boot_dev_algorithm:1; + u8 block_display_switch:1; + u8 allow_display_switch:1; + u8 hotplug_dvo:1; + u8 dual_view_zoom:1; + u8 int15h_hook:1; + u8 sprite_in_clone:1; + u8 primary_lfp_id:1; + + u16 boot_mode_x; + u16 boot_mode_y; + u8 boot_mode_bpp; + u8 boot_mode_refresh; + + u16 enable_lfp_primary:1; + u16 selective_mode_pruning:1; + u16 dual_frequency:1; + u16 render_clock_freq:1; /* 0: high freq; 1: low freq */ + u16 nt_clone_support:1; + u16 power_scheme_ui:1; /* 0: CUI; 1: 3rd party */ + u16 sprite_display_assign:1; /* 0: secondary; 1: primary */ + u16 cui_aspect_scaling:1; + u16 preserve_aspect_ratio:1; + u16 sdvo_device_power_down:1; + u16 crt_hotplug:1; + u16 lvds_config:2; + u16 tv_hotplug:1; + u16 hdmi_config:2; + + u8 static_display:1; + u8 reserved2:7; + u16 legacy_crt_max_x; + u16 legacy_crt_max_y; + u8 legacy_crt_max_refresh; + + u8 hdmi_termination; + u8 custom_vbt_version; +} __attribute__((packed)); + bool intel_init_bios(struct drm_device *dev); /* diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 34c50460eaa7..a9f1307d32d8 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -34,6 +34,8 @@ #include "drm_crtc_helper.h" +#define HAS_eDP (intel_pipe_has_type(crtc, INTEL_OUTPUT_EDP)) + bool intel_pipe_has_type (struct drm_crtc *crtc, int type); static void intel_update_watermarks(struct drm_device *dev); @@ -601,6 +603,23 @@ bool intel_pipe_has_type (struct drm_crtc *crtc, int type) return false; } +struct drm_connector * +intel_pipe_get_output (struct drm_crtc *crtc) +{ + struct drm_device *dev = crtc->dev; + struct drm_mode_config *mode_config = &dev->mode_config; + struct drm_connector *l_entry, *ret = NULL; + + list_for_each_entry(l_entry, &mode_config->connector_list, head) { + if (l_entry->encoder && + l_entry->encoder->crtc == crtc) { + ret = l_entry; + break; + } + } + return ret; +} + #define INTELPllInvalid(s) do { /* DRM_DEBUG(s); */ return false; } while (0) /** * Returns whether the given set of divisors are valid for a given refclk with @@ -790,6 +809,10 @@ intel_igdng_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc, int err_most = 47; found = false; + /* eDP has only 2 clock choice, no n/m/p setting */ + if (HAS_eDP) + return true; + if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT)) return intel_find_pll_igdng_dp(limit, crtc, target, refclk, best_clock); @@ -1052,6 +1075,67 @@ static void i915_disable_vga (struct drm_device *dev) I915_WRITE(vga_reg, VGA_DISP_DISABLE); } +static void igdng_disable_pll_edp (struct drm_crtc *crtc) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + u32 dpa_ctl; + + DRM_DEBUG("\n"); + dpa_ctl = I915_READ(DP_A); + dpa_ctl &= ~DP_PLL_ENABLE; + I915_WRITE(DP_A, dpa_ctl); +} + +static void igdng_enable_pll_edp (struct drm_crtc *crtc) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + u32 dpa_ctl; + + dpa_ctl = I915_READ(DP_A); + dpa_ctl |= DP_PLL_ENABLE; + I915_WRITE(DP_A, dpa_ctl); + udelay(200); +} + + +static void igdng_set_pll_edp (struct drm_crtc *crtc, int clock) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + u32 dpa_ctl; + + DRM_DEBUG("eDP PLL enable for clock %d\n", clock); + dpa_ctl = I915_READ(DP_A); + dpa_ctl &= ~DP_PLL_FREQ_MASK; + + if (clock < 200000) { + u32 temp; + dpa_ctl |= DP_PLL_FREQ_160MHZ; + /* workaround for 160Mhz: + 1) program 0x4600c bits 15:0 = 0x8124 + 2) program 0x46010 bit 0 = 1 + 3) program 0x46034 bit 24 = 1 + 4) program 0x64000 bit 14 = 1 + */ + temp = I915_READ(0x4600c); + temp &= 0xffff0000; + I915_WRITE(0x4600c, temp | 0x8124); + + temp = I915_READ(0x46010); + I915_WRITE(0x46010, temp | 1); + + temp = I915_READ(0x46034); + I915_WRITE(0x46034, temp | (1 << 24)); + } else { + dpa_ctl |= DP_PLL_FREQ_270MHZ; + } + I915_WRITE(DP_A, dpa_ctl); + + udelay(500); +} + static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode) { struct drm_device *dev = crtc->dev; @@ -1093,27 +1177,32 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode) case DRM_MODE_DPMS_STANDBY: case DRM_MODE_DPMS_SUSPEND: DRM_DEBUG("crtc %d dpms on\n", pipe); - /* enable PCH DPLL */ - temp = I915_READ(pch_dpll_reg); - if ((temp & DPLL_VCO_ENABLE) == 0) { - I915_WRITE(pch_dpll_reg, temp | DPLL_VCO_ENABLE); - I915_READ(pch_dpll_reg); - } - - /* enable PCH FDI RX PLL, wait warmup plus DMI latency */ - temp = I915_READ(fdi_rx_reg); - I915_WRITE(fdi_rx_reg, temp | FDI_RX_PLL_ENABLE | - FDI_SEL_PCDCLK | - FDI_DP_PORT_WIDTH_X4); /* default 4 lanes */ - I915_READ(fdi_rx_reg); - udelay(200); + if (HAS_eDP) { + /* enable eDP PLL */ + igdng_enable_pll_edp(crtc); + } else { + /* enable PCH DPLL */ + temp = I915_READ(pch_dpll_reg); + if ((temp & DPLL_VCO_ENABLE) == 0) { + I915_WRITE(pch_dpll_reg, temp | DPLL_VCO_ENABLE); + I915_READ(pch_dpll_reg); + } - /* Enable CPU FDI TX PLL, always on for IGDNG */ - temp = I915_READ(fdi_tx_reg); - if ((temp & FDI_TX_PLL_ENABLE) == 0) { - I915_WRITE(fdi_tx_reg, temp | FDI_TX_PLL_ENABLE); - I915_READ(fdi_tx_reg); - udelay(100); + /* enable PCH FDI RX PLL, wait warmup plus DMI latency */ + temp = I915_READ(fdi_rx_reg); + I915_WRITE(fdi_rx_reg, temp | FDI_RX_PLL_ENABLE | + FDI_SEL_PCDCLK | + FDI_DP_PORT_WIDTH_X4); /* default 4 lanes */ + I915_READ(fdi_rx_reg); + udelay(200); + + /* Enable CPU FDI TX PLL, always on for IGDNG */ + temp = I915_READ(fdi_tx_reg); + if ((temp & FDI_TX_PLL_ENABLE) == 0) { + I915_WRITE(fdi_tx_reg, temp | FDI_TX_PLL_ENABLE); + I915_READ(fdi_tx_reg); + udelay(100); + } } /* Enable CPU pipe */ @@ -1132,122 +1221,126 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode) I915_WRITE(dspbase_reg, I915_READ(dspbase_reg)); } - /* enable CPU FDI TX and PCH FDI RX */ - temp = I915_READ(fdi_tx_reg); - temp |= FDI_TX_ENABLE; - temp |= FDI_DP_PORT_WIDTH_X4; /* default */ - temp &= ~FDI_LINK_TRAIN_NONE; - temp |= FDI_LINK_TRAIN_PATTERN_1; - I915_WRITE(fdi_tx_reg, temp); - I915_READ(fdi_tx_reg); + if (!HAS_eDP) { + /* enable CPU FDI TX and PCH FDI RX */ + temp = I915_READ(fdi_tx_reg); + temp |= FDI_TX_ENABLE; + temp |= FDI_DP_PORT_WIDTH_X4; /* default */ + temp &= ~FDI_LINK_TRAIN_NONE; + temp |= FDI_LINK_TRAIN_PATTERN_1; + I915_WRITE(fdi_tx_reg, temp); + I915_READ(fdi_tx_reg); - temp = I915_READ(fdi_rx_reg); - temp &= ~FDI_LINK_TRAIN_NONE; - temp |= FDI_LINK_TRAIN_PATTERN_1; - I915_WRITE(fdi_rx_reg, temp | FDI_RX_ENABLE); - I915_READ(fdi_rx_reg); + temp = I915_READ(fdi_rx_reg); + temp &= ~FDI_LINK_TRAIN_NONE; + temp |= FDI_LINK_TRAIN_PATTERN_1; + I915_WRITE(fdi_rx_reg, temp | FDI_RX_ENABLE); + I915_READ(fdi_rx_reg); - udelay(150); + udelay(150); - /* Train FDI. */ - /* umask FDI RX Interrupt symbol_lock and bit_lock bit - for train result */ - temp = I915_READ(fdi_rx_imr_reg); - temp &= ~FDI_RX_SYMBOL_LOCK; - temp &= ~FDI_RX_BIT_LOCK; - I915_WRITE(fdi_rx_imr_reg, temp); - I915_READ(fdi_rx_imr_reg); - udelay(150); + /* Train FDI. */ + /* umask FDI RX Interrupt symbol_lock and bit_lock bit + for train result */ + temp = I915_READ(fdi_rx_imr_reg); + temp &= ~FDI_RX_SYMBOL_LOCK; + temp &= ~FDI_RX_BIT_LOCK; + I915_WRITE(fdi_rx_imr_reg, temp); + I915_READ(fdi_rx_imr_reg); + udelay(150); - temp = I915_READ(fdi_rx_iir_reg); - DRM_DEBUG("FDI_RX_IIR 0x%x\n", temp); + temp = I915_READ(fdi_rx_iir_reg); + DRM_DEBUG("FDI_RX_IIR 0x%x\n", temp); - if ((temp & FDI_RX_BIT_LOCK) == 0) { - for (j = 0; j < tries; j++) { - temp = I915_READ(fdi_rx_iir_reg); - DRM_DEBUG("FDI_RX_IIR 0x%x\n", temp); - if (temp & FDI_RX_BIT_LOCK) - break; - udelay(200); - } - if (j != tries) + if ((temp & FDI_RX_BIT_LOCK) == 0) { + for (j = 0; j < tries; j++) { + temp = I915_READ(fdi_rx_iir_reg); + DRM_DEBUG("FDI_RX_IIR 0x%x\n", temp); + if (temp & FDI_RX_BIT_LOCK) + break; + udelay(200); + } + if (j != tries) + I915_WRITE(fdi_rx_iir_reg, + temp | FDI_RX_BIT_LOCK); + else + DRM_DEBUG("train 1 fail\n"); + } else { I915_WRITE(fdi_rx_iir_reg, temp | FDI_RX_BIT_LOCK); - else - DRM_DEBUG("train 1 fail\n"); - } else { - I915_WRITE(fdi_rx_iir_reg, - temp | FDI_RX_BIT_LOCK); - DRM_DEBUG("train 1 ok 2!\n"); - } - temp = I915_READ(fdi_tx_reg); - temp &= ~FDI_LINK_TRAIN_NONE; - temp |= FDI_LINK_TRAIN_PATTERN_2; - I915_WRITE(fdi_tx_reg, temp); - - temp = I915_READ(fdi_rx_reg); - temp &= ~FDI_LINK_TRAIN_NONE; - temp |= FDI_LINK_TRAIN_PATTERN_2; - I915_WRITE(fdi_rx_reg, temp); + DRM_DEBUG("train 1 ok 2!\n"); + } + temp = I915_READ(fdi_tx_reg); + temp &= ~FDI_LINK_TRAIN_NONE; + temp |= FDI_LINK_TRAIN_PATTERN_2; + I915_WRITE(fdi_tx_reg, temp); + + temp = I915_READ(fdi_rx_reg); + temp &= ~FDI_LINK_TRAIN_NONE; + temp |= FDI_LINK_TRAIN_PATTERN_2; + I915_WRITE(fdi_rx_reg, temp); - udelay(150); + udelay(150); - temp = I915_READ(fdi_rx_iir_reg); - DRM_DEBUG("FDI_RX_IIR 0x%x\n", temp); + temp = I915_READ(fdi_rx_iir_reg); + DRM_DEBUG("FDI_RX_IIR 0x%x\n", temp); - if ((temp & FDI_RX_SYMBOL_LOCK) == 0) { - for (j = 0; j < tries; j++) { - temp = I915_READ(fdi_rx_iir_reg); - DRM_DEBUG("FDI_RX_IIR 0x%x\n", temp); - if (temp & FDI_RX_SYMBOL_LOCK) - break; - udelay(200); - } - if (j != tries) { + if ((temp & FDI_RX_SYMBOL_LOCK) == 0) { + for (j = 0; j < tries; j++) { + temp = I915_READ(fdi_rx_iir_reg); + DRM_DEBUG("FDI_RX_IIR 0x%x\n", temp); + if (temp & FDI_RX_SYMBOL_LOCK) + break; + udelay(200); + } + if (j != tries) { + I915_WRITE(fdi_rx_iir_reg, + temp | FDI_RX_SYMBOL_LOCK); + DRM_DEBUG("train 2 ok 1!\n"); + } else + DRM_DEBUG("train 2 fail\n"); + } else { I915_WRITE(fdi_rx_iir_reg, temp | FDI_RX_SYMBOL_LOCK); - DRM_DEBUG("train 2 ok 1!\n"); - } else - DRM_DEBUG("train 2 fail\n"); - } else { - I915_WRITE(fdi_rx_iir_reg, temp | FDI_RX_SYMBOL_LOCK); - DRM_DEBUG("train 2 ok 2!\n"); - } - DRM_DEBUG("train done\n"); + DRM_DEBUG("train 2 ok 2!\n"); + } + DRM_DEBUG("train done\n"); - /* set transcoder timing */ - I915_WRITE(trans_htot_reg, I915_READ(cpu_htot_reg)); - I915_WRITE(trans_hblank_reg, I915_READ(cpu_hblank_reg)); - I915_WRITE(trans_hsync_reg, I915_READ(cpu_hsync_reg)); + /* set transcoder timing */ + I915_WRITE(trans_htot_reg, I915_READ(cpu_htot_reg)); + I915_WRITE(trans_hblank_reg, I915_READ(cpu_hblank_reg)); + I915_WRITE(trans_hsync_reg, I915_READ(cpu_hsync_reg)); - I915_WRITE(trans_vtot_reg, I915_READ(cpu_vtot_reg)); - I915_WRITE(trans_vblank_reg, I915_READ(cpu_vblank_reg)); - I915_WRITE(trans_vsync_reg, I915_READ(cpu_vsync_reg)); + I915_WRITE(trans_vtot_reg, I915_READ(cpu_vtot_reg)); + I915_WRITE(trans_vblank_reg, I915_READ(cpu_vblank_reg)); + I915_WRITE(trans_vsync_reg, I915_READ(cpu_vsync_reg)); - /* enable PCH transcoder */ - temp = I915_READ(transconf_reg); - I915_WRITE(transconf_reg, temp | TRANS_ENABLE); - I915_READ(transconf_reg); + /* enable PCH transcoder */ + temp = I915_READ(transconf_reg); + I915_WRITE(transconf_reg, temp | TRANS_ENABLE); + I915_READ(transconf_reg); - while ((I915_READ(transconf_reg) & TRANS_STATE_ENABLE) == 0) - ; + while ((I915_READ(transconf_reg) & TRANS_STATE_ENABLE) == 0) + ; - /* enable normal */ + /* enable normal */ - temp = I915_READ(fdi_tx_reg); - temp &= ~FDI_LINK_TRAIN_NONE; - I915_WRITE(fdi_tx_reg, temp | FDI_LINK_TRAIN_NONE | - FDI_TX_ENHANCE_FRAME_ENABLE); - I915_READ(fdi_tx_reg); + temp = I915_READ(fdi_tx_reg); + temp &= ~FDI_LINK_TRAIN_NONE; + I915_WRITE(fdi_tx_reg, temp | FDI_LINK_TRAIN_NONE | + FDI_TX_ENHANCE_FRAME_ENABLE); + I915_READ(fdi_tx_reg); - temp = I915_READ(fdi_rx_reg); - temp &= ~FDI_LINK_TRAIN_NONE; - I915_WRITE(fdi_rx_reg, temp | FDI_LINK_TRAIN_NONE | - FDI_RX_ENHANCE_FRAME_ENABLE); - I915_READ(fdi_rx_reg); + temp = I915_READ(fdi_rx_reg); + temp &= ~FDI_LINK_TRAIN_NONE; + I915_WRITE(fdi_rx_reg, temp | FDI_LINK_TRAIN_NONE | + FDI_RX_ENHANCE_FRAME_ENABLE); + I915_READ(fdi_rx_reg); - /* wait one idle pattern time */ - udelay(100); + /* wait one idle pattern time */ + udelay(100); + + } intel_crtc_load_lut(crtc); @@ -1286,6 +1379,10 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode) } else DRM_DEBUG("crtc %d is disabled\n", pipe); + if (HAS_eDP) { + igdng_disable_pll_edp(crtc); + } + /* disable CPU FDI tx and PCH FDI rx */ temp = I915_READ(fdi_tx_reg); I915_WRITE(fdi_tx_reg, temp & ~FDI_TX_ENABLE); @@ -2152,6 +2249,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, u32 dpll = 0, fp = 0, dspcntr, pipeconf; bool ok, is_sdvo = false, is_dvo = false; bool is_crt = false, is_lvds = false, is_tv = false, is_dp = false; + bool is_edp = false; struct drm_mode_config *mode_config = &dev->mode_config; struct drm_connector *connector; const intel_limit_t *limit; @@ -2199,6 +2297,9 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, case INTEL_OUTPUT_DISPLAYPORT: is_dp = true; break; + case INTEL_OUTPUT_EDP: + is_edp = true; + break; } num_outputs++; @@ -2251,16 +2352,27 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, /* FDI link */ if (IS_IGDNG(dev)) { - /* DP over FDI requires target mode clock - instead of link clock */ - if (is_dp) + int lane, link_bw; + /* eDP doesn't require FDI link, so just set DP M/N + according to current link config */ + if (is_edp) { + struct drm_connector *edp; target_clock = mode->clock; - else - target_clock = adjusted_mode->clock; - igdng_compute_m_n(3, 4, /* lane num 4 */ - target_clock, - 270000, /* lane clock */ - &m_n); + edp = intel_pipe_get_output(crtc); + intel_edp_link_config(to_intel_output(edp), + &lane, &link_bw); + } else { + /* DP over FDI requires target mode clock + instead of link clock */ + if (is_dp) + target_clock = mode->clock; + else + target_clock = adjusted_mode->clock; + lane = 4; + link_bw = 270000; + } + igdng_compute_m_n(3, lane, target_clock, + link_bw, &m_n); } if (IS_IGD(dev)) @@ -2382,29 +2494,15 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, dpll_reg = pch_dpll_reg; } - if (dpll & DPLL_VCO_ENABLE) { + if (is_edp) { + igdng_disable_pll_edp(crtc); + } else if ((dpll & DPLL_VCO_ENABLE)) { I915_WRITE(fp_reg, fp); I915_WRITE(dpll_reg, dpll & ~DPLL_VCO_ENABLE); I915_READ(dpll_reg); udelay(150); } - if (IS_IGDNG(dev)) { - /* enable PCH clock reference source */ - /* XXX need to change the setting for other outputs */ - u32 temp; - temp = I915_READ(PCH_DREF_CONTROL); - temp &= ~DREF_NONSPREAD_SOURCE_MASK; - temp |= DREF_NONSPREAD_CK505_ENABLE; - temp &= ~DREF_SSC_SOURCE_MASK; - temp |= DREF_SSC_SOURCE_ENABLE; - temp &= ~DREF_SSC1_ENABLE; - /* if no eDP, disable source output to CPU */ - temp &= ~DREF_CPU_SOURCE_OUTPUT_MASK; - temp |= DREF_CPU_SOURCE_OUTPUT_DISABLE; - I915_WRITE(PCH_DREF_CONTROL, temp); - } - /* The LVDS pin pair needs to be on before the DPLLs are enabled. * This is an exception to the general rule that mode_set doesn't turn * things on. @@ -2436,23 +2534,25 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, if (is_dp) intel_dp_set_m_n(crtc, mode, adjusted_mode); - I915_WRITE(fp_reg, fp); - I915_WRITE(dpll_reg, dpll); - I915_READ(dpll_reg); - /* Wait for the clocks to stabilize. */ - udelay(150); - - if (IS_I965G(dev) && !IS_IGDNG(dev)) { - sdvo_pixel_multiply = adjusted_mode->clock / mode->clock; - I915_WRITE(dpll_md_reg, (0 << DPLL_MD_UDI_DIVIDER_SHIFT) | - ((sdvo_pixel_multiply - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT)); - } else { - /* write it again -- the BIOS does, after all */ + if (!is_edp) { + I915_WRITE(fp_reg, fp); I915_WRITE(dpll_reg, dpll); + I915_READ(dpll_reg); + /* Wait for the clocks to stabilize. */ + udelay(150); + + if (IS_I965G(dev) && !IS_IGDNG(dev)) { + sdvo_pixel_multiply = adjusted_mode->clock / mode->clock; + I915_WRITE(dpll_md_reg, (0 << DPLL_MD_UDI_DIVIDER_SHIFT) | + ((sdvo_pixel_multiply - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT)); + } else { + /* write it again -- the BIOS does, after all */ + I915_WRITE(dpll_reg, dpll); + } + I915_READ(dpll_reg); + /* Wait for the clocks to stabilize. */ + udelay(150); } - I915_READ(dpll_reg); - /* Wait for the clocks to stabilize. */ - udelay(150); I915_WRITE(htot_reg, (adjusted_mode->crtc_hdisplay - 1) | ((adjusted_mode->crtc_htotal - 1) << 16)); @@ -2482,10 +2582,14 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, I915_WRITE(link_m1_reg, m_n.link_m); I915_WRITE(link_n1_reg, m_n.link_n); - /* enable FDI RX PLL too */ - temp = I915_READ(fdi_rx_reg); - I915_WRITE(fdi_rx_reg, temp | FDI_RX_PLL_ENABLE); - udelay(200); + if (is_edp) { + igdng_set_pll_edp(crtc, adjusted_mode->clock); + } else { + /* enable FDI RX PLL too */ + temp = I915_READ(fdi_rx_reg); + I915_WRITE(fdi_rx_reg, temp | FDI_RX_PLL_ENABLE); + udelay(200); + } } I915_WRITE(pipeconf_reg, pipeconf); @@ -3083,6 +3187,9 @@ static void intel_setup_outputs(struct drm_device *dev) if (IS_IGDNG(dev)) { int found; + if (IS_MOBILE(dev) && (I915_READ(DP_A) & DP_DETECTED)) + intel_dp_init(dev, DP_A); + if (I915_READ(HDMIB) & PORT_DETECTED) { /* check SDVOB */ /* found = intel_sdvo_init(dev, HDMIB); */ @@ -3179,6 +3286,10 @@ static void intel_setup_outputs(struct drm_device *dev) (1 << 1)); clone_mask = (1 << INTEL_OUTPUT_DISPLAYPORT); break; + case INTEL_OUTPUT_EDP: + crtc_mask = (1 << 1); + clone_mask = (1 << INTEL_OUTPUT_EDP); + break; } encoder->possible_crtcs = crtc_mask; encoder->possible_clones = intel_connector_clones(dev, clone_mask); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 0715911cbd84..a6ff15ac548a 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -40,6 +40,8 @@ #define DP_LINK_CONFIGURATION_SIZE 9 +#define IS_eDP(i) ((i)->type == INTEL_OUTPUT_EDP) + struct intel_dp_priv { uint32_t output_reg; uint32_t DP; @@ -63,6 +65,19 @@ intel_dp_link_train(struct intel_output *intel_output, uint32_t DP, static void intel_dp_link_down(struct intel_output *intel_output, uint32_t DP); +void +intel_edp_link_config (struct intel_output *intel_output, + int *lane_num, int *link_bw) +{ + struct intel_dp_priv *dp_priv = intel_output->dev_priv; + + *lane_num = dp_priv->lane_count; + if (dp_priv->link_bw == DP_LINK_BW_1_62) + *link_bw = 162000; + else if (dp_priv->link_bw == DP_LINK_BW_2_7) + *link_bw = 270000; +} + static int intel_dp_max_lane_count(struct intel_output *intel_output) { @@ -206,9 +221,10 @@ intel_dp_aux_ch(struct intel_output *intel_output, * and would like to run at 2MHz. So, take the * hrawclk value and divide by 2 and use that */ - /* IGDNG: input clock fixed at 125Mhz, so aux_bit_clk always 62 */ - if (IS_IGDNG(dev)) - aux_clock_divider = 62; + if (IS_eDP(intel_output)) + aux_clock_divider = 225; /* eDP input clock at 450Mhz */ + else if (IS_IGDNG(dev)) + aux_clock_divider = 62; /* IGDNG: input clock fixed at 125Mhz */ else aux_clock_divider = intel_hrawclk(dev) / 2; @@ -579,8 +595,38 @@ intel_dp_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, if (intel_crtc->pipe == 1) dp_priv->DP |= DP_PIPEB_SELECT; + + if (IS_eDP(intel_output)) { + /* don't miss out required setting for eDP */ + dp_priv->DP |= DP_PLL_ENABLE; + if (adjusted_mode->clock < 200000) + dp_priv->DP |= DP_PLL_FREQ_160MHZ; + else + dp_priv->DP |= DP_PLL_FREQ_270MHZ; + } } +static void igdng_edp_backlight_on (struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 pp; + + DRM_DEBUG("\n"); + pp = I915_READ(PCH_PP_CONTROL); + pp |= EDP_BLC_ENABLE; + I915_WRITE(PCH_PP_CONTROL, pp); +} + +static void igdng_edp_backlight_off (struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 pp; + + DRM_DEBUG("\n"); + pp = I915_READ(PCH_PP_CONTROL); + pp &= ~EDP_BLC_ENABLE; + I915_WRITE(PCH_PP_CONTROL, pp); +} static void intel_dp_dpms(struct drm_encoder *encoder, int mode) @@ -592,11 +638,17 @@ intel_dp_dpms(struct drm_encoder *encoder, int mode) uint32_t dp_reg = I915_READ(dp_priv->output_reg); if (mode != DRM_MODE_DPMS_ON) { - if (dp_reg & DP_PORT_EN) + if (dp_reg & DP_PORT_EN) { intel_dp_link_down(intel_output, dp_priv->DP); + if (IS_eDP(intel_output)) + igdng_edp_backlight_off(dev); + } } else { - if (!(dp_reg & DP_PORT_EN)) + if (!(dp_reg & DP_PORT_EN)) { intel_dp_link_train(intel_output, dp_priv->DP, dp_priv->link_configuration); + if (IS_eDP(intel_output)) + igdng_edp_backlight_on(dev); + } } dp_priv->dpms_mode = mode; } @@ -958,12 +1010,23 @@ intel_dp_link_down(struct intel_output *intel_output, uint32_t DP) struct drm_i915_private *dev_priv = dev->dev_private; struct intel_dp_priv *dp_priv = intel_output->dev_priv; + DRM_DEBUG("\n"); + + if (IS_eDP(intel_output)) { + DP &= ~DP_PLL_ENABLE; + I915_WRITE(dp_priv->output_reg, DP); + POSTING_READ(dp_priv->output_reg); + udelay(100); + } + DP &= ~DP_LINK_TRAIN_MASK; I915_WRITE(dp_priv->output_reg, DP | DP_LINK_TRAIN_PAT_IDLE); POSTING_READ(dp_priv->output_reg); udelay(17000); + if (IS_eDP(intel_output)) + DP |= DP_LINK_TRAIN_OFF; I915_WRITE(dp_priv->output_reg, DP & ~DP_PORT_EN); POSTING_READ(dp_priv->output_reg); } @@ -1089,11 +1152,27 @@ intel_dp_detect(struct drm_connector *connector) static int intel_dp_get_modes(struct drm_connector *connector) { struct intel_output *intel_output = to_intel_output(connector); + struct drm_device *dev = intel_output->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; /* We should parse the EDID data and find out if it has an audio sink */ - return intel_ddc_get_modes(intel_output); + ret = intel_ddc_get_modes(intel_output); + if (ret) + return ret; + + /* if eDP has no EDID, try to use fixed panel mode from VBT */ + if (IS_eDP(intel_output)) { + if (dev_priv->panel_fixed_mode != NULL) { + struct drm_display_mode *mode; + mode = drm_mode_duplicate(dev, dev_priv->panel_fixed_mode); + drm_mode_probed_add(connector, mode); + return 1; + } + } + return 0; } static void @@ -1170,7 +1249,10 @@ intel_dp_init(struct drm_device *dev, int output_reg) DRM_MODE_CONNECTOR_DisplayPort); drm_connector_helper_add(connector, &intel_dp_connector_helper_funcs); - intel_output->type = INTEL_OUTPUT_DISPLAYPORT; + if (output_reg == DP_A) + intel_output->type = INTEL_OUTPUT_EDP; + else + intel_output->type = INTEL_OUTPUT_DISPLAYPORT; connector->interlace_allowed = true; connector->doublescan_allowed = 0; @@ -1191,6 +1273,9 @@ intel_dp_init(struct drm_device *dev, int output_reg) /* Set up the DDC bus. */ switch (output_reg) { + case DP_A: + name = "DPDDC-A"; + break; case DP_B: case PCH_DP_B: name = "DPDDC-B"; @@ -1206,9 +1291,22 @@ intel_dp_init(struct drm_device *dev, int output_reg) } intel_dp_i2c_init(intel_output, name); + intel_output->ddc_bus = &dp_priv->adapter; intel_output->hot_plug = intel_dp_hot_plug; + if (output_reg == DP_A) { + /* initialize panel mode from VBT if available for eDP */ + if (dev_priv->lfp_lvds_vbt_mode) { + dev_priv->panel_fixed_mode = + drm_mode_duplicate(dev, dev_priv->lfp_lvds_vbt_mode); + if (dev_priv->panel_fixed_mode) { + dev_priv->panel_fixed_mode->type |= + DRM_MODE_TYPE_PREFERRED; + } + } + } + /* For G4X desktop chip, PEG_BAND_GAP_DATA 3:0 must first be written * 0xd. Failure to do so will result in spurious interrupts being * generated on the port when a cable is not attached. diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 004541c935a8..d6f92ea1b553 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -55,6 +55,7 @@ #define INTEL_OUTPUT_TVOUT 5 #define INTEL_OUTPUT_HDMI 6 #define INTEL_OUTPUT_DISPLAYPORT 7 +#define INTEL_OUTPUT_EDP 8 #define INTEL_DVO_CHIP_NONE 0 #define INTEL_DVO_CHIP_LVDS 1 @@ -121,6 +122,8 @@ extern void intel_dp_init(struct drm_device *dev, int dp_reg); void intel_dp_set_m_n(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode); +extern void intel_edp_link_config (struct intel_output *, int *, int *); + extern void intel_crtc_load_lut(struct drm_crtc *crtc); extern void intel_encoder_prepare (struct drm_encoder *encoder); diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 43c7d9aa59ce..3f445a80c552 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -892,6 +892,10 @@ void intel_lvds_init(struct drm_device *dev) if (IS_IGDNG(dev)) { if ((I915_READ(PCH_LVDS) & LVDS_DETECTED) == 0) return; + if (dev_priv->edp_support) { + DRM_DEBUG("disable LVDS for eDP support\n"); + return; + } gpio = PCH_GPIOC; } -- cgit v1.2.3 From 67941da28d59cca6817d35823fcb1e3e4eed030e Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 24 Jul 2009 01:00:33 +0800 Subject: drm/i915: fix VGA detect on IGDNG Check FORCE_DETECT bit to be clear for the finish of hotplug detect process. Also check possible mono monitor which should also be marked as connected. Signed-off-by: Zhenyu Wang Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_crt.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index d6a1a6e5539a..4cf8e2e88a40 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -156,6 +156,9 @@ static bool intel_igdng_crt_detect_hotplug(struct drm_connector *connector) temp = adpa = I915_READ(PCH_ADPA); + adpa &= ~ADPA_DAC_ENABLE; + I915_WRITE(PCH_ADPA, adpa); + adpa &= ~ADPA_CRT_HOTPLUG_MASK; adpa |= (ADPA_CRT_HOTPLUG_PERIOD_128 | @@ -169,13 +172,14 @@ static bool intel_igdng_crt_detect_hotplug(struct drm_connector *connector) DRM_DEBUG("pch crt adpa 0x%x", adpa); I915_WRITE(PCH_ADPA, adpa); - /* This might not be needed as not specified in spec...*/ - udelay(1000); + while ((I915_READ(PCH_ADPA) & ADPA_CRT_HOTPLUG_FORCE_TRIGGER) != 0) + ; /* Check the status to see if both blue and green are on now */ adpa = I915_READ(PCH_ADPA); - if ((adpa & ADPA_CRT_HOTPLUG_MONITOR_MASK) == - ADPA_CRT_HOTPLUG_MONITOR_COLOR) + adpa &= ADPA_CRT_HOTPLUG_MONITOR_MASK; + if ((adpa == ADPA_CRT_HOTPLUG_MONITOR_COLOR) || + (adpa == ADPA_CRT_HOTPLUG_MONITOR_MONO)) ret = true; else ret = false; -- cgit v1.2.3 From f360132626b11d0dc60814033873ca0e3111677c Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 22 Jul 2009 12:54:59 -0700 Subject: drm/i915: fix 845G FIFO size & burst length I had one report of flicker due to FIFO underruns on 845G. Scott was kind enough to test a few patches and report success with this one. Looks like 845G measures FIFO size slightly differently than other chips, and we were also clobbering the FIFO burst length. Fixing both of those issues gives him a healthy machine again. Note that we still only adjust plane A's watermark in the 830/845 case. If someone is willing to test we could support a bigger variety of dual-head 830/845 configurations with a bit more code. Fixes fdo bug #19304 (again). Reported-by: Scott Hansen Tested-by: Scott Hansen Signed-off-by: Jesse Barnes Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_display.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a9f1307d32d8..1da7b0b9ed31 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2026,6 +2026,9 @@ static int intel_get_fifo_size(struct drm_device *dev, int plane) size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - (dsparb & 0x1ff); size >>= 1; /* Convert to cachelines */ + } else if (IS_845G(dev)) { + size = dsparb & 0x7f; + size >>= 2; /* Convert to cachelines */ } else { size = dsparb & 0x7f; size >>= 1; /* Convert to cachelines */ @@ -2125,14 +2128,16 @@ static void i830_update_wm(struct drm_device *dev, int planea_clock, int pixel_size) { struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t fwater_lo = I915_READ(FW_BLC) & MM_FIFO_WATERMARK; + uint32_t fwater_lo = I915_READ(FW_BLC) & ~0xfff; int planea_wm; i830_wm_info.fifo_size = intel_get_fifo_size(dev, 0); planea_wm = intel_calculate_wm(planea_clock, &i830_wm_info, pixel_size, latency_ns); - fwater_lo = fwater_lo | planea_wm; + fwater_lo |= (3<<8) | planea_wm; + + DRM_DEBUG("Setting FIFO watermarks - A: %d\n", planea_wm); I915_WRITE(FW_BLC, fwater_lo); } -- cgit v1.2.3 From bcae2ca81c4a085e3e0e8941b7c64f9aeb6fc05c Mon Sep 17 00:00:00 2001 From: "ling.ma@intel.com" Date: Mon, 20 Jul 2009 13:20:23 +0800 Subject: drm/i915: Set preferred mode for integrated TV according to TV format In order to get best possible quality image we chose 640x480 for NTSC, PAL and 480p, 1280x720 for 720p, 1920x1080 for 1080i/p TV format respectively. Signed-off-by: Ma Ling Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_tv.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index a43c98e3f077..da4ab4dc1630 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1490,6 +1490,27 @@ static struct input_res { {"1920x1080", 1920, 1080}, }; +/* + * Chose preferred mode according to line number of TV format + */ +static void +intel_tv_chose_preferred_modes(struct drm_connector *connector, + struct drm_display_mode *mode_ptr) +{ + struct intel_output *intel_output = to_intel_output(connector); + const struct tv_mode *tv_mode = intel_tv_mode_find(intel_output); + + if (tv_mode->nbr_end < 480 && mode_ptr->vdisplay == 480) + mode_ptr->type |= DRM_MODE_TYPE_PREFERRED; + else if (tv_mode->nbr_end > 480) { + if (tv_mode->progressive == true && tv_mode->nbr_end < 720) { + if (mode_ptr->vdisplay == 720) + mode_ptr->type |= DRM_MODE_TYPE_PREFERRED; + } else if (mode_ptr->vdisplay == 1080) + mode_ptr->type |= DRM_MODE_TYPE_PREFERRED; + } +} + /** * Stub get_modes function. * @@ -1544,6 +1565,7 @@ intel_tv_get_modes(struct drm_connector *connector) mode_ptr->clock = (int) tmp; mode_ptr->type = DRM_MODE_TYPE_DRIVER; + intel_tv_chose_preferred_modes(connector, mode_ptr); drm_mode_probed_add(connector, mode_ptr); count++; } -- cgit v1.2.3 From fb7a46f3cc7d7dcf7c5edb3a38c84e3f730d7adb Mon Sep 17 00:00:00 2001 From: "ling.ma@intel.com" Date: Thu, 23 Jul 2009 17:11:34 +0800 Subject: drm/i915: Choose real sdvo output according to result from detection Baed on Eric's idea in order to handle multiple sdvo encoders we implement another approach to dynamically chose real one encoder after detection, which is contrasted with patch - drm/i915:Construct all possible sdvo outputs for sdvo encoder. Signed-off-by: Ma Ling Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_sdvo.c | 231 +++++++++++++++++++++++++------------- 1 file changed, 153 insertions(+), 78 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 4f0c30948bc4..b68746f0380e 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -55,6 +55,12 @@ struct intel_sdvo_priv { /* Pixel clock limitations reported by the SDVO device, in kHz */ int pixel_clock_min, pixel_clock_max; + /* + * For multiple function SDVO device, + * this is for current attached outputs. + */ + uint16_t attached_output; + /** * This is set if we're going to treat the device as TV-out. * @@ -114,6 +120,9 @@ struct intel_sdvo_priv { u32 save_SDVOX; }; +static bool +intel_sdvo_output_setup(struct intel_output *intel_output, uint16_t flags); + /** * Writes the SDVOB or SDVOC with the given value, but always writes both * SDVOB and SDVOC to work around apparent hardware issues (according to @@ -1435,6 +1444,39 @@ void intel_sdvo_set_hotplug(struct drm_connector *connector, int on) intel_sdvo_read_response(intel_output, &response, 2); } +static bool +intel_sdvo_multifunc_encoder(struct intel_output *intel_output) +{ + struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv; + int caps = 0; + + if (sdvo_priv->caps.output_flags & + (SDVO_OUTPUT_TMDS0 | SDVO_OUTPUT_TMDS1)) + caps++; + if (sdvo_priv->caps.output_flags & + (SDVO_OUTPUT_RGB0 | SDVO_OUTPUT_RGB1)) + caps++; + if (sdvo_priv->caps.output_flags & + (SDVO_OUTPUT_SVID0 | SDVO_OUTPUT_SVID0)) + caps++; + if (sdvo_priv->caps.output_flags & + (SDVO_OUTPUT_CVBS0 | SDVO_OUTPUT_CVBS1)) + caps++; + if (sdvo_priv->caps.output_flags & + (SDVO_OUTPUT_YPRPB0 | SDVO_OUTPUT_YPRPB1)) + caps++; + + if (sdvo_priv->caps.output_flags & + (SDVO_OUTPUT_SCART0 | SDVO_OUTPUT_SCART1)) + caps++; + + if (sdvo_priv->caps.output_flags & + (SDVO_OUTPUT_LVDS0 | SDVO_OUTPUT_LVDS1)) + caps++; + + return (caps > 1); +} + static void intel_sdvo_hdmi_sink_detect(struct drm_connector *connector) { @@ -1453,23 +1495,31 @@ intel_sdvo_hdmi_sink_detect(struct drm_connector *connector) static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connector) { - u8 response[2]; + uint16_t response; u8 status; struct intel_output *intel_output = to_intel_output(connector); + struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv; intel_sdvo_write_cmd(intel_output, SDVO_CMD_GET_ATTACHED_DISPLAYS, NULL, 0); status = intel_sdvo_read_response(intel_output, &response, 2); - DRM_DEBUG("SDVO response %d %d\n", response[0], response[1]); + DRM_DEBUG("SDVO response %d %d\n", response & 0xff, response >> 8); if (status != SDVO_CMD_STATUS_SUCCESS) return connector_status_unknown; - if ((response[0] != 0) || (response[1] != 0)) { - intel_sdvo_hdmi_sink_detect(connector); - return connector_status_connected; - } else + if (response == 0) return connector_status_disconnected; + + if (intel_sdvo_multifunc_encoder(intel_output) && + sdvo_priv->attached_output != response) { + if (sdvo_priv->controlled_output != response && + intel_sdvo_output_setup(intel_output, response) != true) + return connector_status_unknown; + sdvo_priv->attached_output = response; + } + intel_sdvo_hdmi_sink_detect(connector); + return connector_status_connected; } static void intel_sdvo_get_ddc_modes(struct drm_connector *connector) @@ -1866,16 +1916,101 @@ intel_sdvo_get_slave_addr(struct drm_device *dev, int output_device) return 0x72; } +static bool +intel_sdvo_output_setup(struct intel_output *intel_output, uint16_t flags) +{ + struct drm_connector *connector = &intel_output->base; + struct drm_encoder *encoder = &intel_output->enc; + struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv; + bool ret = true, registered = false; + + sdvo_priv->is_tv = false; + intel_output->needs_tv_clock = false; + sdvo_priv->is_lvds = false; + + if (device_is_registered(&connector->kdev)) { + drm_sysfs_connector_remove(connector); + registered = true; + } + + if (flags & + (SDVO_OUTPUT_TMDS0 | SDVO_OUTPUT_TMDS1)) { + if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_TMDS0) + sdvo_priv->controlled_output = SDVO_OUTPUT_TMDS0; + else + sdvo_priv->controlled_output = SDVO_OUTPUT_TMDS1; + + encoder->encoder_type = DRM_MODE_ENCODER_TMDS; + connector->connector_type = DRM_MODE_CONNECTOR_DVID; + + if (intel_sdvo_get_supp_encode(intel_output, + &sdvo_priv->encode) && + intel_sdvo_get_digital_encoding_mode(intel_output) && + sdvo_priv->is_hdmi) { + /* enable hdmi encoding mode if supported */ + intel_sdvo_set_encode(intel_output, SDVO_ENCODE_HDMI); + intel_sdvo_set_colorimetry(intel_output, + SDVO_COLORIMETRY_RGB256); + connector->connector_type = DRM_MODE_CONNECTOR_HDMIA; + } + } else if (flags & SDVO_OUTPUT_SVID0) { + + sdvo_priv->controlled_output = SDVO_OUTPUT_SVID0; + encoder->encoder_type = DRM_MODE_ENCODER_TVDAC; + connector->connector_type = DRM_MODE_CONNECTOR_SVIDEO; + sdvo_priv->is_tv = true; + intel_output->needs_tv_clock = true; + } else if (flags & SDVO_OUTPUT_RGB0) { + + sdvo_priv->controlled_output = SDVO_OUTPUT_RGB0; + encoder->encoder_type = DRM_MODE_ENCODER_DAC; + connector->connector_type = DRM_MODE_CONNECTOR_VGA; + } else if (flags & SDVO_OUTPUT_RGB1) { + + sdvo_priv->controlled_output = SDVO_OUTPUT_RGB1; + encoder->encoder_type = DRM_MODE_ENCODER_DAC; + connector->connector_type = DRM_MODE_CONNECTOR_VGA; + } else if (flags & SDVO_OUTPUT_LVDS0) { + + sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS0; + encoder->encoder_type = DRM_MODE_ENCODER_LVDS; + connector->connector_type = DRM_MODE_CONNECTOR_LVDS; + sdvo_priv->is_lvds = true; + } else if (flags & SDVO_OUTPUT_LVDS1) { + + sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS1; + encoder->encoder_type = DRM_MODE_ENCODER_LVDS; + connector->connector_type = DRM_MODE_CONNECTOR_LVDS; + sdvo_priv->is_lvds = true; + } else { + + unsigned char bytes[2]; + + sdvo_priv->controlled_output = 0; + memcpy(bytes, &sdvo_priv->caps.output_flags, 2); + DRM_DEBUG_KMS(I915_SDVO, + "%s: Unknown SDVO output type (0x%02x%02x)\n", + SDVO_NAME(sdvo_priv), + bytes[0], bytes[1]); + ret = false; + } + + if (ret && registered) + ret = drm_sysfs_connector_add(connector) == 0 ? true : false; + + + return ret; + +} + bool intel_sdvo_init(struct drm_device *dev, int output_device) { struct drm_connector *connector; struct intel_output *intel_output; struct intel_sdvo_priv *sdvo_priv; - int connector_type; u8 ch[0x40]; int i; - int encoder_type; intel_output = kcalloc(sizeof(struct intel_output)+sizeof(struct intel_sdvo_priv), 1, GFP_KERNEL); if (!intel_output) { @@ -1925,88 +2060,28 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) intel_output->ddc_bus->algo = &intel_sdvo_i2c_bit_algo; /* In defaut case sdvo lvds is false */ - sdvo_priv->is_lvds = false; intel_sdvo_get_capabilities(intel_output, &sdvo_priv->caps); - if (sdvo_priv->caps.output_flags & - (SDVO_OUTPUT_TMDS0 | SDVO_OUTPUT_TMDS1)) { - if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_TMDS0) - sdvo_priv->controlled_output = SDVO_OUTPUT_TMDS0; - else - sdvo_priv->controlled_output = SDVO_OUTPUT_TMDS1; - - encoder_type = DRM_MODE_ENCODER_TMDS; - connector_type = DRM_MODE_CONNECTOR_DVID; - - if (intel_sdvo_get_supp_encode(intel_output, - &sdvo_priv->encode) && - intel_sdvo_get_digital_encoding_mode(intel_output) && - sdvo_priv->is_hdmi) { - /* enable hdmi encoding mode if supported */ - intel_sdvo_set_encode(intel_output, SDVO_ENCODE_HDMI); - intel_sdvo_set_colorimetry(intel_output, - SDVO_COLORIMETRY_RGB256); - connector_type = DRM_MODE_CONNECTOR_HDMIA; - } - } - else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_SVID0) - { - sdvo_priv->controlled_output = SDVO_OUTPUT_SVID0; - encoder_type = DRM_MODE_ENCODER_TVDAC; - connector_type = DRM_MODE_CONNECTOR_SVIDEO; - sdvo_priv->is_tv = true; - intel_output->needs_tv_clock = true; - } - else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_RGB0) - { - sdvo_priv->controlled_output = SDVO_OUTPUT_RGB0; - encoder_type = DRM_MODE_ENCODER_DAC; - connector_type = DRM_MODE_CONNECTOR_VGA; - } - else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_RGB1) - { - sdvo_priv->controlled_output = SDVO_OUTPUT_RGB1; - encoder_type = DRM_MODE_ENCODER_DAC; - connector_type = DRM_MODE_CONNECTOR_VGA; - } - else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_LVDS0) - { - sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS0; - encoder_type = DRM_MODE_ENCODER_LVDS; - connector_type = DRM_MODE_CONNECTOR_LVDS; - sdvo_priv->is_lvds = true; - } - else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_LVDS1) - { - sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS1; - encoder_type = DRM_MODE_ENCODER_LVDS; - connector_type = DRM_MODE_CONNECTOR_LVDS; - sdvo_priv->is_lvds = true; - } - else - { - unsigned char bytes[2]; - - sdvo_priv->controlled_output = 0; - memcpy (bytes, &sdvo_priv->caps.output_flags, 2); - DRM_DEBUG_KMS(I915_SDVO, - "%s: Unknown SDVO output type (0x%02x%02x)\n", - SDVO_NAME(sdvo_priv), - bytes[0], bytes[1]); - encoder_type = DRM_MODE_ENCODER_NONE; - connector_type = DRM_MODE_CONNECTOR_Unknown; + if (intel_sdvo_output_setup(intel_output, + sdvo_priv->caps.output_flags) != true) { + DRM_DEBUG("SDVO output failed to setup on SDVO%c\n", + output_device == SDVOB ? 'B' : 'C'); goto err_i2c; } + connector = &intel_output->base; drm_connector_init(dev, connector, &intel_sdvo_connector_funcs, - connector_type); + connector->connector_type); + drm_connector_helper_add(connector, &intel_sdvo_connector_helper_funcs); connector->interlace_allowed = 0; connector->doublescan_allowed = 0; connector->display_info.subpixel_order = SubPixelHorizontalRGB; - drm_encoder_init(dev, &intel_output->enc, &intel_sdvo_enc_funcs, encoder_type); + drm_encoder_init(dev, &intel_output->enc, + &intel_sdvo_enc_funcs, intel_output->enc.encoder_type); + drm_encoder_helper_add(&intel_output->enc, &intel_sdvo_helper_funcs); drm_mode_connector_attach_encoder(&intel_output->base, &intel_output->enc); -- cgit v1.2.3 From 6e900de3fff01e84c96632409359a84825c54b28 Mon Sep 17 00:00:00 2001 From: Mark Ware Date: Mon, 20 Jul 2009 21:51:03 +1000 Subject: cpm_uart: Don't use alloc_bootmem in cpm_uart_cpm2.c This is another alloc_bootmem() -> kzalloc() change, this time to fix the non-fatal badness caused when booting with a cpm2_uart console. Signed-off-by: Mark Ware Signed-off-by: Kumar Gala --- drivers/serial/cpm_uart/cpm_uart_cpm2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/serial/cpm_uart/cpm_uart_cpm2.c b/drivers/serial/cpm_uart/cpm_uart_cpm2.c index 141c0a3333ad..a9802e76b5fa 100644 --- a/drivers/serial/cpm_uart/cpm_uart_cpm2.c +++ b/drivers/serial/cpm_uart/cpm_uart_cpm2.c @@ -132,7 +132,7 @@ int cpm_uart_allocbuf(struct uart_cpm_port *pinfo, unsigned int is_con) memsz = L1_CACHE_ALIGN(pinfo->rx_nrfifos * pinfo->rx_fifosize) + L1_CACHE_ALIGN(pinfo->tx_nrfifos * pinfo->tx_fifosize); if (is_con) { - mem_addr = alloc_bootmem(memsz); + mem_addr = kzalloc(memsz, GFP_NOWAIT); dma_addr = virt_to_bus(mem_addr); } else -- cgit v1.2.3 From f294526279cda8934b0313ebd02184a16ba888c9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 19 Jul 2009 14:46:09 +0300 Subject: lguest: dereferencing freed mem in add_eventfd() "new" was freed and then dereferenced. Also the return value wasn't being used so I modified the caller as well. Compile tested only. Found by smatch (http://repo.or.cz/w/smatch.git). regards, dan carpenter Signed-off-by: Dan Carpenter Signed-off-by: Rusty Russell --- drivers/lguest/lguest_user.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index 9f9a2953b383..407722a8e0c4 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -52,8 +52,9 @@ static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) new->map[new->num].addr = addr; new->map[new->num].event = eventfd_ctx_fdget(fd); if (IS_ERR(new->map[new->num].event)) { + int err = PTR_ERR(new->map[new->num].event); kfree(new); - return PTR_ERR(new->map[new->num].event); + return err; } new->num++; @@ -83,7 +84,7 @@ static int attach_eventfd(struct lguest *lg, const unsigned long __user *input) err = add_eventfd(lg, addr, fd); mutex_unlock(&lguest_lock); - return 0; + return err; } /*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt -- cgit v1.2.3 From ff52c3fc7188855ede75d87b022271f0da309e5b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 23 Jul 2009 14:57:37 +0300 Subject: virtio: fix memory leak on device removal Make vp_free_vectors do the reverse of vq_request_vectors. Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- drivers/virtio/virtio_pci.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index bcec78ffc765..ca40517ef9c2 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -258,7 +258,6 @@ static void vp_free_vectors(struct virtio_device *vdev) for (i = 0; i < vp_dev->msix_used_vectors; ++i) free_irq(vp_dev->msix_entries[i].vector, vp_dev); - vp_dev->msix_used_vectors = 0; if (vp_dev->msix_enabled) { /* Disable the vector used for configuration */ @@ -267,9 +266,16 @@ static void vp_free_vectors(struct virtio_device *vdev) /* Flush the write out to device */ ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); - vp_dev->msix_enabled = 0; pci_disable_msix(vp_dev->pci_dev); + vp_dev->msix_enabled = 0; + vp_dev->msix_vectors = 0; } + + vp_dev->msix_used_vectors = 0; + kfree(vp_dev->msix_names); + vp_dev->msix_names = NULL; + kfree(vp_dev->msix_entries); + vp_dev->msix_entries = NULL; } static int vp_enable_msix(struct pci_dev *dev, struct msix_entry *entries, @@ -297,11 +303,11 @@ static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs) vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries, GFP_KERNEL); if (!vp_dev->msix_entries) - goto error_entries; + goto error; vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names, GFP_KERNEL); if (!vp_dev->msix_names) - goto error_names; + goto error; for (i = 0; i < nvectors; ++i) vp_dev->msix_entries[i].entry = i; @@ -314,7 +320,7 @@ static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs) err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, name, vp_dev); if (err) - goto error_irq; + goto error; vp_dev->intx_enabled = 1; } else { vp_dev->msix_vectors = err; @@ -328,7 +334,7 @@ static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs) vp_config_changed, 0, vp_dev->msix_names[v], vp_dev); if (err) - goto error_irq; + goto error; ++vp_dev->msix_used_vectors; iowrite16(v, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); @@ -336,7 +342,7 @@ static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs) v = ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); if (v == VIRTIO_MSI_NO_VECTOR) { err = -EBUSY; - goto error_irq; + goto error; } } @@ -349,16 +355,12 @@ static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs) vp_vring_interrupt, 0, vp_dev->msix_names[v], vp_dev); if (err) - goto error_irq; + goto error; ++vp_dev->msix_used_vectors; } return 0; -error_irq: +error: vp_free_vectors(vdev); - kfree(vp_dev->msix_names); -error_names: - kfree(vp_dev->msix_entries); -error_entries: return err; } -- cgit v1.2.3 From f6c82507030d61e15928d5cad946d3eac1c4a384 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 26 Jul 2009 15:48:01 +0300 Subject: virtio: delete vq from list This makes delete vq the reverse of find vq. This is required to make it possible to retry find_vqs after a failure, otherwise the list gets corrupted. Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- drivers/virtio/virtio_pci.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index ca40517ef9c2..a1cb1a1c6522 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -464,7 +464,11 @@ static void vp_del_vq(struct virtqueue *vq) { struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); struct virtio_pci_vq_info *info = vq->priv; - unsigned long size; + unsigned long flags, size; + + spin_lock_irqsave(&vp_dev->lock, flags); + list_del(&info->node); + spin_unlock_irqrestore(&vp_dev->lock, flags); iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); -- cgit v1.2.3 From e969fed542cae08cb11d666efac4f7c5d624d09f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 26 Jul 2009 15:48:08 +0300 Subject: virtio: refactor find_vqs This refactors find_vqs, making it more readable and robust, and fixing two regressions from 2.6.30: - double free_irq causing BUG_ON on device removal - probe failure when vq can't be assigned to msi-x vector (reported on old host kernels) Tested-by: Amit Shah Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- drivers/virtio/virtio_pci.c | 212 +++++++++++++++++++++++++------------------- 1 file changed, 119 insertions(+), 93 deletions(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index a1cb1a1c6522..248e00ec4dc1 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -52,8 +52,10 @@ struct virtio_pci_device char (*msix_names)[256]; /* Number of available vectors */ unsigned msix_vectors; - /* Vectors allocated */ + /* Vectors allocated, excluding per-vq vectors if any */ unsigned msix_used_vectors; + /* Whether we have vector per vq */ + bool per_vq_vectors; }; /* Constants for MSI-X */ @@ -278,27 +280,24 @@ static void vp_free_vectors(struct virtio_device *vdev) vp_dev->msix_entries = NULL; } -static int vp_enable_msix(struct pci_dev *dev, struct msix_entry *entries, - int *options, int noptions) -{ - int i; - for (i = 0; i < noptions; ++i) - if (!pci_enable_msix(dev, entries, options[i])) - return options[i]; - return -EBUSY; -} - -static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs) +static int vp_request_vectors(struct virtio_device *vdev, int nvectors, + bool per_vq_vectors) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); const char *name = dev_name(&vp_dev->vdev.dev); unsigned i, v; int err = -ENOMEM; - /* We want at most one vector per queue and one for config changes. - * Fallback to separate vectors for config and a shared for queues. - * Finally fall back to regular interrupts. */ - int options[] = { max_vqs + 1, 2 }; - int nvectors = max(options[0], options[1]); + + if (!nvectors) { + /* Can't allocate MSI-X vectors, use regular interrupt */ + vp_dev->msix_vectors = 0; + err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, + IRQF_SHARED, name, vp_dev); + if (err) + return err; + vp_dev->intx_enabled = 1; + return 0; + } vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries, GFP_KERNEL); @@ -312,41 +311,34 @@ static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs) for (i = 0; i < nvectors; ++i) vp_dev->msix_entries[i].entry = i; - err = vp_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, - options, ARRAY_SIZE(options)); - if (err < 0) { - /* Can't allocate enough MSI-X vectors, use regular interrupt */ - vp_dev->msix_vectors = 0; - err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, - IRQF_SHARED, name, vp_dev); - if (err) - goto error; - vp_dev->intx_enabled = 1; - } else { - vp_dev->msix_vectors = err; - vp_dev->msix_enabled = 1; - - /* Set the vector used for configuration */ - v = vp_dev->msix_used_vectors; - snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, - "%s-config", name); - err = request_irq(vp_dev->msix_entries[v].vector, - vp_config_changed, 0, vp_dev->msix_names[v], - vp_dev); - if (err) - goto error; - ++vp_dev->msix_used_vectors; + err = pci_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, nvectors); + if (err > 0) + err = -ENOSPC; + if (err) + goto error; + vp_dev->msix_vectors = nvectors; + vp_dev->msix_enabled = 1; + + /* Set the vector used for configuration */ + v = vp_dev->msix_used_vectors; + snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, + "%s-config", name); + err = request_irq(vp_dev->msix_entries[v].vector, + vp_config_changed, 0, vp_dev->msix_names[v], + vp_dev); + if (err) + goto error; + ++vp_dev->msix_used_vectors; - iowrite16(v, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); - /* Verify we had enough resources to assign the vector */ - v = ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); - if (v == VIRTIO_MSI_NO_VECTOR) { - err = -EBUSY; - goto error; - } + iowrite16(v, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + /* Verify we had enough resources to assign the vector */ + v = ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + if (v == VIRTIO_MSI_NO_VECTOR) { + err = -EBUSY; + goto error; } - if (vp_dev->msix_vectors && vp_dev->msix_vectors != max_vqs + 1) { + if (!per_vq_vectors) { /* Shared vector for all VQs */ v = vp_dev->msix_used_vectors; snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, @@ -366,13 +358,14 @@ error: static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, void (*callback)(struct virtqueue *vq), - const char *name) + const char *name, + u16 vector) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info; struct virtqueue *vq; unsigned long flags, size; - u16 num, vector; + u16 num; int err; /* Select the queue we're interested in */ @@ -391,7 +384,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, info->queue_index = index; info->num = num; - info->vector = VIRTIO_MSI_NO_VECTOR; + info->vector = vector; size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN)); info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO); @@ -415,22 +408,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, vq->priv = info; info->vq = vq; - /* allocate per-vq vector if available and necessary */ - if (callback && vp_dev->msix_used_vectors < vp_dev->msix_vectors) { - vector = vp_dev->msix_used_vectors; - snprintf(vp_dev->msix_names[vector], sizeof *vp_dev->msix_names, - "%s-%s", dev_name(&vp_dev->vdev.dev), name); - err = request_irq(vp_dev->msix_entries[vector].vector, - vring_interrupt, 0, - vp_dev->msix_names[vector], vq); - if (err) - goto out_request_irq; - info->vector = vector; - ++vp_dev->msix_used_vectors; - } else - vector = VP_MSIX_VQ_VECTOR; - - if (callback && vp_dev->msix_enabled) { + if (vector != VIRTIO_MSI_NO_VECTOR) { iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); vector = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); if (vector == VIRTIO_MSI_NO_VECTOR) { @@ -446,11 +424,6 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, return vq; out_assign: - if (info->vector != VIRTIO_MSI_NO_VECTOR) { - free_irq(vp_dev->msix_entries[info->vector].vector, vq); - --vp_dev->msix_used_vectors; - } -out_request_irq: vring_del_virtqueue(vq); out_activate_queue: iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); @@ -472,9 +445,6 @@ static void vp_del_vq(struct virtqueue *vq) iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); - if (info->vector != VIRTIO_MSI_NO_VECTOR) - free_irq(vp_dev->msix_entries[info->vector].vector, vq); - if (vp_dev->msix_enabled) { iowrite16(VIRTIO_MSI_NO_VECTOR, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); @@ -495,36 +465,62 @@ static void vp_del_vq(struct virtqueue *vq) /* the config->del_vqs() implementation */ static void vp_del_vqs(struct virtio_device *vdev) { + struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtqueue *vq, *n; + struct virtio_pci_vq_info *info; - list_for_each_entry_safe(vq, n, &vdev->vqs, list) + list_for_each_entry_safe(vq, n, &vdev->vqs, list) { + info = vq->priv; + if (vp_dev->per_vq_vectors) + free_irq(vp_dev->msix_entries[info->vector].vector, vq); vp_del_vq(vq); + } + vp_dev->per_vq_vectors = false; vp_free_vectors(vdev); } -/* the config->find_vqs() implementation */ -static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], - const char *names[]) +static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char *names[], + int nvectors, + bool per_vq_vectors) { - int vectors = 0; - int i, err; - - /* How many vectors would we like? */ - for (i = 0; i < nvqs; ++i) - if (callbacks[i]) - ++vectors; + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + u16 vector; + int i, err, allocated_vectors; - err = vp_request_vectors(vdev, vectors); + err = vp_request_vectors(vdev, nvectors, per_vq_vectors); if (err) goto error_request; + vp_dev->per_vq_vectors = per_vq_vectors; + allocated_vectors = vp_dev->msix_used_vectors; for (i = 0; i < nvqs; ++i) { - vqs[i] = vp_find_vq(vdev, i, callbacks[i], names[i]); - if (IS_ERR(vqs[i])) + if (!callbacks[i] || !vp_dev->msix_enabled) + vector = VIRTIO_MSI_NO_VECTOR; + else if (vp_dev->per_vq_vectors) + vector = allocated_vectors++; + else + vector = VP_MSIX_VQ_VECTOR; + vqs[i] = vp_find_vq(vdev, i, callbacks[i], names[i], vector); + if (IS_ERR(vqs[i])) { + err = PTR_ERR(vqs[i]); goto error_find; + } + /* allocate per-vq irq if available and necessary */ + if (vp_dev->per_vq_vectors && vector != VIRTIO_MSI_NO_VECTOR) { + snprintf(vp_dev->msix_names[vector], sizeof *vp_dev->msix_names, + "%s-%s", dev_name(&vp_dev->vdev.dev), names[i]); + err = request_irq(vp_dev->msix_entries[vector].vector, + vring_interrupt, 0, + vp_dev->msix_names[vector], vqs[i]); + if (err) { + vp_del_vq(vqs[i]); + goto error_find; + } + } } return 0; @@ -532,7 +528,37 @@ error_find: vp_del_vqs(vdev); error_request: - return PTR_ERR(vqs[i]); + return err; +} + +/* the config->find_vqs() implementation */ +static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char *names[]) +{ + int vectors = 0; + int i, uninitialized_var(err); + + /* How many vectors would we like? */ + for (i = 0; i < nvqs; ++i) + if (callbacks[i]) + ++vectors; + + /* We want at most one vector per queue and one for config changes. */ + err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, + vectors + 1, true); + if (!err) + return 0; + /* Fallback to separate vectors for config and a shared for queues. */ + err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, + 2, false); + if (!err) + return 0; + /* Finally fall back to regular interrupts. */ + err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, + 0, false); + return err; } static struct virtio_config_ops virtio_pci_config_ops = { -- cgit v1.2.3 From 2e04ef76916d1e29a077ea9d0f2003c8fd86724d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 30 Jul 2009 16:03:45 -0600 Subject: lguest: fix comment style I don't really notice it (except to begrudge the extra vertical space), but Ingo does. And he pointed out that one excuse of lguest is as a teaching tool, it should set a good example. Signed-off-by: Rusty Russell Cc: Ingo Molnar --- Documentation/lguest/lguest.c | 540 ++++++++++++++++++++++------------ arch/x86/include/asm/lguest.h | 3 +- arch/x86/include/asm/lguest_hcall.h | 10 +- arch/x86/lguest/boot.c | 428 +++++++++++++++++---------- arch/x86/lguest/i386_head.S | 110 ++++--- drivers/lguest/core.c | 114 ++++--- drivers/lguest/hypercalls.c | 141 ++++++--- drivers/lguest/interrupts_and_traps.c | 288 ++++++++++++------ drivers/lguest/lg.h | 23 +- drivers/lguest/lguest_device.c | 150 ++++++---- drivers/lguest/lguest_user.c | 137 ++++++--- drivers/lguest/page_tables.c | 427 ++++++++++++++++++--------- drivers/lguest/segments.c | 106 ++++--- drivers/lguest/x86/core.c | 372 +++++++++++++++-------- drivers/lguest/x86/switcher_32.S | 18 +- include/linux/lguest.h | 36 ++- include/linux/lguest_launcher.h | 18 +- 17 files changed, 1906 insertions(+), 1015 deletions(-) (limited to 'drivers') diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 45d7d6dcae7a..aa66a52b73e9 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -1,7 +1,9 @@ -/*P:100 This is the Launcher code, a simple program which lays out the - * "physical" memory for the new Guest by mapping the kernel image and - * the virtual devices, then opens /dev/lguest to tell the kernel - * about the Guest and control it. :*/ +/*P:100 + * This is the Launcher code, a simple program which lays out the "physical" + * memory for the new Guest by mapping the kernel image and the virtual + * devices, then opens /dev/lguest to tell the kernel about the Guest and + * control it. +:*/ #define _LARGEFILE64_SOURCE #define _GNU_SOURCE #include @@ -46,13 +48,15 @@ #include "linux/virtio_rng.h" #include "linux/virtio_ring.h" #include "asm/bootparam.h" -/*L:110 We can ignore the 39 include files we need for this program, but I do - * want to draw attention to the use of kernel-style types. +/*L:110 + * We can ignore the 39 include files we need for this program, but I do want + * to draw attention to the use of kernel-style types. * * As Linus said, "C is a Spartan language, and so should your naming be." I * like these abbreviations, so we define them here. Note that u64 is always * unsigned long long, which works on all Linux systems: this means that we can - * use %llu in printf for any u64. */ + * use %llu in printf for any u64. + */ typedef unsigned long long u64; typedef uint32_t u32; typedef uint16_t u16; @@ -69,8 +73,10 @@ typedef uint8_t u8; /* This will occupy 3 pages: it must be a power of 2. */ #define VIRTQUEUE_NUM 256 -/*L:120 verbose is both a global flag and a macro. The C preprocessor allows - * this, and although I wouldn't recommend it, it works quite nicely here. */ +/*L:120 + * verbose is both a global flag and a macro. The C preprocessor allows + * this, and although I wouldn't recommend it, it works quite nicely here. + */ static bool verbose; #define verbose(args...) \ do { if (verbose) printf(args); } while(0) @@ -100,8 +106,7 @@ struct device_list /* A single linked list of devices. */ struct device *dev; - /* And a pointer to the last device for easy append and also for - * configuration appending. */ + /* And a pointer to the last device for easy append. */ struct device *lastdev; }; @@ -168,20 +173,24 @@ static char **main_args; /* The original tty settings to restore on exit. */ static struct termios orig_term; -/* We have to be careful with barriers: our devices are all run in separate +/* + * We have to be careful with barriers: our devices are all run in separate * threads and so we need to make sure that changes visible to the Guest happen - * in precise order. */ + * in precise order. + */ #define wmb() __asm__ __volatile__("" : : : "memory") #define mb() __asm__ __volatile__("" : : : "memory") -/* Convert an iovec element to the given type. +/* + * Convert an iovec element to the given type. * * This is a fairly ugly trick: we need to know the size of the type and * alignment requirement to check the pointer is kosher. It's also nice to * have the name of the type in case we report failure. * * Typing those three things all the time is cumbersome and error prone, so we - * have a macro which sets them all up and passes to the real function. */ + * have a macro which sets them all up and passes to the real function. + */ #define convert(iov, type) \ ((type *)_convert((iov), sizeof(type), __alignof__(type), #type)) @@ -198,8 +207,10 @@ static void *_convert(struct iovec *iov, size_t size, size_t align, /* Wrapper for the last available index. Makes it easier to change. */ #define lg_last_avail(vq) ((vq)->last_avail_idx) -/* The virtio configuration space is defined to be little-endian. x86 is - * little-endian too, but it's nice to be explicit so we have these helpers. */ +/* + * The virtio configuration space is defined to be little-endian. x86 is + * little-endian too, but it's nice to be explicit so we have these helpers. + */ #define cpu_to_le16(v16) (v16) #define cpu_to_le32(v32) (v32) #define cpu_to_le64(v64) (v64) @@ -241,11 +252,12 @@ static u8 *get_feature_bits(struct device *dev) + dev->num_vq * sizeof(struct lguest_vqconfig); } -/*L:100 The Launcher code itself takes us out into userspace, that scary place - * where pointers run wild and free! Unfortunately, like most userspace - * programs, it's quite boring (which is why everyone likes to hack on the - * kernel!). Perhaps if you make up an Lguest Drinking Game at this point, it - * will get you through this section. Or, maybe not. +/*L:100 + * The Launcher code itself takes us out into userspace, that scary place where + * pointers run wild and free! Unfortunately, like most userspace programs, + * it's quite boring (which is why everyone likes to hack on the kernel!). + * Perhaps if you make up an Lguest Drinking Game at this point, it will get + * you through this section. Or, maybe not. * * The Launcher sets up a big chunk of memory to be the Guest's "physical" * memory and stores it in "guest_base". In other words, Guest physical == @@ -253,7 +265,8 @@ static u8 *get_feature_bits(struct device *dev) * * This can be tough to get your head around, but usually it just means that we * use these trivial conversion functions when the Guest gives us it's - * "physical" addresses: */ + * "physical" addresses: + */ static void *from_guest_phys(unsigned long addr) { return guest_base + addr; @@ -268,7 +281,8 @@ static unsigned long to_guest_phys(const void *addr) * Loading the Kernel. * * We start with couple of simple helper routines. open_or_die() avoids - * error-checking code cluttering the callers: */ + * error-checking code cluttering the callers: + */ static int open_or_die(const char *name, int flags) { int fd = open(name, flags); @@ -283,8 +297,10 @@ static void *map_zeroed_pages(unsigned int num) int fd = open_or_die("/dev/zero", O_RDONLY); void *addr; - /* We use a private mapping (ie. if we write to the page, it will be - * copied). */ + /* + * We use a private mapping (ie. if we write to the page, it will be + * copied). + */ addr = mmap(NULL, getpagesize() * num, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, fd, 0); if (addr == MAP_FAILED) @@ -305,20 +321,24 @@ static void *get_pages(unsigned int num) return addr; } -/* This routine is used to load the kernel or initrd. It tries mmap, but if +/* + * This routine is used to load the kernel or initrd. It tries mmap, but if * that fails (Plan 9's kernel file isn't nicely aligned on page boundaries), - * it falls back to reading the memory in. */ + * it falls back to reading the memory in. + */ static void map_at(int fd, void *addr, unsigned long offset, unsigned long len) { ssize_t r; - /* We map writable even though for some segments are marked read-only. + /* + * We map writable even though for some segments are marked read-only. * The kernel really wants to be writable: it patches its own * instructions. * * MAP_PRIVATE means that the page won't be copied until a write is * done to it. This allows us to share untouched memory between - * Guests. */ + * Guests. + */ if (mmap(addr, len, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_FIXED|MAP_PRIVATE, fd, offset) != MAP_FAILED) return; @@ -329,7 +349,8 @@ static void map_at(int fd, void *addr, unsigned long offset, unsigned long len) err(1, "Reading offset %lu len %lu gave %zi", offset, len, r); } -/* This routine takes an open vmlinux image, which is in ELF, and maps it into +/* + * This routine takes an open vmlinux image, which is in ELF, and maps it into * the Guest memory. ELF = Embedded Linking Format, which is the format used * by all modern binaries on Linux including the kernel. * @@ -337,23 +358,28 @@ static void map_at(int fd, void *addr, unsigned long offset, unsigned long len) * address. We use the physical address; the Guest will map itself to the * virtual address. * - * We return the starting address. */ + * We return the starting address. + */ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr) { Elf32_Phdr phdr[ehdr->e_phnum]; unsigned int i; - /* Sanity checks on the main ELF header: an x86 executable with a - * reasonable number of correctly-sized program headers. */ + /* + * Sanity checks on the main ELF header: an x86 executable with a + * reasonable number of correctly-sized program headers. + */ if (ehdr->e_type != ET_EXEC || ehdr->e_machine != EM_386 || ehdr->e_phentsize != sizeof(Elf32_Phdr) || ehdr->e_phnum < 1 || ehdr->e_phnum > 65536U/sizeof(Elf32_Phdr)) errx(1, "Malformed elf header"); - /* An ELF executable contains an ELF header and a number of "program" + /* + * An ELF executable contains an ELF header and a number of "program" * headers which indicate which parts ("segments") of the program to - * load where. */ + * load where. + */ /* We read in all the program headers at once: */ if (lseek(elf_fd, ehdr->e_phoff, SEEK_SET) < 0) @@ -361,8 +387,10 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr) if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr)) err(1, "Reading program headers"); - /* Try all the headers: there are usually only three. A read-only one, - * a read-write one, and a "note" section which we don't load. */ + /* + * Try all the headers: there are usually only three. A read-only one, + * a read-write one, and a "note" section which we don't load. + */ for (i = 0; i < ehdr->e_phnum; i++) { /* If this isn't a loadable segment, we ignore it */ if (phdr[i].p_type != PT_LOAD) @@ -380,13 +408,15 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr) return ehdr->e_entry; } -/*L:150 A bzImage, unlike an ELF file, is not meant to be loaded. You're - * supposed to jump into it and it will unpack itself. We used to have to - * perform some hairy magic because the unpacking code scared me. +/*L:150 + * A bzImage, unlike an ELF file, is not meant to be loaded. You're supposed + * to jump into it and it will unpack itself. We used to have to perform some + * hairy magic because the unpacking code scared me. * * Fortunately, Jeremy Fitzhardinge convinced me it wasn't that hard and wrote * a small patch to jump over the tricky bits in the Guest, so now we just read - * the funky header so we know where in the file to load, and away we go! */ + * the funky header so we know where in the file to load, and away we go! + */ static unsigned long load_bzimage(int fd) { struct boot_params boot; @@ -394,8 +424,10 @@ static unsigned long load_bzimage(int fd) /* Modern bzImages get loaded at 1M. */ void *p = from_guest_phys(0x100000); - /* Go back to the start of the file and read the header. It should be - * a Linux boot header (see Documentation/x86/i386/boot.txt) */ + /* + * Go back to the start of the file and read the header. It should be + * a Linux boot header (see Documentation/x86/i386/boot.txt) + */ lseek(fd, 0, SEEK_SET); read(fd, &boot, sizeof(boot)); @@ -414,9 +446,11 @@ static unsigned long load_bzimage(int fd) return boot.hdr.code32_start; } -/*L:140 Loading the kernel is easy when it's a "vmlinux", but most kernels +/*L:140 + * Loading the kernel is easy when it's a "vmlinux", but most kernels * come wrapped up in the self-decompressing "bzImage" format. With a little - * work, we can load those, too. */ + * work, we can load those, too. + */ static unsigned long load_kernel(int fd) { Elf32_Ehdr hdr; @@ -433,24 +467,28 @@ static unsigned long load_kernel(int fd) return load_bzimage(fd); } -/* This is a trivial little helper to align pages. Andi Kleen hated it because +/* + * This is a trivial little helper to align pages. Andi Kleen hated it because * it calls getpagesize() twice: "it's dumb code." * * Kernel guys get really het up about optimization, even when it's not - * necessary. I leave this code as a reaction against that. */ + * necessary. I leave this code as a reaction against that. + */ static inline unsigned long page_align(unsigned long addr) { /* Add upwards and truncate downwards. */ return ((addr + getpagesize()-1) & ~(getpagesize()-1)); } -/*L:180 An "initial ram disk" is a disk image loaded into memory along with - * the kernel which the kernel can use to boot from without needing any - * drivers. Most distributions now use this as standard: the initrd contains - * the code to load the appropriate driver modules for the current machine. +/*L:180 + * An "initial ram disk" is a disk image loaded into memory along with the + * kernel which the kernel can use to boot from without needing any drivers. + * Most distributions now use this as standard: the initrd contains the code to + * load the appropriate driver modules for the current machine. * * Importantly, James Morris works for RedHat, and Fedora uses initrds for its - * kernels. He sent me this (and tells me when I break it). */ + * kernels. He sent me this (and tells me when I break it). + */ static unsigned long load_initrd(const char *name, unsigned long mem) { int ifd; @@ -462,12 +500,16 @@ static unsigned long load_initrd(const char *name, unsigned long mem) if (fstat(ifd, &st) < 0) err(1, "fstat() on initrd '%s'", name); - /* We map the initrd at the top of memory, but mmap wants it to be - * page-aligned, so we round the size up for that. */ + /* + * We map the initrd at the top of memory, but mmap wants it to be + * page-aligned, so we round the size up for that. + */ len = page_align(st.st_size); map_at(ifd, from_guest_phys(mem - len), 0, st.st_size); - /* Once a file is mapped, you can close the file descriptor. It's a - * little odd, but quite useful. */ + /* + * Once a file is mapped, you can close the file descriptor. It's a + * little odd, but quite useful. + */ close(ifd); verbose("mapped initrd %s size=%lu @ %p\n", name, len, (void*)mem-len); @@ -476,8 +518,10 @@ static unsigned long load_initrd(const char *name, unsigned long mem) } /*:*/ -/* Simple routine to roll all the commandline arguments together with spaces - * between them. */ +/* + * Simple routine to roll all the commandline arguments together with spaces + * between them. + */ static void concat(char *dst, char *args[]) { unsigned int i, len = 0; @@ -494,10 +538,12 @@ static void concat(char *dst, char *args[]) dst[len] = '\0'; } -/*L:185 This is where we actually tell the kernel to initialize the Guest. We +/*L:185 + * This is where we actually tell the kernel to initialize the Guest. We * saw the arguments it expects when we looked at initialize() in lguest_user.c: * the base of Guest "physical" memory, the top physical page to allow and the - * entry point for the Guest. */ + * entry point for the Guest. + */ static void tell_kernel(unsigned long start) { unsigned long args[] = { LHREQ_INITIALIZE, @@ -522,20 +568,26 @@ static void tell_kernel(unsigned long start) static void *_check_pointer(unsigned long addr, unsigned int size, unsigned int line) { - /* We have to separately check addr and addr+size, because size could - * be huge and addr + size might wrap around. */ + /* + * We have to separately check addr and addr+size, because size could + * be huge and addr + size might wrap around. + */ if (addr >= guest_limit || addr + size >= guest_limit) errx(1, "%s:%i: Invalid address %#lx", __FILE__, line, addr); - /* We return a pointer for the caller's convenience, now we know it's - * safe to use. */ + /* + * We return a pointer for the caller's convenience, now we know it's + * safe to use. + */ return from_guest_phys(addr); } /* A macro which transparently hands the line number to the real function. */ #define check_pointer(addr,size) _check_pointer(addr, size, __LINE__) -/* Each buffer in the virtqueues is actually a chain of descriptors. This +/* + * Each buffer in the virtqueues is actually a chain of descriptors. This * function returns the next descriptor in the chain, or vq->vring.num if we're - * at the end. */ + * at the end. + */ static unsigned next_desc(struct vring_desc *desc, unsigned int i, unsigned int max) { @@ -576,12 +628,14 @@ static void trigger_irq(struct virtqueue *vq) err(1, "Triggering irq %i", vq->config.irq); } -/* This looks in the virtqueue and for the first available buffer, and converts +/* + * This looks in the virtqueue and for the first available buffer, and converts * it to an iovec for convenient access. Since descriptors consist of some * number of output then some number of input descriptors, it's actually two * iovecs, but we pack them into one and note how many of each there were. * - * This function returns the descriptor number found. */ + * This function returns the descriptor number found. + */ static unsigned wait_for_vq_desc(struct virtqueue *vq, struct iovec iov[], unsigned int *out_num, unsigned int *in_num) @@ -599,8 +653,10 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq, /* OK, now we need to know about added descriptors. */ vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; - /* They could have slipped one in as we were doing that: make - * sure it's written, then check again. */ + /* + * They could have slipped one in as we were doing that: make + * sure it's written, then check again. + */ mb(); if (last_avail != vq->vring.avail->idx) { vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; @@ -620,8 +676,10 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq, errx(1, "Guest moved used index from %u to %u", last_avail, vq->vring.avail->idx); - /* Grab the next descriptor number they're advertising, and increment - * the index we've seen. */ + /* + * Grab the next descriptor number they're advertising, and increment + * the index we've seen. + */ head = vq->vring.avail->ring[last_avail % vq->vring.num]; lg_last_avail(vq)++; @@ -636,8 +694,10 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq, desc = vq->vring.desc; i = head; - /* If this is an indirect entry, then this buffer contains a descriptor - * table which we handle as if it's any normal descriptor chain. */ + /* + * If this is an indirect entry, then this buffer contains a descriptor + * table which we handle as if it's any normal descriptor chain. + */ if (desc[i].flags & VRING_DESC_F_INDIRECT) { if (desc[i].len % sizeof(struct vring_desc)) errx(1, "Invalid size for indirect buffer table"); @@ -656,8 +716,10 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq, if (desc[i].flags & VRING_DESC_F_WRITE) (*in_num)++; else { - /* If it's an output descriptor, they're all supposed - * to come before any input descriptors. */ + /* + * If it's an output descriptor, they're all supposed + * to come before any input descriptors. + */ if (*in_num) errx(1, "Descriptor has out after in"); (*out_num)++; @@ -671,14 +733,18 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq, return head; } -/* After we've used one of their buffers, we tell them about it. We'll then - * want to send them an interrupt, using trigger_irq(). */ +/* + * After we've used one of their buffers, we tell them about it. We'll then + * want to send them an interrupt, using trigger_irq(). + */ static void add_used(struct virtqueue *vq, unsigned int head, int len) { struct vring_used_elem *used; - /* The virtqueue contains a ring of used buffers. Get a pointer to the - * next entry in that used ring. */ + /* + * The virtqueue contains a ring of used buffers. Get a pointer to the + * next entry in that used ring. + */ used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num]; used->id = head; used->len = len; @@ -698,7 +764,8 @@ static void add_used_and_trigger(struct virtqueue *vq, unsigned head, int len) /* * The Console * - * We associate some data with the console for our exit hack. */ + * We associate some data with the console for our exit hack. + */ struct console_abort { /* How many times have they hit ^C? */ @@ -725,20 +792,24 @@ static void console_input(struct virtqueue *vq) if (len <= 0) { /* Ran out of input? */ warnx("Failed to get console input, ignoring console."); - /* For simplicity, dying threads kill the whole Launcher. So - * just nap here. */ + /* + * For simplicity, dying threads kill the whole Launcher. So + * just nap here. + */ for (;;) pause(); } add_used_and_trigger(vq, head, len); - /* Three ^C within one second? Exit. + /* + * Three ^C within one second? Exit. * * This is such a hack, but works surprisingly well. Each ^C has to * be in a buffer by itself, so they can't be too fast. But we check * that we get three within about a second, so they can't be too - * slow. */ + * slow. + */ if (len != 1 || ((char *)iov[0].iov_base)[0] != 3) { abort->count = 0; return; @@ -809,8 +880,7 @@ static bool will_block(int fd) return select(fd+1, &fdset, NULL, NULL, &zero) != 1; } -/* This is where we handle packets coming in from the tun device to our - * Guest. */ +/* This handles packets coming in from the tun device to our Guest. */ static void net_input(struct virtqueue *vq) { int len; @@ -842,8 +912,10 @@ static int do_thread(void *_vq) return 0; } -/* When a child dies, we kill our entire process group with SIGTERM. This - * also has the side effect that the shell restores the console for us! */ +/* + * When a child dies, we kill our entire process group with SIGTERM. This + * also has the side effect that the shell restores the console for us! + */ static void kill_launcher(int signal) { kill(0, SIGTERM); @@ -880,9 +952,10 @@ static void reset_device(struct device *dev) static void create_thread(struct virtqueue *vq) { - /* Create stack for thread and run it. Since stack grows - * upwards, we point the stack pointer to the end of this - * region. */ + /* + * Create stack for thread and run it. Since the stack grows upwards, + * we point the stack pointer to the end of this region. + */ char *stack = malloc(32768); unsigned long args[] = { LHREQ_EVENTFD, vq->config.pfn*getpagesize(), 0 }; @@ -981,8 +1054,11 @@ static void handle_output(unsigned long addr) } } - /* Early console write is done using notify on a nul-terminated string - * in Guest memory. */ + /* + * Early console write is done using notify on a nul-terminated string + * in Guest memory. It's also great for hacking debugging messages + * into a Guest. + */ if (addr >= guest_limit) errx(1, "Bad NOTIFY %#lx", addr); @@ -998,10 +1074,12 @@ static void handle_output(unsigned long addr) * routines to allocate and manage them. */ -/* The layout of the device page is a "struct lguest_device_desc" followed by a +/* + * The layout of the device page is a "struct lguest_device_desc" followed by a * number of virtqueue descriptors, then two sets of feature bits, then an * array of configuration bytes. This routine returns the configuration - * pointer. */ + * pointer. + */ static u8 *device_config(const struct device *dev) { return (void *)(dev->desc + 1) @@ -1009,9 +1087,11 @@ static u8 *device_config(const struct device *dev) + dev->feature_len * 2; } -/* This routine allocates a new "struct lguest_device_desc" from descriptor +/* + * This routine allocates a new "struct lguest_device_desc" from descriptor * table page just above the Guest's normal memory. It returns a pointer to - * that descriptor. */ + * that descriptor. + */ static struct lguest_device_desc *new_dev_desc(u16 type) { struct lguest_device_desc d = { .type = type }; @@ -1032,8 +1112,10 @@ static struct lguest_device_desc *new_dev_desc(u16 type) return memcpy(p, &d, sizeof(d)); } -/* Each device descriptor is followed by the description of its virtqueues. We - * specify how many descriptors the virtqueue is to have. */ +/* + * Each device descriptor is followed by the description of its virtqueues. We + * specify how many descriptors the virtqueue is to have. + */ static void add_virtqueue(struct device *dev, unsigned int num_descs, void (*service)(struct virtqueue *)) { @@ -1061,10 +1143,12 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, /* Initialize the vring. */ vring_init(&vq->vring, num_descs, p, LGUEST_VRING_ALIGN); - /* Append virtqueue to this device's descriptor. We use + /* + * Append virtqueue to this device's descriptor. We use * device_config() to get the end of the device's current virtqueues; * we check that we haven't added any config or feature information - * yet, otherwise we'd be overwriting them. */ + * yet, otherwise we'd be overwriting them. + */ assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0); memcpy(device_config(dev), &vq->config, sizeof(vq->config)); dev->num_vq++; @@ -1072,14 +1156,18 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, verbose("Virtqueue page %#lx\n", to_guest_phys(p)); - /* Add to tail of list, so dev->vq is first vq, dev->vq->next is - * second. */ + /* + * Add to tail of list, so dev->vq is first vq, dev->vq->next is + * second. + */ for (i = &dev->vq; *i; i = &(*i)->next); *i = vq; } -/* The first half of the feature bitmask is for us to advertise features. The - * second half is for the Guest to accept features. */ +/* + * The first half of the feature bitmask is for us to advertise features. The + * second half is for the Guest to accept features. + */ static void add_feature(struct device *dev, unsigned bit) { u8 *features = get_feature_bits(dev); @@ -1093,9 +1181,11 @@ static void add_feature(struct device *dev, unsigned bit) features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT)); } -/* This routine sets the configuration fields for an existing device's +/* + * This routine sets the configuration fields for an existing device's * descriptor. It only works for the last device, but that's OK because that's - * how we use it. */ + * how we use it. + */ static void set_config(struct device *dev, unsigned len, const void *conf) { /* Check we haven't overflowed our single page. */ @@ -1110,10 +1200,12 @@ static void set_config(struct device *dev, unsigned len, const void *conf) assert(dev->desc->config_len == len); } -/* This routine does all the creation and setup of a new device, including +/* + * This routine does all the creation and setup of a new device, including * calling new_dev_desc() to allocate the descriptor and device memory. * - * See what I mean about userspace being boring? */ + * See what I mean about userspace being boring? + */ static struct device *new_device(const char *name, u16 type) { struct device *dev = malloc(sizeof(*dev)); @@ -1126,10 +1218,12 @@ static struct device *new_device(const char *name, u16 type) dev->num_vq = 0; dev->running = false; - /* Append to device list. Prepending to a single-linked list is + /* + * Append to device list. Prepending to a single-linked list is * easier, but the user expects the devices to be arranged on the bus * in command-line order. The first network device on the command line - * is eth0, the first block device /dev/vda, etc. */ + * is eth0, the first block device /dev/vda, etc. + */ if (devices.lastdev) devices.lastdev->next = dev; else @@ -1139,8 +1233,10 @@ static struct device *new_device(const char *name, u16 type) return dev; } -/* Our first setup routine is the console. It's a fairly simple device, but - * UNIX tty handling makes it uglier than it could be. */ +/* + * Our first setup routine is the console. It's a fairly simple device, but + * UNIX tty handling makes it uglier than it could be. + */ static void setup_console(void) { struct device *dev; @@ -1148,8 +1244,10 @@ static void setup_console(void) /* If we can save the initial standard input settings... */ if (tcgetattr(STDIN_FILENO, &orig_term) == 0) { struct termios term = orig_term; - /* Then we turn off echo, line buffering and ^C etc. We want a - * raw input stream to the Guest. */ + /* + * Then we turn off echo, line buffering and ^C etc: We want a + * raw input stream to the Guest. + */ term.c_lflag &= ~(ISIG|ICANON|ECHO); tcsetattr(STDIN_FILENO, TCSANOW, &term); } @@ -1160,10 +1258,12 @@ static void setup_console(void) dev->priv = malloc(sizeof(struct console_abort)); ((struct console_abort *)dev->priv)->count = 0; - /* The console needs two virtqueues: the input then the output. When + /* + * The console needs two virtqueues: the input then the output. When * they put something the input queue, we make sure we're listening to * stdin. When they put something in the output queue, we write it to - * stdout. */ + * stdout. + */ add_virtqueue(dev, VIRTQUEUE_NUM, console_input); add_virtqueue(dev, VIRTQUEUE_NUM, console_output); @@ -1171,7 +1271,8 @@ static void setup_console(void) } /*:*/ -/*M:010 Inter-guest networking is an interesting area. Simplest is to have a +/*M:010 + * Inter-guest networking is an interesting area. Simplest is to have a * --sharenet= option which opens or creates a named pipe. This can be * used to send packets to another guest in a 1:1 manner. * @@ -1185,7 +1286,8 @@ static void setup_console(void) * multiple inter-guest channels behind one interface, although it would * require some manner of hotplugging new virtio channels. * - * Finally, we could implement a virtio network switch in the kernel. :*/ + * Finally, we could implement a virtio network switch in the kernel. +:*/ static u32 str2ip(const char *ipaddr) { @@ -1210,11 +1312,13 @@ static void str2mac(const char *macaddr, unsigned char mac[6]) mac[5] = m[5]; } -/* This code is "adapted" from libbridge: it attaches the Host end of the +/* + * This code is "adapted" from libbridge: it attaches the Host end of the * network device to the bridge device specified by the command line. * * This is yet another James Morris contribution (I'm an IP-level guy, so I - * dislike bridging), and I just try not to break it. */ + * dislike bridging), and I just try not to break it. + */ static void add_to_bridge(int fd, const char *if_name, const char *br_name) { int ifidx; @@ -1234,9 +1338,11 @@ static void add_to_bridge(int fd, const char *if_name, const char *br_name) err(1, "can't add %s to bridge %s", if_name, br_name); } -/* This sets up the Host end of the network device with an IP address, brings +/* + * This sets up the Host end of the network device with an IP address, brings * it up so packets will flow, the copies the MAC address into the hwaddr - * pointer. */ + * pointer. + */ static void configure_device(int fd, const char *tapif, u32 ipaddr) { struct ifreq ifr; @@ -1263,10 +1369,12 @@ static int get_tun_device(char tapif[IFNAMSIZ]) /* Start with this zeroed. Messy but sure. */ memset(&ifr, 0, sizeof(ifr)); - /* We open the /dev/net/tun device and tell it we want a tap device. A + /* + * We open the /dev/net/tun device and tell it we want a tap device. A * tap device is like a tun device, only somehow different. To tell * the truth, I completely blundered my way through this code, but it - * works now! */ + * works now! + */ netfd = open_or_die("/dev/net/tun", O_RDWR); ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; strcpy(ifr.ifr_name, "tap%d"); @@ -1277,18 +1385,22 @@ static int get_tun_device(char tapif[IFNAMSIZ]) TUN_F_CSUM|TUN_F_TSO4|TUN_F_TSO6|TUN_F_TSO_ECN) != 0) err(1, "Could not set features for tun device"); - /* We don't need checksums calculated for packets coming in this - * device: trust us! */ + /* + * We don't need checksums calculated for packets coming in this + * device: trust us! + */ ioctl(netfd, TUNSETNOCSUM, 1); memcpy(tapif, ifr.ifr_name, IFNAMSIZ); return netfd; } -/*L:195 Our network is a Host<->Guest network. This can either use bridging or +/*L:195 + * Our network is a Host<->Guest network. This can either use bridging or * routing, but the principle is the same: it uses the "tun" device to inject * packets into the Host as if they came in from a normal network card. We - * just shunt packets between the Guest and the tun device. */ + * just shunt packets between the Guest and the tun device. + */ static void setup_tun_net(char *arg) { struct device *dev; @@ -1305,13 +1417,14 @@ static void setup_tun_net(char *arg) dev = new_device("net", VIRTIO_ID_NET); dev->priv = net_info; - /* Network devices need a receive and a send queue, just like - * console. */ + /* Network devices need a recv and a send queue, just like console. */ add_virtqueue(dev, VIRTQUEUE_NUM, net_input); add_virtqueue(dev, VIRTQUEUE_NUM, net_output); - /* We need a socket to perform the magic network ioctls to bring up the - * tap interface, connect to the bridge etc. Any socket will do! */ + /* + * We need a socket to perform the magic network ioctls to bring up the + * tap interface, connect to the bridge etc. Any socket will do! + */ ipfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); if (ipfd < 0) err(1, "opening IP socket"); @@ -1366,7 +1479,8 @@ static void setup_tun_net(char *arg) devices.device_num, tapif, arg); } -/* Our block (disk) device should be really simple: the Guest asks for a block +/* + * Our block (disk) device should be really simple: the Guest asks for a block * number and we read or write that position in the file. Unfortunately, that * was amazingly slow: the Guest waits until the read is finished before * running anything else, even if it could have been doing useful work. @@ -1374,7 +1488,9 @@ static void setup_tun_net(char *arg) * We could use async I/O, except it's reputed to suck so hard that characters * actually go missing from your code when you try to use it. * - * So we farm the I/O out to thread, and communicate with it via a pipe. */ + * So this was one reason why lguest now does all virtqueue servicing in + * separate threads: it's more efficient and more like a real device. + */ /* This hangs off device->priv. */ struct vblk_info @@ -1412,9 +1528,11 @@ static void blk_request(struct virtqueue *vq) /* Get the next request. */ head = wait_for_vq_desc(vq, iov, &out_num, &in_num); - /* Every block request should contain at least one output buffer + /* + * Every block request should contain at least one output buffer * (detailing the location on disk and the type of request) and one - * input buffer (to hold the result). */ + * input buffer (to hold the result). + */ if (out_num == 0 || in_num == 0) errx(1, "Bad virtblk cmd %u out=%u in=%u", head, out_num, in_num); @@ -1423,33 +1541,41 @@ static void blk_request(struct virtqueue *vq) in = convert(&iov[out_num+in_num-1], u8); off = out->sector * 512; - /* The block device implements "barriers", where the Guest indicates + /* + * The block device implements "barriers", where the Guest indicates * that it wants all previous writes to occur before this write. We * don't have a way of asking our kernel to do a barrier, so we just - * synchronize all the data in the file. Pretty poor, no? */ + * synchronize all the data in the file. Pretty poor, no? + */ if (out->type & VIRTIO_BLK_T_BARRIER) fdatasync(vblk->fd); - /* In general the virtio block driver is allowed to try SCSI commands. - * It'd be nice if we supported eject, for example, but we don't. */ + /* + * In general the virtio block driver is allowed to try SCSI commands. + * It'd be nice if we supported eject, for example, but we don't. + */ if (out->type & VIRTIO_BLK_T_SCSI_CMD) { fprintf(stderr, "Scsi commands unsupported\n"); *in = VIRTIO_BLK_S_UNSUPP; wlen = sizeof(*in); } else if (out->type & VIRTIO_BLK_T_OUT) { - /* Write */ - - /* Move to the right location in the block file. This can fail - * if they try to write past end. */ + /* + * Write + * + * Move to the right location in the block file. This can fail + * if they try to write past end. + */ if (lseek64(vblk->fd, off, SEEK_SET) != off) err(1, "Bad seek to sector %llu", out->sector); ret = writev(vblk->fd, iov+1, out_num-1); verbose("WRITE to sector %llu: %i\n", out->sector, ret); - /* Grr... Now we know how long the descriptor they sent was, we + /* + * Grr... Now we know how long the descriptor they sent was, we * make sure they didn't try to write over the end of the block - * file (possibly extending it). */ + * file (possibly extending it). + */ if (ret > 0 && off + ret > vblk->len) { /* Trim it back to the correct length */ ftruncate64(vblk->fd, vblk->len); @@ -1459,10 +1585,12 @@ static void blk_request(struct virtqueue *vq) wlen = sizeof(*in); *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); } else { - /* Read */ - - /* Move to the right location in the block file. This can fail - * if they try to read past end. */ + /* + * Read + * + * Move to the right location in the block file. This can fail + * if they try to read past end. + */ if (lseek64(vblk->fd, off, SEEK_SET) != off) err(1, "Bad seek to sector %llu", out->sector); @@ -1477,10 +1605,12 @@ static void blk_request(struct virtqueue *vq) } } - /* OK, so we noted that it was pretty poor to use an fdatasync as a + /* + * OK, so we noted that it was pretty poor to use an fdatasync as a * barrier. But Christoph Hellwig points out that we need a sync * *afterwards* as well: "Barriers specify no reordering to the front - * or the back." And Jens Axboe confirmed it, so here we are: */ + * or the back." And Jens Axboe confirmed it, so here we are: + */ if (out->type & VIRTIO_BLK_T_BARRIER) fdatasync(vblk->fd); @@ -1494,7 +1624,7 @@ static void setup_block_file(const char *filename) struct vblk_info *vblk; struct virtio_blk_config conf; - /* The device responds to return from I/O thread. */ + /* Creat the device. */ dev = new_device("block", VIRTIO_ID_BLOCK); /* The device has one virtqueue, where the Guest places requests. */ @@ -1513,8 +1643,10 @@ static void setup_block_file(const char *filename) /* Tell Guest how many sectors this device has. */ conf.capacity = cpu_to_le64(vblk->len / 512); - /* Tell Guest not to put in too many descriptors at once: two are used - * for the in and out elements. */ + /* + * Tell Guest not to put in too many descriptors at once: two are used + * for the in and out elements. + */ add_feature(dev, VIRTIO_BLK_F_SEG_MAX); conf.seg_max = cpu_to_le32(VIRTQUEUE_NUM - 2); @@ -1525,16 +1657,18 @@ static void setup_block_file(const char *filename) ++devices.device_num, le64_to_cpu(conf.capacity)); } -struct rng_info { - int rfd; -}; - -/* Our random number generator device reads from /dev/random into the Guest's +/*L:211 + * Our random number generator device reads from /dev/random into the Guest's * input buffers. The usual case is that the Guest doesn't want random numbers * and so has no buffers although /dev/random is still readable, whereas * console is the reverse. * - * The same logic applies, however. */ + * The same logic applies, however. + */ +struct rng_info { + int rfd; +}; + static void rng_input(struct virtqueue *vq) { int len; @@ -1547,9 +1681,11 @@ static void rng_input(struct virtqueue *vq) if (out_num) errx(1, "Output buffers in rng?"); - /* This is why we convert to iovecs: the readv() call uses them, and so + /* + * This is why we convert to iovecs: the readv() call uses them, and so * it reads straight into the Guest's buffer. We loop to make sure we - * fill it. */ + * fill it. + */ while (!iov_empty(iov, in_num)) { len = readv(rng_info->rfd, iov, in_num); if (len <= 0) @@ -1562,15 +1698,18 @@ static void rng_input(struct virtqueue *vq) add_used(vq, head, totlen); } -/* And this creates a "hardware" random number device for the Guest. */ +/*L:199 + * This creates a "hardware" random number device for the Guest. + */ static void setup_rng(void) { struct device *dev; struct rng_info *rng_info = malloc(sizeof(*rng_info)); + /* Our device's privat info simply contains the /dev/random fd. */ rng_info->rfd = open_or_die("/dev/random", O_RDONLY); - /* The device responds to return from I/O thread. */ + /* Create the new device. */ dev = new_device("rng", VIRTIO_ID_RNG); dev->priv = rng_info; @@ -1586,8 +1725,10 @@ static void __attribute__((noreturn)) restart_guest(void) { unsigned int i; - /* Since we don't track all open fds, we simply close everything beyond - * stderr. */ + /* + * Since we don't track all open fds, we simply close everything beyond + * stderr. + */ for (i = 3; i < FD_SETSIZE; i++) close(i); @@ -1598,8 +1739,10 @@ static void __attribute__((noreturn)) restart_guest(void) err(1, "Could not exec %s", main_args[0]); } -/*L:220 Finally we reach the core of the Launcher which runs the Guest, serves - * its input and output, and finally, lays it to rest. */ +/*L:220 + * Finally we reach the core of the Launcher which runs the Guest, serves + * its input and output, and finally, lays it to rest. + */ static void __attribute__((noreturn)) run_guest(void) { for (;;) { @@ -1634,7 +1777,7 @@ static void __attribute__((noreturn)) run_guest(void) * * Are you ready? Take a deep breath and join me in the core of the Host, in * "make Host". - :*/ +:*/ static struct option opts[] = { { "verbose", 0, NULL, 'v' }, @@ -1655,8 +1798,7 @@ static void usage(void) /*L:105 The main routine is where the real work begins: */ int main(int argc, char *argv[]) { - /* Memory, top-level pagetable, code startpoint and size of the - * (optional) initrd. */ + /* Memory, code startpoint and size of the (optional) initrd. */ unsigned long mem = 0, start, initrd_size = 0; /* Two temporaries. */ int i, c; @@ -1668,24 +1810,30 @@ int main(int argc, char *argv[]) /* Save the args: we "reboot" by execing ourselves again. */ main_args = argv; - /* First we initialize the device list. We keep a pointer to the last + /* + * First we initialize the device list. We keep a pointer to the last * device, and the next interrupt number to use for devices (1: - * remember that 0 is used by the timer). */ + * remember that 0 is used by the timer). + */ devices.lastdev = NULL; devices.next_irq = 1; cpu_id = 0; - /* We need to know how much memory so we can set up the device + /* + * We need to know how much memory so we can set up the device * descriptor and memory pages for the devices as we parse the command * line. So we quickly look through the arguments to find the amount - * of memory now. */ + * of memory now. + */ for (i = 1; i < argc; i++) { if (argv[i][0] != '-') { mem = atoi(argv[i]) * 1024 * 1024; - /* We start by mapping anonymous pages over all of + /* + * We start by mapping anonymous pages over all of * guest-physical memory range. This fills it with 0, * and ensures that the Guest won't be killed when it - * tries to access it. */ + * tries to access it. + */ guest_base = map_zeroed_pages(mem / getpagesize() + DEVICE_PAGES); guest_limit = mem; @@ -1718,8 +1866,10 @@ int main(int argc, char *argv[]) usage(); } } - /* After the other arguments we expect memory and kernel image name, - * followed by command line arguments for the kernel. */ + /* + * After the other arguments we expect memory and kernel image name, + * followed by command line arguments for the kernel. + */ if (optind + 2 > argc) usage(); @@ -1737,20 +1887,26 @@ int main(int argc, char *argv[]) /* Map the initrd image if requested (at top of physical memory) */ if (initrd_name) { initrd_size = load_initrd(initrd_name, mem); - /* These are the location in the Linux boot header where the - * start and size of the initrd are expected to be found. */ + /* + * These are the location in the Linux boot header where the + * start and size of the initrd are expected to be found. + */ boot->hdr.ramdisk_image = mem - initrd_size; boot->hdr.ramdisk_size = initrd_size; /* The bootloader type 0xFF means "unknown"; that's OK. */ boot->hdr.type_of_loader = 0xFF; } - /* The Linux boot header contains an "E820" memory map: ours is a - * simple, single region. */ + /* + * The Linux boot header contains an "E820" memory map: ours is a + * simple, single region. + */ boot->e820_entries = 1; boot->e820_map[0] = ((struct e820entry) { 0, mem, E820_RAM }); - /* The boot header contains a command line pointer: we put the command - * line after the boot header. */ + /* + * The boot header contains a command line pointer: we put the command + * line after the boot header. + */ boot->hdr.cmd_line_ptr = to_guest_phys(boot + 1); /* We use a simple helper to copy the arguments separated by spaces. */ concat((char *)(boot + 1), argv+optind+2); @@ -1764,8 +1920,10 @@ int main(int argc, char *argv[]) /* Tell the entry path not to try to reload segment registers. */ boot->hdr.loadflags |= KEEP_SEGMENTS; - /* We tell the kernel to initialize the Guest: this returns the open - * /dev/lguest file descriptor. */ + /* + * We tell the kernel to initialize the Guest: this returns the open + * /dev/lguest file descriptor. + */ tell_kernel(start); /* Ensure that we terminate if a child dies. */ diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h index 313389cd50d2..5136dad57cbb 100644 --- a/arch/x86/include/asm/lguest.h +++ b/arch/x86/include/asm/lguest.h @@ -17,8 +17,7 @@ /* Pages for switcher itself, then two pages per cpu */ #define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids) -/* We map at -4M (-2M when PAE is activated) for ease of mapping - * into the guest (one PTE page). */ +/* We map at -4M (-2M for PAE) for ease of mapping (one PTE page). */ #ifdef CONFIG_X86_PAE #define SWITCHER_ADDR 0xFFE00000 #else diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h index 33600a66755f..cceb73e12e50 100644 --- a/arch/x86/include/asm/lguest_hcall.h +++ b/arch/x86/include/asm/lguest_hcall.h @@ -30,7 +30,8 @@ #include #include -/*G:030 But first, how does our Guest contact the Host to ask for privileged +/*G:030 + * But first, how does our Guest contact the Host to ask for privileged * operations? There are two ways: the direct way is to make a "hypercall", * to make requests of the Host Itself. * @@ -41,16 +42,15 @@ * * Grossly invalid calls result in Sudden Death at the hands of the vengeful * Host, rather than returning failure. This reflects Winston Churchill's - * definition of a gentleman: "someone who is only rude intentionally". */ -/*:*/ + * definition of a gentleman: "someone who is only rude intentionally". +:*/ /* Can't use our min() macro here: needs to be a constant */ #define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32) #define LHCALL_RING_SIZE 64 struct hcall_args { - /* These map directly onto eax, ebx, ecx, edx and esi - * in struct lguest_regs */ + /* These map directly onto eax/ebx/ecx/edx/esi in struct lguest_regs */ unsigned long arg0, arg1, arg2, arg3, arg4; }; diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index f2bf1f73d468..025c04d18f2b 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -22,7 +22,8 @@ * * So how does the kernel know it's a Guest? We'll see that later, but let's * just say that we end up here where we replace the native functions various - * "paravirt" structures with our Guest versions, then boot like normal. :*/ + * "paravirt" structures with our Guest versions, then boot like normal. +:*/ /* * Copyright (C) 2006, Rusty Russell IBM Corporation. @@ -74,7 +75,8 @@ * * The Guest in our tale is a simple creature: identical to the Host but * behaving in simplified but equivalent ways. In particular, the Guest is the - * same kernel as the Host (or at least, built from the same source code). :*/ + * same kernel as the Host (or at least, built from the same source code). +:*/ struct lguest_data lguest_data = { .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF }, @@ -85,7 +87,8 @@ struct lguest_data lguest_data = { .syscall_vec = SYSCALL_VECTOR, }; -/*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a +/*G:037 + * async_hcall() is pretty simple: I'm quite proud of it really. We have a * ring buffer of stored hypercalls which the Host will run though next time we * do a normal hypercall. Each entry in the ring has 5 slots for the hypercall * arguments, and a "hcall_status" word which is 0 if the call is ready to go, @@ -94,7 +97,8 @@ struct lguest_data lguest_data = { * If we come around to a slot which hasn't been finished, then the table is * full and we just make the hypercall directly. This has the nice side * effect of causing the Host to run all the stored calls in the ring buffer - * which empties it for next time! */ + * which empties it for next time! + */ static void async_hcall(unsigned long call, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4) @@ -103,9 +107,11 @@ static void async_hcall(unsigned long call, unsigned long arg1, static unsigned int next_call; unsigned long flags; - /* Disable interrupts if not already disabled: we don't want an + /* + * Disable interrupts if not already disabled: we don't want an * interrupt handler making a hypercall while we're already doing - * one! */ + * one! + */ local_irq_save(flags); if (lguest_data.hcall_status[next_call] != 0xFF) { /* Table full, so do normal hcall which will flush table. */ @@ -125,8 +131,9 @@ static void async_hcall(unsigned long call, unsigned long arg1, local_irq_restore(flags); } -/*G:035 Notice the lazy_hcall() above, rather than hcall(). This is our first - * real optimization trick! +/*G:035 + * Notice the lazy_hcall() above, rather than hcall(). This is our first real + * optimization trick! * * When lazy_mode is set, it means we're allowed to defer all hypercalls and do * them as a batch when lazy_mode is eventually turned off. Because hypercalls @@ -136,7 +143,8 @@ static void async_hcall(unsigned long call, unsigned long arg1, * lguest_leave_lazy_mode(). * * So, when we're in lazy mode, we call async_hcall() to store the call for - * future processing: */ + * future processing: + */ static void lazy_hcall1(unsigned long call, unsigned long arg1) { @@ -208,9 +216,11 @@ static void lguest_end_context_switch(struct task_struct *next) * check there before it tries to deliver an interrupt. */ -/* save_flags() is expected to return the processor state (ie. "flags"). The +/* + * save_flags() is expected to return the processor state (ie. "flags"). The * flags word contains all kind of stuff, but in practice Linux only cares - * about the interrupt flag. Our "save_flags()" just returns that. */ + * about the interrupt flag. Our "save_flags()" just returns that. + */ static unsigned long save_fl(void) { return lguest_data.irq_enabled; @@ -222,13 +232,15 @@ static void irq_disable(void) lguest_data.irq_enabled = 0; } -/* Let's pause a moment. Remember how I said these are called so often? +/* + * Let's pause a moment. Remember how I said these are called so often? * Jeremy Fitzhardinge optimized them so hard early in 2009 that he had to * break some rules. In particular, these functions are assumed to save their * own registers if they need to: normal C functions assume they can trash the * eax register. To use normal C functions, we use * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the - * C function, then restores it. */ + * C function, then restores it. + */ PV_CALLEE_SAVE_REGS_THUNK(save_fl); PV_CALLEE_SAVE_REGS_THUNK(irq_disable); /*:*/ @@ -237,18 +249,20 @@ PV_CALLEE_SAVE_REGS_THUNK(irq_disable); extern void lg_irq_enable(void); extern void lg_restore_fl(unsigned long flags); -/*M:003 Note that we don't check for outstanding interrupts when we re-enable - * them (or when we unmask an interrupt). This seems to work for the moment, - * since interrupts are rare and we'll just get the interrupt on the next timer - * tick, but now we can run with CONFIG_NO_HZ, we should revisit this. One way - * would be to put the "irq_enabled" field in a page by itself, and have the - * Host write-protect it when an interrupt comes in when irqs are disabled. - * There will then be a page fault as soon as interrupts are re-enabled. +/*M:003 + * Note that we don't check for outstanding interrupts when we re-enable them + * (or when we unmask an interrupt). This seems to work for the moment, since + * interrupts are rare and we'll just get the interrupt on the next timer tick, + * but now we can run with CONFIG_NO_HZ, we should revisit this. One way would + * be to put the "irq_enabled" field in a page by itself, and have the Host + * write-protect it when an interrupt comes in when irqs are disabled. There + * will then be a page fault as soon as interrupts are re-enabled. * * A better method is to implement soft interrupt disable generally for x86: * instead of disabling interrupts, we set a flag. If an interrupt does come * in, we then disable them for real. This is uncommon, so we could simply use - * a hypercall for interrupt control and not worry about efficiency. :*/ + * a hypercall for interrupt control and not worry about efficiency. +:*/ /*G:034 * The Interrupt Descriptor Table (IDT). @@ -261,10 +275,12 @@ extern void lg_restore_fl(unsigned long flags); static void lguest_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g) { - /* The gate_desc structure is 8 bytes long: we hand it to the Host in + /* + * The gate_desc structure is 8 bytes long: we hand it to the Host in * two 32-bit chunks. The whole 32-bit kernel used to hand descriptors * around like this; typesafety wasn't a big concern in Linux's early - * years. */ + * years. + */ u32 *desc = (u32 *)g; /* Keep the local copy up to date. */ native_write_idt_entry(dt, entrynum, g); @@ -272,9 +288,11 @@ static void lguest_write_idt_entry(gate_desc *dt, kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]); } -/* Changing to a different IDT is very rare: we keep the IDT up-to-date every +/* + * Changing to a different IDT is very rare: we keep the IDT up-to-date every * time it is written, so we can simply loop through all entries and tell the - * Host about them. */ + * Host about them. + */ static void lguest_load_idt(const struct desc_ptr *desc) { unsigned int i; @@ -305,9 +323,11 @@ static void lguest_load_gdt(const struct desc_ptr *desc) kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b); } -/* For a single GDT entry which changes, we do the lazy thing: alter our GDT, +/* + * For a single GDT entry which changes, we do the lazy thing: alter our GDT, * then tell the Host to reload the entire thing. This operation is so rare - * that this naive implementation is reasonable. */ + * that this naive implementation is reasonable. + */ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum, const void *desc, int type) { @@ -317,29 +337,36 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum, dt[entrynum].a, dt[entrynum].b); } -/* OK, I lied. There are three "thread local storage" GDT entries which change +/* + * OK, I lied. There are three "thread local storage" GDT entries which change * on every context switch (these three entries are how glibc implements - * __thread variables). So we have a hypercall specifically for this case. */ + * __thread variables). So we have a hypercall specifically for this case. + */ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) { - /* There's one problem which normal hardware doesn't have: the Host + /* + * There's one problem which normal hardware doesn't have: the Host * can't handle us removing entries we're currently using. So we clear - * the GS register here: if it's needed it'll be reloaded anyway. */ + * the GS register here: if it's needed it'll be reloaded anyway. + */ lazy_load_gs(0); lazy_hcall2(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu); } -/*G:038 That's enough excitement for now, back to ploughing through each of - * the different pv_ops structures (we're about 1/3 of the way through). +/*G:038 + * That's enough excitement for now, back to ploughing through each of the + * different pv_ops structures (we're about 1/3 of the way through). * * This is the Local Descriptor Table, another weird Intel thingy. Linux only * uses this for some strange applications like Wine. We don't do anything - * here, so they'll get an informative and friendly Segmentation Fault. */ + * here, so they'll get an informative and friendly Segmentation Fault. + */ static void lguest_set_ldt(const void *addr, unsigned entries) { } -/* This loads a GDT entry into the "Task Register": that entry points to a +/* + * This loads a GDT entry into the "Task Register": that entry points to a * structure called the Task State Segment. Some comments scattered though the * kernel code indicate that this used for task switching in ages past, along * with blood sacrifice and astrology. @@ -347,19 +374,21 @@ static void lguest_set_ldt(const void *addr, unsigned entries) * Now there's nothing interesting in here that we don't get told elsewhere. * But the native version uses the "ltr" instruction, which makes the Host * complain to the Guest about a Segmentation Fault and it'll oops. So we - * override the native version with a do-nothing version. */ + * override the native version with a do-nothing version. + */ static void lguest_load_tr_desc(void) { } -/* The "cpuid" instruction is a way of querying both the CPU identity +/* + * The "cpuid" instruction is a way of querying both the CPU identity * (manufacturer, model, etc) and its features. It was introduced before the * Pentium in 1993 and keeps getting extended by both Intel, AMD and others. * As you might imagine, after a decade and a half this treatment, it is now a * giant ball of hair. Its entry in the current Intel manual runs to 28 pages. * * This instruction even it has its own Wikipedia entry. The Wikipedia entry - * has been translated into 4 languages. I am not making this up! + * has been translated into 5 languages. I am not making this up! * * We could get funky here and identify ourselves as "GenuineLguest", but * instead we just use the real "cpuid" instruction. Then I pretty much turned @@ -371,7 +400,8 @@ static void lguest_load_tr_desc(void) * Replacing the cpuid so we can turn features off is great for the kernel, but * anyone (including userspace) can just use the raw "cpuid" instruction and * the Host won't even notice since it isn't privileged. So we try not to get - * too worked up about it. */ + * too worked up about it. + */ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, unsigned int *cx, unsigned int *dx) { @@ -379,43 +409,63 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, native_cpuid(ax, bx, cx, dx); switch (function) { - case 0: /* ID and highest CPUID. Futureproof a little by sticking to - * older ones. */ + /* + * CPUID 0 gives the highest legal CPUID number (and the ID string). + * We futureproof our code a little by sticking to known CPUID values. + */ + case 0: if (*ax > 5) *ax = 5; break; - case 1: /* Basic feature request. */ - /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ + + /* + * CPUID 1 is a basic feature request. + * + * CX: we only allow kernel to see SSE3, CMPXCHG16B and SSSE3 + * DX: SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU and PAE. + */ + case 1: *cx &= 0x00002201; - /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU, PAE. */ *dx &= 0x07808151; - /* The Host can do a nice optimization if it knows that the + /* + * The Host can do a nice optimization if it knows that the * kernel mappings (addresses above 0xC0000000 or whatever * PAGE_OFFSET is set to) haven't changed. But Linux calls * flush_tlb_user() for both user and kernel mappings unless - * the Page Global Enable (PGE) feature bit is set. */ + * the Page Global Enable (PGE) feature bit is set. + */ *dx |= 0x00002000; - /* We also lie, and say we're family id 5. 6 or greater + /* + * We also lie, and say we're family id 5. 6 or greater * leads to a rdmsr in early_init_intel which we can't handle. - * Family ID is returned as bits 8-12 in ax. */ + * Family ID is returned as bits 8-12 in ax. + */ *ax &= 0xFFFFF0FF; *ax |= 0x00000500; break; + /* + * 0x80000000 returns the highest Extended Function, so we futureproof + * like we do above by limiting it to known fields. + */ case 0x80000000: - /* Futureproof this a little: if they ask how much extended - * processor information there is, limit it to known fields. */ if (*ax > 0x80000008) *ax = 0x80000008; break; + + /* + * PAE systems can mark pages as non-executable. Linux calls this the + * NX bit. Intel calls it XD (eXecute Disable), AMD EVP (Enhanced + * Virus Protection). We just switch turn if off here, since we don't + * support it. + */ case 0x80000001: - /* Here we should fix nx cap depending on host. */ - /* For this version of PAE, we just clear NX bit. */ *dx &= ~(1 << 20); break; } } -/* Intel has four control registers, imaginatively named cr0, cr2, cr3 and cr4. +/* + * Intel has four control registers, imaginatively named cr0, cr2, cr3 and cr4. * I assume there's a cr1, but it hasn't bothered us yet, so we'll not bother * it. The Host needs to know when the Guest wants to change them, so we have * a whole series of functions like read_cr0() and write_cr0(). @@ -430,7 +480,8 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, * name like "FPUTRAP bit" be a little less cryptic? * * We store cr0 locally because the Host never changes it. The Guest sometimes - * wants to read it and we'd prefer not to bother the Host unnecessarily. */ + * wants to read it and we'd prefer not to bother the Host unnecessarily. + */ static unsigned long current_cr0; static void lguest_write_cr0(unsigned long val) { @@ -443,18 +494,22 @@ static unsigned long lguest_read_cr0(void) return current_cr0; } -/* Intel provided a special instruction to clear the TS bit for people too cool +/* + * Intel provided a special instruction to clear the TS bit for people too cool * to use write_cr0() to do it. This "clts" instruction is faster, because all - * the vowels have been optimized out. */ + * the vowels have been optimized out. + */ static void lguest_clts(void) { lazy_hcall1(LHCALL_TS, 0); current_cr0 &= ~X86_CR0_TS; } -/* cr2 is the virtual address of the last page fault, which the Guest only ever +/* + * cr2 is the virtual address of the last page fault, which the Guest only ever * reads. The Host kindly writes this into our "struct lguest_data", so we - * just read it out of there. */ + * just read it out of there. + */ static unsigned long lguest_read_cr2(void) { return lguest_data.cr2; @@ -463,10 +518,12 @@ static unsigned long lguest_read_cr2(void) /* See lguest_set_pte() below. */ static bool cr3_changed = false; -/* cr3 is the current toplevel pagetable page: the principle is the same as +/* + * cr3 is the current toplevel pagetable page: the principle is the same as * cr0. Keep a local copy, and tell the Host when it changes. The only * difference is that our local copy is in lguest_data because the Host needs - * to set it upon our initial hypercall. */ + * to set it upon our initial hypercall. + */ static void lguest_write_cr3(unsigned long cr3) { lguest_data.pgdir = cr3; @@ -538,10 +595,12 @@ static void lguest_write_cr4(unsigned long val) * the real page tables based on the Guests'. */ -/* The Guest calls this to set a second-level entry (pte), ie. to map a page +/* + * The Guest calls this to set a second-level entry (pte), ie. to map a page * into a process' address space. We set the entry then tell the Host the * toplevel and address this corresponds to. The Guest uses one pagetable per - * process, so we need to tell the Host which one we're changing (mm->pgd). */ + * process, so we need to tell the Host which one we're changing (mm->pgd). + */ static void lguest_pte_update(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { @@ -560,10 +619,13 @@ static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr, lguest_pte_update(mm, addr, ptep); } -/* The Guest calls lguest_set_pud to set a top-level entry and lguest_set_pmd +/* + * The Guest calls lguest_set_pud to set a top-level entry and lguest_set_pmd * to set a middle-level entry when PAE is activated. + * * Again, we set the entry then tell the Host which page we changed, - * and the index of the entry we changed. */ + * and the index of the entry we changed. + */ #ifdef CONFIG_X86_PAE static void lguest_set_pud(pud_t *pudp, pud_t pudval) { @@ -582,8 +644,7 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) } #else -/* The Guest calls lguest_set_pmd to set a top-level entry when PAE is not - * activated. */ +/* The Guest calls lguest_set_pmd to set a top-level entry when !PAE. */ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) { native_set_pmd(pmdp, pmdval); @@ -592,7 +653,8 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) } #endif -/* There are a couple of legacy places where the kernel sets a PTE, but we +/* + * There are a couple of legacy places where the kernel sets a PTE, but we * don't know the top level any more. This is useless for us, since we don't * know which pagetable is changing or what address, so we just tell the Host * to forget all of them. Fortunately, this is very rare. @@ -600,7 +662,8 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) * ... except in early boot when the kernel sets up the initial pagetables, * which makes booting astonishingly slow: 1.83 seconds! So we don't even tell * the Host anything changed until we've done the first page table switch, - * which brings boot back to 0.25 seconds. */ + * which brings boot back to 0.25 seconds. + */ static void lguest_set_pte(pte_t *ptep, pte_t pteval) { native_set_pte(ptep, pteval); @@ -628,7 +691,8 @@ void lguest_pmd_clear(pmd_t *pmdp) } #endif -/* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on +/* + * Unfortunately for Lguest, the pv_mmu_ops for page tables were based on * native page table operations. On native hardware you can set a new page * table entry whenever you want, but if you want to remove one you have to do * a TLB flush (a TLB is a little cache of page table entries kept by the CPU). @@ -637,24 +701,29 @@ void lguest_pmd_clear(pmd_t *pmdp) * called when a valid entry is written, not when it's removed (ie. marked not * present). Instead, this is where we come when the Guest wants to remove a * page table entry: we tell the Host to set that entry to 0 (ie. the present - * bit is zero). */ + * bit is zero). + */ static void lguest_flush_tlb_single(unsigned long addr) { /* Simply set it to zero: if it was not, it will fault back in. */ lazy_hcall3(LHCALL_SET_PTE, lguest_data.pgdir, addr, 0); } -/* This is what happens after the Guest has removed a large number of entries. +/* + * This is what happens after the Guest has removed a large number of entries. * This tells the Host that any of the page table entries for userspace might - * have changed, ie. virtual addresses below PAGE_OFFSET. */ + * have changed, ie. virtual addresses below PAGE_OFFSET. + */ static void lguest_flush_tlb_user(void) { lazy_hcall1(LHCALL_FLUSH_TLB, 0); } -/* This is called when the kernel page tables have changed. That's not very +/* + * This is called when the kernel page tables have changed. That's not very * common (unless the Guest is using highmem, which makes the Guest extremely - * slow), so it's worth separating this from the user flushing above. */ + * slow), so it's worth separating this from the user flushing above. + */ static void lguest_flush_tlb_kernel(void) { lazy_hcall1(LHCALL_FLUSH_TLB, 1); @@ -691,23 +760,27 @@ static struct irq_chip lguest_irq_controller = { .unmask = enable_lguest_irq, }; -/* This sets up the Interrupt Descriptor Table (IDT) entry for each hardware +/* + * This sets up the Interrupt Descriptor Table (IDT) entry for each hardware * interrupt (except 128, which is used for system calls), and then tells the * Linux infrastructure that each interrupt is controlled by our level-based - * lguest interrupt controller. */ + * lguest interrupt controller. + */ static void __init lguest_init_IRQ(void) { unsigned int i; for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { - /* Some systems map "vectors" to interrupts weirdly. Lguest has - * a straightforward 1 to 1 mapping, so force that here. */ + /* Some systems map "vectors" to interrupts weirdly. Not us! */ __get_cpu_var(vector_irq)[i] = i - FIRST_EXTERNAL_VECTOR; if (i != SYSCALL_VECTOR) set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); } - /* This call is required to set up for 4k stacks, where we have - * separate stacks for hard and soft interrupts. */ + + /* + * This call is required to set up for 4k stacks, where we have + * separate stacks for hard and soft interrupts. + */ irq_ctx_init(smp_processor_id()); } @@ -729,31 +802,39 @@ static unsigned long lguest_get_wallclock(void) return lguest_data.time.tv_sec; } -/* The TSC is an Intel thing called the Time Stamp Counter. The Host tells us +/* + * The TSC is an Intel thing called the Time Stamp Counter. The Host tells us * what speed it runs at, or 0 if it's unusable as a reliable clock source. * This matches what we want here: if we return 0 from this function, the x86 - * TSC clock will give up and not register itself. */ + * TSC clock will give up and not register itself. + */ static unsigned long lguest_tsc_khz(void) { return lguest_data.tsc_khz; } -/* If we can't use the TSC, the kernel falls back to our lower-priority - * "lguest_clock", where we read the time value given to us by the Host. */ +/* + * If we can't use the TSC, the kernel falls back to our lower-priority + * "lguest_clock", where we read the time value given to us by the Host. + */ static cycle_t lguest_clock_read(struct clocksource *cs) { unsigned long sec, nsec; - /* Since the time is in two parts (seconds and nanoseconds), we risk + /* + * Since the time is in two parts (seconds and nanoseconds), we risk * reading it just as it's changing from 99 & 0.999999999 to 100 and 0, * and getting 99 and 0. As Linux tends to come apart under the stress - * of time travel, we must be careful: */ + * of time travel, we must be careful: + */ do { /* First we read the seconds part. */ sec = lguest_data.time.tv_sec; - /* This read memory barrier tells the compiler and the CPU that + /* + * This read memory barrier tells the compiler and the CPU that * this can't be reordered: we have to complete the above - * before going on. */ + * before going on. + */ rmb(); /* Now we read the nanoseconds part. */ nsec = lguest_data.time.tv_nsec; @@ -777,9 +858,11 @@ static struct clocksource lguest_clock = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -/* We also need a "struct clock_event_device": Linux asks us to set it to go +/* + * We also need a "struct clock_event_device": Linux asks us to set it to go * off some time in the future. Actually, James Morris figured all this out, I - * just applied the patch. */ + * just applied the patch. + */ static int lguest_clockevent_set_next_event(unsigned long delta, struct clock_event_device *evt) { @@ -829,8 +912,10 @@ static struct clock_event_device lguest_clockevent = { .max_delta_ns = LG_CLOCK_MAX_DELTA, }; -/* This is the Guest timer interrupt handler (hardware interrupt 0). We just - * call the clockevent infrastructure and it does whatever needs doing. */ +/* + * This is the Guest timer interrupt handler (hardware interrupt 0). We just + * call the clockevent infrastructure and it does whatever needs doing. + */ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) { unsigned long flags; @@ -841,10 +926,12 @@ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) local_irq_restore(flags); } -/* At some point in the boot process, we get asked to set up our timing +/* + * At some point in the boot process, we get asked to set up our timing * infrastructure. The kernel doesn't expect timer interrupts before this, but * we cleverly initialized the "blocked_interrupts" field of "struct - * lguest_data" so that timer interrupts were blocked until now. */ + * lguest_data" so that timer interrupts were blocked until now. + */ static void lguest_time_init(void) { /* Set up the timer interrupt (0) to go to our simple timer routine */ @@ -868,14 +955,16 @@ static void lguest_time_init(void) * to work. They're pretty simple. */ -/* The Guest needs to tell the Host what stack it expects traps to use. For +/* + * The Guest needs to tell the Host what stack it expects traps to use. For * native hardware, this is part of the Task State Segment mentioned above in * lguest_load_tr_desc(), but to help hypervisors there's this special call. * * We tell the Host the segment we want to use (__KERNEL_DS is the kernel data * segment), the privilege level (we're privilege level 1, the Host is 0 and * will not tolerate us trying to use that), the stack pointer, and the number - * of pages in the stack. */ + * of pages in the stack. + */ static void lguest_load_sp0(struct tss_struct *tss, struct thread_struct *thread) { @@ -889,7 +978,8 @@ static void lguest_set_debugreg(int regno, unsigned long value) /* FIXME: Implement */ } -/* There are times when the kernel wants to make sure that no memory writes are +/* + * There are times when the kernel wants to make sure that no memory writes are * caught in the cache (that they've all reached real hardware devices). This * doesn't matter for the Guest which has virtual hardware. * @@ -903,11 +993,13 @@ static void lguest_wbinvd(void) { } -/* If the Guest expects to have an Advanced Programmable Interrupt Controller, +/* + * If the Guest expects to have an Advanced Programmable Interrupt Controller, * we play dumb by ignoring writes and returning 0 for reads. So it's no * longer Programmable nor Controlling anything, and I don't think 8 lines of * code qualifies for Advanced. It will also never interrupt anything. It - * does, however, allow us to get through the Linux boot code. */ + * does, however, allow us to get through the Linux boot code. + */ #ifdef CONFIG_X86_LOCAL_APIC static void lguest_apic_write(u32 reg, u32 v) { @@ -956,11 +1048,13 @@ static void lguest_safe_halt(void) kvm_hypercall0(LHCALL_HALT); } -/* The SHUTDOWN hypercall takes a string to describe what's happening, and +/* + * The SHUTDOWN hypercall takes a string to describe what's happening, and * an argument which says whether this to restart (reboot) the Guest or not. * * Note that the Host always prefers that the Guest speak in physical addresses - * rather than virtual addresses, so we use __pa() here. */ + * rather than virtual addresses, so we use __pa() here. + */ static void lguest_power_off(void) { kvm_hypercall2(LHCALL_SHUTDOWN, __pa("Power down"), @@ -991,8 +1085,10 @@ static __init char *lguest_memory_setup(void) * nice to move it back to lguest_init. Patch welcome... */ atomic_notifier_chain_register(&panic_notifier_list, &paniced); - /* The Linux bootloader header contains an "e820" memory map: the - * Launcher populated the first entry with our memory limit. */ + /* + *The Linux bootloader header contains an "e820" memory map: the + * Launcher populated the first entry with our memory limit. + */ e820_add_region(boot_params.e820_map[0].addr, boot_params.e820_map[0].size, boot_params.e820_map[0].type); @@ -1001,16 +1097,17 @@ static __init char *lguest_memory_setup(void) return "LGUEST"; } -/* We will eventually use the virtio console device to produce console output, +/* + * We will eventually use the virtio console device to produce console output, * but before that is set up we use LHCALL_NOTIFY on normal memory to produce - * console output. */ + * console output. + */ static __init int early_put_chars(u32 vtermno, const char *buf, int count) { char scratch[17]; unsigned int len = count; - /* We use a nul-terminated string, so we have to make a copy. Icky, - * huh? */ + /* We use a nul-terminated string, so we make a copy. Icky, huh? */ if (len > sizeof(scratch) - 1) len = sizeof(scratch) - 1; scratch[len] = '\0'; @@ -1021,8 +1118,10 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count) return len; } -/* Rebooting also tells the Host we're finished, but the RESTART flag tells the - * Launcher to reboot us. */ +/* + * Rebooting also tells the Host we're finished, but the RESTART flag tells the + * Launcher to reboot us. + */ static void lguest_restart(char *reason) { kvm_hypercall2(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART); @@ -1049,7 +1148,8 @@ static void lguest_restart(char *reason) * fit comfortably. * * First we need assembly templates of each of the patchable Guest operations, - * and these are in i386_head.S. */ + * and these are in i386_head.S. + */ /*G:060 We construct a table from the assembler templates: */ static const struct lguest_insns @@ -1060,9 +1160,11 @@ static const struct lguest_insns [PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf }, }; -/* Now our patch routine is fairly simple (based on the native one in +/* + * Now our patch routine is fairly simple (based on the native one in * paravirt.c). If we have a replacement, we copy it in and return how much of - * the available space we used. */ + * the available space we used. + */ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf, unsigned long addr, unsigned len) { @@ -1074,8 +1176,7 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf, insn_len = lguest_insns[type].end - lguest_insns[type].start; - /* Similarly if we can't fit replacement (shouldn't happen, but let's - * be thorough). */ + /* Similarly if it can't fit (doesn't happen, but let's be thorough). */ if (len < insn_len) return paravirt_patch_default(type, clobber, ibuf, addr, len); @@ -1084,22 +1185,28 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf, return insn_len; } -/*G:029 Once we get to lguest_init(), we know we're a Guest. The various +/*G:029 + * Once we get to lguest_init(), we know we're a Guest. The various * pv_ops structures in the kernel provide points for (almost) every routine we - * have to override to avoid privileged instructions. */ + * have to override to avoid privileged instructions. + */ __init void lguest_init(void) { - /* We're under lguest, paravirt is enabled, and we're running at - * privilege level 1, not 0 as normal. */ + /* We're under lguest. */ pv_info.name = "lguest"; + /* Paravirt is enabled. */ pv_info.paravirt_enabled = 1; + /* We're running at privilege level 1, not 0 as normal. */ pv_info.kernel_rpl = 1; + /* Everyone except Xen runs with this set. */ pv_info.shared_kernel_pmd = 1; - /* We set up all the lguest overrides for sensitive operations. These - * are detailed with the operations themselves. */ + /* + * We set up all the lguest overrides for sensitive operations. These + * are detailed with the operations themselves. + */ - /* interrupt-related operations */ + /* Interrupt-related operations */ pv_irq_ops.init_IRQ = lguest_init_IRQ; pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl); pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl); @@ -1107,11 +1214,11 @@ __init void lguest_init(void) pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable); pv_irq_ops.safe_halt = lguest_safe_halt; - /* init-time operations */ + /* Setup operations */ pv_init_ops.memory_setup = lguest_memory_setup; pv_init_ops.patch = lguest_patch; - /* Intercepts of various cpu instructions */ + /* Intercepts of various CPU instructions */ pv_cpu_ops.load_gdt = lguest_load_gdt; pv_cpu_ops.cpuid = lguest_cpuid; pv_cpu_ops.load_idt = lguest_load_idt; @@ -1132,7 +1239,7 @@ __init void lguest_init(void) pv_cpu_ops.start_context_switch = paravirt_start_context_switch; pv_cpu_ops.end_context_switch = lguest_end_context_switch; - /* pagetable management */ + /* Pagetable management */ pv_mmu_ops.write_cr3 = lguest_write_cr3; pv_mmu_ops.flush_tlb_user = lguest_flush_tlb_user; pv_mmu_ops.flush_tlb_single = lguest_flush_tlb_single; @@ -1154,54 +1261,71 @@ __init void lguest_init(void) pv_mmu_ops.pte_update_defer = lguest_pte_update; #ifdef CONFIG_X86_LOCAL_APIC - /* apic read/write intercepts */ + /* APIC read/write intercepts */ set_lguest_basic_apic_ops(); #endif - /* time operations */ + /* Time operations */ pv_time_ops.get_wallclock = lguest_get_wallclock; pv_time_ops.time_init = lguest_time_init; pv_time_ops.get_tsc_khz = lguest_tsc_khz; - /* Now is a good time to look at the implementations of these functions - * before returning to the rest of lguest_init(). */ + /* + * Now is a good time to look at the implementations of these functions + * before returning to the rest of lguest_init(). + */ - /*G:070 Now we've seen all the paravirt_ops, we return to + /*G:070 + * Now we've seen all the paravirt_ops, we return to * lguest_init() where the rest of the fairly chaotic boot setup - * occurs. */ + * occurs. + */ - /* The stack protector is a weird thing where gcc places a canary + /* + * The stack protector is a weird thing where gcc places a canary * value on the stack and then checks it on return. This file is * compiled with -fno-stack-protector it, so we got this far without * problems. The value of the canary is kept at offset 20 from the * %gs register, so we need to set that up before calling C functions - * in other files. */ + * in other files. + */ setup_stack_canary_segment(0); - /* We could just call load_stack_canary_segment(), but we might as - * call switch_to_new_gdt() which loads the whole table and sets up - * the per-cpu segment descriptor register %fs as well. */ + + /* + * We could just call load_stack_canary_segment(), but we might as well + * call switch_to_new_gdt() which loads the whole table and sets up the + * per-cpu segment descriptor register %fs as well. + */ switch_to_new_gdt(0); /* As described in head_32.S, we map the first 128M of memory. */ max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT; - /* The Host<->Guest Switcher lives at the top of our address space, and + /* + * The Host<->Guest Switcher lives at the top of our address space, and * the Host told us how big it is when we made LGUEST_INIT hypercall: - * it put the answer in lguest_data.reserve_mem */ + * it put the answer in lguest_data.reserve_mem + */ reserve_top_address(lguest_data.reserve_mem); - /* If we don't initialize the lock dependency checker now, it crashes - * paravirt_disable_iospace. */ + /* + * If we don't initialize the lock dependency checker now, it crashes + * paravirt_disable_iospace. + */ lockdep_init(); - /* The IDE code spends about 3 seconds probing for disks: if we reserve + /* + * The IDE code spends about 3 seconds probing for disks: if we reserve * all the I/O ports up front it can't get them and so doesn't probe. * Other device drivers are similar (but less severe). This cuts the - * kernel boot time on my machine from 4.1 seconds to 0.45 seconds. */ + * kernel boot time on my machine from 4.1 seconds to 0.45 seconds. + */ paravirt_disable_iospace(); - /* This is messy CPU setup stuff which the native boot code does before - * start_kernel, so we have to do, too: */ + /* + * This is messy CPU setup stuff which the native boot code does before + * start_kernel, so we have to do, too: + */ cpu_detect(&new_cpu_data); /* head.S usually sets up the first capability word, so do it here. */ new_cpu_data.x86_capability[0] = cpuid_edx(1); @@ -1218,22 +1342,28 @@ __init void lguest_init(void) acpi_ht = 0; #endif - /* We set the preferred console to "hvc". This is the "hypervisor + /* + * We set the preferred console to "hvc". This is the "hypervisor * virtual console" driver written by the PowerPC people, which we also - * adapted for lguest's use. */ + * adapted for lguest's use. + */ add_preferred_console("hvc", 0, NULL); /* Register our very early console. */ virtio_cons_early_init(early_put_chars); - /* Last of all, we set the power management poweroff hook to point to + /* + * Last of all, we set the power management poweroff hook to point to * the Guest routine to power off, and the reboot hook to our restart - * routine. */ + * routine. + */ pm_power_off = lguest_power_off; machine_ops.restart = lguest_restart; - /* Now we're set up, call i386_start_kernel() in head32.c and we proceed - * to boot as normal. It never returns. */ + /* + * Now we're set up, call i386_start_kernel() in head32.c and we proceed + * to boot as normal. It never returns. + */ i386_start_kernel(); } /* diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index a9c8cfe61cd4..db6aa95eb054 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S @@ -5,7 +5,8 @@ #include #include -/*G:020 Our story starts with the kernel booting into startup_32 in +/*G:020 + * Our story starts with the kernel booting into startup_32 in * arch/x86/kernel/head_32.S. It expects a boot header, which is created by * the bootloader (the Launcher in our case). * @@ -21,11 +22,14 @@ * data without remembering to subtract __PAGE_OFFSET! * * The .section line puts this code in .init.text so it will be discarded after - * boot. */ + * boot. + */ .section .init.text, "ax", @progbits ENTRY(lguest_entry) - /* We make the "initialization" hypercall now to tell the Host about - * us, and also find out where it put our page tables. */ + /* + * We make the "initialization" hypercall now to tell the Host about + * us, and also find out where it put our page tables. + */ movl $LHCALL_LGUEST_INIT, %eax movl $lguest_data - __PAGE_OFFSET, %ebx .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */ @@ -33,13 +37,14 @@ ENTRY(lguest_entry) /* Set up the initial stack so we can run C code. */ movl $(init_thread_union+THREAD_SIZE),%esp - /* Jumps are relative, and we're running __PAGE_OFFSET too low at the - * moment. */ + /* Jumps are relative: we're running __PAGE_OFFSET too low. */ jmp lguest_init+__PAGE_OFFSET -/*G:055 We create a macro which puts the assembler code between lgstart_ and - * lgend_ markers. These templates are put in the .text section: they can't be - * discarded after boot as we may need to patch modules, too. */ +/*G:055 + * We create a macro which puts the assembler code between lgstart_ and lgend_ + * markers. These templates are put in the .text section: they can't be + * discarded after boot as we may need to patch modules, too. + */ .text #define LGUEST_PATCH(name, insns...) \ lgstart_##name: insns; lgend_##name:; \ @@ -48,58 +53,74 @@ ENTRY(lguest_entry) LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled) LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax) -/*G:033 But using those wrappers is inefficient (we'll see why that doesn't - * matter for save_fl and irq_disable later). If we write our routines - * carefully in assembler, we can avoid clobbering any registers and avoid - * jumping through the wrapper functions. +/*G:033 + * But using those wrappers is inefficient (we'll see why that doesn't matter + * for save_fl and irq_disable later). If we write our routines carefully in + * assembler, we can avoid clobbering any registers and avoid jumping through + * the wrapper functions. * * I skipped over our first piece of assembler, but this one is worth studying - * in a bit more detail so I'll describe in easy stages. First, the routine - * to enable interrupts: */ + * in a bit more detail so I'll describe in easy stages. First, the routine to + * enable interrupts: + */ ENTRY(lg_irq_enable) - /* The reverse of irq_disable, this sets lguest_data.irq_enabled to - * X86_EFLAGS_IF (ie. "Interrupts enabled"). */ + /* + * The reverse of irq_disable, this sets lguest_data.irq_enabled to + * X86_EFLAGS_IF (ie. "Interrupts enabled"). + */ movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled - /* But now we need to check if the Host wants to know: there might have + /* + * But now we need to check if the Host wants to know: there might have * been interrupts waiting to be delivered, in which case it will have * set lguest_data.irq_pending to X86_EFLAGS_IF. If it's not zero, we - * jump to send_interrupts, otherwise we're done. */ + * jump to send_interrupts, otherwise we're done. + */ testl $0, lguest_data+LGUEST_DATA_irq_pending jnz send_interrupts - /* One cool thing about x86 is that you can do many things without using + /* + * One cool thing about x86 is that you can do many things without using * a register. In this case, the normal path hasn't needed to save or - * restore any registers at all! */ + * restore any registers at all! + */ ret send_interrupts: - /* OK, now we need a register: eax is used for the hypercall number, + /* + * OK, now we need a register: eax is used for the hypercall number, * which is LHCALL_SEND_INTERRUPTS. * * We used not to bother with this pending detection at all, which was * much simpler. Sooner or later the Host would realize it had to * send us an interrupt. But that turns out to make performance 7 * times worse on a simple tcp benchmark. So now we do this the hard - * way. */ + * way. + */ pushl %eax movl $LHCALL_SEND_INTERRUPTS, %eax - /* This is a vmcall instruction (same thing that KVM uses). Older + /* + * This is a vmcall instruction (same thing that KVM uses). Older * assembler versions might not know the "vmcall" instruction, so we - * create one manually here. */ + * create one manually here. + */ .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */ popl %eax ret -/* Finally, the "popf" or "restore flags" routine. The %eax register holds the +/* + * Finally, the "popf" or "restore flags" routine. The %eax register holds the * flags (in practice, either X86_EFLAGS_IF or 0): if it's X86_EFLAGS_IF we're - * enabling interrupts again, if it's 0 we're leaving them off. */ + * enabling interrupts again, if it's 0 we're leaving them off. + */ ENTRY(lg_restore_fl) /* This is just "lguest_data.irq_enabled = flags;" */ movl %eax, lguest_data+LGUEST_DATA_irq_enabled - /* Now, if the %eax value has enabled interrupts and + /* + * Now, if the %eax value has enabled interrupts and * lguest_data.irq_pending is set, we want to tell the Host so it can * deliver any outstanding interrupts. Fortunately, both values will * be X86_EFLAGS_IF (ie. 512) in that case, and the "testl" * instruction will AND them together for us. If both are set, we - * jump to send_interrupts. */ + * jump to send_interrupts. + */ testl lguest_data+LGUEST_DATA_irq_pending, %eax jnz send_interrupts /* Again, the normal path has used no extra registers. Clever, huh? */ @@ -109,22 +130,24 @@ ENTRY(lg_restore_fl) .global lguest_noirq_start .global lguest_noirq_end -/*M:004 When the Host reflects a trap or injects an interrupt into the Guest, - * it sets the eflags interrupt bit on the stack based on - * lguest_data.irq_enabled, so the Guest iret logic does the right thing when - * restoring it. However, when the Host sets the Guest up for direct traps, - * such as system calls, the processor is the one to push eflags onto the - * stack, and the interrupt bit will be 1 (in reality, interrupts are always - * enabled in the Guest). +/*M:004 + * When the Host reflects a trap or injects an interrupt into the Guest, it + * sets the eflags interrupt bit on the stack based on lguest_data.irq_enabled, + * so the Guest iret logic does the right thing when restoring it. However, + * when the Host sets the Guest up for direct traps, such as system calls, the + * processor is the one to push eflags onto the stack, and the interrupt bit + * will be 1 (in reality, interrupts are always enabled in the Guest). * * This turns out to be harmless: the only trap which should happen under Linux * with interrupts disabled is Page Fault (due to our lazy mapping of vmalloc * regions), which has to be reflected through the Host anyway. If another * trap *does* go off when interrupts are disabled, the Guest will panic, and - * we'll never get to this iret! :*/ + * we'll never get to this iret! +:*/ -/*G:045 There is one final paravirt_op that the Guest implements, and glancing - * at it you can see why I left it to last. It's *cool*! It's in *assembler*! +/*G:045 + * There is one final paravirt_op that the Guest implements, and glancing at it + * you can see why I left it to last. It's *cool*! It's in *assembler*! * * The "iret" instruction is used to return from an interrupt or trap. The * stack looks like this: @@ -148,15 +171,18 @@ ENTRY(lg_restore_fl) * return to userspace or wherever. Our solution to this is to surround the * code with lguest_noirq_start: and lguest_noirq_end: labels. We tell the * Host that it is *never* to interrupt us there, even if interrupts seem to be - * enabled. */ + * enabled. + */ ENTRY(lguest_iret) pushl %eax movl 12(%esp), %eax lguest_noirq_start: - /* Note the %ss: segment prefix here. Normal data accesses use the + /* + * Note the %ss: segment prefix here. Normal data accesses use the * "ds" segment, but that will have already been restored for whatever * we're returning to (such as userspace): we can't trust it. The %ss: - * prefix makes sure we use the stack segment, which is still valid. */ + * prefix makes sure we use the stack segment, which is still valid. + */ movl %eax,%ss:lguest_data+LGUEST_DATA_irq_enabled popl %eax iret diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index a6974e9b8ebf..cd058bc903ff 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -1,6 +1,8 @@ -/*P:400 This contains run_guest() which actually calls into the Host<->Guest +/*P:400 + * This contains run_guest() which actually calls into the Host<->Guest * Switcher and analyzes the return, such as determining if the Guest wants the - * Host to do something. This file also contains useful helper routines. :*/ + * Host to do something. This file also contains useful helper routines. +:*/ #include #include #include @@ -24,7 +26,8 @@ static struct page **switcher_page; /* This One Big lock protects all inter-guest data structures. */ DEFINE_MUTEX(lguest_lock); -/*H:010 We need to set up the Switcher at a high virtual address. Remember the +/*H:010 + * We need to set up the Switcher at a high virtual address. Remember the * Switcher is a few hundred bytes of assembler code which actually changes the * CPU to run the Guest, and then changes back to the Host when a trap or * interrupt happens. @@ -33,7 +36,8 @@ DEFINE_MUTEX(lguest_lock); * Host since it will be running as the switchover occurs. * * Trying to map memory at a particular address is an unusual thing to do, so - * it's not a simple one-liner. */ + * it's not a simple one-liner. + */ static __init int map_switcher(void) { int i, err; @@ -47,8 +51,10 @@ static __init int map_switcher(void) * easy. */ - /* We allocate an array of struct page pointers. map_vm_area() wants - * this, rather than just an array of pages. */ + /* + * We allocate an array of struct page pointers. map_vm_area() wants + * this, rather than just an array of pages. + */ switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES, GFP_KERNEL); if (!switcher_page) { @@ -56,8 +62,10 @@ static __init int map_switcher(void) goto out; } - /* Now we actually allocate the pages. The Guest will see these pages, - * so we make sure they're zeroed. */ + /* + * Now we actually allocate the pages. The Guest will see these pages, + * so we make sure they're zeroed. + */ for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) { unsigned long addr = get_zeroed_page(GFP_KERNEL); if (!addr) { @@ -67,19 +75,23 @@ static __init int map_switcher(void) switcher_page[i] = virt_to_page(addr); } - /* First we check that the Switcher won't overlap the fixmap area at + /* + * First we check that the Switcher won't overlap the fixmap area at * the top of memory. It's currently nowhere near, but it could have - * very strange effects if it ever happened. */ + * very strange effects if it ever happened. + */ if (SWITCHER_ADDR + (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE > FIXADDR_START){ err = -ENOMEM; printk("lguest: mapping switcher would thwack fixmap\n"); goto free_pages; } - /* Now we reserve the "virtual memory area" we want: 0xFFC00000 + /* + * Now we reserve the "virtual memory area" we want: 0xFFC00000 * (SWITCHER_ADDR). We might not get it in theory, but in practice * it's worked so far. The end address needs +1 because __get_vm_area - * allocates an extra guard page, so we need space for that. */ + * allocates an extra guard page, so we need space for that. + */ switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE, VM_ALLOC, SWITCHER_ADDR, SWITCHER_ADDR + (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE); @@ -89,11 +101,13 @@ static __init int map_switcher(void) goto free_pages; } - /* This code actually sets up the pages we've allocated to appear at + /* + * This code actually sets up the pages we've allocated to appear at * SWITCHER_ADDR. map_vm_area() takes the vma we allocated above, the * kind of pages we're mapping (kernel pages), and a pointer to our * array of struct pages. It increments that pointer, but we don't - * care. */ + * care. + */ pagep = switcher_page; err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep); if (err) { @@ -101,8 +115,10 @@ static __init int map_switcher(void) goto free_vma; } - /* Now the Switcher is mapped at the right address, we can't fail! - * Copy in the compiled-in Switcher code (from _switcher.S). */ + /* + * Now the Switcher is mapped at the right address, we can't fail! + * Copy in the compiled-in Switcher code (from _switcher.S). + */ memcpy(switcher_vma->addr, start_switcher_text, end_switcher_text - start_switcher_text); @@ -124,8 +140,7 @@ out: } /*:*/ -/* Cleaning up the mapping when the module is unloaded is almost... - * too easy. */ +/* Cleaning up the mapping when the module is unloaded is almost... too easy. */ static void unmap_switcher(void) { unsigned int i; @@ -151,16 +166,19 @@ static void unmap_switcher(void) * But we can't trust the Guest: it might be trying to access the Launcher * code. We have to check that the range is below the pfn_limit the Launcher * gave us. We have to make sure that addr + len doesn't give us a false - * positive by overflowing, too. */ + * positive by overflowing, too. + */ bool lguest_address_ok(const struct lguest *lg, unsigned long addr, unsigned long len) { return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr); } -/* This routine copies memory from the Guest. Here we can see how useful the +/* + * This routine copies memory from the Guest. Here we can see how useful the * kill_lguest() routine we met in the Launcher can be: we return a random - * value (all zeroes) instead of needing to return an error. */ + * value (all zeroes) instead of needing to return an error. + */ void __lgread(struct lg_cpu *cpu, void *b, unsigned long addr, unsigned bytes) { if (!lguest_address_ok(cpu->lg, addr, bytes) @@ -181,9 +199,11 @@ void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b, } /*:*/ -/*H:030 Let's jump straight to the the main loop which runs the Guest. +/*H:030 + * Let's jump straight to the the main loop which runs the Guest. * Remember, this is called by the Launcher reading /dev/lguest, and we keep - * going around and around until something interesting happens. */ + * going around and around until something interesting happens. + */ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) { /* We stop running once the Guest is dead. */ @@ -195,8 +215,10 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) if (cpu->hcall) do_hypercalls(cpu); - /* It's possible the Guest did a NOTIFY hypercall to the - * Launcher, in which case we return from the read() now. */ + /* + * It's possible the Guest did a NOTIFY hypercall to the + * Launcher, in which case we return from the read() now. + */ if (cpu->pending_notify) { if (!send_notify_to_eventfd(cpu)) { if (put_user(cpu->pending_notify, user)) @@ -209,29 +231,39 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) if (signal_pending(current)) return -ERESTARTSYS; - /* Check if there are any interrupts which can be delivered now: + /* + * Check if there are any interrupts which can be delivered now: * if so, this sets up the hander to be executed when we next - * run the Guest. */ + * run the Guest. + */ irq = interrupt_pending(cpu, &more); if (irq < LGUEST_IRQS) try_deliver_interrupt(cpu, irq, more); - /* All long-lived kernel loops need to check with this horrible + /* + * All long-lived kernel loops need to check with this horrible * thing called the freezer. If the Host is trying to suspend, - * it stops us. */ + * it stops us. + */ try_to_freeze(); - /* Just make absolutely sure the Guest is still alive. One of - * those hypercalls could have been fatal, for example. */ + /* + * Just make absolutely sure the Guest is still alive. One of + * those hypercalls could have been fatal, for example. + */ if (cpu->lg->dead) break; - /* If the Guest asked to be stopped, we sleep. The Guest's - * clock timer will wake us. */ + /* + * If the Guest asked to be stopped, we sleep. The Guest's + * clock timer will wake us. + */ if (cpu->halted) { set_current_state(TASK_INTERRUPTIBLE); - /* Just before we sleep, make sure no interrupt snuck in - * which we should be doing. */ + /* + * Just before we sleep, make sure no interrupt snuck in + * which we should be doing. + */ if (interrupt_pending(cpu, &more) < LGUEST_IRQS) set_current_state(TASK_RUNNING); else @@ -239,8 +271,10 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) continue; } - /* OK, now we're ready to jump into the Guest. First we put up - * the "Do Not Disturb" sign: */ + /* + * OK, now we're ready to jump into the Guest. First we put up + * the "Do Not Disturb" sign: + */ local_irq_disable(); /* Actually run the Guest until something happens. */ @@ -327,8 +361,10 @@ static void __exit fini(void) } /*:*/ -/* The Host side of lguest can be a module. This is a nice way for people to - * play with it. */ +/* + * The Host side of lguest can be a module. This is a nice way for people to + * play with it. + */ module_init(init); module_exit(fini); MODULE_LICENSE("GPL"); diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index c29ffa19cb74..787ab4bc09f0 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c @@ -1,8 +1,10 @@ -/*P:500 Just as userspace programs request kernel operations through a system +/*P:500 + * Just as userspace programs request kernel operations through a system * call, the Guest requests Host operations through a "hypercall". You might * notice this nomenclature doesn't really follow any logic, but the name has * been around for long enough that we're stuck with it. As you'd expect, this - * code is basically a one big switch statement. :*/ + * code is basically a one big switch statement. +:*/ /* Copyright (C) 2006 Rusty Russell IBM Corporation @@ -28,30 +30,41 @@ #include #include "lg.h" -/*H:120 This is the core hypercall routine: where the Guest gets what it wants. - * Or gets killed. Or, in the case of LHCALL_SHUTDOWN, both. */ +/*H:120 + * This is the core hypercall routine: where the Guest gets what it wants. + * Or gets killed. Or, in the case of LHCALL_SHUTDOWN, both. + */ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) { switch (args->arg0) { case LHCALL_FLUSH_ASYNC: - /* This call does nothing, except by breaking out of the Guest - * it makes us process all the asynchronous hypercalls. */ + /* + * This call does nothing, except by breaking out of the Guest + * it makes us process all the asynchronous hypercalls. + */ break; case LHCALL_SEND_INTERRUPTS: - /* This call does nothing too, but by breaking out of the Guest - * it makes us process any pending interrupts. */ + /* + * This call does nothing too, but by breaking out of the Guest + * it makes us process any pending interrupts. + */ break; case LHCALL_LGUEST_INIT: - /* You can't get here unless you're already initialized. Don't - * do that. */ + /* + * You can't get here unless you're already initialized. Don't + * do that. + */ kill_guest(cpu, "already have lguest_data"); break; case LHCALL_SHUTDOWN: { - /* Shutdown is such a trivial hypercall that we do it in four - * lines right here. */ char msg[128]; - /* If the lgread fails, it will call kill_guest() itself; the - * kill_guest() with the message will be ignored. */ + /* + * Shutdown is such a trivial hypercall that we do it in four + * lines right here. + * + * If the lgread fails, it will call kill_guest() itself; the + * kill_guest() with the message will be ignored. + */ __lgread(cpu, msg, args->arg1, sizeof(msg)); msg[sizeof(msg)-1] = '\0'; kill_guest(cpu, "CRASH: %s", msg); @@ -60,16 +73,17 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) break; } case LHCALL_FLUSH_TLB: - /* FLUSH_TLB comes in two flavors, depending on the - * argument: */ + /* FLUSH_TLB comes in two flavors, depending on the argument: */ if (args->arg1) guest_pagetable_clear_all(cpu); else guest_pagetable_flush_user(cpu); break; - /* All these calls simply pass the arguments through to the right - * routines. */ + /* + * All these calls simply pass the arguments through to the right + * routines. + */ case LHCALL_NEW_PGTABLE: guest_new_pagetable(cpu, args->arg1); break; @@ -112,15 +126,16 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) kill_guest(cpu, "Bad hypercall %li\n", args->arg0); } } -/*:*/ -/*H:124 Asynchronous hypercalls are easy: we just look in the array in the +/*H:124 + * Asynchronous hypercalls are easy: we just look in the array in the * Guest's "struct lguest_data" to see if any new ones are marked "ready". * * We are careful to do these in order: obviously we respect the order the * Guest put them in the ring, but we also promise the Guest that they will * happen before any normal hypercall (which is why we check this before - * checking for a normal hcall). */ + * checking for a normal hcall). + */ static void do_async_hcalls(struct lg_cpu *cpu) { unsigned int i; @@ -133,22 +148,28 @@ static void do_async_hcalls(struct lg_cpu *cpu) /* We process "struct lguest_data"s hcalls[] ring once. */ for (i = 0; i < ARRAY_SIZE(st); i++) { struct hcall_args args; - /* We remember where we were up to from last time. This makes + /* + * We remember where we were up to from last time. This makes * sure that the hypercalls are done in the order the Guest - * places them in the ring. */ + * places them in the ring. + */ unsigned int n = cpu->next_hcall; /* 0xFF means there's no call here (yet). */ if (st[n] == 0xFF) break; - /* OK, we have hypercall. Increment the "next_hcall" cursor, - * and wrap back to 0 if we reach the end. */ + /* + * OK, we have hypercall. Increment the "next_hcall" cursor, + * and wrap back to 0 if we reach the end. + */ if (++cpu->next_hcall == LHCALL_RING_SIZE) cpu->next_hcall = 0; - /* Copy the hypercall arguments into a local copy of - * the hcall_args struct. */ + /* + * Copy the hypercall arguments into a local copy of the + * hcall_args struct. + */ if (copy_from_user(&args, &cpu->lg->lguest_data->hcalls[n], sizeof(struct hcall_args))) { kill_guest(cpu, "Fetching async hypercalls"); @@ -164,19 +185,25 @@ static void do_async_hcalls(struct lg_cpu *cpu) break; } - /* Stop doing hypercalls if they want to notify the Launcher: - * it needs to service this first. */ + /* + * Stop doing hypercalls if they want to notify the Launcher: + * it needs to service this first. + */ if (cpu->pending_notify) break; } } -/* Last of all, we look at what happens first of all. The very first time the - * Guest makes a hypercall, we end up here to set things up: */ +/* + * Last of all, we look at what happens first of all. The very first time the + * Guest makes a hypercall, we end up here to set things up: + */ static void initialize(struct lg_cpu *cpu) { - /* You can't do anything until you're initialized. The Guest knows the - * rules, so we're unforgiving here. */ + /* + * You can't do anything until you're initialized. The Guest knows the + * rules, so we're unforgiving here. + */ if (cpu->hcall->arg0 != LHCALL_LGUEST_INIT) { kill_guest(cpu, "hypercall %li before INIT", cpu->hcall->arg0); return; @@ -185,32 +212,40 @@ static void initialize(struct lg_cpu *cpu) if (lguest_arch_init_hypercalls(cpu)) kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); - /* The Guest tells us where we're not to deliver interrupts by putting - * the range of addresses into "struct lguest_data". */ + /* + * The Guest tells us where we're not to deliver interrupts by putting + * the range of addresses into "struct lguest_data". + */ if (get_user(cpu->lg->noirq_start, &cpu->lg->lguest_data->noirq_start) || get_user(cpu->lg->noirq_end, &cpu->lg->lguest_data->noirq_end)) kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); - /* We write the current time into the Guest's data page once so it can - * set its clock. */ + /* + * We write the current time into the Guest's data page once so it can + * set its clock. + */ write_timestamp(cpu); /* page_tables.c will also do some setup. */ page_table_guest_data_init(cpu); - /* This is the one case where the above accesses might have been the + /* + * This is the one case where the above accesses might have been the * first write to a Guest page. This may have caused a copy-on-write * fault, but the old page might be (read-only) in the Guest - * pagetable. */ + * pagetable. + */ guest_pagetable_clear_all(cpu); } /*:*/ -/*M:013 If a Guest reads from a page (so creates a mapping) that it has never +/*M:013 + * If a Guest reads from a page (so creates a mapping) that it has never * written to, and then the Launcher writes to it (ie. the output of a virtual * device), the Guest will still see the old page. In practice, this never * happens: why would the Guest read a page which it has never written to? But - * a similar scenario might one day bite us, so it's worth mentioning. :*/ + * a similar scenario might one day bite us, so it's worth mentioning. +:*/ /*H:100 * Hypercalls @@ -229,17 +264,22 @@ void do_hypercalls(struct lg_cpu *cpu) return; } - /* The Guest has initialized. + /* + * The Guest has initialized. * - * Look in the hypercall ring for the async hypercalls: */ + * Look in the hypercall ring for the async hypercalls: + */ do_async_hcalls(cpu); - /* If we stopped reading the hypercall ring because the Guest did a + /* + * If we stopped reading the hypercall ring because the Guest did a * NOTIFY to the Launcher, we want to return now. Otherwise we do - * the hypercall. */ + * the hypercall. + */ if (!cpu->pending_notify) { do_hcall(cpu, cpu->hcall); - /* Tricky point: we reset the hcall pointer to mark the + /* + * Tricky point: we reset the hcall pointer to mark the * hypercall as "done". We use the hcall pointer rather than * the trap number to indicate a hypercall is pending. * Normally it doesn't matter: the Guest will run again and @@ -248,13 +288,16 @@ void do_hypercalls(struct lg_cpu *cpu) * However, if we are signalled or the Guest sends I/O to the * Launcher, the run_guest() loop will exit without running the * Guest. When it comes back it would try to re-run the - * hypercall. Finding that bug sucked. */ + * hypercall. Finding that bug sucked. + */ cpu->hcall = NULL; } } -/* This routine supplies the Guest with time: it's used for wallclock time at - * initial boot and as a rough time source if the TSC isn't available. */ +/* + * This routine supplies the Guest with time: it's used for wallclock time at + * initial boot and as a rough time source if the TSC isn't available. + */ void write_timestamp(struct lg_cpu *cpu) { struct timespec now; diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 0e9067b0d507..18648180db02 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -1,4 +1,5 @@ -/*P:800 Interrupts (traps) are complicated enough to earn their own file. +/*P:800 + * Interrupts (traps) are complicated enough to earn their own file. * There are three classes of interrupts: * * 1) Real hardware interrupts which occur while we're running the Guest, @@ -10,7 +11,8 @@ * just like real hardware would deliver them. Traps from the Guest can be set * up to go directly back into the Guest, but sometimes the Host wants to see * them first, so we also have a way of "reflecting" them into the Guest as if - * they had been delivered to it directly. :*/ + * they had been delivered to it directly. +:*/ #include #include #include @@ -26,8 +28,10 @@ static unsigned long idt_address(u32 lo, u32 hi) return (lo & 0x0000FFFF) | (hi & 0xFFFF0000); } -/* The "type" of the interrupt handler is a 4 bit field: we only support a - * couple of types. */ +/* + * The "type" of the interrupt handler is a 4 bit field: we only support a + * couple of types. + */ static int idt_type(u32 lo, u32 hi) { return (hi >> 8) & 0xF; @@ -39,8 +43,10 @@ static bool idt_present(u32 lo, u32 hi) return (hi & 0x8000); } -/* We need a helper to "push" a value onto the Guest's stack, since that's a - * big part of what delivering an interrupt does. */ +/* + * We need a helper to "push" a value onto the Guest's stack, since that's a + * big part of what delivering an interrupt does. + */ static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val) { /* Stack grows upwards: move stack then write value. */ @@ -48,7 +54,8 @@ static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val) lgwrite(cpu, *gstack, u32, val); } -/*H:210 The set_guest_interrupt() routine actually delivers the interrupt or +/*H:210 + * The set_guest_interrupt() routine actually delivers the interrupt or * trap. The mechanics of delivering traps and interrupts to the Guest are the * same, except some traps have an "error code" which gets pushed onto the * stack as well: the caller tells us if this is one. @@ -59,7 +66,8 @@ static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val) * * We set up the stack just like the CPU does for a real interrupt, so it's * identical for the Guest (and the standard "iret" instruction will undo - * it). */ + * it). + */ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, bool has_err) { @@ -67,20 +75,26 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, u32 eflags, ss, irq_enable; unsigned long virtstack; - /* There are two cases for interrupts: one where the Guest is already + /* + * There are two cases for interrupts: one where the Guest is already * in the kernel, and a more complex one where the Guest is in - * userspace. We check the privilege level to find out. */ + * userspace. We check the privilege level to find out. + */ if ((cpu->regs->ss&0x3) != GUEST_PL) { - /* The Guest told us their kernel stack with the SET_STACK - * hypercall: both the virtual address and the segment */ + /* + * The Guest told us their kernel stack with the SET_STACK + * hypercall: both the virtual address and the segment. + */ virtstack = cpu->esp1; ss = cpu->ss1; origstack = gstack = guest_pa(cpu, virtstack); - /* We push the old stack segment and pointer onto the new + /* + * We push the old stack segment and pointer onto the new * stack: when the Guest does an "iret" back from the interrupt * handler the CPU will notice they're dropping privilege - * levels and expect these here. */ + * levels and expect these here. + */ push_guest_stack(cpu, &gstack, cpu->regs->ss); push_guest_stack(cpu, &gstack, cpu->regs->esp); } else { @@ -91,18 +105,22 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, origstack = gstack = guest_pa(cpu, virtstack); } - /* Remember that we never let the Guest actually disable interrupts, so + /* + * Remember that we never let the Guest actually disable interrupts, so * the "Interrupt Flag" bit is always set. We copy that bit from the * Guest's "irq_enabled" field into the eflags word: we saw the Guest - * copy it back in "lguest_iret". */ + * copy it back in "lguest_iret". + */ eflags = cpu->regs->eflags; if (get_user(irq_enable, &cpu->lg->lguest_data->irq_enabled) == 0 && !(irq_enable & X86_EFLAGS_IF)) eflags &= ~X86_EFLAGS_IF; - /* An interrupt is expected to push three things on the stack: the old + /* + * An interrupt is expected to push three things on the stack: the old * "eflags" word, the old code segment, and the old instruction - * pointer. */ + * pointer. + */ push_guest_stack(cpu, &gstack, eflags); push_guest_stack(cpu, &gstack, cpu->regs->cs); push_guest_stack(cpu, &gstack, cpu->regs->eip); @@ -111,15 +129,19 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, if (has_err) push_guest_stack(cpu, &gstack, cpu->regs->errcode); - /* Now we've pushed all the old state, we change the stack, the code - * segment and the address to execute. */ + /* + * Now we've pushed all the old state, we change the stack, the code + * segment and the address to execute. + */ cpu->regs->ss = ss; cpu->regs->esp = virtstack + (gstack - origstack); cpu->regs->cs = (__KERNEL_CS|GUEST_PL); cpu->regs->eip = idt_address(lo, hi); - /* There are two kinds of interrupt handlers: 0xE is an "interrupt - * gate" which expects interrupts to be disabled on entry. */ + /* + * There are two kinds of interrupt handlers: 0xE is an "interrupt + * gate" which expects interrupts to be disabled on entry. + */ if (idt_type(lo, hi) == 0xE) if (put_user(0, &cpu->lg->lguest_data->irq_enabled)) kill_guest(cpu, "Disabling interrupts"); @@ -130,7 +152,8 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, * * interrupt_pending() returns the first pending interrupt which isn't blocked * by the Guest. It is called before every entry to the Guest, and just before - * we go to sleep when the Guest has halted itself. */ + * we go to sleep when the Guest has halted itself. + */ unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more) { unsigned int irq; @@ -140,8 +163,10 @@ unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more) if (!cpu->lg->lguest_data) return LGUEST_IRQS; - /* Take our "irqs_pending" array and remove any interrupts the Guest - * wants blocked: the result ends up in "blk". */ + /* + * Take our "irqs_pending" array and remove any interrupts the Guest + * wants blocked: the result ends up in "blk". + */ if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts, sizeof(blk))) return LGUEST_IRQS; @@ -154,16 +179,20 @@ unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more) return irq; } -/* This actually diverts the Guest to running an interrupt handler, once an - * interrupt has been identified by interrupt_pending(). */ +/* + * This actually diverts the Guest to running an interrupt handler, once an + * interrupt has been identified by interrupt_pending(). + */ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more) { struct desc_struct *idt; BUG_ON(irq >= LGUEST_IRQS); - /* They may be in the middle of an iret, where they asked us never to - * deliver interrupts. */ + /* + * They may be in the middle of an iret, where they asked us never to + * deliver interrupts. + */ if (cpu->regs->eip >= cpu->lg->noirq_start && (cpu->regs->eip < cpu->lg->noirq_end)) return; @@ -187,29 +216,37 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more) } } - /* Look at the IDT entry the Guest gave us for this interrupt. The + /* + * Look at the IDT entry the Guest gave us for this interrupt. The * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip - * over them. */ + * over them. + */ idt = &cpu->arch.idt[FIRST_EXTERNAL_VECTOR+irq]; /* If they don't have a handler (yet?), we just ignore it */ if (idt_present(idt->a, idt->b)) { /* OK, mark it no longer pending and deliver it. */ clear_bit(irq, cpu->irqs_pending); - /* set_guest_interrupt() takes the interrupt descriptor and a + /* + * set_guest_interrupt() takes the interrupt descriptor and a * flag to say whether this interrupt pushes an error code onto - * the stack as well: virtual interrupts never do. */ + * the stack as well: virtual interrupts never do. + */ set_guest_interrupt(cpu, idt->a, idt->b, false); } - /* Every time we deliver an interrupt, we update the timestamp in the + /* + * Every time we deliver an interrupt, we update the timestamp in the * Guest's lguest_data struct. It would be better for the Guest if we * did this more often, but it can actually be quite slow: doing it * here is a compromise which means at least it gets updated every - * timer interrupt. */ + * timer interrupt. + */ write_timestamp(cpu); - /* If there are no other interrupts we want to deliver, clear - * the pending flag. */ + /* + * If there are no other interrupts we want to deliver, clear + * the pending flag. + */ if (!more) put_user(0, &cpu->lg->lguest_data->irq_pending); } @@ -217,24 +254,29 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more) /* And this is the routine when we want to set an interrupt for the Guest. */ void set_interrupt(struct lg_cpu *cpu, unsigned int irq) { - /* Next time the Guest runs, the core code will see if it can deliver - * this interrupt. */ + /* + * Next time the Guest runs, the core code will see if it can deliver + * this interrupt. + */ set_bit(irq, cpu->irqs_pending); - /* Make sure it sees it; it might be asleep (eg. halted), or - * running the Guest right now, in which case kick_process() - * will knock it out. */ + /* + * Make sure it sees it; it might be asleep (eg. halted), or running + * the Guest right now, in which case kick_process() will knock it out. + */ if (!wake_up_process(cpu->tsk)) kick_process(cpu->tsk); } /*:*/ -/* Linux uses trap 128 for system calls. Plan9 uses 64, and Ron Minnich sent +/* + * Linux uses trap 128 for system calls. Plan9 uses 64, and Ron Minnich sent * me a patch, so we support that too. It'd be a big step for lguest if half * the Plan 9 user base were to start using it. * * Actually now I think of it, it's possible that Ron *is* half the Plan 9 - * userbase. Oh well. */ + * userbase. Oh well. + */ static bool could_be_syscall(unsigned int num) { /* Normal Linux SYSCALL_VECTOR or reserved vector? */ @@ -274,9 +316,11 @@ void free_interrupts(void) clear_bit(syscall_vector, used_vectors); } -/*H:220 Now we've got the routines to deliver interrupts, delivering traps like +/*H:220 + * Now we've got the routines to deliver interrupts, delivering traps like * page fault is easy. The only trick is that Intel decided that some traps - * should have error codes: */ + * should have error codes: + */ static bool has_err(unsigned int trap) { return (trap == 8 || (trap >= 10 && trap <= 14) || trap == 17); @@ -285,13 +329,17 @@ static bool has_err(unsigned int trap) /* deliver_trap() returns true if it could deliver the trap. */ bool deliver_trap(struct lg_cpu *cpu, unsigned int num) { - /* Trap numbers are always 8 bit, but we set an impossible trap number - * for traps inside the Switcher, so check that here. */ + /* + * Trap numbers are always 8 bit, but we set an impossible trap number + * for traps inside the Switcher, so check that here. + */ if (num >= ARRAY_SIZE(cpu->arch.idt)) return false; - /* Early on the Guest hasn't set the IDT entries (or maybe it put a - * bogus one in): if we fail here, the Guest will be killed. */ + /* + * Early on the Guest hasn't set the IDT entries (or maybe it put a + * bogus one in): if we fail here, the Guest will be killed. + */ if (!idt_present(cpu->arch.idt[num].a, cpu->arch.idt[num].b)) return false; set_guest_interrupt(cpu, cpu->arch.idt[num].a, @@ -299,7 +347,8 @@ bool deliver_trap(struct lg_cpu *cpu, unsigned int num) return true; } -/*H:250 Here's the hard part: returning to the Host every time a trap happens +/*H:250 + * Here's the hard part: returning to the Host every time a trap happens * and then calling deliver_trap() and re-entering the Guest is slow. * Particularly because Guest userspace system calls are traps (usually trap * 128). @@ -311,69 +360,87 @@ bool deliver_trap(struct lg_cpu *cpu, unsigned int num) * the other hypervisors would beat it up at lunchtime. * * This routine indicates if a particular trap number could be delivered - * directly. */ + * directly. + */ static bool direct_trap(unsigned int num) { - /* Hardware interrupts don't go to the Guest at all (except system - * call). */ + /* + * Hardware interrupts don't go to the Guest at all (except system + * call). + */ if (num >= FIRST_EXTERNAL_VECTOR && !could_be_syscall(num)) return false; - /* The Host needs to see page faults (for shadow paging and to save the + /* + * The Host needs to see page faults (for shadow paging and to save the * fault address), general protection faults (in/out emulation) and * device not available (TS handling), invalid opcode fault (kvm hcall), - * and of course, the hypercall trap. */ + * and of course, the hypercall trap. + */ return num != 14 && num != 13 && num != 7 && num != 6 && num != LGUEST_TRAP_ENTRY; } /*:*/ -/*M:005 The Guest has the ability to turn its interrupt gates into trap gates, +/*M:005 + * The Guest has the ability to turn its interrupt gates into trap gates, * if it is careful. The Host will let trap gates can go directly to the * Guest, but the Guest needs the interrupts atomically disabled for an * interrupt gate. It can do this by pointing the trap gate at instructions - * within noirq_start and noirq_end, where it can safely disable interrupts. */ + * within noirq_start and noirq_end, where it can safely disable interrupts. + */ -/*M:006 The Guests do not use the sysenter (fast system call) instruction, +/*M:006 + * The Guests do not use the sysenter (fast system call) instruction, * because it's hardcoded to enter privilege level 0 and so can't go direct. * It's about twice as fast as the older "int 0x80" system call, so it might * still be worthwhile to handle it in the Switcher and lcall down to the * Guest. The sysenter semantics are hairy tho: search for that keyword in - * entry.S :*/ + * entry.S +:*/ -/*H:260 When we make traps go directly into the Guest, we need to make sure +/*H:260 + * When we make traps go directly into the Guest, we need to make sure * the kernel stack is valid (ie. mapped in the page tables). Otherwise, the * CPU trying to deliver the trap will fault while trying to push the interrupt * words on the stack: this is called a double fault, and it forces us to kill * the Guest. * - * Which is deeply unfair, because (literally!) it wasn't the Guests' fault. */ + * Which is deeply unfair, because (literally!) it wasn't the Guests' fault. + */ void pin_stack_pages(struct lg_cpu *cpu) { unsigned int i; - /* Depending on the CONFIG_4KSTACKS option, the Guest can have one or - * two pages of stack space. */ + /* + * Depending on the CONFIG_4KSTACKS option, the Guest can have one or + * two pages of stack space. + */ for (i = 0; i < cpu->lg->stack_pages; i++) - /* The stack grows *upwards*, so the address we're given is the + /* + * The stack grows *upwards*, so the address we're given is the * start of the page after the kernel stack. Subtract one to * get back onto the first stack page, and keep subtracting to - * get to the rest of the stack pages. */ + * get to the rest of the stack pages. + */ pin_page(cpu, cpu->esp1 - 1 - i * PAGE_SIZE); } -/* Direct traps also mean that we need to know whenever the Guest wants to use +/* + * Direct traps also mean that we need to know whenever the Guest wants to use * a different kernel stack, so we can change the IDT entries to use that * stack. The IDT entries expect a virtual address, so unlike most addresses * the Guest gives us, the "esp" (stack pointer) value here is virtual, not * physical. * * In Linux each process has its own kernel stack, so this happens a lot: we - * change stacks on each context switch. */ + * change stacks on each context switch. + */ void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages) { - /* You are not allowed have a stack segment with privilege level 0: bad - * Guest! */ + /* + * You're not allowed a stack segment with privilege level 0: bad Guest! + */ if ((seg & 0x3) != GUEST_PL) kill_guest(cpu, "bad stack segment %i", seg); /* We only expect one or two stack pages. */ @@ -387,11 +454,15 @@ void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages) pin_stack_pages(cpu); } -/* All this reference to mapping stacks leads us neatly into the other complex - * part of the Host: page table handling. */ +/* + * All this reference to mapping stacks leads us neatly into the other complex + * part of the Host: page table handling. + */ -/*H:235 This is the routine which actually checks the Guest's IDT entry and - * transfers it into the entry in "struct lguest": */ +/*H:235 + * This is the routine which actually checks the Guest's IDT entry and + * transfers it into the entry in "struct lguest": + */ static void set_trap(struct lg_cpu *cpu, struct desc_struct *trap, unsigned int num, u32 lo, u32 hi) { @@ -407,30 +478,38 @@ static void set_trap(struct lg_cpu *cpu, struct desc_struct *trap, if (type != 0xE && type != 0xF) kill_guest(cpu, "bad IDT type %i", type); - /* We only copy the handler address, present bit, privilege level and + /* + * We only copy the handler address, present bit, privilege level and * type. The privilege level controls where the trap can be triggered * manually with an "int" instruction. This is usually GUEST_PL, - * except for system calls which userspace can use. */ + * except for system calls which userspace can use. + */ trap->a = ((__KERNEL_CS|GUEST_PL)<<16) | (lo&0x0000FFFF); trap->b = (hi&0xFFFFEF00); } -/*H:230 While we're here, dealing with delivering traps and interrupts to the +/*H:230 + * While we're here, dealing with delivering traps and interrupts to the * Guest, we might as well complete the picture: how the Guest tells us where * it wants them to go. This would be simple, except making traps fast * requires some tricks. * * We saw the Guest setting Interrupt Descriptor Table (IDT) entries with the - * LHCALL_LOAD_IDT_ENTRY hypercall before: that comes here. */ + * LHCALL_LOAD_IDT_ENTRY hypercall before: that comes here. + */ void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi) { - /* Guest never handles: NMI, doublefault, spurious interrupt or - * hypercall. We ignore when it tries to set them. */ + /* + * Guest never handles: NMI, doublefault, spurious interrupt or + * hypercall. We ignore when it tries to set them. + */ if (num == 2 || num == 8 || num == 15 || num == LGUEST_TRAP_ENTRY) return; - /* Mark the IDT as changed: next time the Guest runs we'll know we have - * to copy this again. */ + /* + * Mark the IDT as changed: next time the Guest runs we'll know we have + * to copy this again. + */ cpu->changed |= CHANGED_IDT; /* Check that the Guest doesn't try to step outside the bounds. */ @@ -440,9 +519,11 @@ void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi) set_trap(cpu, &cpu->arch.idt[num], num, lo, hi); } -/* The default entry for each interrupt points into the Switcher routines which +/* + * The default entry for each interrupt points into the Switcher routines which * simply return to the Host. The run_guest() loop will then call - * deliver_trap() to bounce it back into the Guest. */ + * deliver_trap() to bounce it back into the Guest. + */ static void default_idt_entry(struct desc_struct *idt, int trap, const unsigned long handler, @@ -451,13 +532,17 @@ static void default_idt_entry(struct desc_struct *idt, /* A present interrupt gate. */ u32 flags = 0x8e00; - /* Set the privilege level on the entry for the hypercall: this allows - * the Guest to use the "int" instruction to trigger it. */ + /* + * Set the privilege level on the entry for the hypercall: this allows + * the Guest to use the "int" instruction to trigger it. + */ if (trap == LGUEST_TRAP_ENTRY) flags |= (GUEST_PL << 13); else if (base) - /* Copy priv. level from what Guest asked for. This allows - * debug (int 3) traps from Guest userspace, for example. */ + /* + * Copy privilege level from what Guest asked for. This allows + * debug (int 3) traps from Guest userspace, for example. + */ flags |= (base->b & 0x6000); /* Now pack it into the IDT entry in its weird format. */ @@ -475,16 +560,20 @@ void setup_default_idt_entries(struct lguest_ro_state *state, default_idt_entry(&state->guest_idt[i], i, def[i], NULL); } -/*H:240 We don't use the IDT entries in the "struct lguest" directly, instead +/*H:240 + * We don't use the IDT entries in the "struct lguest" directly, instead * we copy them into the IDT which we've set up for Guests on this CPU, just - * before we run the Guest. This routine does that copy. */ + * before we run the Guest. This routine does that copy. + */ void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, const unsigned long *def) { unsigned int i; - /* We can simply copy the direct traps, otherwise we use the default - * ones in the Switcher: they will return to the Host. */ + /* + * We can simply copy the direct traps, otherwise we use the default + * ones in the Switcher: they will return to the Host. + */ for (i = 0; i < ARRAY_SIZE(cpu->arch.idt); i++) { const struct desc_struct *gidt = &cpu->arch.idt[i]; @@ -492,14 +581,16 @@ void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, if (!direct_trap(i)) continue; - /* Only trap gates (type 15) can go direct to the Guest. + /* + * Only trap gates (type 15) can go direct to the Guest. * Interrupt gates (type 14) disable interrupts as they are * entered, which we never let the Guest do. Not present * entries (type 0x0) also can't go direct, of course. * * If it can't go direct, we still need to copy the priv. level: * they might want to give userspace access to a software - * interrupt. */ + * interrupt. + */ if (idt_type(gidt->a, gidt->b) == 0xF) idt[i] = *gidt; else @@ -518,7 +609,8 @@ void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, * the next timer interrupt (in nanoseconds). We use the high-resolution timer * infrastructure to set a callback at that time. * - * 0 means "turn off the clock". */ + * 0 means "turn off the clock". + */ void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta) { ktime_t expires; @@ -529,9 +621,11 @@ void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta) return; } - /* We use wallclock time here, so the Guest might not be running for + /* + * We use wallclock time here, so the Guest might not be running for * all the time between now and the timer interrupt it asked for. This - * is almost always the right thing to do. */ + * is almost always the right thing to do. + */ expires = ktime_add_ns(ktime_get_real(), delta); hrtimer_start(&cpu->hrt, expires, HRTIMER_MODE_ABS); } diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 01c591923793..74c0db691b53 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -54,13 +54,13 @@ struct lg_cpu { unsigned long pending_notify; /* pfn from LHCALL_NOTIFY */ - /* At end of a page shared mapped over lguest_pages in guest. */ + /* At end of a page shared mapped over lguest_pages in guest. */ unsigned long regs_page; struct lguest_regs *regs; struct lguest_pages *last_pages; - int cpu_pgd; /* which pgd this cpu is currently using */ + int cpu_pgd; /* Which pgd this cpu is currently using */ /* If a hypercall was asked for, this points to the arguments. */ struct hcall_args *hcall; @@ -96,8 +96,11 @@ struct lguest unsigned int nr_cpus; u32 pfn_limit; - /* This provides the offset to the base of guest-physical - * memory in the Launcher. */ + + /* + * This provides the offset to the base of guest-physical memory in the + * Launcher. + */ void __user *mem_base; unsigned long kernel_address; @@ -122,11 +125,13 @@ bool lguest_address_ok(const struct lguest *lg, void __lgread(struct lg_cpu *, void *, unsigned long, unsigned); void __lgwrite(struct lg_cpu *, unsigned long, const void *, unsigned); -/*H:035 Using memory-copy operations like that is usually inconvient, so we +/*H:035 + * Using memory-copy operations like that is usually inconvient, so we * have the following helper macros which read and write a specific type (often * an unsigned long). * - * This reads into a variable of the given type then returns that. */ + * This reads into a variable of the given type then returns that. + */ #define lgread(cpu, addr, type) \ ({ type _v; __lgread((cpu), &_v, (addr), sizeof(_v)); _v; }) @@ -140,9 +145,11 @@ void __lgwrite(struct lg_cpu *, unsigned long, const void *, unsigned); int run_guest(struct lg_cpu *cpu, unsigned long __user *user); -/* Helper macros to obtain the first 12 or the last 20 bits, this is only the +/* + * Helper macros to obtain the first 12 or the last 20 bits, this is only the * first step in the migration to the kernel types. pte_pfn is already defined - * in the kernel. */ + * in the kernel. + */ #define pgd_flags(x) (pgd_val(x) & ~PAGE_MASK) #define pgd_pfn(x) (pgd_val(x) >> PAGE_SHIFT) #define pmd_flags(x) (pmd_val(x) & ~PAGE_MASK) diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index e082cdac88b4..cc000e79c3d1 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -1,10 +1,12 @@ -/*P:050 Lguest guests use a very simple method to describe devices. It's a +/*P:050 + * Lguest guests use a very simple method to describe devices. It's a * series of device descriptors contained just above the top of normal Guest * memory. * * We use the standard "virtio" device infrastructure, which provides us with a * console, a network and a block driver. Each one expects some configuration - * information and a "virtqueue" or two to send and receive data. :*/ + * information and a "virtqueue" or two to send and receive data. +:*/ #include #include #include @@ -20,8 +22,10 @@ /* The pointer to our (page) of device descriptions. */ static void *lguest_devices; -/* For Guests, device memory can be used as normal memory, so we cast away the - * __iomem to quieten sparse. */ +/* + * For Guests, device memory can be used as normal memory, so we cast away the + * __iomem to quieten sparse. + */ static inline void *lguest_map(unsigned long phys_addr, unsigned long pages) { return (__force void *)ioremap_cache(phys_addr, PAGE_SIZE*pages); @@ -32,8 +36,10 @@ static inline void lguest_unmap(void *addr) iounmap((__force void __iomem *)addr); } -/*D:100 Each lguest device is just a virtio device plus a pointer to its entry - * in the lguest_devices page. */ +/*D:100 + * Each lguest device is just a virtio device plus a pointer to its entry + * in the lguest_devices page. + */ struct lguest_device { struct virtio_device vdev; @@ -41,9 +47,11 @@ struct lguest_device { struct lguest_device_desc *desc; }; -/* Since the virtio infrastructure hands us a pointer to the virtio_device all +/* + * Since the virtio infrastructure hands us a pointer to the virtio_device all * the time, it helps to have a curt macro to get a pointer to the struct - * lguest_device it's enclosed in. */ + * lguest_device it's enclosed in. + */ #define to_lgdev(vd) container_of(vd, struct lguest_device, vdev) /*D:130 @@ -55,7 +63,8 @@ struct lguest_device { * the driver will look at them during setup. * * A convenient routine to return the device's virtqueue config array: - * immediately after the descriptor. */ + * immediately after the descriptor. + */ static struct lguest_vqconfig *lg_vq(const struct lguest_device_desc *desc) { return (void *)(desc + 1); @@ -98,10 +107,12 @@ static u32 lg_get_features(struct virtio_device *vdev) return features; } -/* The virtio core takes the features the Host offers, and copies the - * ones supported by the driver into the vdev->features array. Once - * that's all sorted out, this routine is called so we can tell the - * Host which features we understand and accept. */ +/* + * The virtio core takes the features the Host offers, and copies the ones + * supported by the driver into the vdev->features array. Once that's all + * sorted out, this routine is called so we can tell the Host which features we + * understand and accept. + */ static void lg_finalize_features(struct virtio_device *vdev) { unsigned int i, bits; @@ -112,10 +123,11 @@ static void lg_finalize_features(struct virtio_device *vdev) /* Give virtio_ring a chance to accept features. */ vring_transport_features(vdev); - /* The vdev->feature array is a Linux bitmask: this isn't the - * same as a the simple array of bits used by lguest devices - * for features. So we do this slow, manual conversion which is - * completely general. */ + /* + * The vdev->feature array is a Linux bitmask: this isn't the same as a + * the simple array of bits used by lguest devices for features. So we + * do this slow, manual conversion which is completely general. + */ memset(out_features, 0, desc->feature_len); bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8; for (i = 0; i < bits; i++) { @@ -146,15 +158,19 @@ static void lg_set(struct virtio_device *vdev, unsigned int offset, memcpy(lg_config(desc) + offset, buf, len); } -/* The operations to get and set the status word just access the status field - * of the device descriptor. */ +/* + * The operations to get and set the status word just access the status field + * of the device descriptor. + */ static u8 lg_get_status(struct virtio_device *vdev) { return to_lgdev(vdev)->desc->status; } -/* To notify on status updates, we (ab)use the NOTIFY hypercall, with the - * descriptor address of the device. A zero status means "reset". */ +/* + * To notify on status updates, we (ab)use the NOTIFY hypercall, with the + * descriptor address of the device. A zero status means "reset". + */ static void set_status(struct virtio_device *vdev, u8 status) { unsigned long offset = (void *)to_lgdev(vdev)->desc - lguest_devices; @@ -200,13 +216,17 @@ struct lguest_vq_info void *pages; }; -/* When the virtio_ring code wants to prod the Host, it calls us here and we +/* + * When the virtio_ring code wants to prod the Host, it calls us here and we * make a hypercall. We hand the physical address of the virtqueue so the Host - * knows which virtqueue we're talking about. */ + * knows which virtqueue we're talking about. + */ static void lg_notify(struct virtqueue *vq) { - /* We store our virtqueue information in the "priv" pointer of the - * virtqueue structure. */ + /* + * We store our virtqueue information in the "priv" pointer of the + * virtqueue structure. + */ struct lguest_vq_info *lvq = vq->priv; kvm_hypercall1(LHCALL_NOTIFY, lvq->config.pfn << PAGE_SHIFT); @@ -215,7 +235,8 @@ static void lg_notify(struct virtqueue *vq) /* An extern declaration inside a C file is bad form. Don't do it. */ extern void lguest_setup_irq(unsigned int irq); -/* This routine finds the first virtqueue described in the configuration of +/* + * This routine finds the first virtqueue described in the configuration of * this device and sets it up. * * This is kind of an ugly duckling. It'd be nicer to have a standard @@ -225,7 +246,8 @@ extern void lguest_setup_irq(unsigned int irq); * simpler for the Host to simply tell us where the pages are. * * So we provide drivers with a "find the Nth virtqueue and set it up" - * function. */ + * function. + */ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, unsigned index, void (*callback)(struct virtqueue *vq), @@ -244,9 +266,11 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, if (!lvq) return ERR_PTR(-ENOMEM); - /* Make a copy of the "struct lguest_vqconfig" entry, which sits after + /* + * Make a copy of the "struct lguest_vqconfig" entry, which sits after * the descriptor. We need a copy because the config space might not - * be aligned correctly. */ + * be aligned correctly. + */ memcpy(&lvq->config, lg_vq(ldev->desc)+index, sizeof(lvq->config)); printk("Mapping virtqueue %i addr %lx\n", index, @@ -261,8 +285,10 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, goto free_lvq; } - /* OK, tell virtio_ring.c to set up a virtqueue now we know its size - * and we've got a pointer to its pages. */ + /* + * OK, tell virtio_ring.c to set up a virtqueue now we know its size + * and we've got a pointer to its pages. + */ vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN, vdev, lvq->pages, lg_notify, callback, name); if (!vq) { @@ -273,18 +299,23 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, /* Make sure the interrupt is allocated. */ lguest_setup_irq(lvq->config.irq); - /* Tell the interrupt for this virtqueue to go to the virtio_ring - * interrupt handler. */ - /* FIXME: We used to have a flag for the Host to tell us we could use + /* + * Tell the interrupt for this virtqueue to go to the virtio_ring + * interrupt handler. + * + * FIXME: We used to have a flag for the Host to tell us we could use * the interrupt as a source of randomness: it'd be nice to have that - * back.. */ + * back. + */ err = request_irq(lvq->config.irq, vring_interrupt, IRQF_SHARED, dev_name(&vdev->dev), vq); if (err) goto destroy_vring; - /* Last of all we hook up our 'struct lguest_vq_info" to the - * virtqueue's priv pointer. */ + /* + * Last of all we hook up our 'struct lguest_vq_info" to the + * virtqueue's priv pointer. + */ vq->priv = lvq; return vq; @@ -358,11 +389,14 @@ static struct virtio_config_ops lguest_config_ops = { .del_vqs = lg_del_vqs, }; -/* The root device for the lguest virtio devices. This makes them appear as - * /sys/devices/lguest/0,1,2 not /sys/devices/0,1,2. */ +/* + * The root device for the lguest virtio devices. This makes them appear as + * /sys/devices/lguest/0,1,2 not /sys/devices/0,1,2. + */ static struct device *lguest_root; -/*D:120 This is the core of the lguest bus: actually adding a new device. +/*D:120 + * This is the core of the lguest bus: actually adding a new device. * It's a separate function because it's neater that way, and because an * earlier version of the code supported hotplug and unplug. They were removed * early on because they were never used. @@ -371,14 +405,14 @@ static struct device *lguest_root; * * It's worth reading this carefully: we start with a pointer to the new device * descriptor in the "lguest_devices" page, and the offset into the device - * descriptor page so we can uniquely identify it if things go badly wrong. */ + * descriptor page so we can uniquely identify it if things go badly wrong. + */ static void add_lguest_device(struct lguest_device_desc *d, unsigned int offset) { struct lguest_device *ldev; - /* Start with zeroed memory; Linux's device layer seems to count on - * it. */ + /* Start with zeroed memory; Linux's device layer counts on it. */ ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); if (!ldev) { printk(KERN_EMERG "Cannot allocate lguest dev %u type %u\n", @@ -390,15 +424,19 @@ static void add_lguest_device(struct lguest_device_desc *d, ldev->vdev.dev.parent = lguest_root; /* We have a unique device index thanks to the dev_index counter. */ ldev->vdev.id.device = d->type; - /* We have a simple set of routines for querying the device's - * configuration information and setting its status. */ + /* + * We have a simple set of routines for querying the device's + * configuration information and setting its status. + */ ldev->vdev.config = &lguest_config_ops; /* And we remember the device's descriptor for lguest_config_ops. */ ldev->desc = d; - /* register_virtio_device() sets up the generic fields for the struct + /* + * register_virtio_device() sets up the generic fields for the struct * virtio_device and calls device_register(). This makes the bus - * infrastructure look for a matching driver. */ + * infrastructure look for a matching driver. + */ if (register_virtio_device(&ldev->vdev) != 0) { printk(KERN_ERR "Failed to register lguest dev %u type %u\n", offset, d->type); @@ -406,8 +444,10 @@ static void add_lguest_device(struct lguest_device_desc *d, } } -/*D:110 scan_devices() simply iterates through the device page. The type 0 is - * reserved to mean "end of devices". */ +/*D:110 + * scan_devices() simply iterates through the device page. The type 0 is + * reserved to mean "end of devices". + */ static void scan_devices(void) { unsigned int i; @@ -426,7 +466,8 @@ static void scan_devices(void) } } -/*D:105 Fairly early in boot, lguest_devices_init() is called to set up the +/*D:105 + * Fairly early in boot, lguest_devices_init() is called to set up the * lguest device infrastructure. We check that we are a Guest by checking * pv_info.name: there are other ways of checking, but this seems most * obvious to me. @@ -437,7 +478,8 @@ static void scan_devices(void) * correct sysfs incantation). * * Finally we call scan_devices() which adds all the devices found in the - * lguest_devices page. */ + * lguest_devices page. + */ static int __init lguest_devices_init(void) { if (strcmp(pv_info.name, "lguest") != 0) @@ -456,11 +498,13 @@ static int __init lguest_devices_init(void) /* We do this after core stuff, but before the drivers. */ postcore_initcall(lguest_devices_init); -/*D:150 At this point in the journey we used to now wade through the lguest +/*D:150 + * At this point in the journey we used to now wade through the lguest * devices themselves: net, block and console. Since they're all now virtio * devices rather than lguest-specific, I've decided to ignore them. Mostly, * they're kind of boring. But this does mean you'll never experience the * thrill of reading the forbidden love scene buried deep in the block driver. * * "make Launcher" beckons, where we answer questions like "Where do Guests - * come from?", and "What do you do when someone asks for optimization?". */ + * come from?", and "What do you do when someone asks for optimization?". + */ diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index 407722a8e0c4..7e92017103dc 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -1,8 +1,10 @@ -/*P:200 This contains all the /dev/lguest code, whereby the userspace launcher +/*P:200 + * This contains all the /dev/lguest code, whereby the userspace launcher * controls and communicates with the Guest. For example, the first write will * tell us the Guest's memory layout, pagetable, entry point and kernel address * offset. A read will run the Guest until something happens, such as a signal - * or the Guest doing a NOTIFY out to the Launcher. :*/ + * or the Guest doing a NOTIFY out to the Launcher. +:*/ #include #include #include @@ -37,8 +39,10 @@ static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) if (!addr) return -EINVAL; - /* Replace the old array with the new one, carefully: others can - * be accessing it at the same time */ + /* + * Replace the old array with the new one, carefully: others can + * be accessing it at the same time. + */ new = kmalloc(sizeof(*new) + sizeof(new->map[0]) * (old->num + 1), GFP_KERNEL); if (!new) @@ -61,8 +65,10 @@ static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) /* Now put new one in place. */ rcu_assign_pointer(lg->eventfds, new); - /* We're not in a big hurry. Wait until noone's looking at old - * version, then delete it. */ + /* + * We're not in a big hurry. Wait until noone's looking at old + * version, then delete it. + */ synchronize_rcu(); kfree(old); @@ -87,8 +93,10 @@ static int attach_eventfd(struct lguest *lg, const unsigned long __user *input) return err; } -/*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt - * number to /dev/lguest. */ +/*L:050 + * Sending an interrupt is done by writing LHREQ_IRQ and an interrupt + * number to /dev/lguest. + */ static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input) { unsigned long irq; @@ -102,8 +110,10 @@ static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input) return 0; } -/*L:040 Once our Guest is initialized, the Launcher makes it run by reading - * from /dev/lguest. */ +/*L:040 + * Once our Guest is initialized, the Launcher makes it run by reading + * from /dev/lguest. + */ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) { struct lguest *lg = file->private_data; @@ -139,8 +149,10 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) return len; } - /* If we returned from read() last time because the Guest sent I/O, - * clear the flag. */ + /* + * If we returned from read() last time because the Guest sent I/O, + * clear the flag. + */ if (cpu->pending_notify) cpu->pending_notify = 0; @@ -148,8 +160,10 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) return run_guest(cpu, (unsigned long __user *)user); } -/*L:025 This actually initializes a CPU. For the moment, a Guest is only - * uniprocessor, so "id" is always 0. */ +/*L:025 + * This actually initializes a CPU. For the moment, a Guest is only + * uniprocessor, so "id" is always 0. + */ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) { /* We have a limited number the number of CPUs in the lguest struct. */ @@ -164,8 +178,10 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) /* Each CPU has a timer it can set. */ init_clockdev(cpu); - /* We need a complete page for the Guest registers: they are accessible - * to the Guest and we can only grant it access to whole pages. */ + /* + * We need a complete page for the Guest registers: they are accessible + * to the Guest and we can only grant it access to whole pages. + */ cpu->regs_page = get_zeroed_page(GFP_KERNEL); if (!cpu->regs_page) return -ENOMEM; @@ -173,29 +189,38 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) /* We actually put the registers at the bottom of the page. */ cpu->regs = (void *)cpu->regs_page + PAGE_SIZE - sizeof(*cpu->regs); - /* Now we initialize the Guest's registers, handing it the start - * address. */ + /* + * Now we initialize the Guest's registers, handing it the start + * address. + */ lguest_arch_setup_regs(cpu, start_ip); - /* We keep a pointer to the Launcher task (ie. current task) for when - * other Guests want to wake this one (eg. console input). */ + /* + * We keep a pointer to the Launcher task (ie. current task) for when + * other Guests want to wake this one (eg. console input). + */ cpu->tsk = current; - /* We need to keep a pointer to the Launcher's memory map, because if + /* + * We need to keep a pointer to the Launcher's memory map, because if * the Launcher dies we need to clean it up. If we don't keep a - * reference, it is destroyed before close() is called. */ + * reference, it is destroyed before close() is called. + */ cpu->mm = get_task_mm(cpu->tsk); - /* We remember which CPU's pages this Guest used last, for optimization - * when the same Guest runs on the same CPU twice. */ + /* + * We remember which CPU's pages this Guest used last, for optimization + * when the same Guest runs on the same CPU twice. + */ cpu->last_pages = NULL; /* No error == success. */ return 0; } -/*L:020 The initialization write supplies 3 pointer sized (32 or 64 bit) - * values (in addition to the LHREQ_INITIALIZE value). These are: +/*L:020 + * The initialization write supplies 3 pointer sized (32 or 64 bit) values (in + * addition to the LHREQ_INITIALIZE value). These are: * * base: The start of the Guest-physical memory inside the Launcher memory. * @@ -207,14 +232,15 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) */ static int initialize(struct file *file, const unsigned long __user *input) { - /* "struct lguest" contains everything we (the Host) know about a - * Guest. */ + /* "struct lguest" contains all we (the Host) know about a Guest. */ struct lguest *lg; int err; unsigned long args[3]; - /* We grab the Big Lguest lock, which protects against multiple - * simultaneous initializations. */ + /* + * We grab the Big Lguest lock, which protects against multiple + * simultaneous initializations. + */ mutex_lock(&lguest_lock); /* You can't initialize twice! Close the device and start again... */ if (file->private_data) { @@ -249,8 +275,10 @@ static int initialize(struct file *file, const unsigned long __user *input) if (err) goto free_eventfds; - /* Initialize the Guest's shadow page tables, using the toplevel - * address the Launcher gave us. This allocates memory, so can fail. */ + /* + * Initialize the Guest's shadow page tables, using the toplevel + * address the Launcher gave us. This allocates memory, so can fail. + */ err = init_guest_pagetable(lg); if (err) goto free_regs; @@ -275,7 +303,8 @@ unlock: return err; } -/*L:010 The first operation the Launcher does must be a write. All writes +/*L:010 + * The first operation the Launcher does must be a write. All writes * start with an unsigned long number: for the first write this must be * LHREQ_INITIALIZE to set up the Guest. After that the Launcher can use * writes of other values to send interrupts. @@ -283,12 +312,15 @@ unlock: * Note that we overload the "offset" in the /dev/lguest file to indicate what * CPU number we're dealing with. Currently this is always 0, since we only * support uniprocessor Guests, but you can see the beginnings of SMP support - * here. */ + * here. + */ static ssize_t write(struct file *file, const char __user *in, size_t size, loff_t *off) { - /* Once the Guest is initialized, we hold the "struct lguest" in the - * file private data. */ + /* + * Once the Guest is initialized, we hold the "struct lguest" in the + * file private data. + */ struct lguest *lg = file->private_data; const unsigned long __user *input = (const unsigned long __user *)in; unsigned long req; @@ -323,13 +355,15 @@ static ssize_t write(struct file *file, const char __user *in, } } -/*L:060 The final piece of interface code is the close() routine. It reverses +/*L:060 + * The final piece of interface code is the close() routine. It reverses * everything done in initialize(). This is usually called because the * Launcher exited. * * Note that the close routine returns 0 or a negative error number: it can't * really fail, but it can whine. I blame Sun for this wart, and K&R C for - * letting them do it. :*/ + * letting them do it. +:*/ static int close(struct inode *inode, struct file *file) { struct lguest *lg = file->private_data; @@ -339,8 +373,10 @@ static int close(struct inode *inode, struct file *file) if (!lg) return 0; - /* We need the big lock, to protect from inter-guest I/O and other - * Launchers initializing guests. */ + /* + * We need the big lock, to protect from inter-guest I/O and other + * Launchers initializing guests. + */ mutex_lock(&lguest_lock); /* Free up the shadow page tables for the Guest. */ @@ -351,8 +387,10 @@ static int close(struct inode *inode, struct file *file) hrtimer_cancel(&lg->cpus[i].hrt); /* We can free up the register page we allocated. */ free_page(lg->cpus[i].regs_page); - /* Now all the memory cleanups are done, it's safe to release - * the Launcher's memory management structure. */ + /* + * Now all the memory cleanups are done, it's safe to release + * the Launcher's memory management structure. + */ mmput(lg->cpus[i].mm); } @@ -361,8 +399,10 @@ static int close(struct inode *inode, struct file *file) eventfd_ctx_put(lg->eventfds->map[i].event); kfree(lg->eventfds); - /* If lg->dead doesn't contain an error code it will be NULL or a - * kmalloc()ed string, either of which is ok to hand to kfree(). */ + /* + * If lg->dead doesn't contain an error code it will be NULL or a + * kmalloc()ed string, either of which is ok to hand to kfree(). + */ if (!IS_ERR(lg->dead)) kfree(lg->dead); /* Free the memory allocated to the lguest_struct */ @@ -386,7 +426,8 @@ static int close(struct inode *inode, struct file *file) * * We begin our understanding with the Host kernel interface which the Launcher * uses: reading and writing a character device called /dev/lguest. All the - * work happens in the read(), write() and close() routines: */ + * work happens in the read(), write() and close() routines: + */ static struct file_operations lguest_fops = { .owner = THIS_MODULE, .release = close, @@ -394,8 +435,10 @@ static struct file_operations lguest_fops = { .read = read, }; -/* This is a textbook example of a "misc" character device. Populate a "struct - * miscdevice" and register it with misc_register(). */ +/* + * This is a textbook example of a "misc" character device. Populate a "struct + * miscdevice" and register it with misc_register(). + */ static struct miscdevice lguest_dev = { .minor = MISC_DYNAMIC_MINOR, .name = "lguest", diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index a6fe1abda240..3da902e4b4cb 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -1,9 +1,11 @@ -/*P:700 The pagetable code, on the other hand, still shows the scars of +/*P:700 + * The pagetable code, on the other hand, still shows the scars of * previous encounters. It's functional, and as neat as it can be in the * circumstances, but be wary, for these things are subtle and break easily. * The Guest provides a virtual to physical mapping, but we can neither trust * it nor use it: we verify and convert it here then point the CPU to the - * converted Guest pages when running the Guest. :*/ + * converted Guest pages when running the Guest. +:*/ /* Copyright (C) Rusty Russell IBM Corporation 2006. * GPL v2 and any later version */ @@ -17,10 +19,12 @@ #include #include "lg.h" -/*M:008 We hold reference to pages, which prevents them from being swapped. +/*M:008 + * We hold reference to pages, which prevents them from being swapped. * It'd be nice to have a callback in the "struct mm_struct" when Linux wants * to swap out. If we had this, and a shrinker callback to trim PTE pages, we - * could probably consider launching Guests as non-root. :*/ + * could probably consider launching Guests as non-root. +:*/ /*H:300 * The Page Table Code @@ -45,16 +49,19 @@ * (v) Flushing (throwing away) page tables, * (vi) Mapping the Switcher when the Guest is about to run, * (vii) Setting up the page tables initially. - :*/ +:*/ - -/* 1024 entries in a page table page maps 1024 pages: 4MB. The Switcher is +/* + * 1024 entries in a page table page maps 1024 pages: 4MB. The Switcher is * conveniently placed at the top 4MB, so it uses a separate, complete PTE - * page. */ + * page. + */ #define SWITCHER_PGD_INDEX (PTRS_PER_PGD - 1) -/* For PAE we need the PMD index as well. We use the last 2MB, so we - * will need the last pmd entry of the last pmd page. */ +/* + * For PAE we need the PMD index as well. We use the last 2MB, so we + * will need the last pmd entry of the last pmd page. + */ #ifdef CONFIG_X86_PAE #define SWITCHER_PMD_INDEX (PTRS_PER_PMD - 1) #define RESERVE_MEM 2U @@ -64,13 +71,16 @@ #define CHECK_GPGD_MASK _PAGE_TABLE #endif -/* We actually need a separate PTE page for each CPU. Remember that after the +/* + * We actually need a separate PTE page for each CPU. Remember that after the * Switcher code itself comes two pages for each CPU, and we don't want this - * CPU's guest to see the pages of any other CPU. */ + * CPU's guest to see the pages of any other CPU. + */ static DEFINE_PER_CPU(pte_t *, switcher_pte_pages); #define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu) -/*H:320 The page table code is curly enough to need helper functions to keep it +/*H:320 + * The page table code is curly enough to need helper functions to keep it * clear and clean. * * There are two functions which return pointers to the shadow (aka "real") @@ -79,7 +89,8 @@ static DEFINE_PER_CPU(pte_t *, switcher_pte_pages); * spgd_addr() takes the virtual address and returns a pointer to the top-level * page directory entry (PGD) for that address. Since we keep track of several * page tables, the "i" argument tells us which one we're interested in (it's - * usually the current one). */ + * usually the current one). + */ static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr) { unsigned int index = pgd_index(vaddr); @@ -96,9 +107,11 @@ static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr) } #ifdef CONFIG_X86_PAE -/* This routine then takes the PGD entry given above, which contains the +/* + * This routine then takes the PGD entry given above, which contains the * address of the PMD page. It then returns a pointer to the PMD entry for the - * given address. */ + * given address. + */ static pmd_t *spmd_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr) { unsigned int index = pmd_index(vaddr); @@ -119,9 +132,11 @@ static pmd_t *spmd_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr) } #endif -/* This routine then takes the page directory entry returned above, which +/* + * This routine then takes the page directory entry returned above, which * contains the address of the page table entry (PTE) page. It then returns a - * pointer to the PTE entry for the given address. */ + * pointer to the PTE entry for the given address. + */ static pte_t *spte_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr) { #ifdef CONFIG_X86_PAE @@ -139,8 +154,10 @@ static pte_t *spte_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr) return &page[pte_index(vaddr)]; } -/* These two functions just like the above two, except they access the Guest - * page tables. Hence they return a Guest address. */ +/* + * These two functions just like the above two, except they access the Guest + * page tables. Hence they return a Guest address. + */ static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr) { unsigned int index = vaddr >> (PGDIR_SHIFT); @@ -175,17 +192,21 @@ static unsigned long gpte_addr(struct lg_cpu *cpu, #endif /*:*/ -/*M:014 get_pfn is slow: we could probably try to grab batches of pages here as - * an optimization (ie. pre-faulting). :*/ +/*M:014 + * get_pfn is slow: we could probably try to grab batches of pages here as + * an optimization (ie. pre-faulting). +:*/ -/*H:350 This routine takes a page number given by the Guest and converts it to +/*H:350 + * This routine takes a page number given by the Guest and converts it to * an actual, physical page number. It can fail for several reasons: the * virtual address might not be mapped by the Launcher, the write flag is set * and the page is read-only, or the write flag was set and the page was * shared so had to be copied, but we ran out of memory. * * This holds a reference to the page, so release_pte() is careful to put that - * back. */ + * back. + */ static unsigned long get_pfn(unsigned long virtpfn, int write) { struct page *page; @@ -198,33 +219,41 @@ static unsigned long get_pfn(unsigned long virtpfn, int write) return -1UL; } -/*H:340 Converting a Guest page table entry to a shadow (ie. real) page table +/*H:340 + * Converting a Guest page table entry to a shadow (ie. real) page table * entry can be a little tricky. The flags are (almost) the same, but the * Guest PTE contains a virtual page number: the CPU needs the real page - * number. */ + * number. + */ static pte_t gpte_to_spte(struct lg_cpu *cpu, pte_t gpte, int write) { unsigned long pfn, base, flags; - /* The Guest sets the global flag, because it thinks that it is using + /* + * The Guest sets the global flag, because it thinks that it is using * PGE. We only told it to use PGE so it would tell us whether it was * flushing a kernel mapping or a userspace mapping. We don't actually - * use the global bit, so throw it away. */ + * use the global bit, so throw it away. + */ flags = (pte_flags(gpte) & ~_PAGE_GLOBAL); /* The Guest's pages are offset inside the Launcher. */ base = (unsigned long)cpu->lg->mem_base / PAGE_SIZE; - /* We need a temporary "unsigned long" variable to hold the answer from + /* + * We need a temporary "unsigned long" variable to hold the answer from * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't * fit in spte.pfn. get_pfn() finds the real physical number of the - * page, given the virtual number. */ + * page, given the virtual number. + */ pfn = get_pfn(base + pte_pfn(gpte), write); if (pfn == -1UL) { kill_guest(cpu, "failed to get page %lu", pte_pfn(gpte)); - /* When we destroy the Guest, we'll go through the shadow page + /* + * When we destroy the Guest, we'll go through the shadow page * tables and release_pte() them. Make sure we don't think - * this one is valid! */ + * this one is valid! + */ flags = 0; } /* Now we assemble our shadow PTE from the page number and flags. */ @@ -234,8 +263,10 @@ static pte_t gpte_to_spte(struct lg_cpu *cpu, pte_t gpte, int write) /*H:460 And to complete the chain, release_pte() looks like this: */ static void release_pte(pte_t pte) { - /* Remember that get_user_pages_fast() took a reference to the page, in - * get_pfn()? We have to put it back now. */ + /* + * Remember that get_user_pages_fast() took a reference to the page, in + * get_pfn()? We have to put it back now. + */ if (pte_flags(pte) & _PAGE_PRESENT) put_page(pte_page(pte)); } @@ -273,7 +304,8 @@ static void check_gpmd(struct lg_cpu *cpu, pmd_t gpmd) * and return to the Guest without it knowing. * * If we fixed up the fault (ie. we mapped the address), this routine returns - * true. Otherwise, it was a real fault and we need to tell the Guest. */ + * true. Otherwise, it was a real fault and we need to tell the Guest. + */ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) { pgd_t gpgd; @@ -298,22 +330,26 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) { /* No shadow entry: allocate a new shadow PTE page. */ unsigned long ptepage = get_zeroed_page(GFP_KERNEL); - /* This is not really the Guest's fault, but killing it is - * simple for this corner case. */ + /* + * This is not really the Guest's fault, but killing it is + * simple for this corner case. + */ if (!ptepage) { kill_guest(cpu, "out of memory allocating pte page"); return false; } /* We check that the Guest pgd is OK. */ check_gpgd(cpu, gpgd); - /* And we copy the flags to the shadow PGD entry. The page - * number in the shadow PGD is the page we just allocated. */ + /* + * And we copy the flags to the shadow PGD entry. The page + * number in the shadow PGD is the page we just allocated. + */ set_pgd(spgd, __pgd(__pa(ptepage) | pgd_flags(gpgd))); } #ifdef CONFIG_X86_PAE gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t); - /* middle level not present? We can't map it in. */ + /* Middle level not present? We can't map it in. */ if (!(pmd_flags(gpmd) & _PAGE_PRESENT)) return false; @@ -324,8 +360,10 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) /* No shadow entry: allocate a new shadow PTE page. */ unsigned long ptepage = get_zeroed_page(GFP_KERNEL); - /* This is not really the Guest's fault, but killing it is - * simple for this corner case. */ + /* + * This is not really the Guest's fault, but killing it is + * simple for this corner case. + */ if (!ptepage) { kill_guest(cpu, "out of memory allocating pte page"); return false; @@ -334,17 +372,23 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) /* We check that the Guest pmd is OK. */ check_gpmd(cpu, gpmd); - /* And we copy the flags to the shadow PMD entry. The page - * number in the shadow PMD is the page we just allocated. */ + /* + * And we copy the flags to the shadow PMD entry. The page + * number in the shadow PMD is the page we just allocated. + */ native_set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd))); } - /* OK, now we look at the lower level in the Guest page table: keep its - * address, because we might update it later. */ + /* + * OK, now we look at the lower level in the Guest page table: keep its + * address, because we might update it later. + */ gpte_ptr = gpte_addr(cpu, gpmd, vaddr); #else - /* OK, now we look at the lower level in the Guest page table: keep its - * address, because we might update it later. */ + /* + * OK, now we look at the lower level in the Guest page table: keep its + * address, because we might update it later. + */ gpte_ptr = gpte_addr(cpu, gpgd, vaddr); #endif gpte = lgread(cpu, gpte_ptr, pte_t); @@ -353,8 +397,10 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) if (!(pte_flags(gpte) & _PAGE_PRESENT)) return false; - /* Check they're not trying to write to a page the Guest wants - * read-only (bit 2 of errcode == write). */ + /* + * Check they're not trying to write to a page the Guest wants + * read-only (bit 2 of errcode == write). + */ if ((errcode & 2) && !(pte_flags(gpte) & _PAGE_RW)) return false; @@ -362,8 +408,10 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) if ((errcode & 4) && !(pte_flags(gpte) & _PAGE_USER)) return false; - /* Check that the Guest PTE flags are OK, and the page number is below - * the pfn_limit (ie. not mapping the Launcher binary). */ + /* + * Check that the Guest PTE flags are OK, and the page number is below + * the pfn_limit (ie. not mapping the Launcher binary). + */ check_gpte(cpu, gpte); /* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */ @@ -373,29 +421,40 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) /* Get the pointer to the shadow PTE entry we're going to set. */ spte = spte_addr(cpu, *spgd, vaddr); - /* If there was a valid shadow PTE entry here before, we release it. - * This can happen with a write to a previously read-only entry. */ + + /* + * If there was a valid shadow PTE entry here before, we release it. + * This can happen with a write to a previously read-only entry. + */ release_pte(*spte); - /* If this is a write, we insist that the Guest page is writable (the - * final arg to gpte_to_spte()). */ + /* + * If this is a write, we insist that the Guest page is writable (the + * final arg to gpte_to_spte()). + */ if (pte_dirty(gpte)) *spte = gpte_to_spte(cpu, gpte, 1); else - /* If this is a read, don't set the "writable" bit in the page + /* + * If this is a read, don't set the "writable" bit in the page * table entry, even if the Guest says it's writable. That way * we will come back here when a write does actually occur, so - * we can update the Guest's _PAGE_DIRTY flag. */ + * we can update the Guest's _PAGE_DIRTY flag. + */ native_set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0)); - /* Finally, we write the Guest PTE entry back: we've set the - * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */ + /* + * Finally, we write the Guest PTE entry back: we've set the + * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. + */ lgwrite(cpu, gpte_ptr, pte_t, gpte); - /* The fault is fixed, the page table is populated, the mapping + /* + * The fault is fixed, the page table is populated, the mapping * manipulated, the result returned and the code complete. A small * delay and a trace of alliteration are the only indications the Guest - * has that a page fault occurred at all. */ + * has that a page fault occurred at all. + */ return true; } @@ -408,7 +467,8 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) * mapped, so it's overkill. * * This is a quick version which answers the question: is this virtual address - * mapped by the shadow page tables, and is it writable? */ + * mapped by the shadow page tables, and is it writable? + */ static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr) { pgd_t *spgd; @@ -428,16 +488,20 @@ static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr) return false; #endif - /* Check the flags on the pte entry itself: it must be present and - * writable. */ + /* + * Check the flags on the pte entry itself: it must be present and + * writable. + */ flags = pte_flags(*(spte_addr(cpu, *spgd, vaddr))); return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); } -/* So, when pin_stack_pages() asks us to pin a page, we check if it's already +/* + * So, when pin_stack_pages() asks us to pin a page, we check if it's already * in the page tables, and if not, we call demand_page() with error code 2 - * (meaning "write"). */ + * (meaning "write"). + */ void pin_page(struct lg_cpu *cpu, unsigned long vaddr) { if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2)) @@ -485,9 +549,11 @@ static void release_pgd(pgd_t *spgd) /* If the entry's not present, there's nothing to release. */ if (pgd_flags(*spgd) & _PAGE_PRESENT) { unsigned int i; - /* Converting the pfn to find the actual PTE page is easy: turn + /* + * Converting the pfn to find the actual PTE page is easy: turn * the page number into a physical address, then convert to a - * virtual address (easy for kernel pages like this one). */ + * virtual address (easy for kernel pages like this one). + */ pte_t *ptepage = __va(pgd_pfn(*spgd) << PAGE_SHIFT); /* For each entry in the page, we might need to release it. */ for (i = 0; i < PTRS_PER_PTE; i++) @@ -499,9 +565,12 @@ static void release_pgd(pgd_t *spgd) } } #endif -/*H:445 We saw flush_user_mappings() twice: once from the flush_user_mappings() + +/*H:445 + * We saw flush_user_mappings() twice: once from the flush_user_mappings() * hypercall and once in new_pgdir() when we re-used a top-level pgdir page. - * It simply releases every PTE page from 0 up to the Guest's kernel address. */ + * It simply releases every PTE page from 0 up to the Guest's kernel address. + */ static void flush_user_mappings(struct lguest *lg, int idx) { unsigned int i; @@ -510,10 +579,12 @@ static void flush_user_mappings(struct lguest *lg, int idx) release_pgd(lg->pgdirs[idx].pgdir + i); } -/*H:440 (v) Flushing (throwing away) page tables, +/*H:440 + * (v) Flushing (throwing away) page tables, * * The Guest has a hypercall to throw away the page tables: it's used when a - * large number of mappings have been changed. */ + * large number of mappings have been changed. + */ void guest_pagetable_flush_user(struct lg_cpu *cpu) { /* Drop the userspace part of the current page table. */ @@ -551,9 +622,11 @@ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr) return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK); } -/* We keep several page tables. This is a simple routine to find the page +/* + * We keep several page tables. This is a simple routine to find the page * table (if any) corresponding to this top-level address the Guest has given - * us. */ + * us. + */ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable) { unsigned int i; @@ -563,9 +636,11 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable) return i; } -/*H:435 And this is us, creating the new page directory. If we really do +/*H:435 + * And this is us, creating the new page directory. If we really do * allocate a new one (and so the kernel parts are not there), we set - * blank_pgdir. */ + * blank_pgdir. + */ static unsigned int new_pgdir(struct lg_cpu *cpu, unsigned long gpgdir, int *blank_pgdir) @@ -575,8 +650,10 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, pmd_t *pmd_table; #endif - /* We pick one entry at random to throw out. Choosing the Least - * Recently Used might be better, but this is easy. */ + /* + * We pick one entry at random to throw out. Choosing the Least + * Recently Used might be better, but this is easy. + */ next = random32() % ARRAY_SIZE(cpu->lg->pgdirs); /* If it's never been allocated at all before, try now. */ if (!cpu->lg->pgdirs[next].pgdir) { @@ -587,8 +664,10 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, next = cpu->cpu_pgd; else { #ifdef CONFIG_X86_PAE - /* In PAE mode, allocate a pmd page and populate the - * last pgd entry. */ + /* + * In PAE mode, allocate a pmd page and populate the + * last pgd entry. + */ pmd_table = (pmd_t *)get_zeroed_page(GFP_KERNEL); if (!pmd_table) { free_page((long)cpu->lg->pgdirs[next].pgdir); @@ -598,8 +677,10 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, set_pgd(cpu->lg->pgdirs[next].pgdir + SWITCHER_PGD_INDEX, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); - /* This is a blank page, so there are no kernel - * mappings: caller must map the stack! */ + /* + * This is a blank page, so there are no kernel + * mappings: caller must map the stack! + */ *blank_pgdir = 1; } #else @@ -615,19 +696,23 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, return next; } -/*H:430 (iv) Switching page tables +/*H:430 + * (iv) Switching page tables * * Now we've seen all the page table setting and manipulation, let's see * what happens when the Guest changes page tables (ie. changes the top-level - * pgdir). This occurs on almost every context switch. */ + * pgdir). This occurs on almost every context switch. + */ void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable) { int newpgdir, repin = 0; /* Look to see if we have this one already. */ newpgdir = find_pgdir(cpu->lg, pgtable); - /* If not, we allocate or mug an existing one: if it's a fresh one, - * repin gets set to 1. */ + /* + * If not, we allocate or mug an existing one: if it's a fresh one, + * repin gets set to 1. + */ if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs)) newpgdir = new_pgdir(cpu, pgtable, &repin); /* Change the current pgd index to the new one. */ @@ -637,9 +722,11 @@ void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable) pin_stack_pages(cpu); } -/*H:470 Finally, a routine which throws away everything: all PGD entries in all +/*H:470 + * Finally, a routine which throws away everything: all PGD entries in all * the shadow page tables, including the Guest's kernel mappings. This is used - * when we destroy the Guest. */ + * when we destroy the Guest. + */ static void release_all_pagetables(struct lguest *lg) { unsigned int i, j; @@ -656,8 +743,10 @@ static void release_all_pagetables(struct lguest *lg) spgd = lg->pgdirs[i].pgdir + SWITCHER_PGD_INDEX; pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT); - /* And release the pmd entries of that pmd page, - * except for the switcher pmd. */ + /* + * And release the pmd entries of that pmd page, + * except for the switcher pmd. + */ for (k = 0; k < SWITCHER_PMD_INDEX; k++) release_pmd(&pmdpage[k]); #endif @@ -667,10 +756,12 @@ static void release_all_pagetables(struct lguest *lg) } } -/* We also throw away everything when a Guest tells us it's changed a kernel +/* + * We also throw away everything when a Guest tells us it's changed a kernel * mapping. Since kernel mappings are in every page table, it's easiest to * throw them all away. This traps the Guest in amber for a while as - * everything faults back in, but it's rare. */ + * everything faults back in, but it's rare. + */ void guest_pagetable_clear_all(struct lg_cpu *cpu) { release_all_pagetables(cpu->lg); @@ -678,15 +769,19 @@ void guest_pagetable_clear_all(struct lg_cpu *cpu) pin_stack_pages(cpu); } /*:*/ -/*M:009 Since we throw away all mappings when a kernel mapping changes, our + +/*M:009 + * Since we throw away all mappings when a kernel mapping changes, our * performance sucks for guests using highmem. In fact, a guest with * PAGE_OFFSET 0xc0000000 (the default) and more than about 700MB of RAM is * usually slower than a Guest with less memory. * * This, of course, cannot be fixed. It would take some kind of... well, I - * don't know, but the term "puissant code-fu" comes to mind. :*/ + * don't know, but the term "puissant code-fu" comes to mind. +:*/ -/*H:420 This is the routine which actually sets the page table entry for then +/*H:420 + * This is the routine which actually sets the page table entry for then * "idx"'th shadow page table. * * Normally, we can just throw out the old entry and replace it with 0: if they @@ -715,31 +810,36 @@ static void do_set_pte(struct lg_cpu *cpu, int idx, spmd = spmd_addr(cpu, *spgd, vaddr); if (pmd_flags(*spmd) & _PAGE_PRESENT) { #endif - /* Otherwise, we start by releasing - * the existing entry. */ + /* Otherwise, start by releasing the existing entry. */ pte_t *spte = spte_addr(cpu, *spgd, vaddr); release_pte(*spte); - /* If they're setting this entry as dirty or accessed, - * we might as well put that entry they've given us - * in now. This shaves 10% off a - * copy-on-write micro-benchmark. */ + /* + * If they're setting this entry as dirty or accessed, + * we might as well put that entry they've given us in + * now. This shaves 10% off a copy-on-write + * micro-benchmark. + */ if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) { check_gpte(cpu, gpte); native_set_pte(spte, gpte_to_spte(cpu, gpte, pte_flags(gpte) & _PAGE_DIRTY)); - } else - /* Otherwise kill it and we can demand_page() - * it in later. */ + } else { + /* + * Otherwise kill it and we can demand_page() + * it in later. + */ native_set_pte(spte, __pte(0)); + } #ifdef CONFIG_X86_PAE } #endif } } -/*H:410 Updating a PTE entry is a little trickier. +/*H:410 + * Updating a PTE entry is a little trickier. * * We keep track of several different page tables (the Guest uses one for each * process, so it makes sense to cache at least a few). Each of these have @@ -748,12 +848,15 @@ static void do_set_pte(struct lg_cpu *cpu, int idx, * all the page tables, not just the current one. This is rare. * * The benefit is that when we have to track a new page table, we can keep all - * the kernel mappings. This speeds up context switch immensely. */ + * the kernel mappings. This speeds up context switch immensely. + */ void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir, unsigned long vaddr, pte_t gpte) { - /* Kernel mappings must be changed on all top levels. Slow, but doesn't - * happen often. */ + /* + * Kernel mappings must be changed on all top levels. Slow, but doesn't + * happen often. + */ if (vaddr >= cpu->lg->kernel_address) { unsigned int i; for (i = 0; i < ARRAY_SIZE(cpu->lg->pgdirs); i++) @@ -802,12 +905,14 @@ void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx) } #endif -/* Once we know how much memory we have we can construct simple identity - * (which set virtual == physical) and linear mappings - * which will get the Guest far enough into the boot to create its own. +/* + * Once we know how much memory we have we can construct simple identity (which + * set virtual == physical) and linear mappings which will get the Guest far + * enough into the boot to create its own. * * We lay them out of the way, just below the initrd (which is why we need to - * know its size here). */ + * know its size here). + */ static unsigned long setup_pagetables(struct lguest *lg, unsigned long mem, unsigned long initrd_size) @@ -825,8 +930,10 @@ static unsigned long setup_pagetables(struct lguest *lg, unsigned int phys_linear; #endif - /* We have mapped_pages frames to map, so we need - * linear_pages page tables to map them. */ + /* + * We have mapped_pages frames to map, so we need linear_pages page + * tables to map them. + */ mapped_pages = mem / PAGE_SIZE; linear_pages = (mapped_pages + PTRS_PER_PTE - 1) / PTRS_PER_PTE; @@ -839,8 +946,10 @@ static unsigned long setup_pagetables(struct lguest *lg, #ifdef CONFIG_X86_PAE pmds = (void *)linear - PAGE_SIZE; #endif - /* Linear mapping is easy: put every page's address into the - * mapping in order. */ + /* + * Linear mapping is easy: put every page's address into the + * mapping in order. + */ for (i = 0; i < mapped_pages; i++) { pte_t pte; pte = pfn_pte(i, __pgprot(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER)); @@ -848,8 +957,10 @@ static unsigned long setup_pagetables(struct lguest *lg, return -EFAULT; } - /* The top level points to the linear page table pages above. - * We setup the identity and linear mappings here. */ + /* + * The top level points to the linear page table pages above. + * We setup the identity and linear mappings here. + */ #ifdef CONFIG_X86_PAE for (i = j = 0; i < mapped_pages && j < PTRS_PER_PMD; i += PTRS_PER_PTE, j++) { @@ -880,15 +991,19 @@ static unsigned long setup_pagetables(struct lguest *lg, } #endif - /* We return the top level (guest-physical) address: remember where - * this is. */ + /* + * We return the top level (guest-physical) address: remember where + * this is. + */ return (unsigned long)pgdir - mem_base; } -/*H:500 (vii) Setting up the page tables initially. +/*H:500 + * (vii) Setting up the page tables initially. * * When a Guest is first created, the Launcher tells us where the toplevel of - * its first page table is. We set some things up here: */ + * its first page table is. We set some things up here: + */ int init_guest_pagetable(struct lguest *lg) { u64 mem; @@ -898,14 +1013,18 @@ int init_guest_pagetable(struct lguest *lg) pgd_t *pgd; pmd_t *pmd_table; #endif - /* Get the Guest memory size and the ramdisk size from the boot header - * located at lg->mem_base (Guest address 0). */ + /* + * Get the Guest memory size and the ramdisk size from the boot header + * located at lg->mem_base (Guest address 0). + */ if (copy_from_user(&mem, &boot->e820_map[0].size, sizeof(mem)) || get_user(initrd_size, &boot->hdr.ramdisk_size)) return -EFAULT; - /* We start on the first shadow page table, and give it a blank PGD - * page. */ + /* + * We start on the first shadow page table, and give it a blank PGD + * page. + */ lg->pgdirs[0].gpgdir = setup_pagetables(lg, mem, initrd_size); if (IS_ERR_VALUE(lg->pgdirs[0].gpgdir)) return lg->pgdirs[0].gpgdir; @@ -931,17 +1050,21 @@ void page_table_guest_data_init(struct lg_cpu *cpu) /* We get the kernel address: above this is all kernel memory. */ if (get_user(cpu->lg->kernel_address, &cpu->lg->lguest_data->kernel_address) - /* We tell the Guest that it can't use the top 2 or 4 MB - * of virtual addresses used by the Switcher. */ + /* + * We tell the Guest that it can't use the top 2 or 4 MB + * of virtual addresses used by the Switcher. + */ || put_user(RESERVE_MEM * 1024 * 1024, &cpu->lg->lguest_data->reserve_mem) || put_user(cpu->lg->pgdirs[0].gpgdir, &cpu->lg->lguest_data->pgdir)) kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); - /* In flush_user_mappings() we loop from 0 to + /* + * In flush_user_mappings() we loop from 0 to * "pgd_index(lg->kernel_address)". This assumes it won't hit the - * Switcher mappings, so check that now. */ + * Switcher mappings, so check that now. + */ #ifdef CONFIG_X86_PAE if (pgd_index(cpu->lg->kernel_address) == SWITCHER_PGD_INDEX && pmd_index(cpu->lg->kernel_address) == SWITCHER_PMD_INDEX) @@ -964,12 +1087,14 @@ void free_guest_pagetable(struct lguest *lg) free_page((long)lg->pgdirs[i].pgdir); } -/*H:480 (vi) Mapping the Switcher when the Guest is about to run. +/*H:480 + * (vi) Mapping the Switcher when the Guest is about to run. * * The Switcher and the two pages for this CPU need to be visible in the * Guest (and not the pages for other CPUs). We have the appropriate PTE pages * for each CPU already set up, we just need to hook them in now we know which - * Guest is about to run on this CPU. */ + * Guest is about to run on this CPU. + */ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) { pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); @@ -990,20 +1115,24 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) #else pgd_t switcher_pgd; - /* Make the last PGD entry for this Guest point to the Switcher's PTE - * page for this CPU (with appropriate flags). */ + /* + * Make the last PGD entry for this Guest point to the Switcher's PTE + * page for this CPU (with appropriate flags). + */ switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL_EXEC); cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; #endif - /* We also change the Switcher PTE page. When we're running the Guest, + /* + * We also change the Switcher PTE page. When we're running the Guest, * we want the Guest's "regs" page to appear where the first Switcher * page for this CPU is. This is an optimization: when the Switcher * saves the Guest registers, it saves them into the first page of this * CPU's "struct lguest_pages": if we make sure the Guest's register * page is already mapped there, we don't have to copy them out - * again. */ + * again. + */ pfn = __pa(cpu->regs_page) >> PAGE_SHIFT; native_set_pte(®s_pte, pfn_pte(pfn, PAGE_KERNEL)); native_set_pte(&switcher_pte_page[pte_index((unsigned long)pages)], @@ -1019,10 +1148,12 @@ static void free_switcher_pte_pages(void) free_page((long)switcher_pte_page(i)); } -/*H:520 Setting up the Switcher PTE page for given CPU is fairly easy, given +/*H:520 + * Setting up the Switcher PTE page for given CPU is fairly easy, given * the CPU number and the "struct page"s for the Switcher code itself. * - * Currently the Switcher is less than a page long, so "pages" is always 1. */ + * Currently the Switcher is less than a page long, so "pages" is always 1. + */ static __init void populate_switcher_pte_page(unsigned int cpu, struct page *switcher_page[], unsigned int pages) @@ -1043,13 +1174,16 @@ static __init void populate_switcher_pte_page(unsigned int cpu, native_set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]), __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW))); - /* The second page contains the "struct lguest_ro_state", and is - * read-only. */ + /* + * The second page contains the "struct lguest_ro_state", and is + * read-only. + */ native_set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]), __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED))); } -/* We've made it through the page table code. Perhaps our tired brains are +/* + * We've made it through the page table code. Perhaps our tired brains are * still processing the details, or perhaps we're simply glad it's over. * * If nothing else, note that all this complexity in juggling shadow page tables @@ -1058,10 +1192,13 @@ static __init void populate_switcher_pte_page(unsigned int cpu, * uses exotic direct Guest pagetable manipulation, and why both Intel and AMD * have implemented shadow page table support directly into hardware. * - * There is just one file remaining in the Host. */ + * There is just one file remaining in the Host. + */ -/*H:510 At boot or module load time, init_pagetables() allocates and populates - * the Switcher PTE page for each CPU. */ +/*H:510 + * At boot or module load time, init_pagetables() allocates and populates + * the Switcher PTE page for each CPU. + */ __init int init_pagetables(struct page **switcher_page, unsigned int pages) { unsigned int i; diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c index 482ed5a18750..951c57b0a7e0 100644 --- a/drivers/lguest/segments.c +++ b/drivers/lguest/segments.c @@ -1,4 +1,5 @@ -/*P:600 The x86 architecture has segments, which involve a table of descriptors +/*P:600 + * The x86 architecture has segments, which involve a table of descriptors * which can be used to do funky things with virtual address interpretation. * We originally used to use segments so the Guest couldn't alter the * Guest<->Host Switcher, and then we had to trim Guest segments, and restore @@ -8,7 +9,8 @@ * * In these modern times, the segment handling code consists of simple sanity * checks, and the worst you'll experience reading this code is butterfly-rash - * from frolicking through its parklike serenity. :*/ + * from frolicking through its parklike serenity. +:*/ #include "lg.h" /*H:600 @@ -41,10 +43,12 @@ * begin. */ -/* There are several entries we don't let the Guest set. The TSS entry is the +/* + * There are several entries we don't let the Guest set. The TSS entry is the * "Task State Segment" which controls all kinds of delicate things. The * LGUEST_CS and LGUEST_DS entries are reserved for the Switcher, and the - * the Guest can't be trusted to deal with double faults. */ + * the Guest can't be trusted to deal with double faults. + */ static bool ignored_gdt(unsigned int num) { return (num == GDT_ENTRY_TSS @@ -53,42 +57,52 @@ static bool ignored_gdt(unsigned int num) || num == GDT_ENTRY_DOUBLEFAULT_TSS); } -/*H:630 Once the Guest gave us new GDT entries, we fix them up a little. We +/*H:630 + * Once the Guest gave us new GDT entries, we fix them up a little. We * don't care if they're invalid: the worst that can happen is a General * Protection Fault in the Switcher when it restores a Guest segment register * which tries to use that entry. Then we kill the Guest for causing such a - * mess: the message will be "unhandled trap 256". */ + * mess: the message will be "unhandled trap 256". + */ static void fixup_gdt_table(struct lg_cpu *cpu, unsigned start, unsigned end) { unsigned int i; for (i = start; i < end; i++) { - /* We never copy these ones to real GDT, so we don't care what - * they say */ + /* + * We never copy these ones to real GDT, so we don't care what + * they say + */ if (ignored_gdt(i)) continue; - /* Segment descriptors contain a privilege level: the Guest is + /* + * Segment descriptors contain a privilege level: the Guest is * sometimes careless and leaves this as 0, even though it's - * running at privilege level 1. If so, we fix it here. */ + * running at privilege level 1. If so, we fix it here. + */ if ((cpu->arch.gdt[i].b & 0x00006000) == 0) cpu->arch.gdt[i].b |= (GUEST_PL << 13); - /* Each descriptor has an "accessed" bit. If we don't set it + /* + * Each descriptor has an "accessed" bit. If we don't set it * now, the CPU will try to set it when the Guest first loads * that entry into a segment register. But the GDT isn't - * writable by the Guest, so bad things can happen. */ + * writable by the Guest, so bad things can happen. + */ cpu->arch.gdt[i].b |= 0x00000100; } } -/*H:610 Like the IDT, we never simply use the GDT the Guest gives us. We keep +/*H:610 + * Like the IDT, we never simply use the GDT the Guest gives us. We keep * a GDT for each CPU, and copy across the Guest's entries each time we want to * run the Guest on that CPU. * * This routine is called at boot or modprobe time for each CPU to set up the * constant GDT entries: the ones which are the same no matter what Guest we're - * running. */ + * running. + */ void setup_default_gdt_entries(struct lguest_ro_state *state) { struct desc_struct *gdt = state->guest_gdt; @@ -98,30 +112,37 @@ void setup_default_gdt_entries(struct lguest_ro_state *state) gdt[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; gdt[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; - /* The TSS segment refers to the TSS entry for this particular CPU. + /* + * The TSS segment refers to the TSS entry for this particular CPU. * Forgive the magic flags: the 0x8900 means the entry is Present, it's * privilege level 0 Available 386 TSS system segment, and the 0x67 - * means Saturn is eclipsed by Mercury in the twelfth house. */ + * means Saturn is eclipsed by Mercury in the twelfth house. + */ gdt[GDT_ENTRY_TSS].a = 0x00000067 | (tss << 16); gdt[GDT_ENTRY_TSS].b = 0x00008900 | (tss & 0xFF000000) | ((tss >> 16) & 0x000000FF); } -/* This routine sets up the initial Guest GDT for booting. All entries start - * as 0 (unusable). */ +/* + * This routine sets up the initial Guest GDT for booting. All entries start + * as 0 (unusable). + */ void setup_guest_gdt(struct lg_cpu *cpu) { - /* Start with full 0-4G segments... */ + /* + * Start with full 0-4G segments...except the Guest is allowed to use + * them, so set the privilege level appropriately in the flags. + */ cpu->arch.gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT; cpu->arch.gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT; - /* ...except the Guest is allowed to use them, so set the privilege - * level appropriately in the flags. */ cpu->arch.gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13); cpu->arch.gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13); } -/*H:650 An optimization of copy_gdt(), for just the three "thead-local storage" - * entries. */ +/*H:650 + * An optimization of copy_gdt(), for just the three "thead-local storage" + * entries. + */ void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt) { unsigned int i; @@ -130,26 +151,34 @@ void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt) gdt[i] = cpu->arch.gdt[i]; } -/*H:640 When the Guest is run on a different CPU, or the GDT entries have - * changed, copy_gdt() is called to copy the Guest's GDT entries across to this - * CPU's GDT. */ +/*H:640 + * When the Guest is run on a different CPU, or the GDT entries have changed, + * copy_gdt() is called to copy the Guest's GDT entries across to this CPU's + * GDT. + */ void copy_gdt(const struct lg_cpu *cpu, struct desc_struct *gdt) { unsigned int i; - /* The default entries from setup_default_gdt_entries() are not - * replaced. See ignored_gdt() above. */ + /* + * The default entries from setup_default_gdt_entries() are not + * replaced. See ignored_gdt() above. + */ for (i = 0; i < GDT_ENTRIES; i++) if (!ignored_gdt(i)) gdt[i] = cpu->arch.gdt[i]; } -/*H:620 This is where the Guest asks us to load a new GDT entry - * (LHCALL_LOAD_GDT_ENTRY). We tweak the entry and copy it in. */ +/*H:620 + * This is where the Guest asks us to load a new GDT entry + * (LHCALL_LOAD_GDT_ENTRY). We tweak the entry and copy it in. + */ void load_guest_gdt_entry(struct lg_cpu *cpu, u32 num, u32 lo, u32 hi) { - /* We assume the Guest has the same number of GDT entries as the - * Host, otherwise we'd have to dynamically allocate the Guest GDT. */ + /* + * We assume the Guest has the same number of GDT entries as the + * Host, otherwise we'd have to dynamically allocate the Guest GDT. + */ if (num >= ARRAY_SIZE(cpu->arch.gdt)) kill_guest(cpu, "too many gdt entries %i", num); @@ -157,15 +186,19 @@ void load_guest_gdt_entry(struct lg_cpu *cpu, u32 num, u32 lo, u32 hi) cpu->arch.gdt[num].a = lo; cpu->arch.gdt[num].b = hi; fixup_gdt_table(cpu, num, num+1); - /* Mark that the GDT changed so the core knows it has to copy it again, - * even if the Guest is run on the same CPU. */ + /* + * Mark that the GDT changed so the core knows it has to copy it again, + * even if the Guest is run on the same CPU. + */ cpu->changed |= CHANGED_GDT; } -/* This is the fast-track version for just changing the three TLS entries. +/* + * This is the fast-track version for just changing the three TLS entries. * Remember that this happens on every context switch, so it's worth * optimizing. But wouldn't it be neater to have a single hypercall to cover - * both cases? */ + * both cases? + */ void guest_load_tls(struct lg_cpu *cpu, unsigned long gtls) { struct desc_struct *tls = &cpu->arch.gdt[GDT_ENTRY_TLS_MIN]; @@ -175,7 +208,6 @@ void guest_load_tls(struct lg_cpu *cpu, unsigned long gtls) /* Note that just the TLS entries have changed. */ cpu->changed |= CHANGED_GDT_TLS; } -/*:*/ /*H:660 * With this, we have finished the Host. diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index eaf722fe309a..96f7d88ec7f8 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -17,13 +17,15 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -/*P:450 This file contains the x86-specific lguest code. It used to be all +/*P:450 + * This file contains the x86-specific lguest code. It used to be all * mixed in with drivers/lguest/core.c but several foolhardy code slashers * wrestled most of the dependencies out to here in preparation for porting * lguest to other architectures (see what I mean by foolhardy?). * * This also contains a couple of non-obvious setup and teardown pieces which - * were implemented after days of debugging pain. :*/ + * were implemented after days of debugging pain. +:*/ #include #include #include @@ -82,25 +84,33 @@ static DEFINE_PER_CPU(struct lg_cpu *, last_cpu); */ static void copy_in_guest_info(struct lg_cpu *cpu, struct lguest_pages *pages) { - /* Copying all this data can be quite expensive. We usually run the + /* + * Copying all this data can be quite expensive. We usually run the * same Guest we ran last time (and that Guest hasn't run anywhere else * meanwhile). If that's not the case, we pretend everything in the - * Guest has changed. */ + * Guest has changed. + */ if (__get_cpu_var(last_cpu) != cpu || cpu->last_pages != pages) { __get_cpu_var(last_cpu) = cpu; cpu->last_pages = pages; cpu->changed = CHANGED_ALL; } - /* These copies are pretty cheap, so we do them unconditionally: */ - /* Save the current Host top-level page directory. */ + /* + * These copies are pretty cheap, so we do them unconditionally: */ + /* Save the current Host top-level page directory. + */ pages->state.host_cr3 = __pa(current->mm->pgd); - /* Set up the Guest's page tables to see this CPU's pages (and no - * other CPU's pages). */ + /* + * Set up the Guest's page tables to see this CPU's pages (and no + * other CPU's pages). + */ map_switcher_in_guest(cpu, pages); - /* Set up the two "TSS" members which tell the CPU what stack to use + /* + * Set up the two "TSS" members which tell the CPU what stack to use * for traps which do directly into the Guest (ie. traps at privilege - * level 1). */ + * level 1). + */ pages->state.guest_tss.sp1 = cpu->esp1; pages->state.guest_tss.ss1 = cpu->ss1; @@ -125,40 +135,53 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages) /* This is a dummy value we need for GCC's sake. */ unsigned int clobber; - /* Copy the guest-specific information into this CPU's "struct - * lguest_pages". */ + /* + * Copy the guest-specific information into this CPU's "struct + * lguest_pages". + */ copy_in_guest_info(cpu, pages); - /* Set the trap number to 256 (impossible value). If we fault while + /* + * Set the trap number to 256 (impossible value). If we fault while * switching to the Guest (bad segment registers or bug), this will - * cause us to abort the Guest. */ + * cause us to abort the Guest. + */ cpu->regs->trapnum = 256; - /* Now: we push the "eflags" register on the stack, then do an "lcall". + /* + * Now: we push the "eflags" register on the stack, then do an "lcall". * This is how we change from using the kernel code segment to using * the dedicated lguest code segment, as well as jumping into the * Switcher. * * The lcall also pushes the old code segment (KERNEL_CS) onto the * stack, then the address of this call. This stack layout happens to - * exactly match the stack layout created by an interrupt... */ + * exactly match the stack layout created by an interrupt... + */ asm volatile("pushf; lcall *lguest_entry" - /* This is how we tell GCC that %eax ("a") and %ebx ("b") - * are changed by this routine. The "=" means output. */ + /* + * This is how we tell GCC that %eax ("a") and %ebx ("b") + * are changed by this routine. The "=" means output. + */ : "=a"(clobber), "=b"(clobber) - /* %eax contains the pages pointer. ("0" refers to the + /* + * %eax contains the pages pointer. ("0" refers to the * 0-th argument above, ie "a"). %ebx contains the * physical address of the Guest's top-level page - * directory. */ + * directory. + */ : "0"(pages), "1"(__pa(cpu->lg->pgdirs[cpu->cpu_pgd].pgdir)) - /* We tell gcc that all these registers could change, + /* + * We tell gcc that all these registers could change, * which means we don't have to save and restore them in - * the Switcher. */ + * the Switcher. + */ : "memory", "%edx", "%ecx", "%edi", "%esi"); } /*:*/ -/*M:002 There are hooks in the scheduler which we can register to tell when we +/*M:002 + * There are hooks in the scheduler which we can register to tell when we * get kicked off the CPU (preempt_notifier_register()). This would allow us * to lazily disable SYSENTER which would regain some performance, and should * also simplify copy_in_guest_info(). Note that we'd still need to restore @@ -166,56 +189,72 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages) * * We could also try using this hooks for PGE, but that might be too expensive. * - * The hooks were designed for KVM, but we can also put them to good use. :*/ + * The hooks were designed for KVM, but we can also put them to good use. +:*/ -/*H:040 This is the i386-specific code to setup and run the Guest. Interrupts - * are disabled: we own the CPU. */ +/*H:040 + * This is the i386-specific code to setup and run the Guest. Interrupts + * are disabled: we own the CPU. + */ void lguest_arch_run_guest(struct lg_cpu *cpu) { - /* Remember the awfully-named TS bit? If the Guest has asked to set it + /* + * Remember the awfully-named TS bit? If the Guest has asked to set it * we set it now, so we can trap and pass that trap to the Guest if it - * uses the FPU. */ + * uses the FPU. + */ if (cpu->ts) unlazy_fpu(current); - /* SYSENTER is an optimized way of doing system calls. We can't allow + /* + * SYSENTER is an optimized way of doing system calls. We can't allow * it because it always jumps to privilege level 0. A normal Guest * won't try it because we don't advertise it in CPUID, but a malicious * Guest (or malicious Guest userspace program) could, so we tell the - * CPU to disable it before running the Guest. */ + * CPU to disable it before running the Guest. + */ if (boot_cpu_has(X86_FEATURE_SEP)) wrmsr(MSR_IA32_SYSENTER_CS, 0, 0); - /* Now we actually run the Guest. It will return when something + /* + * Now we actually run the Guest. It will return when something * interesting happens, and we can examine its registers to see what it - * was doing. */ + * was doing. + */ run_guest_once(cpu, lguest_pages(raw_smp_processor_id())); - /* Note that the "regs" structure contains two extra entries which are + /* + * Note that the "regs" structure contains two extra entries which are * not really registers: a trap number which says what interrupt or * trap made the switcher code come back, and an error code which some - * traps set. */ + * traps set. + */ /* Restore SYSENTER if it's supposed to be on. */ if (boot_cpu_has(X86_FEATURE_SEP)) wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); - /* If the Guest page faulted, then the cr2 register will tell us the + /* + * If the Guest page faulted, then the cr2 register will tell us the * bad virtual address. We have to grab this now, because once we * re-enable interrupts an interrupt could fault and thus overwrite - * cr2, or we could even move off to a different CPU. */ + * cr2, or we could even move off to a different CPU. + */ if (cpu->regs->trapnum == 14) cpu->arch.last_pagefault = read_cr2(); - /* Similarly, if we took a trap because the Guest used the FPU, + /* + * Similarly, if we took a trap because the Guest used the FPU, * we have to restore the FPU it expects to see. * math_state_restore() may sleep and we may even move off to * a different CPU. So all the critical stuff should be done - * before this. */ + * before this. + */ else if (cpu->regs->trapnum == 7) math_state_restore(); } -/*H:130 Now we've examined the hypercall code; our Guest can make requests. +/*H:130 + * Now we've examined the hypercall code; our Guest can make requests. * Our Guest is usually so well behaved; it never tries to do things it isn't * allowed to, and uses hypercalls instead. Unfortunately, Linux's paravirtual * infrastructure isn't quite complete, because it doesn't contain replacements @@ -225,26 +264,33 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) * * When the Guest uses one of these instructions, we get a trap (General * Protection Fault) and come here. We see if it's one of those troublesome - * instructions and skip over it. We return true if we did. */ + * instructions and skip over it. We return true if we did. + */ static int emulate_insn(struct lg_cpu *cpu) { u8 insn; unsigned int insnlen = 0, in = 0, shift = 0; - /* The eip contains the *virtual* address of the Guest's instruction: - * guest_pa just subtracts the Guest's page_offset. */ + /* + * The eip contains the *virtual* address of the Guest's instruction: + * guest_pa just subtracts the Guest's page_offset. + */ unsigned long physaddr = guest_pa(cpu, cpu->regs->eip); - /* This must be the Guest kernel trying to do something, not userspace! + /* + * This must be the Guest kernel trying to do something, not userspace! * The bottom two bits of the CS segment register are the privilege - * level. */ + * level. + */ if ((cpu->regs->cs & 3) != GUEST_PL) return 0; /* Decoding x86 instructions is icky. */ insn = lgread(cpu, physaddr, u8); - /* 0x66 is an "operand prefix". It means it's using the upper 16 bits - of the eax register. */ + /* + * 0x66 is an "operand prefix". It means it's using the upper 16 bits + * of the eax register. + */ if (insn == 0x66) { shift = 16; /* The instruction is 1 byte so far, read the next byte. */ @@ -252,8 +298,10 @@ static int emulate_insn(struct lg_cpu *cpu) insn = lgread(cpu, physaddr + insnlen, u8); } - /* We can ignore the lower bit for the moment and decode the 4 opcodes - * we need to emulate. */ + /* + * We can ignore the lower bit for the moment and decode the 4 opcodes + * we need to emulate. + */ switch (insn & 0xFE) { case 0xE4: /* in ,%al */ insnlen += 2; @@ -274,9 +322,11 @@ static int emulate_insn(struct lg_cpu *cpu) return 0; } - /* If it was an "IN" instruction, they expect the result to be read + /* + * If it was an "IN" instruction, they expect the result to be read * into %eax, so we change %eax. We always return all-ones, which - * traditionally means "there's nothing there". */ + * traditionally means "there's nothing there". + */ if (in) { /* Lower bit tells is whether it's a 16 or 32 bit access */ if (insn & 0x1) @@ -290,7 +340,8 @@ static int emulate_insn(struct lg_cpu *cpu) return 1; } -/* Our hypercalls mechanism used to be based on direct software interrupts. +/* + * Our hypercalls mechanism used to be based on direct software interrupts. * After Anthony's "Refactor hypercall infrastructure" kvm patch, we decided to * change over to using kvm hypercalls. * @@ -318,16 +369,20 @@ static int emulate_insn(struct lg_cpu *cpu) */ static void rewrite_hypercall(struct lg_cpu *cpu) { - /* This are the opcodes we use to patch the Guest. The opcode for "int + /* + * This are the opcodes we use to patch the Guest. The opcode for "int * $0x1f" is "0xcd 0x1f" but vmcall instruction is 3 bytes long, so we - * complete the sequence with a NOP (0x90). */ + * complete the sequence with a NOP (0x90). + */ u8 insn[3] = {0xcd, 0x1f, 0x90}; __lgwrite(cpu, guest_pa(cpu, cpu->regs->eip), insn, sizeof(insn)); - /* The above write might have caused a copy of that page to be made + /* + * The above write might have caused a copy of that page to be made * (if it was read-only). We need to make sure the Guest has * up-to-date pagetables. As this doesn't happen often, we can just - * drop them all. */ + * drop them all. + */ guest_pagetable_clear_all(cpu); } @@ -335,9 +390,11 @@ static bool is_hypercall(struct lg_cpu *cpu) { u8 insn[3]; - /* This must be the Guest kernel trying to do something. + /* + * This must be the Guest kernel trying to do something. * The bottom two bits of the CS segment register are the privilege - * level. */ + * level. + */ if ((cpu->regs->cs & 3) != GUEST_PL) return false; @@ -351,86 +408,105 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) { switch (cpu->regs->trapnum) { case 13: /* We've intercepted a General Protection Fault. */ - /* Check if this was one of those annoying IN or OUT + /* + * Check if this was one of those annoying IN or OUT * instructions which we need to emulate. If so, we just go - * back into the Guest after we've done it. */ + * back into the Guest after we've done it. + */ if (cpu->regs->errcode == 0) { if (emulate_insn(cpu)) return; } - /* If KVM is active, the vmcall instruction triggers a - * General Protection Fault. Normally it triggers an - * invalid opcode fault (6): */ + /* + * If KVM is active, the vmcall instruction triggers a General + * Protection Fault. Normally it triggers an invalid opcode + * fault (6): + */ case 6: - /* We need to check if ring == GUEST_PL and - * faulting instruction == vmcall. */ + /* + * We need to check if ring == GUEST_PL and faulting + * instruction == vmcall. + */ if (is_hypercall(cpu)) { rewrite_hypercall(cpu); return; } break; case 14: /* We've intercepted a Page Fault. */ - /* The Guest accessed a virtual address that wasn't mapped. + /* + * The Guest accessed a virtual address that wasn't mapped. * This happens a lot: we don't actually set up most of the page * tables for the Guest at all when we start: as it runs it asks * for more and more, and we set them up as required. In this * case, we don't even tell the Guest that the fault happened. * * The errcode tells whether this was a read or a write, and - * whether kernel or userspace code. */ + * whether kernel or userspace code. + */ if (demand_page(cpu, cpu->arch.last_pagefault, cpu->regs->errcode)) return; - /* OK, it's really not there (or not OK): the Guest needs to + /* + * OK, it's really not there (or not OK): the Guest needs to * know. We write out the cr2 value so it knows where the * fault occurred. * * Note that if the Guest were really messed up, this could * happen before it's done the LHCALL_LGUEST_INIT hypercall, so - * lg->lguest_data could be NULL */ + * lg->lguest_data could be NULL + */ if (cpu->lg->lguest_data && put_user(cpu->arch.last_pagefault, &cpu->lg->lguest_data->cr2)) kill_guest(cpu, "Writing cr2"); break; case 7: /* We've intercepted a Device Not Available fault. */ - /* If the Guest doesn't want to know, we already restored the - * Floating Point Unit, so we just continue without telling - * it. */ + /* + * If the Guest doesn't want to know, we already restored the + * Floating Point Unit, so we just continue without telling it. + */ if (!cpu->ts) return; break; case 32 ... 255: - /* These values mean a real interrupt occurred, in which case + /* + * These values mean a real interrupt occurred, in which case * the Host handler has already been run. We just do a * friendly check if another process should now be run, then - * return to run the Guest again */ + * return to run the Guest again + */ cond_resched(); return; case LGUEST_TRAP_ENTRY: - /* Our 'struct hcall_args' maps directly over our regs: we set - * up the pointer now to indicate a hypercall is pending. */ + /* + * Our 'struct hcall_args' maps directly over our regs: we set + * up the pointer now to indicate a hypercall is pending. + */ cpu->hcall = (struct hcall_args *)cpu->regs; return; } /* We didn't handle the trap, so it needs to go to the Guest. */ if (!deliver_trap(cpu, cpu->regs->trapnum)) - /* If the Guest doesn't have a handler (either it hasn't + /* + * If the Guest doesn't have a handler (either it hasn't * registered any yet, or it's one of the faults we don't let - * it handle), it dies with this cryptic error message. */ + * it handle), it dies with this cryptic error message. + */ kill_guest(cpu, "unhandled trap %li at %#lx (%#lx)", cpu->regs->trapnum, cpu->regs->eip, cpu->regs->trapnum == 14 ? cpu->arch.last_pagefault : cpu->regs->errcode); } -/* Now we can look at each of the routines this calls, in increasing order of +/* + * Now we can look at each of the routines this calls, in increasing order of * complexity: do_hypercalls(), emulate_insn(), maybe_do_interrupt(), * deliver_trap() and demand_page(). After all those, we'll be ready to * examine the Switcher, and our philosophical understanding of the Host/Guest - * duality will be complete. :*/ + * duality will be complete. +:*/ static void adjust_pge(void *on) { if (on) @@ -439,13 +515,16 @@ static void adjust_pge(void *on) write_cr4(read_cr4() & ~X86_CR4_PGE); } -/*H:020 Now the Switcher is mapped and every thing else is ready, we need to do - * some more i386-specific initialization. */ +/*H:020 + * Now the Switcher is mapped and every thing else is ready, we need to do + * some more i386-specific initialization. + */ void __init lguest_arch_host_init(void) { int i; - /* Most of the i386/switcher.S doesn't care that it's been moved; on + /* + * Most of the i386/switcher.S doesn't care that it's been moved; on * Intel, jumps are relative, and it doesn't access any references to * external code or data. * @@ -453,7 +532,8 @@ void __init lguest_arch_host_init(void) * addresses are placed in a table (default_idt_entries), so we need to * update the table with the new addresses. switcher_offset() is a * convenience function which returns the distance between the - * compiled-in switcher code and the high-mapped copy we just made. */ + * compiled-in switcher code and the high-mapped copy we just made. + */ for (i = 0; i < IDT_ENTRIES; i++) default_idt_entries[i] += switcher_offset(); @@ -468,63 +548,81 @@ void __init lguest_arch_host_init(void) for_each_possible_cpu(i) { /* lguest_pages() returns this CPU's two pages. */ struct lguest_pages *pages = lguest_pages(i); - /* This is a convenience pointer to make the code fit one - * statement to a line. */ + /* This is a convenience pointer to make the code neater. */ struct lguest_ro_state *state = &pages->state; - /* The Global Descriptor Table: the Host has a different one + /* + * The Global Descriptor Table: the Host has a different one * for each CPU. We keep a descriptor for the GDT which says * where it is and how big it is (the size is actually the last - * byte, not the size, hence the "-1"). */ + * byte, not the size, hence the "-1"). + */ state->host_gdt_desc.size = GDT_SIZE-1; state->host_gdt_desc.address = (long)get_cpu_gdt_table(i); - /* All CPUs on the Host use the same Interrupt Descriptor + /* + * All CPUs on the Host use the same Interrupt Descriptor * Table, so we just use store_idt(), which gets this CPU's IDT - * descriptor. */ + * descriptor. + */ store_idt(&state->host_idt_desc); - /* The descriptors for the Guest's GDT and IDT can be filled + /* + * The descriptors for the Guest's GDT and IDT can be filled * out now, too. We copy the GDT & IDT into ->guest_gdt and - * ->guest_idt before actually running the Guest. */ + * ->guest_idt before actually running the Guest. + */ state->guest_idt_desc.size = sizeof(state->guest_idt)-1; state->guest_idt_desc.address = (long)&state->guest_idt; state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1; state->guest_gdt_desc.address = (long)&state->guest_gdt; - /* We know where we want the stack to be when the Guest enters + /* + * We know where we want the stack to be when the Guest enters * the Switcher: in pages->regs. The stack grows upwards, so - * we start it at the end of that structure. */ + * we start it at the end of that structure. + */ state->guest_tss.sp0 = (long)(&pages->regs + 1); - /* And this is the GDT entry to use for the stack: we keep a - * couple of special LGUEST entries. */ + /* + * And this is the GDT entry to use for the stack: we keep a + * couple of special LGUEST entries. + */ state->guest_tss.ss0 = LGUEST_DS; - /* x86 can have a finegrained bitmap which indicates what I/O + /* + * x86 can have a finegrained bitmap which indicates what I/O * ports the process can use. We set it to the end of our - * structure, meaning "none". */ + * structure, meaning "none". + */ state->guest_tss.io_bitmap_base = sizeof(state->guest_tss); - /* Some GDT entries are the same across all Guests, so we can - * set them up now. */ + /* + * Some GDT entries are the same across all Guests, so we can + * set them up now. + */ setup_default_gdt_entries(state); /* Most IDT entries are the same for all Guests, too.*/ setup_default_idt_entries(state, default_idt_entries); - /* The Host needs to be able to use the LGUEST segments on this - * CPU, too, so put them in the Host GDT. */ + /* + * The Host needs to be able to use the LGUEST segments on this + * CPU, too, so put them in the Host GDT. + */ get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; } - /* In the Switcher, we want the %cs segment register to use the + /* + * In the Switcher, we want the %cs segment register to use the * LGUEST_CS GDT entry: we've put that in the Host and Guest GDTs, so * it will be undisturbed when we switch. To change %cs and jump we - * need this structure to feed to Intel's "lcall" instruction. */ + * need this structure to feed to Intel's "lcall" instruction. + */ lguest_entry.offset = (long)switch_to_guest + switcher_offset(); lguest_entry.segment = LGUEST_CS; - /* Finally, we need to turn off "Page Global Enable". PGE is an + /* + * Finally, we need to turn off "Page Global Enable". PGE is an * optimization where page table entries are specially marked to show * they never change. The Host kernel marks all the kernel pages this * way because it's always present, even when userspace is running. @@ -534,16 +632,21 @@ void __init lguest_arch_host_init(void) * you'll get really weird bugs that you'll chase for two days. * * I used to turn PGE off every time we switched to the Guest and back - * on when we return, but that slowed the Switcher down noticibly. */ + * on when we return, but that slowed the Switcher down noticibly. + */ - /* We don't need the complexity of CPUs coming and going while we're - * doing this. */ + /* + * We don't need the complexity of CPUs coming and going while we're + * doing this. + */ get_online_cpus(); if (cpu_has_pge) { /* We have a broader idea of "global". */ /* Remember that this was originally set (for cleanup). */ cpu_had_pge = 1; - /* adjust_pge is a helper function which sets or unsets the PGE - * bit on its CPU, depending on the argument (0 == unset). */ + /* + * adjust_pge is a helper function which sets or unsets the PGE + * bit on its CPU, depending on the argument (0 == unset). + */ on_each_cpu(adjust_pge, (void *)0, 1); /* Turn off the feature in the global feature set. */ clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE); @@ -590,26 +693,32 @@ int lguest_arch_init_hypercalls(struct lg_cpu *cpu) { u32 tsc_speed; - /* The pointer to the Guest's "struct lguest_data" is the only argument. - * We check that address now. */ + /* + * The pointer to the Guest's "struct lguest_data" is the only argument. + * We check that address now. + */ if (!lguest_address_ok(cpu->lg, cpu->hcall->arg1, sizeof(*cpu->lg->lguest_data))) return -EFAULT; - /* Having checked it, we simply set lg->lguest_data to point straight + /* + * Having checked it, we simply set lg->lguest_data to point straight * into the Launcher's memory at the right place and then use * copy_to_user/from_user from now on, instead of lgread/write. I put * this in to show that I'm not immune to writing stupid - * optimizations. */ + * optimizations. + */ cpu->lg->lguest_data = cpu->lg->mem_base + cpu->hcall->arg1; - /* We insist that the Time Stamp Counter exist and doesn't change with + /* + * We insist that the Time Stamp Counter exist and doesn't change with * cpu frequency. Some devious chip manufacturers decided that TSC * changes could be handled in software. I decided that time going * backwards might be good for benchmarks, but it's bad for users. * * We also insist that the TSC be stable: the kernel detects unreliable - * TSCs for its own purposes, and we use that here. */ + * TSCs for its own purposes, and we use that here. + */ if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable()) tsc_speed = tsc_khz; else @@ -625,38 +734,47 @@ int lguest_arch_init_hypercalls(struct lg_cpu *cpu) } /*:*/ -/*L:030 lguest_arch_setup_regs() +/*L:030 + * lguest_arch_setup_regs() * * Most of the Guest's registers are left alone: we used get_zeroed_page() to - * allocate the structure, so they will be 0. */ + * allocate the structure, so they will be 0. + */ void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start) { struct lguest_regs *regs = cpu->regs; - /* There are four "segment" registers which the Guest needs to boot: + /* + * There are four "segment" registers which the Guest needs to boot: * The "code segment" register (cs) refers to the kernel code segment * __KERNEL_CS, and the "data", "extra" and "stack" segment registers * refer to the kernel data segment __KERNEL_DS. * * The privilege level is packed into the lower bits. The Guest runs - * at privilege level 1 (GUEST_PL).*/ + * at privilege level 1 (GUEST_PL). + */ regs->ds = regs->es = regs->ss = __KERNEL_DS|GUEST_PL; regs->cs = __KERNEL_CS|GUEST_PL; - /* The "eflags" register contains miscellaneous flags. Bit 1 (0x002) + /* + * The "eflags" register contains miscellaneous flags. Bit 1 (0x002) * is supposed to always be "1". Bit 9 (0x200) controls whether * interrupts are enabled. We always leave interrupts enabled while - * running the Guest. */ + * running the Guest. + */ regs->eflags = X86_EFLAGS_IF | 0x2; - /* The "Extended Instruction Pointer" register says where the Guest is - * running. */ + /* + * The "Extended Instruction Pointer" register says where the Guest is + * running. + */ regs->eip = start; - /* %esi points to our boot information, at physical address 0, so don't - * touch it. */ + /* + * %esi points to our boot information, at physical address 0, so don't + * touch it. + */ - /* There are a couple of GDT entries the Guest expects when first - * booting. */ + /* There are a couple of GDT entries the Guest expects at boot. */ setup_guest_gdt(cpu); } diff --git a/drivers/lguest/x86/switcher_32.S b/drivers/lguest/x86/switcher_32.S index 3fc15318a80f..6dec09793836 100644 --- a/drivers/lguest/x86/switcher_32.S +++ b/drivers/lguest/x86/switcher_32.S @@ -1,12 +1,15 @@ -/*P:900 This is the Switcher: code which sits at 0xFFC00000 astride both the +/*P:900 + * This is the Switcher: code which sits at 0xFFC00000 astride both the * Host and Guest to do the low-level Guest<->Host switch. It is as simple as * it can be made, but it's naturally very specific to x86. * * You have now completed Preparation. If this has whet your appetite; if you * are feeling invigorated and refreshed then the next, more challenging stage - * can be found in "make Guest". :*/ + * can be found in "make Guest". + :*/ -/*M:012 Lguest is meant to be simple: my rule of thumb is that 1% more LOC must +/*M:012 + * Lguest is meant to be simple: my rule of thumb is that 1% more LOC must * gain at least 1% more performance. Since neither LOC nor performance can be * measured beforehand, it generally means implementing a feature then deciding * if it's worth it. And once it's implemented, who can say no? @@ -31,11 +34,14 @@ * Host (which is actually really easy). * * Two questions remain. Would the performance gain outweigh the complexity? - * And who would write the verse documenting it? :*/ + * And who would write the verse documenting it? +:*/ -/*M:011 Lguest64 handles NMI. This gave me NMI envy (until I looked at their +/*M:011 + * Lguest64 handles NMI. This gave me NMI envy (until I looked at their * code). It's worth doing though, since it would let us use oprofile in the - * Host when a Guest is running. :*/ + * Host when a Guest is running. +:*/ /*S:100 * Welcome to the Switcher itself! diff --git a/include/linux/lguest.h b/include/linux/lguest.h index dbf2479e808e..0a3a11afd64b 100644 --- a/include/linux/lguest.h +++ b/include/linux/lguest.h @@ -1,5 +1,7 @@ -/* Things the lguest guest needs to know. Note: like all lguest interfaces, - * this is subject to wild and random change between versions. */ +/* + * Things the lguest guest needs to know. Note: like all lguest interfaces, + * this is subject to wild and random change between versions. + */ #ifndef _LINUX_LGUEST_H #define _LINUX_LGUEST_H @@ -11,32 +13,42 @@ #define LG_CLOCK_MIN_DELTA 100UL #define LG_CLOCK_MAX_DELTA ULONG_MAX -/*G:031 The second method of communicating with the Host is to via "struct +/*G:031 + * The second method of communicating with the Host is to via "struct * lguest_data". Once the Guest's initialization hypercall tells the Host where - * this is, the Guest and Host both publish information in it. :*/ + * this is, the Guest and Host both publish information in it. +:*/ struct lguest_data { - /* 512 == enabled (same as eflags in normal hardware). The Guest - * changes interrupts so often that a hypercall is too slow. */ + /* + * 512 == enabled (same as eflags in normal hardware). The Guest + * changes interrupts so often that a hypercall is too slow. + */ unsigned int irq_enabled; /* Fine-grained interrupt disabling by the Guest */ DECLARE_BITMAP(blocked_interrupts, LGUEST_IRQS); - /* The Host writes the virtual address of the last page fault here, + /* + * The Host writes the virtual address of the last page fault here, * which saves the Guest a hypercall. CR2 is the native register where - * this address would normally be found. */ + * this address would normally be found. + */ unsigned long cr2; /* Wallclock time set by the Host. */ struct timespec time; - /* Interrupt pending set by the Host. The Guest should do a hypercall - * if it re-enables interrupts and sees this set (to X86_EFLAGS_IF). */ + /* + * Interrupt pending set by the Host. The Guest should do a hypercall + * if it re-enables interrupts and sees this set (to X86_EFLAGS_IF). + */ int irq_pending; - /* Async hypercall ring. Instead of directly making hypercalls, we can + /* + * Async hypercall ring. Instead of directly making hypercalls, we can * place them in here for processing the next time the Host wants. - * This batching can be quite efficient. */ + * This batching can be quite efficient. + */ /* 0xFF == done (set by Host), 0 == pending (set by Guest). */ u8 hcall_status[LHCALL_RING_SIZE]; diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h index bfefbdf7498a..495203ff221c 100644 --- a/include/linux/lguest_launcher.h +++ b/include/linux/lguest_launcher.h @@ -29,8 +29,10 @@ struct lguest_device_desc { __u8 type; /* The number of virtqueues (first in config array) */ __u8 num_vq; - /* The number of bytes of feature bits. Multiply by 2: one for host - * features and one for Guest acknowledgements. */ + /* + * The number of bytes of feature bits. Multiply by 2: one for host + * features and one for Guest acknowledgements. + */ __u8 feature_len; /* The number of bytes of the config array after virtqueues. */ __u8 config_len; @@ -39,8 +41,10 @@ struct lguest_device_desc { __u8 config[0]; }; -/*D:135 This is how we expect the device configuration field for a virtqueue - * to be laid out in config space. */ +/*D:135 + * This is how we expect the device configuration field for a virtqueue + * to be laid out in config space. + */ struct lguest_vqconfig { /* The number of entries in the virtio_ring */ __u16 num; @@ -61,7 +65,9 @@ enum lguest_req LHREQ_EVENTFD, /* + address, fd. */ }; -/* The alignment to use between consumer and producer parts of vring. - * x86 pagesize for historical reasons. */ +/* + * The alignment to use between consumer and producer parts of vring. + * x86 pagesize for historical reasons. + */ #define LGUEST_VRING_ALIGN 4096 #endif /* _LINUX_LGUEST_LAUNCHER */ -- cgit v1.2.3 From a91d74a3c4de8115295ee87350c13a329164aaaf Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 30 Jul 2009 16:03:45 -0600 Subject: lguest: update commentry Every so often, after code shuffles, I need to go through and unbitrot the Lguest Journey (see drivers/lguest/README). Since we now use RCU in a simple form in one place I took the opportunity to expand that explanation. Signed-off-by: Rusty Russell Cc: Ingo Molnar Cc: Paul McKenney --- Documentation/lguest/lguest.c | 184 +++++++++++++++++++++++++++--------- arch/x86/include/asm/lguest_hcall.h | 8 +- arch/x86/lguest/boot.c | 99 ++++++++++++++----- arch/x86/lguest/i386_head.S | 2 + drivers/lguest/core.c | 7 +- drivers/lguest/hypercalls.c | 6 +- drivers/lguest/lguest_device.c | 11 ++- drivers/lguest/lguest_user.c | 100 ++++++++++++++++++-- drivers/lguest/page_tables.c | 84 ++++++++++++---- drivers/lguest/x86/core.c | 2 +- drivers/lguest/x86/switcher_32.S | 6 +- 11 files changed, 398 insertions(+), 111 deletions(-) (limited to 'drivers') diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index aa66a52b73e9..45163651b519 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -49,7 +49,7 @@ #include "linux/virtio_ring.h" #include "asm/bootparam.h" /*L:110 - * We can ignore the 39 include files we need for this program, but I do want + * We can ignore the 42 include files we need for this program, but I do want * to draw attention to the use of kernel-style types. * * As Linus said, "C is a Spartan language, and so should your naming be." I @@ -305,6 +305,11 @@ static void *map_zeroed_pages(unsigned int num) PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, fd, 0); if (addr == MAP_FAILED) err(1, "Mmaping %u pages of /dev/zero", num); + + /* + * One neat mmap feature is that you can close the fd, and it + * stays mapped. + */ close(fd); return addr; @@ -557,7 +562,7 @@ static void tell_kernel(unsigned long start) } /*:*/ -/* +/*L:200 * Device Handling. * * When the Guest gives us a buffer, it sends an array of addresses and sizes. @@ -608,7 +613,10 @@ static unsigned next_desc(struct vring_desc *desc, return next; } -/* This actually sends the interrupt for this virtqueue */ +/* + * This actually sends the interrupt for this virtqueue, if we've used a + * buffer. + */ static void trigger_irq(struct virtqueue *vq) { unsigned long buf[] = { LHREQ_IRQ, vq->config.irq }; @@ -629,12 +637,12 @@ static void trigger_irq(struct virtqueue *vq) } /* - * This looks in the virtqueue and for the first available buffer, and converts + * This looks in the virtqueue for the first available buffer, and converts * it to an iovec for convenient access. Since descriptors consist of some * number of output then some number of input descriptors, it's actually two * iovecs, but we pack them into one and note how many of each there were. * - * This function returns the descriptor number found. + * This function waits if necessary, and returns the descriptor number found. */ static unsigned wait_for_vq_desc(struct virtqueue *vq, struct iovec iov[], @@ -644,10 +652,14 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq, struct vring_desc *desc; u16 last_avail = lg_last_avail(vq); + /* There's nothing available? */ while (last_avail == vq->vring.avail->idx) { u64 event; - /* OK, tell Guest about progress up to now. */ + /* + * Since we're about to sleep, now is a good time to tell the + * Guest about what we've used up to now. + */ trigger_irq(vq); /* OK, now we need to know about added descriptors. */ @@ -734,8 +746,9 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq, } /* - * After we've used one of their buffers, we tell them about it. We'll then - * want to send them an interrupt, using trigger_irq(). + * After we've used one of their buffers, we tell the Guest about it. Sometime + * later we'll want to send them an interrupt using trigger_irq(); note that + * wait_for_vq_desc() does that for us if it has to wait. */ static void add_used(struct virtqueue *vq, unsigned int head, int len) { @@ -782,12 +795,12 @@ static void console_input(struct virtqueue *vq) struct console_abort *abort = vq->dev->priv; struct iovec iov[vq->vring.num]; - /* Make sure there's a descriptor waiting. */ + /* Make sure there's a descriptor available. */ head = wait_for_vq_desc(vq, iov, &out_num, &in_num); if (out_num) errx(1, "Output buffers in console in queue?"); - /* Read it in. */ + /* Read into it. This is where we usually wait. */ len = readv(STDIN_FILENO, iov, in_num); if (len <= 0) { /* Ran out of input? */ @@ -800,6 +813,7 @@ static void console_input(struct virtqueue *vq) pause(); } + /* Tell the Guest we used a buffer. */ add_used_and_trigger(vq, head, len); /* @@ -834,15 +848,23 @@ static void console_output(struct virtqueue *vq) unsigned int head, out, in; struct iovec iov[vq->vring.num]; + /* We usually wait in here, for the Guest to give us something. */ head = wait_for_vq_desc(vq, iov, &out, &in); if (in) errx(1, "Input buffers in console output queue?"); + + /* writev can return a partial write, so we loop here. */ while (!iov_empty(iov, out)) { int len = writev(STDOUT_FILENO, iov, out); if (len <= 0) err(1, "Write to stdout gave %i", len); iov_consume(iov, out, len); } + + /* + * We're finished with that buffer: if we're going to sleep, + * wait_for_vq_desc() will prod the Guest with an interrupt. + */ add_used(vq, head, 0); } @@ -862,15 +884,30 @@ static void net_output(struct virtqueue *vq) unsigned int head, out, in; struct iovec iov[vq->vring.num]; + /* We usually wait in here for the Guest to give us a packet. */ head = wait_for_vq_desc(vq, iov, &out, &in); if (in) errx(1, "Input buffers in net output queue?"); + /* + * Send the whole thing through to /dev/net/tun. It expects the exact + * same format: what a coincidence! + */ if (writev(net_info->tunfd, iov, out) < 0) errx(1, "Write to tun failed?"); + + /* + * Done with that one; wait_for_vq_desc() will send the interrupt if + * all packets are processed. + */ add_used(vq, head, 0); } -/* Will reading from this file descriptor block? */ +/* + * Handling network input is a bit trickier, because I've tried to optimize it. + * + * First we have a helper routine which tells is if from this file descriptor + * (ie. the /dev/net/tun device) will block: + */ static bool will_block(int fd) { fd_set fdset; @@ -880,7 +917,11 @@ static bool will_block(int fd) return select(fd+1, &fdset, NULL, NULL, &zero) != 1; } -/* This handles packets coming in from the tun device to our Guest. */ +/* + * This handles packets coming in from the tun device to our Guest. Like all + * service routines, it gets called again as soon as it returns, so you don't + * see a while(1) loop here. + */ static void net_input(struct virtqueue *vq) { int len; @@ -888,21 +929,38 @@ static void net_input(struct virtqueue *vq) struct iovec iov[vq->vring.num]; struct net_info *net_info = vq->dev->priv; + /* + * Get a descriptor to write an incoming packet into. This will also + * send an interrupt if they're out of descriptors. + */ head = wait_for_vq_desc(vq, iov, &out, &in); if (out) errx(1, "Output buffers in net input queue?"); - /* Deliver interrupt now, since we're about to sleep. */ + /* + * If it looks like we'll block reading from the tun device, send them + * an interrupt. + */ if (vq->pending_used && will_block(net_info->tunfd)) trigger_irq(vq); + /* + * Read in the packet. This is where we normally wait (when there's no + * incoming network traffic). + */ len = readv(net_info->tunfd, iov, in); if (len <= 0) err(1, "Failed to read from tun."); + + /* + * Mark that packet buffer as used, but don't interrupt here. We want + * to wait until we've done as much work as we can. + */ add_used(vq, head, len); } +/*:*/ -/* This is the helper to create threads. */ +/* This is the helper to create threads: run the service routine in a loop. */ static int do_thread(void *_vq) { struct virtqueue *vq = _vq; @@ -950,11 +1008,14 @@ static void reset_device(struct device *dev) signal(SIGCHLD, (void *)kill_launcher); } +/*L:216 + * This actually creates the thread which services the virtqueue for a device. + */ static void create_thread(struct virtqueue *vq) { /* - * Create stack for thread and run it. Since the stack grows upwards, - * we point the stack pointer to the end of this region. + * Create stack for thread. Since the stack grows upwards, we point + * the stack pointer to the end of this region. */ char *stack = malloc(32768); unsigned long args[] = { LHREQ_EVENTFD, @@ -966,17 +1027,22 @@ static void create_thread(struct virtqueue *vq) err(1, "Creating eventfd"); args[2] = vq->eventfd; - /* Attach an eventfd to this virtqueue: it will go off - * when the Guest does an LHCALL_NOTIFY for this vq. */ + /* + * Attach an eventfd to this virtqueue: it will go off when the Guest + * does an LHCALL_NOTIFY for this vq. + */ if (write(lguest_fd, &args, sizeof(args)) != 0) err(1, "Attaching eventfd"); - /* CLONE_VM: because it has to access the Guest memory, and - * SIGCHLD so we get a signal if it dies. */ + /* + * CLONE_VM: because it has to access the Guest memory, and SIGCHLD so + * we get a signal if it dies. + */ vq->thread = clone(do_thread, stack + 32768, CLONE_VM | SIGCHLD, vq); if (vq->thread == (pid_t)-1) err(1, "Creating clone"); - /* We close our local copy, now the child has it. */ + + /* We close our local copy now the child has it. */ close(vq->eventfd); } @@ -1028,7 +1094,10 @@ static void update_device_status(struct device *dev) } } -/* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */ +/*L:215 + * This is the generic routine we call when the Guest uses LHCALL_NOTIFY. In + * particular, it's used to notify us of device status changes during boot. + */ static void handle_output(unsigned long addr) { struct device *i; @@ -1037,18 +1106,32 @@ static void handle_output(unsigned long addr) for (i = devices.dev; i; i = i->next) { struct virtqueue *vq; - /* Notifications to device descriptors update device status. */ + /* + * Notifications to device descriptors mean they updated the + * device status. + */ if (from_guest_phys(addr) == i->desc) { update_device_status(i); return; } - /* Devices *can* be used before status is set to DRIVER_OK. */ + /* + * Devices *can* be used before status is set to DRIVER_OK. + * The original plan was that they would never do this: they + * would always finish setting up their status bits before + * actually touching the virtqueues. In practice, we allowed + * them to, and they do (eg. the disk probes for partition + * tables as part of initialization). + * + * If we see this, we start the device: once it's running, we + * expect the device to catch all the notifications. + */ for (vq = i->vq; vq; vq = vq->next) { if (addr != vq->config.pfn*getpagesize()) continue; if (i->running) errx(1, "Notification on running %s", i->name); + /* This just calls create_thread() for each virtqueue */ start_device(i); return; } @@ -1132,6 +1215,11 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, vq->next = NULL; vq->last_avail_idx = 0; vq->dev = dev; + + /* + * This is the routine the service thread will run, and its Process ID + * once it's running. + */ vq->service = service; vq->thread = (pid_t)-1; @@ -1202,7 +1290,8 @@ static void set_config(struct device *dev, unsigned len, const void *conf) /* * This routine does all the creation and setup of a new device, including - * calling new_dev_desc() to allocate the descriptor and device memory. + * calling new_dev_desc() to allocate the descriptor and device memory. We + * don't actually start the service threads until later. * * See what I mean about userspace being boring? */ @@ -1478,19 +1567,7 @@ static void setup_tun_net(char *arg) verbose("device %u: tun %s: %s\n", devices.device_num, tapif, arg); } - -/* - * Our block (disk) device should be really simple: the Guest asks for a block - * number and we read or write that position in the file. Unfortunately, that - * was amazingly slow: the Guest waits until the read is finished before - * running anything else, even if it could have been doing useful work. - * - * We could use async I/O, except it's reputed to suck so hard that characters - * actually go missing from your code when you try to use it. - * - * So this was one reason why lguest now does all virtqueue servicing in - * separate threads: it's more efficient and more like a real device. - */ +/*:*/ /* This hangs off device->priv. */ struct vblk_info @@ -1512,8 +1589,16 @@ struct vblk_info /*L:210 * The Disk * - * Remember that the block device is handled by a separate I/O thread. We head - * straight into the core of that thread here: + * The disk only has one virtqueue, so it only has one thread. It is really + * simple: the Guest asks for a block number and we read or write that position + * in the file. + * + * Before we serviced each virtqueue in a separate thread, that was unacceptably + * slow: the Guest waits until the read is finished before running anything + * else, even if it could have been doing useful work. + * + * We could have used async I/O, except it's reputed to suck so hard that + * characters actually go missing from your code when you try to use it. */ static void blk_request(struct virtqueue *vq) { @@ -1525,7 +1610,10 @@ static void blk_request(struct virtqueue *vq) struct iovec iov[vq->vring.num]; off64_t off; - /* Get the next request. */ + /* + * Get the next request, where we normally wait. It triggers the + * interrupt to acknowledge previously serviced requests (if any). + */ head = wait_for_vq_desc(vq, iov, &out_num, &in_num); /* @@ -1539,6 +1627,10 @@ static void blk_request(struct virtqueue *vq) out = convert(&iov[0], struct virtio_blk_outhdr); in = convert(&iov[out_num+in_num-1], u8); + /* + * For historical reasons, block operations are expressed in 512 byte + * "sectors". + */ off = out->sector * 512; /* @@ -1614,6 +1706,7 @@ static void blk_request(struct virtqueue *vq) if (out->type & VIRTIO_BLK_T_BARRIER) fdatasync(vblk->fd); + /* Finished that request. */ add_used(vq, head, wlen); } @@ -1682,9 +1775,8 @@ static void rng_input(struct virtqueue *vq) errx(1, "Output buffers in rng?"); /* - * This is why we convert to iovecs: the readv() call uses them, and so - * it reads straight into the Guest's buffer. We loop to make sure we - * fill it. + * Just like the console write, we loop to cover the whole iovec. + * In this case, short reads actually happen quite a bit. */ while (!iov_empty(iov, in_num)) { len = readv(rng_info->rfd, iov, in_num); @@ -1818,7 +1910,9 @@ int main(int argc, char *argv[]) devices.lastdev = NULL; devices.next_irq = 1; + /* We're CPU 0. In fact, that's the only CPU possible right now. */ cpu_id = 0; + /* * We need to know how much memory so we can set up the device * descriptor and memory pages for the devices as we parse the command @@ -1926,7 +2020,7 @@ int main(int argc, char *argv[]) */ tell_kernel(start); - /* Ensure that we terminate if a child dies. */ + /* Ensure that we terminate if a device-servicing child dies. */ signal(SIGCHLD, kill_launcher); /* If we exit via err(), this kills all the threads, restores tty. */ diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h index cceb73e12e50..ba0eed8aa1a6 100644 --- a/arch/x86/include/asm/lguest_hcall.h +++ b/arch/x86/include/asm/lguest_hcall.h @@ -35,10 +35,10 @@ * operations? There are two ways: the direct way is to make a "hypercall", * to make requests of the Host Itself. * - * We use the KVM hypercall mechanism. Seventeen hypercalls are - * available: the hypercall number is put in the %eax register, and the - * arguments (when required) are placed in %ebx, %ecx, %edx and %esi. - * If a return value makes sense, it's returned in %eax. + * We use the KVM hypercall mechanism, though completely different hypercall + * numbers. Seventeen hypercalls are available: the hypercall number is put in + * the %eax register, and the arguments (when required) are placed in %ebx, + * %ecx, %edx and %esi. If a return value makes sense, it's returned in %eax. * * Grossly invalid calls result in Sudden Death at the hands of the vengeful * Host, rather than returning failure. This reflects Winston Churchill's diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 025c04d18f2b..d677fa9ca650 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -154,6 +154,7 @@ static void lazy_hcall1(unsigned long call, async_hcall(call, arg1, 0, 0, 0); } +/* You can imagine what lazy_hcall2, 3 and 4 look like. :*/ static void lazy_hcall2(unsigned long call, unsigned long arg1, unsigned long arg2) @@ -189,8 +190,10 @@ static void lazy_hcall4(unsigned long call, } #endif -/* When lazy mode is turned off reset the per-cpu lazy mode variable and then - * issue the do-nothing hypercall to flush any stored calls. */ +/*G:036 + * When lazy mode is turned off reset the per-cpu lazy mode variable and then + * issue the do-nothing hypercall to flush any stored calls. +:*/ static void lguest_leave_lazy_mmu_mode(void) { kvm_hypercall0(LHCALL_FLUSH_ASYNC); @@ -250,13 +253,11 @@ extern void lg_irq_enable(void); extern void lg_restore_fl(unsigned long flags); /*M:003 - * Note that we don't check for outstanding interrupts when we re-enable them - * (or when we unmask an interrupt). This seems to work for the moment, since - * interrupts are rare and we'll just get the interrupt on the next timer tick, - * but now we can run with CONFIG_NO_HZ, we should revisit this. One way would - * be to put the "irq_enabled" field in a page by itself, and have the Host - * write-protect it when an interrupt comes in when irqs are disabled. There - * will then be a page fault as soon as interrupts are re-enabled. + * We could be more efficient in our checking of outstanding interrupts, rather + * than using a branch. One way would be to put the "irq_enabled" field in a + * page by itself, and have the Host write-protect it when an interrupt comes + * in when irqs are disabled. There will then be a page fault as soon as + * interrupts are re-enabled. * * A better method is to implement soft interrupt disable generally for x86: * instead of disabling interrupts, we set a flag. If an interrupt does come @@ -568,7 +569,7 @@ static void lguest_write_cr4(unsigned long val) * cr3 ---> +---------+ * | --------->+---------+ * | | | PADDR1 | - * Top-level | | PADDR2 | + * Mid-level | | PADDR2 | * (PMD) page | | | * | | Lower-level | * | | (PTE) page | @@ -588,23 +589,62 @@ static void lguest_write_cr4(unsigned long val) * Index into top Index into second Offset within page * page directory page pagetable page * - * The kernel spends a lot of time changing both the top-level page directory - * and lower-level pagetable pages. The Guest doesn't know physical addresses, - * so while it maintains these page tables exactly like normal, it also needs - * to keep the Host informed whenever it makes a change: the Host will create - * the real page tables based on the Guests'. + * Now, unfortunately, this isn't the whole story: Intel added Physical Address + * Extension (PAE) to allow 32 bit systems to use 64GB of memory (ie. 36 bits). + * These are held in 64-bit page table entries, so we can now only fit 512 + * entries in a page, and the neat three-level tree breaks down. + * + * The result is a four level page table: + * + * cr3 --> [ 4 Upper ] + * [ Level ] + * [ Entries ] + * [(PUD Page)]---> +---------+ + * | --------->+---------+ + * | | | PADDR1 | + * Mid-level | | PADDR2 | + * (PMD) page | | | + * | | Lower-level | + * | | (PTE) page | + * | | | | + * .... .... + * + * + * And the virtual address is decoded as: + * + * 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + * |<-2->|<--- 9 bits ---->|<---- 9 bits --->|<------ 12 bits ------>| + * Index into Index into mid Index into lower Offset within page + * top entries directory page pagetable page + * + * It's too hard to switch between these two formats at runtime, so Linux only + * supports one or the other depending on whether CONFIG_X86_PAE is set. Many + * distributions turn it on, and not just for people with silly amounts of + * memory: the larger PTE entries allow room for the NX bit, which lets the + * kernel disable execution of pages and increase security. + * + * This was a problem for lguest, which couldn't run on these distributions; + * then Matias Zabaljauregui figured it all out and implemented it, and only a + * handful of puppies were crushed in the process! + * + * Back to our point: the kernel spends a lot of time changing both the + * top-level page directory and lower-level pagetable pages. The Guest doesn't + * know physical addresses, so while it maintains these page tables exactly + * like normal, it also needs to keep the Host informed whenever it makes a + * change: the Host will create the real page tables based on the Guests'. */ /* - * The Guest calls this to set a second-level entry (pte), ie. to map a page - * into a process' address space. We set the entry then tell the Host the - * toplevel and address this corresponds to. The Guest uses one pagetable per - * process, so we need to tell the Host which one we're changing (mm->pgd). + * The Guest calls this after it has set a second-level entry (pte), ie. to map + * a page into a process' address space. Wetell the Host the toplevel and + * address this corresponds to. The Guest uses one pagetable per process, so + * we need to tell the Host which one we're changing (mm->pgd). */ static void lguest_pte_update(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { #ifdef CONFIG_X86_PAE + /* PAE needs to hand a 64 bit page table entry, so it uses two args. */ lazy_hcall4(LHCALL_SET_PTE, __pa(mm->pgd), addr, ptep->pte_low, ptep->pte_high); #else @@ -612,6 +652,7 @@ static void lguest_pte_update(struct mm_struct *mm, unsigned long addr, #endif } +/* This is the "set and update" combo-meal-deal version. */ static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval) { @@ -672,6 +713,11 @@ static void lguest_set_pte(pte_t *ptep, pte_t pteval) } #ifdef CONFIG_X86_PAE +/* + * With 64-bit PTE values, we need to be careful setting them: if we set 32 + * bits at a time, the hardware could see a weird half-set entry. These + * versions ensure we update all 64 bits at once. + */ static void lguest_set_pte_atomic(pte_t *ptep, pte_t pte) { native_set_pte_atomic(ptep, pte); @@ -679,13 +725,14 @@ static void lguest_set_pte_atomic(pte_t *ptep, pte_t pte) lazy_hcall1(LHCALL_FLUSH_TLB, 1); } -void lguest_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +static void lguest_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { native_pte_clear(mm, addr, ptep); lguest_pte_update(mm, addr, ptep); } -void lguest_pmd_clear(pmd_t *pmdp) +static void lguest_pmd_clear(pmd_t *pmdp) { lguest_set_pmd(pmdp, __pmd(0)); } @@ -784,6 +831,14 @@ static void __init lguest_init_IRQ(void) irq_ctx_init(smp_processor_id()); } +/* + * With CONFIG_SPARSE_IRQ, interrupt descriptors are allocated as-needed, so + * rather than set them in lguest_init_IRQ we are called here every time an + * lguest device needs an interrupt. + * + * FIXME: irq_to_desc_alloc_node() can fail due to lack of memory, we should + * pass that up! + */ void lguest_setup_irq(unsigned int irq) { irq_to_desc_alloc_node(irq, 0); @@ -1298,7 +1353,7 @@ __init void lguest_init(void) */ switch_to_new_gdt(0); - /* As described in head_32.S, we map the first 128M of memory. */ + /* We actually boot with all memory mapped, but let's say 128MB. */ max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT; /* diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index db6aa95eb054..27eac0faee48 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S @@ -102,6 +102,7 @@ send_interrupts: * create one manually here. */ .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */ + /* Put eax back the way we found it. */ popl %eax ret @@ -125,6 +126,7 @@ ENTRY(lg_restore_fl) jnz send_interrupts /* Again, the normal path has used no extra registers. Clever, huh? */ ret +/*:*/ /* These demark the EIP range where host should never deliver interrupts. */ .global lguest_noirq_start diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index cd058bc903ff..1e2cb846b3c9 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -217,10 +217,15 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) /* * It's possible the Guest did a NOTIFY hypercall to the - * Launcher, in which case we return from the read() now. + * Launcher. */ if (cpu->pending_notify) { + /* + * Does it just needs to write to a registered + * eventfd (ie. the appropriate virtqueue thread)? + */ if (!send_notify_to_eventfd(cpu)) { + /* OK, we tell the main Laucher. */ if (put_user(cpu->pending_notify, user)) return -EFAULT; return sizeof(cpu->pending_notify); diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index 787ab4bc09f0..83511eb0923d 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c @@ -59,7 +59,7 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) case LHCALL_SHUTDOWN: { char msg[128]; /* - * Shutdown is such a trivial hypercall that we do it in four + * Shutdown is such a trivial hypercall that we do it in five * lines right here. * * If the lgread fails, it will call kill_guest() itself; the @@ -245,6 +245,10 @@ static void initialize(struct lg_cpu *cpu) * device), the Guest will still see the old page. In practice, this never * happens: why would the Guest read a page which it has never written to? But * a similar scenario might one day bite us, so it's worth mentioning. + * + * Note that if we used a shared anonymous mapping in the Launcher instead of + * mapping /dev/zero private, we wouldn't worry about cop-on-write. And we + * need that to switch the Launcher to processes (away from threads) anyway. :*/ /*H:100 diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index cc000e79c3d1..1401c1ace1ec 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -236,7 +236,7 @@ static void lg_notify(struct virtqueue *vq) extern void lguest_setup_irq(unsigned int irq); /* - * This routine finds the first virtqueue described in the configuration of + * This routine finds the Nth virtqueue described in the configuration of * this device and sets it up. * * This is kind of an ugly duckling. It'd be nicer to have a standard @@ -244,9 +244,6 @@ extern void lguest_setup_irq(unsigned int irq); * everyone wants to do it differently. The KVM coders want the Guest to * allocate its own pages and tell the Host where they are, but for lguest it's * simpler for the Host to simply tell us where the pages are. - * - * So we provide drivers with a "find the Nth virtqueue and set it up" - * function. */ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, unsigned index, @@ -422,7 +419,11 @@ static void add_lguest_device(struct lguest_device_desc *d, /* This devices' parent is the lguest/ dir. */ ldev->vdev.dev.parent = lguest_root; - /* We have a unique device index thanks to the dev_index counter. */ + /* + * The device type comes straight from the descriptor. There's also a + * device vendor field in the virtio_device struct, which we leave as + * 0. + */ ldev->vdev.id.device = d->type; /* * We have a simple set of routines for querying the device's diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index 7e92017103dc..b4d3f7ca554f 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -1,9 +1,8 @@ -/*P:200 - * This contains all the /dev/lguest code, whereby the userspace launcher +/*P:200 This contains all the /dev/lguest code, whereby the userspace launcher * controls and communicates with the Guest. For example, the first write will - * tell us the Guest's memory layout, pagetable, entry point and kernel address - * offset. A read will run the Guest until something happens, such as a signal - * or the Guest doing a NOTIFY out to the Launcher. + * tell us the Guest's memory layout and entry point. A read will run the + * Guest until something happens, such as a signal or the Guest doing a NOTIFY + * out to the Launcher. :*/ #include #include @@ -13,14 +12,41 @@ #include #include "lg.h" +/*L:056 + * Before we move on, let's jump ahead and look at what the kernel does when + * it needs to look up the eventfds. That will complete our picture of how we + * use RCU. + * + * The notification value is in cpu->pending_notify: we return true if it went + * to an eventfd. + */ bool send_notify_to_eventfd(struct lg_cpu *cpu) { unsigned int i; struct lg_eventfd_map *map; - /* lg->eventfds is RCU-protected */ + /* + * This "rcu_read_lock()" helps track when someone is still looking at + * the (RCU-using) eventfds array. It's not actually a lock at all; + * indeed it's a noop in many configurations. (You didn't expect me to + * explain all the RCU secrets here, did you?) + */ rcu_read_lock(); + /* + * rcu_dereference is the counter-side of rcu_assign_pointer(); it + * makes sure we don't access the memory pointed to by + * cpu->lg->eventfds before cpu->lg->eventfds is set. Sounds crazy, + * but Alpha allows this! Paul McKenney points out that a really + * aggressive compiler could have the same effect: + * http://lists.ozlabs.org/pipermail/lguest/2009-July/001560.html + * + * So play safe, use rcu_dereference to get the rcu-protected pointer: + */ map = rcu_dereference(cpu->lg->eventfds); + /* + * Simple array search: even if they add an eventfd while we do this, + * we'll continue to use the old array and just won't see the new one. + */ for (i = 0; i < map->num; i++) { if (map->map[i].addr == cpu->pending_notify) { eventfd_signal(map->map[i].event, 1); @@ -28,14 +54,43 @@ bool send_notify_to_eventfd(struct lg_cpu *cpu) break; } } + /* We're done with the rcu-protected variable cpu->lg->eventfds. */ rcu_read_unlock(); + + /* If we cleared the notification, it's because we found a match. */ return cpu->pending_notify == 0; } +/*L:055 + * One of the more tricksy tricks in the Linux Kernel is a technique called + * Read Copy Update. Since one point of lguest is to teach lguest journeyers + * about kernel coding, I use it here. (In case you're curious, other purposes + * include learning about virtualization and instilling a deep appreciation for + * simplicity and puppies). + * + * We keep a simple array which maps LHCALL_NOTIFY values to eventfds, but we + * add new eventfds without ever blocking readers from accessing the array. + * The current Launcher only does this during boot, so that never happens. But + * Read Copy Update is cool, and adding a lock risks damaging even more puppies + * than this code does. + * + * We allocate a brand new one-larger array, copy the old one and add our new + * element. Then we make the lg eventfd pointer point to the new array. + * That's the easy part: now we need to free the old one, but we need to make + * sure no slow CPU somewhere is still looking at it. That's what + * synchronize_rcu does for us: waits until every CPU has indicated that it has + * moved on to know it's no longer using the old one. + * + * If that's unclear, see http://en.wikipedia.org/wiki/Read-copy-update. + */ static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) { struct lg_eventfd_map *new, *old = lg->eventfds; + /* + * We don't allow notifications on value 0 anyway (pending_notify of + * 0 means "nothing pending"). + */ if (!addr) return -EINVAL; @@ -62,12 +117,20 @@ static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) } new->num++; - /* Now put new one in place. */ + /* + * Now put new one in place: rcu_assign_pointer() is a fancy way of + * doing "lg->eventfds = new", but it uses memory barriers to make + * absolutely sure that the contents of "new" written above is nailed + * down before we actually do the assignment. + * + * We have to think about these kinds of things when we're operating on + * live data without locks. + */ rcu_assign_pointer(lg->eventfds, new); /* * We're not in a big hurry. Wait until noone's looking at old - * version, then delete it. + * version, then free it. */ synchronize_rcu(); kfree(old); @@ -75,6 +138,14 @@ static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) return 0; } +/*L:052 + * Receiving notifications from the Guest is usually done by attaching a + * particular LHCALL_NOTIFY value to an event filedescriptor. The eventfd will + * become readable when the Guest does an LHCALL_NOTIFY with that value. + * + * This is really convenient for processing each virtqueue in a separate + * thread. + */ static int attach_eventfd(struct lguest *lg, const unsigned long __user *input) { unsigned long addr, fd; @@ -86,6 +157,11 @@ static int attach_eventfd(struct lguest *lg, const unsigned long __user *input) if (get_user(fd, input) != 0) return -EFAULT; + /* + * Just make sure two callers don't add eventfds at once. We really + * only need to lock against callers adding to the same Guest, so using + * the Big Lguest Lock is overkill. But this is setup, not a fast path. + */ mutex_lock(&lguest_lock); err = add_eventfd(lg, addr, fd); mutex_unlock(&lguest_lock); @@ -106,6 +182,10 @@ static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input) if (irq >= LGUEST_IRQS) return -EINVAL; + /* + * Next time the Guest runs, the core code will see if it can deliver + * this interrupt. + */ set_interrupt(cpu, irq); return 0; } @@ -307,10 +387,10 @@ unlock: * The first operation the Launcher does must be a write. All writes * start with an unsigned long number: for the first write this must be * LHREQ_INITIALIZE to set up the Guest. After that the Launcher can use - * writes of other values to send interrupts. + * writes of other values to send interrupts or set up receipt of notifications. * * Note that we overload the "offset" in the /dev/lguest file to indicate what - * CPU number we're dealing with. Currently this is always 0, since we only + * CPU number we're dealing with. Currently this is always 0 since we only * support uniprocessor Guests, but you can see the beginnings of SMP support * here. */ diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index 3da902e4b4cb..a8d0aee3bc0e 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -29,10 +29,10 @@ /*H:300 * The Page Table Code * - * We use two-level page tables for the Guest. If you're not entirely - * comfortable with virtual addresses, physical addresses and page tables then - * I recommend you review arch/x86/lguest/boot.c's "Page Table Handling" (with - * diagrams!). + * We use two-level page tables for the Guest, or three-level with PAE. If + * you're not entirely comfortable with virtual addresses, physical addresses + * and page tables then I recommend you review arch/x86/lguest/boot.c's "Page + * Table Handling" (with diagrams!). * * The Guest keeps page tables, but we maintain the actual ones here: these are * called "shadow" page tables. Which is a very Guest-centric name: these are @@ -52,9 +52,8 @@ :*/ /* - * 1024 entries in a page table page maps 1024 pages: 4MB. The Switcher is - * conveniently placed at the top 4MB, so it uses a separate, complete PTE - * page. + * The Switcher uses the complete top PTE page. That's 1024 PTE entries (4MB) + * or 512 PTE entries with PAE (2MB). */ #define SWITCHER_PGD_INDEX (PTRS_PER_PGD - 1) @@ -81,7 +80,8 @@ static DEFINE_PER_CPU(pte_t *, switcher_pte_pages); /*H:320 * The page table code is curly enough to need helper functions to keep it - * clear and clean. + * clear and clean. The kernel itself provides many of them; one advantage + * of insisting that the Guest and Host use the same CONFIG_PAE setting. * * There are two functions which return pointers to the shadow (aka "real") * page tables. @@ -155,7 +155,7 @@ static pte_t *spte_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr) } /* - * These two functions just like the above two, except they access the Guest + * These functions are just like the above two, except they access the Guest * page tables. Hence they return a Guest address. */ static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr) @@ -165,6 +165,7 @@ static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr) } #ifdef CONFIG_X86_PAE +/* Follow the PGD to the PMD. */ static unsigned long gpmd_addr(pgd_t gpgd, unsigned long vaddr) { unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT; @@ -172,6 +173,7 @@ static unsigned long gpmd_addr(pgd_t gpgd, unsigned long vaddr) return gpage + pmd_index(vaddr) * sizeof(pmd_t); } +/* Follow the PMD to the PTE. */ static unsigned long gpte_addr(struct lg_cpu *cpu, pmd_t gpmd, unsigned long vaddr) { @@ -181,6 +183,7 @@ static unsigned long gpte_addr(struct lg_cpu *cpu, return gpage + pte_index(vaddr) * sizeof(pte_t); } #else +/* Follow the PGD to the PTE (no mid-level for !PAE). */ static unsigned long gpte_addr(struct lg_cpu *cpu, pgd_t gpgd, unsigned long vaddr) { @@ -314,6 +317,7 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) pte_t gpte; pte_t *spte; + /* Mid level for PAE. */ #ifdef CONFIG_X86_PAE pmd_t *spmd; pmd_t gpmd; @@ -391,6 +395,8 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) */ gpte_ptr = gpte_addr(cpu, gpgd, vaddr); #endif + + /* Read the actual PTE value. */ gpte = lgread(cpu, gpte_ptr, pte_t); /* If this page isn't in the Guest page tables, we can't page it in. */ @@ -507,6 +513,7 @@ void pin_page(struct lg_cpu *cpu, unsigned long vaddr) if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2)) kill_guest(cpu, "bad stack page %#lx", vaddr); } +/*:*/ #ifdef CONFIG_X86_PAE static void release_pmd(pmd_t *spmd) @@ -543,7 +550,11 @@ static void release_pgd(pgd_t *spgd) } #else /* !CONFIG_X86_PAE */ -/*H:450 If we chase down the release_pgd() code, it looks like this: */ +/*H:450 + * If we chase down the release_pgd() code, the non-PAE version looks like + * this. The PAE version is almost identical, but instead of calling + * release_pte it calls release_pmd(), which looks much like this. + */ static void release_pgd(pgd_t *spgd) { /* If the entry's not present, there's nothing to release. */ @@ -898,17 +909,21 @@ void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx) /* ... throw it away. */ release_pgd(lg->pgdirs[pgdir].pgdir + idx); } + #ifdef CONFIG_X86_PAE +/* For setting a mid-level, we just throw everything away. It's easy. */ void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx) { guest_pagetable_clear_all(&lg->cpus[0]); } #endif -/* - * Once we know how much memory we have we can construct simple identity (which +/*H:505 + * To get through boot, we construct simple identity page mappings (which * set virtual == physical) and linear mappings which will get the Guest far - * enough into the boot to create its own. + * enough into the boot to create its own. The linear mapping means we + * simplify the Guest boot, but it makes assumptions about their PAGE_OFFSET, + * as you'll see. * * We lay them out of the way, just below the initrd (which is why we need to * know its size here). @@ -944,6 +959,10 @@ static unsigned long setup_pagetables(struct lguest *lg, linear = (void *)pgdir - linear_pages * PAGE_SIZE; #ifdef CONFIG_X86_PAE + /* + * And the single mid page goes below that. We only use one, but + * that's enough to map 1G, which definitely gets us through boot. + */ pmds = (void *)linear - PAGE_SIZE; #endif /* @@ -957,13 +976,14 @@ static unsigned long setup_pagetables(struct lguest *lg, return -EFAULT; } +#ifdef CONFIG_X86_PAE /* - * The top level points to the linear page table pages above. - * We setup the identity and linear mappings here. + * Make the Guest PMD entries point to the corresponding place in the + * linear mapping (up to one page worth of PMD). */ -#ifdef CONFIG_X86_PAE for (i = j = 0; i < mapped_pages && j < PTRS_PER_PMD; i += PTRS_PER_PTE, j++) { + /* FIXME: native_set_pmd is overkill here. */ native_set_pmd(&pmd, __pmd(((unsigned long)(linear + i) - mem_base) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER)); @@ -971,18 +991,36 @@ static unsigned long setup_pagetables(struct lguest *lg, return -EFAULT; } + /* One PGD entry, pointing to that PMD page. */ set_pgd(&pgd, __pgd(((u32)pmds - mem_base) | _PAGE_PRESENT)); + /* Copy it in as the first PGD entry (ie. addresses 0-1G). */ if (copy_to_user(&pgdir[0], &pgd, sizeof(pgd)) != 0) return -EFAULT; + /* + * And the third PGD entry (ie. addresses 3G-4G). + * + * FIXME: This assumes that PAGE_OFFSET for the Guest is 0xC0000000. + */ if (copy_to_user(&pgdir[3], &pgd, sizeof(pgd)) != 0) return -EFAULT; #else + /* + * The top level points to the linear page table pages above. + * We setup the identity and linear mappings here. + */ phys_linear = (unsigned long)linear - mem_base; for (i = 0; i < mapped_pages; i += PTRS_PER_PTE) { pgd_t pgd; + /* + * Create a PGD entry which points to the right part of the + * linear PTE pages. + */ pgd = __pgd((phys_linear + i * sizeof(pte_t)) | (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER)); + /* + * Copy it into the PGD page at 0 and PAGE_OFFSET. + */ if (copy_to_user(&pgdir[i / PTRS_PER_PTE], &pgd, sizeof(pgd)) || copy_to_user(&pgdir[pgd_index(PAGE_OFFSET) + i / PTRS_PER_PTE], @@ -992,8 +1030,8 @@ static unsigned long setup_pagetables(struct lguest *lg, #endif /* - * We return the top level (guest-physical) address: remember where - * this is. + * We return the top level (guest-physical) address: we remember where + * this is to write it into lguest_data when the Guest initializes. */ return (unsigned long)pgdir - mem_base; } @@ -1031,7 +1069,9 @@ int init_guest_pagetable(struct lguest *lg) lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); if (!lg->pgdirs[0].pgdir) return -ENOMEM; + #ifdef CONFIG_X86_PAE + /* For PAE, we also create the initial mid-level. */ pgd = lg->pgdirs[0].pgdir; pmd_table = (pmd_t *) get_zeroed_page(GFP_KERNEL); if (!pmd_table) @@ -1040,11 +1080,13 @@ int init_guest_pagetable(struct lguest *lg) set_pgd(pgd + SWITCHER_PGD_INDEX, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); #endif + + /* This is the current page table. */ lg->cpus[0].cpu_pgd = 0; return 0; } -/* When the Guest calls LHCALL_LGUEST_INIT we do more setup. */ +/*H:508 When the Guest calls LHCALL_LGUEST_INIT we do more setup. */ void page_table_guest_data_init(struct lg_cpu *cpu) { /* We get the kernel address: above this is all kernel memory. */ @@ -1105,12 +1147,16 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) pmd_t switcher_pmd; pmd_t *pmd_table; + /* FIXME: native_set_pmd is overkill here. */ native_set_pmd(&switcher_pmd, pfn_pmd(__pa(switcher_pte_page) >> PAGE_SHIFT, PAGE_KERNEL_EXEC)); + /* Figure out where the pmd page is, by reading the PGD, and converting + * it to a virtual address. */ pmd_table = __va(pgd_pfn(cpu->lg-> pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX]) << PAGE_SHIFT); + /* Now write it into the shadow page table. */ native_set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd); #else pgd_t switcher_pgd; diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 96f7d88ec7f8..6ae388849a3b 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -187,7 +187,7 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages) * also simplify copy_in_guest_info(). Note that we'd still need to restore * things when we exit to Launcher userspace, but that's fairly easy. * - * We could also try using this hooks for PGE, but that might be too expensive. + * We could also try using these hooks for PGE, but that might be too expensive. * * The hooks were designed for KVM, but we can also put them to good use. :*/ diff --git a/drivers/lguest/x86/switcher_32.S b/drivers/lguest/x86/switcher_32.S index 6dec09793836..40634b0db9f7 100644 --- a/drivers/lguest/x86/switcher_32.S +++ b/drivers/lguest/x86/switcher_32.S @@ -1,7 +1,7 @@ /*P:900 - * This is the Switcher: code which sits at 0xFFC00000 astride both the - * Host and Guest to do the low-level Guest<->Host switch. It is as simple as - * it can be made, but it's naturally very specific to x86. + * This is the Switcher: code which sits at 0xFFC00000 (or 0xFFE00000) astride + * both the Host and Guest to do the low-level Guest<->Host switch. It is as + * simple as it can be made, but it's naturally very specific to x86. * * You have now completed Preparation. If this has whet your appetite; if you * are feeling invigorated and refreshed then the next, more challenging stage -- cgit v1.2.3 From 1842f23c05b6a866be831aa60bc8a8731c58ddd0 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 30 Jul 2009 16:03:46 -0600 Subject: lguest and virtio: cleanup struct definitions to Linux style. I've been doing this for years, and akpm picked me up on it about 12 months ago. lguest partly serves as example code, so let's do it Right. Also, remove two unused fields in struct vblk_info in the example launcher. Signed-off-by: Rusty Russell Cc: Ingo Molnar --- Documentation/lguest/lguest.c | 21 +++++---------------- drivers/lguest/lg.h | 9 +++------ drivers/lguest/lguest_device.c | 3 +-- include/linux/lguest.h | 3 +-- include/linux/virtio_blk.h | 6 ++---- include/linux/virtio_config.h | 3 +-- include/linux/virtio_net.h | 6 ++---- include/linux/virtio_ring.h | 12 ++++-------- 8 files changed, 19 insertions(+), 44 deletions(-) (limited to 'drivers') diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 45163651b519..950cde6d6e58 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -93,8 +93,7 @@ static int lguest_fd; static unsigned int __thread cpu_id; /* This is our list of devices. */ -struct device_list -{ +struct device_list { /* Counter to assign interrupt numbers. */ unsigned int next_irq; @@ -114,8 +113,7 @@ struct device_list static struct device_list devices; /* The device structure describes a single device. */ -struct device -{ +struct device { /* The linked-list pointer. */ struct device *next; @@ -140,8 +138,7 @@ struct device }; /* The virtqueue structure describes a queue attached to a device. */ -struct virtqueue -{ +struct virtqueue { struct virtqueue *next; /* Which device owns me. */ @@ -779,8 +776,7 @@ static void add_used_and_trigger(struct virtqueue *vq, unsigned head, int len) * * We associate some data with the console for our exit hack. */ -struct console_abort -{ +struct console_abort { /* How many times have they hit ^C? */ int count; /* When did they start? */ @@ -1570,20 +1566,13 @@ static void setup_tun_net(char *arg) /*:*/ /* This hangs off device->priv. */ -struct vblk_info -{ +struct vblk_info { /* The size of the file. */ off64_t len; /* The file descriptor for the file. */ int fd; - /* IO thread listens on this file descriptor [0]. */ - int workpipe[2]; - - /* IO thread writes to this file descriptor to mark it done, then - * Launcher triggers interrupt to Guest. */ - int done_fd; }; /*L:210 diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 74c0db691b53..bc28745d05af 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -16,15 +16,13 @@ void free_pagetables(void); int init_pagetables(struct page **switcher_page, unsigned int pages); -struct pgdir -{ +struct pgdir { unsigned long gpgdir; pgd_t *pgdir; }; /* We have two pages shared with guests, per cpu. */ -struct lguest_pages -{ +struct lguest_pages { /* This is the stack page mapped rw in guest */ char spare[PAGE_SIZE - sizeof(struct lguest_regs)]; struct lguest_regs regs; @@ -89,8 +87,7 @@ struct lg_eventfd_map { }; /* The private info the thread maintains about the guest. */ -struct lguest -{ +struct lguest { struct lguest_data __user *lguest_data; struct lg_cpu cpus[NR_CPUS]; unsigned int nr_cpus; diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index 1401c1ace1ec..b6200bc39b58 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -207,8 +207,7 @@ static void lg_reset(struct virtio_device *vdev) */ /*D:140 This is the information we remember about each virtqueue. */ -struct lguest_vq_info -{ +struct lguest_vq_info { /* A copy of the information contained in the device config. */ struct lguest_vqconfig config; diff --git a/include/linux/lguest.h b/include/linux/lguest.h index 0a3a11afd64b..2fb1dcbcb5aa 100644 --- a/include/linux/lguest.h +++ b/include/linux/lguest.h @@ -18,8 +18,7 @@ * lguest_data". Once the Guest's initialization hypercall tells the Host where * this is, the Guest and Host both publish information in it. :*/ -struct lguest_data -{ +struct lguest_data { /* * 512 == enabled (same as eflags in normal hardware). The Guest * changes interrupts so often that a hypercall is too slow. diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index be7d255fc7cf..8dab9f2b8832 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -20,8 +20,7 @@ #define VIRTIO_BLK_ID_BYTES (sizeof(__u16[256])) /* IDENTIFY DATA */ -struct virtio_blk_config -{ +struct virtio_blk_config { /* The capacity (in 512-byte sectors). */ __u64 capacity; /* The maximum segment size (if VIRTIO_BLK_F_SIZE_MAX) */ @@ -50,8 +49,7 @@ struct virtio_blk_config #define VIRTIO_BLK_T_BARRIER 0x80000000 /* This is the first element of the read scatter-gather list. */ -struct virtio_blk_outhdr -{ +struct virtio_blk_outhdr { /* VIRTIO_BLK_T* */ __u32 type; /* io priority. */ diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 99f514575f6a..e547e3c8ee9a 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -79,8 +79,7 @@ * the dev->feature bits if it wants. */ typedef void vq_callback_t(struct virtqueue *); -struct virtio_config_ops -{ +struct virtio_config_ops { void (*get)(struct virtio_device *vdev, unsigned offset, void *buf, unsigned len); void (*set)(struct virtio_device *vdev, unsigned offset, diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 9c543d6ac535..d8dd539c9f48 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -31,8 +31,7 @@ #define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ -struct virtio_net_config -{ +struct virtio_net_config { /* The config defining mac address (if VIRTIO_NET_F_MAC) */ __u8 mac[6]; /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */ @@ -41,8 +40,7 @@ struct virtio_net_config /* This is the first element of the scatter-gather list. If you don't * specify GSO or CSUM features, you can simply ignore the header. */ -struct virtio_net_hdr -{ +struct virtio_net_hdr { #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 // Use csum_start, csum_offset __u8 flags; #define VIRTIO_NET_HDR_GSO_NONE 0 // Not a GSO frame diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index 693e0ec5afa6..e4d144b132b5 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -30,8 +30,7 @@ #define VIRTIO_RING_F_INDIRECT_DESC 28 /* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ -struct vring_desc -{ +struct vring_desc { /* Address (guest-physical). */ __u64 addr; /* Length. */ @@ -42,24 +41,21 @@ struct vring_desc __u16 next; }; -struct vring_avail -{ +struct vring_avail { __u16 flags; __u16 idx; __u16 ring[]; }; /* u32 is used here for ids for padding reasons. */ -struct vring_used_elem -{ +struct vring_used_elem { /* Index of start of used descriptor chain. */ __u32 id; /* Total length of the descriptor chain which was used (written to) */ __u32 len; }; -struct vring_used -{ +struct vring_used { __u16 flags; __u16 idx; struct vring_used_elem ring[]; -- cgit v1.2.3 From dfb3cf00e402686f671db697adbd8b9f4c219268 Mon Sep 17 00:00:00 2001 From: Swen Schillig Date: Mon, 13 Jul 2009 15:06:02 +0200 Subject: [SCSI] zfcp: Fix invalid command order We should not modify the port status after triggering an ERP action for the port. It is not guaranteed which status is finally active when the ERP action is performed. This can lead to situations which are unwanted and hard to debug in case of a failure. Signed-off-by: Swen Schillig Signed-off-by: Christof Schmitt Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_fsf.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index c57658f3d34f..bbb7ef0b052d 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -1731,15 +1731,16 @@ static void zfcp_fsf_close_physical_port_handler(struct zfcp_fsf_req *req) zfcp_fsf_access_denied_port(req, port); break; case FSF_PORT_BOXED: - zfcp_erp_port_boxed(port, "fscpph2", req); - req->status |= ZFCP_STATUS_FSFREQ_ERROR | - ZFCP_STATUS_FSFREQ_RETRY; /* can't use generic zfcp_erp_modify_port_status because * ZFCP_STATUS_COMMON_OPEN must not be reset for the port */ atomic_clear_mask(ZFCP_STATUS_PORT_PHYS_OPEN, &port->status); list_for_each_entry(unit, &port->unit_list_head, list) atomic_clear_mask(ZFCP_STATUS_COMMON_OPEN, &unit->status); + zfcp_erp_port_boxed(port, "fscpph2", req); + req->status |= ZFCP_STATUS_FSFREQ_ERROR | + ZFCP_STATUS_FSFREQ_RETRY; + break; case FSF_ADAPTER_STATUS_AVAILABLE: switch (header->fsf_status_qual.word[0]) { -- cgit v1.2.3 From acf7b86150701de105aa8307b4b3f9dc533c45bb Mon Sep 17 00:00:00 2001 From: Christof Schmitt Date: Mon, 13 Jul 2009 15:06:03 +0200 Subject: [SCSI] zfcp: Acquire qdio_stat_lock when reading the queue utilization req_q_util is not atomic, so the qdio_stat_lock must be held when reading this variable. Reviewed-by: Swen Schillig Signed-off-by: Christof Schmitt Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_sysfs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/s390/scsi/zfcp_sysfs.c b/drivers/s390/scsi/zfcp_sysfs.c index 3e51e64d1108..0fe5cce818cb 100644 --- a/drivers/s390/scsi/zfcp_sysfs.c +++ b/drivers/s390/scsi/zfcp_sysfs.c @@ -494,9 +494,14 @@ static ssize_t zfcp_sysfs_adapter_q_full_show(struct device *dev, struct Scsi_Host *scsi_host = class_to_shost(dev); struct zfcp_adapter *adapter = (struct zfcp_adapter *) scsi_host->hostdata[0]; + u64 util; + + spin_lock_bh(&adapter->qdio_stat_lock); + util = adapter->req_q_util; + spin_unlock_bh(&adapter->qdio_stat_lock); return sprintf(buf, "%d %llu\n", atomic_read(&adapter->qdio_outb_full), - (unsigned long long)adapter->req_q_util); + (unsigned long long)util); } static DEVICE_ATTR(queue_full, S_IRUGO, zfcp_sysfs_adapter_q_full_show, NULL); -- cgit v1.2.3 From 1e9b16430ff4fd09408a74342d6b8338228e2f70 Mon Sep 17 00:00:00 2001 From: Christof Schmitt Date: Mon, 13 Jul 2009 15:06:04 +0200 Subject: [SCSI] zfcp: Return -ENOMEM for allocation failures in zfcp_fsf When a fsf_req or a qtcb cannot be allocated return -ENOMEM instead of -EIO. Reviewed-by: Swen Schillig Signed-off-by: Christof Schmitt Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_fsf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index bbb7ef0b052d..c4eb62b32a32 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -722,7 +722,7 @@ static struct zfcp_fsf_req *zfcp_fsf_req_create(struct zfcp_adapter *adapter, req = zfcp_fsf_alloc_qtcb(pool); if (unlikely(!req)) - return ERR_PTR(-EIO); + return ERR_PTR(-ENOMEM); if (adapter->req_no == 0) adapter->req_no++; -- cgit v1.2.3 From 688a1820bde27749f22b18b94ef1c9bc179b1b29 Mon Sep 17 00:00:00 2001 From: Christof Schmitt Date: Mon, 13 Jul 2009 15:06:05 +0200 Subject: [SCSI] zfcp: Use correct flags for zfcp_erp_notify zfcp_erp_notify uses the ZFCP_ERP_STATUS_* flags, so it is ZFCP_STATUS_ERP_LOWMEM instead of ZFCP_ERP_NOMEM. Signalling ZFCP_ERP_FAILED is not necessary, the missing d_id will show that the nameserver did not return the d_id. Reviewed-by: Swen Schillig Signed-off-by: Christof Schmitt Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_erp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 8030e25152fb..e7d7ef55e37d 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -854,10 +854,10 @@ void zfcp_erp_port_strategy_open_lookup(struct work_struct *work) retval = zfcp_fc_ns_gid_pn(&port->erp_action); if (retval == -ENOMEM) - zfcp_erp_notify(&port->erp_action, ZFCP_ERP_NOMEM); + zfcp_erp_notify(&port->erp_action, ZFCP_STATUS_ERP_LOWMEM); port->erp_action.step = ZFCP_ERP_STEP_NAMESERVER_LOOKUP; if (retval) - zfcp_erp_notify(&port->erp_action, ZFCP_ERP_FAILED); + zfcp_erp_notify(&port->erp_action, 0); zfcp_port_put(port); } -- cgit v1.2.3 From 426f6059b0eb66cec139f4b9066168ab72b85774 Mon Sep 17 00:00:00 2001 From: Christof Schmitt Date: Mon, 13 Jul 2009 15:06:06 +0200 Subject: [SCSI] zfcp: Use unchained mode for small ct and els requests The ELS ADISC and the GID_PN requests sent from zfcp fit into unchained FSF requests. Change the FSF allocation logic to use unchained requests whenever possible where everything fits in one SBAL. This avoids acquiring more SBALs than necessary, especially during zfcp recovery when things might be stalled. Reviewed-by: Swen Schillig Signed-off-by: Christof Schmitt Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_fsf.c | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index c4eb62b32a32..bec912547fb8 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -1010,6 +1010,23 @@ skip_fsfstatus: send_ct->handler(send_ct->handler_data); } +static void zfcp_fsf_setup_ct_els_unchained(struct qdio_buffer_element *sbale, + struct scatterlist *sg_req, + struct scatterlist *sg_resp) +{ + sbale[0].flags |= SBAL_FLAGS0_TYPE_WRITE_READ; + sbale[2].addr = sg_virt(sg_req); + sbale[2].length = sg_req->length; + sbale[3].addr = sg_virt(sg_resp); + sbale[3].length = sg_resp->length; + sbale[3].flags |= SBAL_FLAGS_LAST_ENTRY; +} + +static int zfcp_fsf_one_sbal(struct scatterlist *sg) +{ + return sg_is_last(sg) && sg->length <= PAGE_SIZE; +} + static int zfcp_fsf_setup_ct_els_sbals(struct zfcp_fsf_req *req, struct scatterlist *sg_req, struct scatterlist *sg_resp, @@ -1020,16 +1037,16 @@ static int zfcp_fsf_setup_ct_els_sbals(struct zfcp_fsf_req *req, int bytes; if (!(feat & FSF_FEATURE_ELS_CT_CHAINED_SBALS)) { - if (sg_req->length > PAGE_SIZE || sg_resp->length > PAGE_SIZE || - !sg_is_last(sg_req) || !sg_is_last(sg_resp)) + if (!zfcp_fsf_one_sbal(sg_req) || !zfcp_fsf_one_sbal(sg_resp)) return -EOPNOTSUPP; - sbale[0].flags |= SBAL_FLAGS0_TYPE_WRITE_READ; - sbale[2].addr = sg_virt(sg_req); - sbale[2].length = sg_req->length; - sbale[3].addr = sg_virt(sg_resp); - sbale[3].length = sg_resp->length; - sbale[3].flags |= SBAL_FLAGS_LAST_ENTRY; + zfcp_fsf_setup_ct_els_unchained(sbale, sg_req, sg_resp); + return 0; + } + + /* use single, unchained SBAL if it can hold the request */ + if (zfcp_fsf_one_sbal(sg_req) && zfcp_fsf_one_sbal(sg_resp)) { + zfcp_fsf_setup_ct_els_unchained(sbale, sg_req, sg_resp); return 0; } -- cgit v1.2.3 From 9072df4dc6e8fd569d583815edb0198af4b688b8 Mon Sep 17 00:00:00 2001 From: Christof Schmitt Date: Mon, 13 Jul 2009 15:06:07 +0200 Subject: [SCSI] zfcp: Use -EIO for SBAL allocation failures -ENOMEM is for memory allocation problems, -EIO for queue/SBAL allocation problems. Reviewed-by: Swen Schillig Signed-off-by: Christof Schmitt Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_fsf.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index bec912547fb8..0c24695a53cb 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -1053,14 +1053,14 @@ static int zfcp_fsf_setup_ct_els_sbals(struct zfcp_fsf_req *req, bytes = zfcp_qdio_sbals_from_sg(req, SBAL_FLAGS0_TYPE_WRITE_READ, sg_req, max_sbals); if (bytes <= 0) - return -ENOMEM; + return -EIO; req->qtcb->bottom.support.req_buf_length = bytes; req->sbale_curr = ZFCP_LAST_SBALE_PER_SBAL; bytes = zfcp_qdio_sbals_from_sg(req, SBAL_FLAGS0_TYPE_WRITE_READ, sg_resp, max_sbals); if (bytes <= 0) - return -ENOMEM; + return -EIO; req->qtcb->bottom.support.resp_buf_length = bytes; return 0; @@ -2559,7 +2559,6 @@ struct zfcp_fsf_req *zfcp_fsf_control_file(struct zfcp_adapter *adapter, bytes = zfcp_qdio_sbals_from_sg(req, direction, fsf_cfdc->sg, FSF_MAX_SBALS_PER_REQ); if (bytes != ZFCP_CFDC_MAX_SIZE) { - retval = -ENOMEM; zfcp_fsf_req_free(req); goto out; } -- cgit v1.2.3 From ddb3e0c111fed0a8bf74884dc918274acec2b618 Mon Sep 17 00:00:00 2001 From: Christof Schmitt Date: Mon, 13 Jul 2009 15:06:08 +0200 Subject: [SCSI] zfcp: Fix logic for physical port close After closing the port, we want it to be "not open" to consider the action to be successful. Reviewed-by: Swen Schillig Signed-off-by: Christof Schmitt Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_erp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index e7d7ef55e37d..0a7c6aef532a 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -801,7 +801,7 @@ static int zfcp_erp_port_forced_strategy(struct zfcp_erp_action *erp_action) return ZFCP_ERP_FAILED; case ZFCP_ERP_STEP_PHYS_PORT_CLOSING: - if (status & ZFCP_STATUS_PORT_PHYS_OPEN) + if (!(status & ZFCP_STATUS_PORT_PHYS_OPEN)) return ZFCP_ERP_SUCCEEDED; } return ZFCP_ERP_FAILED; -- cgit v1.2.3 From 85600f7f8370fe5b4be0debd8b401de7986b52ae Mon Sep 17 00:00:00 2001 From: Christof Schmitt Date: Mon, 13 Jul 2009 15:06:09 +0200 Subject: [SCSI] zfcp: Fix erp escalation procedure If an action fails, retry it until the erp count exceeds the threshold. If there is something fundamentally wrong, the FSF layer will trigger a more appropriate action depending on the FSF status codes. The followup for successful actions is a different followup than retrying failed actions, so split the code two functions to make this clear. Reviewed-by: Swen Schillig Signed-off-by: Christof Schmitt Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_erp.c | 50 +++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 26 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 0a7c6aef532a..b5562f952654 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -553,40 +553,35 @@ static void _zfcp_erp_unit_reopen_all(struct zfcp_port *port, int clear, _zfcp_erp_unit_reopen(unit, clear, id, ref); } -static void zfcp_erp_strategy_followup_actions(struct zfcp_erp_action *act) +static void zfcp_erp_strategy_followup_failed(struct zfcp_erp_action *act) { - struct zfcp_adapter *adapter = act->adapter; - struct zfcp_port *port = act->port; - struct zfcp_unit *unit = act->unit; - u32 status = act->status; - - /* initiate follow-up actions depending on success of finished action */ switch (act->action) { - case ZFCP_ERP_ACTION_REOPEN_ADAPTER: - if (status == ZFCP_ERP_SUCCEEDED) - _zfcp_erp_port_reopen_all(adapter, 0, "ersfa_1", NULL); - else - _zfcp_erp_adapter_reopen(adapter, 0, "ersfa_2", NULL); + _zfcp_erp_adapter_reopen(act->adapter, 0, "ersff_1", NULL); break; - case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: - if (status == ZFCP_ERP_SUCCEEDED) - _zfcp_erp_port_reopen(port, 0, "ersfa_3", NULL); - else - _zfcp_erp_adapter_reopen(adapter, 0, "ersfa_4", NULL); + _zfcp_erp_port_forced_reopen(act->port, 0, "ersff_2", NULL); break; - case ZFCP_ERP_ACTION_REOPEN_PORT: - if (status == ZFCP_ERP_SUCCEEDED) - _zfcp_erp_unit_reopen_all(port, 0, "ersfa_5", NULL); - else - _zfcp_erp_port_forced_reopen(port, 0, "ersfa_6", NULL); + _zfcp_erp_port_reopen(act->port, 0, "ersff_3", NULL); break; - case ZFCP_ERP_ACTION_REOPEN_UNIT: - if (status != ZFCP_ERP_SUCCEEDED) - _zfcp_erp_port_reopen(unit->port, 0, "ersfa_7", NULL); + _zfcp_erp_unit_reopen(act->unit, 0, "ersff_4", NULL); + break; + } +} + +static void zfcp_erp_strategy_followup_success(struct zfcp_erp_action *act) +{ + switch (act->action) { + case ZFCP_ERP_ACTION_REOPEN_ADAPTER: + _zfcp_erp_port_reopen_all(act->adapter, 0, "ersfs_1", NULL); + break; + case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: + _zfcp_erp_port_reopen(act->port, 0, "ersfs_2", NULL); + break; + case ZFCP_ERP_ACTION_REOPEN_PORT: + _zfcp_erp_unit_reopen_all(act->port, 0, "ersfs_3", NULL); break; } } @@ -1289,7 +1284,10 @@ static int zfcp_erp_strategy(struct zfcp_erp_action *erp_action) retval = zfcp_erp_strategy_statechange(erp_action, retval); if (retval == ZFCP_ERP_EXIT) goto unlock; - zfcp_erp_strategy_followup_actions(erp_action); + if (retval == ZFCP_ERP_SUCCEEDED) + zfcp_erp_strategy_followup_success(erp_action); + if (retval == ZFCP_ERP_FAILED) + zfcp_erp_strategy_followup_failed(erp_action); unlock: write_unlock(&adapter->erp_lock); -- cgit v1.2.3 From cbf1ed0264da104573458aedc220ebfcd02567f6 Mon Sep 17 00:00:00 2001 From: Christof Schmitt Date: Mon, 13 Jul 2009 15:06:10 +0200 Subject: [SCSI] zfcp: Recover from stalled outbound queue Depending on interruptions on some storage systems, the complete channel can stall which looks like an outbound queue stall to Linux. When trying to acquire a free SBAL for a non-SCSI command, zfcp waits for 5 seconds for a free slot to appear. This is the right place to detect a queue stall: If the wait times out, we assume a stalled queue and try to recover this. The overall strategy should be to trigger the erp from specific events, and not try an overall escalation from one failed port to a full-blown queue recovery. If we manage to send a command, the status codes for this command or a timeout will trigger the right follow-on actions. Reviewed-by: Swen Schillig Signed-off-by: Christof Schmitt Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_fsf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 0c24695a53cb..b7e48844056a 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -670,8 +670,11 @@ static int zfcp_fsf_req_sbal_get(struct zfcp_adapter *adapter) zfcp_fsf_sbal_check(adapter), 5 * HZ); if (ret > 0) return 0; - if (!ret) + if (!ret) { atomic_inc(&adapter->qdio_outb_full); + /* assume hanging outbound queue, try queue recovery */ + zfcp_erp_adapter_reopen(adapter, 0, "fsrsg_1", NULL); + } spin_lock_bh(&adapter->req_q_lock); return -EIO; -- cgit v1.2.3 From 379d6bf6573ee6541a38bbe9140c1f0b94e3feae Mon Sep 17 00:00:00 2001 From: Christof Schmitt Date: Mon, 13 Jul 2009 15:06:11 +0200 Subject: [SCSI] zfcp: Add port only once to FC transport class When calling fc_remote_port_add make sure to not call it again before fc_remote_port_delete has been called. In other words, ensure to create a new fc_rport, then delete it, then create a new one again. Reviewed-by: Swen Schillig Signed-off-by: Christof Schmitt Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_scsi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 967ede73f4c5..ba32709921a4 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -534,6 +534,9 @@ static void zfcp_scsi_rport_register(struct zfcp_port *port) struct fc_rport_identifiers ids; struct fc_rport *rport; + if (port->rport) + return; + ids.node_name = port->wwnn; ids.port_name = port->wwpn; ids.port_id = port->d_id; @@ -557,8 +560,10 @@ static void zfcp_scsi_rport_block(struct zfcp_port *port) { struct fc_rport *rport = port->rport; - if (rport) + if (rport) { fc_remote_port_delete(rport); + port->rport = NULL; + } } void zfcp_scsi_schedule_rport_register(struct zfcp_port *port) -- cgit v1.2.3 From 17a093ef018481ee1760da19568bad3c11da395d Mon Sep 17 00:00:00 2001 From: Swen Schillig Date: Mon, 13 Jul 2009 15:06:12 +0200 Subject: [SCSI] zfcp: avoid double notify in lowmem scenario In a LOWMEM condition an ERP notification would have been sent twice causing an unpredictable behaviour of the ERP. Signed-off-by: Swen Schillig Signed-off-by: Christof Schmitt Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_erp.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index b5562f952654..c75d6f35cb5f 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -848,11 +848,17 @@ void zfcp_erp_port_strategy_open_lookup(struct work_struct *work) gid_pn_work); retval = zfcp_fc_ns_gid_pn(&port->erp_action); - if (retval == -ENOMEM) + if (!retval) { + port->erp_action.step = ZFCP_ERP_STEP_NAMESERVER_LOOKUP; + goto out; + } + if (retval == -ENOMEM) { zfcp_erp_notify(&port->erp_action, ZFCP_STATUS_ERP_LOWMEM); - port->erp_action.step = ZFCP_ERP_STEP_NAMESERVER_LOOKUP; - if (retval) - zfcp_erp_notify(&port->erp_action, 0); + goto out; + } + /* all other error condtions */ + zfcp_erp_notify(&port->erp_action, 0); +out: zfcp_port_put(port); } -- cgit v1.2.3 From 27f492ccec94b6acd8440c83bfe0515ce4db0af0 Mon Sep 17 00:00:00 2001 From: Swen Schillig Date: Mon, 13 Jul 2009 15:06:13 +0200 Subject: [SCSI] zfcp: Fix wka port processing Under certain conditions it is possible that a WKA port ist not opened within the expected timeframe of half a second. In this situation the WKA port remains in the state OPENING preventing any succeding request to open the port. This led to unrecoverable remote ports. Fixing this by always setting an appropriate WKA port status before leaving the function and removing the timeout value here since it's not needed here because the general timeout processing would deal with it if required. Signed-off-by: Swen Schillig Signed-off-by: Christof Schmitt Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_fc.c | 8 +++----- drivers/s390/scsi/zfcp_fsf.c | 4 ++-- 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index 2f0705d76b72..47daebfa7e59 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -79,11 +79,9 @@ static int zfcp_wka_port_get(struct zfcp_wka_port *wka_port) mutex_unlock(&wka_port->mutex); - wait_event_timeout( - wka_port->completion_wq, - wka_port->status == ZFCP_WKA_PORT_ONLINE || - wka_port->status == ZFCP_WKA_PORT_OFFLINE, - HZ >> 1); + wait_event(wka_port->completion_wq, + wka_port->status == ZFCP_WKA_PORT_ONLINE || + wka_port->status == ZFCP_WKA_PORT_OFFLINE); if (wka_port->status == ZFCP_WKA_PORT_ONLINE) { atomic_inc(&wka_port->refcount); diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index b7e48844056a..47795fbf081f 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -1627,10 +1627,10 @@ static void zfcp_fsf_open_wka_port_handler(struct zfcp_fsf_req *req) case FSF_ACCESS_DENIED: wka_port->status = ZFCP_WKA_PORT_OFFLINE; break; - case FSF_PORT_ALREADY_OPEN: - break; case FSF_GOOD: wka_port->handle = header->port_handle; + /* fall through */ + case FSF_PORT_ALREADY_OPEN: wka_port->status = ZFCP_WKA_PORT_ONLINE; } out: -- cgit v1.2.3 From a11a52be115889a5d1f738ed2e154807bceed4ee Mon Sep 17 00:00:00 2001 From: Christof Schmitt Date: Mon, 13 Jul 2009 15:06:14 +0200 Subject: [SCSI] zfcp: Fix tracing of request id for abort requests The trace record for SCSI abort requests has a field for the request id of the request to be aborted. Put the real request id instead of zero. Reviewed-by: Swen Schillig Signed-off-by: Christof Schmitt Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_scsi.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index ba32709921a4..6925a1784682 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -167,20 +167,21 @@ static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) struct zfcp_unit *unit = scpnt->device->hostdata; struct zfcp_fsf_req *old_req, *abrt_req; unsigned long flags; - unsigned long old_req_id = (unsigned long) scpnt->host_scribble; + unsigned long old_reqid = (unsigned long) scpnt->host_scribble; int retval = SUCCESS; int retry = 3; + char *dbf_tag; /* avoid race condition between late normal completion and abort */ write_lock_irqsave(&adapter->abort_lock, flags); spin_lock(&adapter->req_list_lock); - old_req = zfcp_reqlist_find(adapter, old_req_id); + old_req = zfcp_reqlist_find(adapter, old_reqid); spin_unlock(&adapter->req_list_lock); if (!old_req) { write_unlock_irqrestore(&adapter->abort_lock, flags); zfcp_scsi_dbf_event_abort("lte1", adapter, scpnt, NULL, - old_req_id); + old_reqid); return FAILED; /* completion could be in progress */ } old_req->data = NULL; @@ -189,7 +190,7 @@ static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) write_unlock_irqrestore(&adapter->abort_lock, flags); while (retry--) { - abrt_req = zfcp_fsf_abort_fcp_command(old_req_id, unit); + abrt_req = zfcp_fsf_abort_fcp_command(old_reqid, unit); if (abrt_req) break; @@ -197,7 +198,7 @@ static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) if (!(atomic_read(&adapter->status) & ZFCP_STATUS_COMMON_RUNNING)) { zfcp_scsi_dbf_event_abort("nres", adapter, scpnt, NULL, - old_req_id); + old_reqid); return SUCCESS; } } @@ -208,13 +209,14 @@ static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) abrt_req->status & ZFCP_STATUS_FSFREQ_COMPLETED); if (abrt_req->status & ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED) - zfcp_scsi_dbf_event_abort("okay", adapter, scpnt, abrt_req, 0); + dbf_tag = "okay"; else if (abrt_req->status & ZFCP_STATUS_FSFREQ_ABORTNOTNEEDED) - zfcp_scsi_dbf_event_abort("lte2", adapter, scpnt, abrt_req, 0); + dbf_tag = "lte2"; else { - zfcp_scsi_dbf_event_abort("fail", adapter, scpnt, abrt_req, 0); + dbf_tag = "fail"; retval = FAILED; } + zfcp_scsi_dbf_event_abort(dbf_tag, adapter, scpnt, abrt_req, old_reqid); zfcp_fsf_req_free(abrt_req); return retval; } -- cgit v1.2.3 From 6187c242089d334102be76427a5a020240e6c19a Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 15 Jul 2009 15:02:57 -0500 Subject: [SCSI] libiscsi: disable bh in and abort handler. The session lock can be held in the scsi eh thread or the completion paths run from the net softirq. This disables bhs in iscsi_eh_abort when taking the session lock. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/libiscsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 716cc344c5df..a751f6230c22 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1974,10 +1974,10 @@ int iscsi_eh_abort(struct scsi_cmnd *sc) * good and have never sent us a successful tmf response * then sent more data for the cmd. */ - spin_lock(&session->lock); + spin_lock_bh(&session->lock); fail_scsi_task(task, DID_ABORT); conn->tmf_state = TMF_INITIAL; - spin_unlock(&session->lock); + spin_unlock_bh(&session->lock); iscsi_start_tx(conn); goto success_unlocked; case TMF_TIMEDOUT: -- cgit v1.2.3 From 94bced3c1b371014cbd187f2df5539b13a0e3b90 Mon Sep 17 00:00:00 2001 From: Karen Higgins Date: Wed, 15 Jul 2009 15:02:58 -0500 Subject: [SCSI] qla4xxx: Correct Extended Sense Data Errors Fixed sense data errors occurring above the first 32 bytes, as required by some third party applications. Sense data in the first 32 bytes has always been correct. Patch updated to use srb data variables instead of scsi command scratchpad data area, as scratchpad area is already used. Also, corrected debug print alignment bug in dump_buffer routine. Changed KERN_DEBUG to KERN_INFO in printk statements in this routine. Changed version number to 5.01.00-k9 Signed-off-by: Karen Higgins [michaelc: fixed checkpath.pl errors] Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/qla4xxx/ql4_dbg.c | 15 ++-- drivers/scsi/qla4xxx/ql4_def.h | 7 ++ drivers/scsi/qla4xxx/ql4_fw.h | 7 ++ drivers/scsi/qla4xxx/ql4_isr.c | 145 +++++++++++++++++++++++++------------ drivers/scsi/qla4xxx/ql4_version.h | 2 +- 5 files changed, 122 insertions(+), 54 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/qla4xxx/ql4_dbg.c b/drivers/scsi/qla4xxx/ql4_dbg.c index fcc184cd066d..cbceb0ebabf7 100644 --- a/drivers/scsi/qla4xxx/ql4_dbg.c +++ b/drivers/scsi/qla4xxx/ql4_dbg.c @@ -15,19 +15,18 @@ void qla4xxx_dump_buffer(void *b, uint32_t size) uint32_t cnt; uint8_t *c = b; - printk(" 0 1 2 3 4 5 6 7 8 9 Ah Bh Ch Dh Eh " + printk(" 0 1 2 3 4 5 6 7 8 9 Ah Bh Ch Dh Eh " "Fh\n"); printk("------------------------------------------------------------" "--\n"); - for (cnt = 0; cnt < size; cnt++, c++) { - printk(KERN_DEBUG "%02x", *c); - if (!(cnt % 16)) - printk(KERN_DEBUG "\n"); + for (cnt = 0; cnt < size; c++) { + printk(KERN_INFO "%02x", *c); + if (!(++cnt % 16)) + printk(KERN_INFO "\n"); else - printk(KERN_DEBUG " "); + printk(KERN_INFO " "); } - if (cnt % 16) - printk(KERN_DEBUG "\n"); + printk(KERN_INFO "\n"); } diff --git a/drivers/scsi/qla4xxx/ql4_def.h b/drivers/scsi/qla4xxx/ql4_def.h index b586f27c3bd4..963e8553d210 100644 --- a/drivers/scsi/qla4xxx/ql4_def.h +++ b/drivers/scsi/qla4xxx/ql4_def.h @@ -184,6 +184,11 @@ struct srb { uint16_t cc_stat; u_long r_start; /* Time we recieve a cmd from OS */ u_long u_start; /* Time when we handed the cmd to F/W */ + + /* Used for extended sense / status continuation */ + uint8_t *req_sense_ptr; + uint16_t req_sense_len; + uint16_t reserved2; }; /* @@ -436,6 +441,8 @@ struct scsi_qla_host { /* Map ddb_list entry by FW ddb index */ struct ddb_entry *fw_ddb_index_map[MAX_DDB_ENTRIES]; + /* Saved srb for status continuation entry processing */ + struct srb *status_srb; }; static inline int is_qla4010(struct scsi_qla_host *ha) diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h index 1b667a70cffa..9cd7a608df38 100644 --- a/drivers/scsi/qla4xxx/ql4_fw.h +++ b/drivers/scsi/qla4xxx/ql4_fw.h @@ -572,6 +572,7 @@ struct conn_event_log_entry { *************************************************************************/ #define IOCB_MAX_CDB_LEN 16 /* Bytes in a CBD */ #define IOCB_MAX_SENSEDATA_LEN 32 /* Bytes of sense data */ +#define IOCB_MAX_EXT_SENSEDATA_LEN 60 /* Bytes of extended sense data */ /* IOCB header structure */ struct qla4_header { @@ -733,6 +734,12 @@ struct status_entry { }; +/* Status Continuation entry */ +struct status_cont_entry { + struct qla4_header hdr; /* 00-03 */ + uint8_t ext_sense_data[IOCB_MAX_EXT_SENSEDATA_LEN]; /* 04-63 */ +}; + struct passthru0 { struct qla4_header hdr; /* 00-03 */ uint32_t handle; /* 04-07 */ diff --git a/drivers/scsi/qla4xxx/ql4_isr.c b/drivers/scsi/qla4xxx/ql4_isr.c index 799120fcb9be..8025ee16588e 100644 --- a/drivers/scsi/qla4xxx/ql4_isr.c +++ b/drivers/scsi/qla4xxx/ql4_isr.c @@ -10,6 +10,98 @@ #include "ql4_dbg.h" #include "ql4_inline.h" +/** + * qla4xxx_copy_sense - copy sense data into cmd sense buffer + * @ha: Pointer to host adapter structure. + * @sts_entry: Pointer to status entry structure. + * @srb: Pointer to srb structure. + **/ +static void qla4xxx_copy_sense(struct scsi_qla_host *ha, + struct status_entry *sts_entry, + struct srb *srb) +{ + struct scsi_cmnd *cmd = srb->cmd; + uint16_t sense_len; + + memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); + sense_len = le16_to_cpu(sts_entry->senseDataByteCnt); + if (sense_len == 0) + return; + + /* Save total available sense length, + * not to exceed cmd's sense buffer size */ + sense_len = min_t(uint16_t, sense_len, SCSI_SENSE_BUFFERSIZE); + srb->req_sense_ptr = cmd->sense_buffer; + srb->req_sense_len = sense_len; + + /* Copy sense from sts_entry pkt */ + sense_len = min_t(uint16_t, sense_len, IOCB_MAX_SENSEDATA_LEN); + memcpy(cmd->sense_buffer, sts_entry->senseData, sense_len); + + DEBUG2(printk(KERN_INFO "scsi%ld:%d:%d:%d: %s: sense key = %x, " + "ASL= %02x, ASC/ASCQ = %02x/%02x\n", ha->host_no, + cmd->device->channel, cmd->device->id, + cmd->device->lun, __func__, + sts_entry->senseData[2] & 0x0f, + sts_entry->senseData[7], + sts_entry->senseData[12], + sts_entry->senseData[13])); + + DEBUG5(qla4xxx_dump_buffer(cmd->sense_buffer, sense_len)); + srb->flags |= SRB_GOT_SENSE; + + /* Update srb, in case a sts_cont pkt follows */ + srb->req_sense_ptr += sense_len; + srb->req_sense_len -= sense_len; + if (srb->req_sense_len != 0) + ha->status_srb = srb; + else + ha->status_srb = NULL; +} + +/** + * qla4xxx_status_cont_entry - Process a Status Continuations entry. + * @ha: SCSI driver HA context + * @sts_cont: Entry pointer + * + * Extended sense data. + */ +static void +qla4xxx_status_cont_entry(struct scsi_qla_host *ha, + struct status_cont_entry *sts_cont) +{ + struct srb *srb = ha->status_srb; + struct scsi_cmnd *cmd; + uint8_t sense_len; + + if (srb == NULL) + return; + + cmd = srb->cmd; + if (cmd == NULL) { + DEBUG2(printk(KERN_INFO "scsi%ld: %s: Cmd already returned " + "back to OS srb=%p srb->state:%d\n", ha->host_no, + __func__, srb, srb->state)); + ha->status_srb = NULL; + return; + } + + /* Copy sense data. */ + sense_len = min_t(uint16_t, srb->req_sense_len, + IOCB_MAX_EXT_SENSEDATA_LEN); + memcpy(srb->req_sense_ptr, sts_cont->ext_sense_data, sense_len); + DEBUG5(qla4xxx_dump_buffer(srb->req_sense_ptr, sense_len)); + + srb->req_sense_ptr += sense_len; + srb->req_sense_len -= sense_len; + + /* Place command on done queue. */ + if (srb->req_sense_len == 0) { + qla4xxx_srb_compl(ha, srb); + ha->status_srb = NULL; + } +} + /** * qla4xxx_status_entry - processes status IOCBs * @ha: Pointer to host adapter structure. @@ -23,7 +115,6 @@ static void qla4xxx_status_entry(struct scsi_qla_host *ha, struct srb *srb; struct ddb_entry *ddb_entry; uint32_t residual; - uint16_t sensebytecnt; srb = qla4xxx_del_from_active_array(ha, le32_to_cpu(sts_entry->handle)); if (!srb) { @@ -92,24 +183,7 @@ static void qla4xxx_status_entry(struct scsi_qla_host *ha, break; /* Copy Sense Data into sense buffer. */ - memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); - - sensebytecnt = le16_to_cpu(sts_entry->senseDataByteCnt); - if (sensebytecnt == 0) - break; - - memcpy(cmd->sense_buffer, sts_entry->senseData, - min_t(uint16_t, sensebytecnt, SCSI_SENSE_BUFFERSIZE)); - - DEBUG2(printk("scsi%ld:%d:%d:%d: %s: sense key = %x, " - "ASC/ASCQ = %02x/%02x\n", ha->host_no, - cmd->device->channel, cmd->device->id, - cmd->device->lun, __func__, - sts_entry->senseData[2] & 0x0f, - sts_entry->senseData[12], - sts_entry->senseData[13])); - - srb->flags |= SRB_GOT_SENSE; + qla4xxx_copy_sense(ha, sts_entry, srb); break; case SCS_INCOMPLETE: @@ -176,23 +250,7 @@ static void qla4xxx_status_entry(struct scsi_qla_host *ha, break; /* Copy Sense Data into sense buffer. */ - memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); - - sensebytecnt = - le16_to_cpu(sts_entry->senseDataByteCnt); - if (sensebytecnt == 0) - break; - - memcpy(cmd->sense_buffer, sts_entry->senseData, - min_t(uint16_t, sensebytecnt, SCSI_SENSE_BUFFERSIZE)); - - DEBUG2(printk("scsi%ld:%d:%d:%d: %s: sense key = %x, " - "ASC/ASCQ = %02x/%02x\n", ha->host_no, - cmd->device->channel, cmd->device->id, - cmd->device->lun, __func__, - sts_entry->senseData[2] & 0x0f, - sts_entry->senseData[12], - sts_entry->senseData[13])); + qla4xxx_copy_sense(ha, sts_entry, srb); } else { /* * If RISC reports underrun and target does not @@ -268,9 +326,10 @@ static void qla4xxx_status_entry(struct scsi_qla_host *ha, status_entry_exit: - /* complete the request */ + /* complete the request, if not waiting for status_continuation pkt */ srb->cc_stat = sts_entry->completionStatus; - qla4xxx_srb_compl(ha, srb); + if (ha->status_srb == NULL) + qla4xxx_srb_compl(ha, srb); } /** @@ -305,10 +364,7 @@ static void qla4xxx_process_response_queue(struct scsi_qla_host * ha) /* process entry */ switch (sts_entry->hdr.entryType) { case ET_STATUS: - /* - * Common status - Single completion posted in single - * IOSB. - */ + /* Common status */ qla4xxx_status_entry(ha, sts_entry); break; @@ -316,9 +372,8 @@ static void qla4xxx_process_response_queue(struct scsi_qla_host * ha) break; case ET_STATUS_CONTINUATION: - /* Just throw away the status continuation entries */ - DEBUG2(printk("scsi%ld: %s: Status Continuation entry " - "- ignoring\n", ha->host_no, __func__)); + qla4xxx_status_cont_entry(ha, + (struct status_cont_entry *) sts_entry); break; case ET_COMMAND: diff --git a/drivers/scsi/qla4xxx/ql4_version.h b/drivers/scsi/qla4xxx/ql4_version.h index ab984cb89cea..6980cb279c81 100644 --- a/drivers/scsi/qla4xxx/ql4_version.h +++ b/drivers/scsi/qla4xxx/ql4_version.h @@ -5,5 +5,5 @@ * See LICENSE.qla4xxx for copyright and licensing details. */ -#define QLA4XXX_DRIVER_VERSION "5.01.00-k8" +#define QLA4XXX_DRIVER_VERSION "5.01.00-k9" -- cgit v1.2.3 From 5c656af7e4edfe44c85034d6fa7002909f9c3c59 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 15 Jul 2009 15:02:59 -0500 Subject: [SCSI] qla4xxx: add timeout handler Recently dm-multipath began calling blk_abort_queue. This causes all the commands/request running on the path to have the timeout function called. If a path does go down, and the LLD returns DID_*, dm-multpiath will eventually get this error and begin to call the cmd timeout handler. qla4xxx currently does not set a timed out handler and so the default one could return BLK_EH_NOT_HANDLED and end up firing the scsi eh and stopping IO to all paths on the host when only one path is affected. For software and offload iscsi we have a timed out handler already. This patch adds a driver specific one to qla4xxx because there are some ddb->state and session->state and command completion races that are better handled in the LLD. This also handles the problem where if the session is down, we do not need the scsi eh to run until the transport code has tried to reconnect us. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/qla4xxx/ql4_os.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'drivers') diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index ec9da6ce8489..6841883b3611 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -66,6 +66,7 @@ static int qla4xxx_sess_get_param(struct iscsi_cls_session *sess, static int qla4xxx_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param, char *buf); static void qla4xxx_recovery_timedout(struct iscsi_cls_session *session); +static enum blk_eh_timer_return qla4xxx_eh_cmd_timed_out(struct scsi_cmnd *sc); /* * SCSI host template entry points @@ -89,6 +90,7 @@ static struct scsi_host_template qla4xxx_driver_template = { .eh_device_reset_handler = qla4xxx_eh_device_reset, .eh_target_reset_handler = qla4xxx_eh_target_reset, .eh_host_reset_handler = qla4xxx_eh_host_reset, + .eh_timed_out = qla4xxx_eh_cmd_timed_out, .slave_configure = qla4xxx_slave_configure, .slave_alloc = qla4xxx_slave_alloc, @@ -124,6 +126,21 @@ static struct iscsi_transport qla4xxx_iscsi_transport = { static struct scsi_transport_template *qla4xxx_scsi_transport; +static enum blk_eh_timer_return qla4xxx_eh_cmd_timed_out(struct scsi_cmnd *sc) +{ + struct iscsi_cls_session *session; + struct ddb_entry *ddb_entry; + + session = starget_to_session(scsi_target(sc->device)); + ddb_entry = session->dd_data; + + /* if we are not logged in then the LLD is going to clean up the cmd */ + if (atomic_read(&ddb_entry->state) != DDB_STATE_ONLINE) + return BLK_EH_RESET_TIMER; + else + return BLK_EH_NOT_HANDLED; +} + static void qla4xxx_recovery_timedout(struct iscsi_cls_session *session) { struct ddb_entry *ddb_entry = session->dd_data; -- cgit v1.2.3 From dca05c4c07c48da0509708d9e562578d269e90e5 Mon Sep 17 00:00:00 2001 From: Karen Higgins Date: Wed, 15 Jul 2009 15:03:00 -0500 Subject: [SCSI] qla4xxx: Fix Driver Fault Recovery Completion Fixed driver bug where adapter recovery did not complete if there were outstanding commands detected on that host adapter. Signed-off-by: Karen Higgins Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/qla4xxx/ql4_os.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 6841883b3611..e1cc0d21d890 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -921,18 +921,17 @@ static int qla4xxx_recover_adapter(struct scsi_qla_host *ha, /* Flush any pending ddb changed AENs */ qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS); + qla4xxx_flush_active_srbs(ha); + /* Reset the firmware. If successful, function * returns with ISP interrupts enabled. */ - if (status == QLA_SUCCESS) { - DEBUG2(printk("scsi%ld: %s - Performing soft reset..\n", - ha->host_no, __func__)); - qla4xxx_flush_active_srbs(ha); - if (ql4xxx_lock_drvr_wait(ha) == QLA_SUCCESS) - status = qla4xxx_soft_reset(ha); - else - status = QLA_ERROR; - } + DEBUG2(printk("scsi%ld: %s - Performing soft reset..\n", + ha->host_no, __func__)); + if (ql4xxx_lock_drvr_wait(ha) == QLA_SUCCESS) + status = qla4xxx_soft_reset(ha); + else + status = QLA_ERROR; /* Flush any pending ddb changed AENs */ qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS); @@ -1661,7 +1660,7 @@ static int qla4xxx_eh_host_reset(struct scsi_cmnd *cmd) ha = (struct scsi_qla_host *) cmd->device->host->hostdata; dev_info(&ha->pdev->dev, - "scsi(%ld:%d:%d:%d): ADAPTER RESET ISSUED.\n", ha->host_no, + "scsi(%ld:%d:%d:%d): HOST RESET ISSUED.\n", ha->host_no, cmd->device->channel, cmd->device->id, cmd->device->lun); if (qla4xxx_wait_for_hba_online(ha) != QLA_SUCCESS) { -- cgit v1.2.3 From 612f73488785829d4f34aad00bfe30b904c94c9e Mon Sep 17 00:00:00 2001 From: Karen Higgins Date: Wed, 15 Jul 2009 15:03:01 -0500 Subject: [SCSI] qla4xxx: Fix srb lookup in qla4xxx_eh_device_reset eh_device_reset may be called from scsi error handler or sg_reset, etc. When called from sg_reset, there will not be an associated srb. The driver should lookup the corresponding device handle given information from the supplied cmd structure and should not assume that there exists an srb. Signed-off-by: Karen Higgins Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/qla4xxx/ql4_os.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index e1cc0d21d890..40e3cafb3a9c 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -1543,11 +1543,9 @@ static int qla4xxx_eh_device_reset(struct scsi_cmnd *cmd) { struct scsi_qla_host *ha = to_qla_host(cmd->device->host); struct ddb_entry *ddb_entry = cmd->device->hostdata; - struct srb *sp; int ret = FAILED, stat; - sp = (struct srb *) cmd->SCp.ptr; - if (!sp || !ddb_entry) + if (!ddb_entry) return ret; dev_info(&ha->pdev->dev, -- cgit v1.2.3 From 16ed55f9de6743ceece9bf528362cadff10f1c5c Mon Sep 17 00:00:00 2001 From: Karen Higgins Date: Wed, 15 Jul 2009 15:03:02 -0500 Subject: [SCSI] qla4xxx: Remove hiwat code so scsi eh does not get escalated when we can make progress Removed unnecessary hiwat code to free up the number available IOCBs. Eliminates unnecessary eh_ escalations due to inability to obtain IOCB pkt for marker. v2. - Remove define not used anymore and fix req_q_coun accounting. Signed-off-by: Karen Higgins [michaelc: ported patch from qlogic.com driver to upstream] Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/qla4xxx/ql4_def.h | 2 - drivers/scsi/qla4xxx/ql4_iocb.c | 133 ++++++++++++++++++---------------------- drivers/scsi/qla4xxx/ql4_mbx.c | 10 --- 3 files changed, 60 insertions(+), 85 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/qla4xxx/ql4_def.h b/drivers/scsi/qla4xxx/ql4_def.h index 963e8553d210..81b5f29254e2 100644 --- a/drivers/scsi/qla4xxx/ql4_def.h +++ b/drivers/scsi/qla4xxx/ql4_def.h @@ -100,7 +100,6 @@ #define MAX_SRBS MAX_CMDS_TO_RISC #define MBOX_AEN_REG_COUNT 5 #define MAX_INIT_RETRIES 5 -#define IOCB_HIWAT_CUSHION 16 /* * Buffer sizes @@ -307,7 +306,6 @@ struct scsi_qla_host { uint32_t tot_ddbs; uint16_t iocb_cnt; - uint16_t iocb_hiwat; /* SRB cache. */ #define SRB_MIN_REQ 128 diff --git a/drivers/scsi/qla4xxx/ql4_iocb.c b/drivers/scsi/qla4xxx/ql4_iocb.c index 912a67494adf..e0c32159749c 100644 --- a/drivers/scsi/qla4xxx/ql4_iocb.c +++ b/drivers/scsi/qla4xxx/ql4_iocb.c @@ -10,9 +10,42 @@ #include "ql4_dbg.h" #include "ql4_inline.h" - #include +static int +qla4xxx_space_in_req_ring(struct scsi_qla_host *ha, uint16_t req_cnt) +{ + uint16_t cnt; + + /* Calculate number of free request entries. */ + if ((req_cnt + 2) >= ha->req_q_count) { + cnt = (uint16_t) le32_to_cpu(ha->shadow_regs->req_q_out); + if (ha->request_in < cnt) + ha->req_q_count = cnt - ha->request_in; + else + ha->req_q_count = REQUEST_QUEUE_DEPTH - + (ha->request_in - cnt); + } + + /* Check if room for request in request ring. */ + if ((req_cnt + 2) < ha->req_q_count) + return 1; + else + return 0; +} + +static void qla4xxx_advance_req_ring_ptr(struct scsi_qla_host *ha) +{ + /* Advance request queue pointer */ + if (ha->request_in == (REQUEST_QUEUE_DEPTH - 1)) { + ha->request_in = 0; + ha->request_ptr = ha->request_ring; + } else { + ha->request_in++; + ha->request_ptr++; + } +} + /** * qla4xxx_get_req_pkt - returns a valid entry in request queue. * @ha: Pointer to host adapter structure. @@ -26,35 +59,18 @@ static int qla4xxx_get_req_pkt(struct scsi_qla_host *ha, struct queue_entry **queue_entry) { - uint16_t request_in; - uint8_t status = QLA_SUCCESS; - - *queue_entry = ha->request_ptr; + uint16_t req_cnt = 1; - /* get the latest request_in and request_out index */ - request_in = ha->request_in; - ha->request_out = (uint16_t) le32_to_cpu(ha->shadow_regs->req_q_out); - - /* Advance request queue pointer and check for queue full */ - if (request_in == (REQUEST_QUEUE_DEPTH - 1)) { - request_in = 0; - ha->request_ptr = ha->request_ring; - } else { - request_in++; - ha->request_ptr++; - } - - /* request queue is full, try again later */ - if ((ha->iocb_cnt + 1) >= ha->iocb_hiwat) { - /* restore request pointer */ - ha->request_ptr = *queue_entry; - status = QLA_ERROR; - } else { - ha->request_in = request_in; + if (qla4xxx_space_in_req_ring(ha, req_cnt)) { + *queue_entry = ha->request_ptr; memset(*queue_entry, 0, sizeof(**queue_entry)); + + qla4xxx_advance_req_ring_ptr(ha); + ha->req_q_count -= req_cnt; + return QLA_SUCCESS; } - return status; + return QLA_ERROR; } /** @@ -100,21 +116,14 @@ exit_send_marker: return status; } -static struct continuation_t1_entry* qla4xxx_alloc_cont_entry( - struct scsi_qla_host *ha) +static struct continuation_t1_entry * +qla4xxx_alloc_cont_entry(struct scsi_qla_host *ha) { struct continuation_t1_entry *cont_entry; cont_entry = (struct continuation_t1_entry *)ha->request_ptr; - /* Advance request queue pointer */ - if (ha->request_in == (REQUEST_QUEUE_DEPTH - 1)) { - ha->request_in = 0; - ha->request_ptr = ha->request_ring; - } else { - ha->request_in++; - ha->request_ptr++; - } + qla4xxx_advance_req_ring_ptr(ha); /* Load packet defaults */ cont_entry->hdr.entryType = ET_CONTINUE; @@ -197,13 +206,10 @@ int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb) struct scsi_cmnd *cmd = srb->cmd; struct ddb_entry *ddb_entry; struct command_t3_entry *cmd_entry; - int nseg; uint16_t tot_dsds; uint16_t req_cnt; - unsigned long flags; - uint16_t cnt; uint32_t index; char tag[2]; @@ -217,6 +223,19 @@ int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb) index = (uint32_t)cmd->request->tag; + /* + * Check to see if adapter is online before placing request on + * request queue. If a reset occurs and a request is in the queue, + * the firmware will still attempt to process the request, retrieving + * garbage for pointers. + */ + if (!test_bit(AF_ONLINE, &ha->flags)) { + DEBUG2(printk("scsi%ld: %s: Adapter OFFLINE! " + "Do not issue command.\n", + ha->host_no, __func__)); + goto queuing_error; + } + /* Calculate the number of request entries needed. */ nseg = scsi_dma_map(cmd); if (nseg < 0) @@ -224,17 +243,7 @@ int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb) tot_dsds = nseg; req_cnt = qla4xxx_calc_request_entries(tot_dsds); - - if (ha->req_q_count < (req_cnt + 2)) { - cnt = (uint16_t) le32_to_cpu(ha->shadow_regs->req_q_out); - if (ha->request_in < cnt) - ha->req_q_count = cnt - ha->request_in; - else - ha->req_q_count = REQUEST_QUEUE_DEPTH - - (ha->request_in - cnt); - } - - if (ha->req_q_count < (req_cnt + 2)) + if (!qla4xxx_space_in_req_ring(ha, req_cnt)) goto queuing_error; /* total iocbs active */ @@ -286,32 +295,10 @@ int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb) break; } - - /* Advance request queue pointer */ - ha->request_in++; - if (ha->request_in == REQUEST_QUEUE_DEPTH) { - ha->request_in = 0; - ha->request_ptr = ha->request_ring; - } else - ha->request_ptr++; - - + qla4xxx_advance_req_ring_ptr(ha); qla4xxx_build_scsi_iocbs(srb, cmd_entry, tot_dsds); wmb(); - /* - * Check to see if adapter is online before placing request on - * request queue. If a reset occurs and a request is in the queue, - * the firmware will still attempt to process the request, retrieving - * garbage for pointers. - */ - if (!test_bit(AF_ONLINE, &ha->flags)) { - DEBUG2(printk("scsi%ld: %s: Adapter OFFLINE! " - "Do not issue command.\n", - ha->host_no, __func__)); - goto queuing_error; - } - srb->cmd->host_scribble = (unsigned char *)srb; /* update counters */ diff --git a/drivers/scsi/qla4xxx/ql4_mbx.c b/drivers/scsi/qla4xxx/ql4_mbx.c index 051b0f5e8c8e..09d6d4b76f39 100644 --- a/drivers/scsi/qla4xxx/ql4_mbx.c +++ b/drivers/scsi/qla4xxx/ql4_mbx.c @@ -385,16 +385,6 @@ int qla4xxx_get_firmware_status(struct scsi_qla_host * ha) mbox_sts[0])); return QLA_ERROR; } - - /* High-water mark of IOCBs */ - ha->iocb_hiwat = mbox_sts[2]; - if (ha->iocb_hiwat > IOCB_HIWAT_CUSHION) - ha->iocb_hiwat -= IOCB_HIWAT_CUSHION; - else - dev_info(&ha->pdev->dev, "WARNING!!! You have less than %d " - "firmware IOCBs available (%d).\n", - IOCB_HIWAT_CUSHION, ha->iocb_hiwat); - return QLA_SUCCESS; } -- cgit v1.2.3 From a0cc1ecc098e31d03b3265712a3e280a7fabf438 Mon Sep 17 00:00:00 2001 From: Vasu Dev Date: Tue, 28 Jul 2009 17:33:37 -0700 Subject: [SCSI] libfc: fix a circular locking warning during sending RRQ Currently the fc_exch_rrq is called with fc_exch's ex_lock held. The fc_exch_rrq allocates new exch and that requires taking ex_lock again after EM lock. This locking order causes warning, see more details on this warning at :- http://www.open-fcoe.org/pipermail/devel/2009-July/003251.html This patch fixes this by dropping the ex_lock before calling fc_exch_rrq(). The fc_exch_rrq needs to grab ex_lock lock again to schedule RRQ retry and in the meanwhile fc_exch_reset could occur before ex_lock is grabbed inside fc_exch_rrq. So to handle this case, this patch adds additional check to detect fc_exch_reset after ex_lock acquired and in case the fc_exch_reset occurred then abandons the RRQ retry and releases the exch. Signed-off-by: Vasu Dev Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/libfc/fc_exch.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index 2bc22be5f849..145ab9ba55ea 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -415,9 +415,9 @@ static void fc_exch_timeout(struct work_struct *work) e_stat = ep->esb_stat; if (e_stat & ESB_ST_COMPLETE) { ep->esb_stat = e_stat & ~ESB_ST_REC_QUAL; + spin_unlock_bh(&ep->ex_lock); if (e_stat & ESB_ST_REC_QUAL) fc_exch_rrq(ep); - spin_unlock_bh(&ep->ex_lock); goto done; } else { resp = ep->resp; @@ -1624,14 +1624,14 @@ static void fc_exch_rrq(struct fc_exch *ep) struct fc_lport *lp; struct fc_els_rrq *rrq; struct fc_frame *fp; - struct fc_seq *rrq_sp; u32 did; lp = ep->lp; fp = fc_frame_alloc(lp, sizeof(*rrq)); if (!fp) - return; + goto retry; + rrq = fc_frame_payload_get(fp, sizeof(*rrq)); memset(rrq, 0, sizeof(*rrq)); rrq->rrq_cmd = ELS_RRQ; @@ -1647,13 +1647,20 @@ static void fc_exch_rrq(struct fc_exch *ep) fc_host_port_id(lp->host), FC_TYPE_ELS, FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0); - rrq_sp = fc_exch_seq_send(lp, fp, fc_exch_rrq_resp, NULL, ep, - lp->e_d_tov); - if (!rrq_sp) { - ep->esb_stat |= ESB_ST_REC_QUAL; - fc_exch_timer_set_locked(ep, ep->r_a_tov); + if (fc_exch_seq_send(lp, fp, fc_exch_rrq_resp, NULL, ep, lp->e_d_tov)) + return; + +retry: + spin_lock_bh(&ep->ex_lock); + if (ep->state & (FC_EX_RST_CLEANUP | FC_EX_DONE)) { + spin_unlock_bh(&ep->ex_lock); + /* drop hold for rec qual */ + fc_exch_release(ep); return; } + ep->esb_stat |= ESB_ST_REC_QUAL; + fc_exch_timer_set_locked(ep, ep->r_a_tov); + spin_unlock_bh(&ep->ex_lock); } -- cgit v1.2.3 From 19252de6818ced0def0551d64a0a2975f52a523d Mon Sep 17 00:00:00 2001 From: Tom Peng Date: Fri, 17 Jul 2009 16:02:04 +0800 Subject: [SCSI] libsas: fix wide port hotplug issues Hotplug of phys which form wide ports simply does not work at the moment. Fix this by adding checks at the hotplug points to see if the attached sas address of the phy already exists (in which case it's part of a wide port) and act accordingly. Signed-off-by: Tom Peng Signed-off-by: Jack Wang Signed-off-by: Lindar Liu Signed-off-by: Kevin Ao [jejb: tidied up coding, fixed an error case and made TRUE/FALSE lower case to fix a ppc64 compile error in linux-next] Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_expander.c | 147 +++++++++++++++++++++++++++---------- 1 file changed, 107 insertions(+), 40 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 54fa1e42dc4d..b3381959acce 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -766,6 +766,7 @@ static int sas_ex_join_wide_port(struct domain_device *parent, int phy_id) if (!memcmp(phy->attached_sas_addr, ephy->attached_sas_addr, SAS_ADDR_SIZE) && ephy->port) { sas_port_add_phy(ephy->port, phy->phy); + phy->port = ephy->port; phy->phy_state = PHY_DEVICE_DISCOVERED; return 0; } @@ -945,11 +946,21 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id) if (ex->ex_phy[i].phy_state == PHY_VACANT || ex->ex_phy[i].phy_state == PHY_NOT_PRESENT) continue; - + /* + * Due to races, the phy might not get added to the + * wide port, so we add the phy to the wide port here. + */ if (SAS_ADDR(ex->ex_phy[i].attached_sas_addr) == - SAS_ADDR(child->sas_addr)) + SAS_ADDR(child->sas_addr)) { ex->ex_phy[i].phy_state= PHY_DEVICE_DISCOVERED; + res = sas_ex_join_wide_port(dev, i); + if (!res) + SAS_DPRINTK("Attaching ex phy%d to wide port %016llx\n", + i, SAS_ADDR(ex->ex_phy[i].attached_sas_addr)); + + } } + res = 0; } return res; @@ -1598,7 +1609,7 @@ static int sas_get_phy_attached_sas_addr(struct domain_device *dev, } static int sas_find_bcast_phy(struct domain_device *dev, int *phy_id, - int from_phy) + int from_phy, bool update) { struct expander_device *ex = &dev->ex_dev; int res = 0; @@ -1611,7 +1622,9 @@ static int sas_find_bcast_phy(struct domain_device *dev, int *phy_id, if (res) goto out; else if (phy_change_count != ex->ex_phy[i].phy_change_count) { - ex->ex_phy[i].phy_change_count = phy_change_count; + if (update) + ex->ex_phy[i].phy_change_count = + phy_change_count; *phy_id = i; return 0; } @@ -1653,31 +1666,52 @@ out: kfree(rg_req); return res; } +/** + * sas_find_bcast_dev - find the device issue BROADCAST(CHANGE). + * @dev:domain device to be detect. + * @src_dev: the device which originated BROADCAST(CHANGE). + * + * Add self-configuration expander suport. Suppose two expander cascading, + * when the first level expander is self-configuring, hotplug the disks in + * second level expander, BROADCAST(CHANGE) will not only be originated + * in the second level expander, but also be originated in the first level + * expander (see SAS protocol SAS 2r-14, 7.11 for detail), it is to say, + * expander changed count in two level expanders will all increment at least + * once, but the phy which chang count has changed is the source device which + * we concerned. + */ static int sas_find_bcast_dev(struct domain_device *dev, struct domain_device **src_dev) { struct expander_device *ex = &dev->ex_dev; int ex_change_count = -1; + int phy_id = -1; int res; + struct domain_device *ch; res = sas_get_ex_change_count(dev, &ex_change_count); if (res) goto out; - if (ex_change_count != -1 && - ex_change_count != ex->ex_change_count) { - *src_dev = dev; - ex->ex_change_count = ex_change_count; - } else { - struct domain_device *ch; - - list_for_each_entry(ch, &ex->children, siblings) { - if (ch->dev_type == EDGE_DEV || - ch->dev_type == FANOUT_DEV) { - res = sas_find_bcast_dev(ch, src_dev); - if (src_dev) - return res; - } + if (ex_change_count != -1 && ex_change_count != ex->ex_change_count) { + /* Just detect if this expander phys phy change count changed, + * in order to determine if this expander originate BROADCAST, + * and do not update phy change count field in our structure. + */ + res = sas_find_bcast_phy(dev, &phy_id, 0, false); + if (phy_id != -1) { + *src_dev = dev; + ex->ex_change_count = ex_change_count; + SAS_DPRINTK("Expander phy change count has changed\n"); + return res; + } else + SAS_DPRINTK("Expander phys DID NOT change\n"); + } + list_for_each_entry(ch, &ex->children, siblings) { + if (ch->dev_type == EDGE_DEV || ch->dev_type == FANOUT_DEV) { + res = sas_find_bcast_dev(ch, src_dev); + if (src_dev) + return res; } } out: @@ -1700,24 +1734,26 @@ static void sas_unregister_ex_tree(struct domain_device *dev) } static void sas_unregister_devs_sas_addr(struct domain_device *parent, - int phy_id) + int phy_id, bool last) { struct expander_device *ex_dev = &parent->ex_dev; struct ex_phy *phy = &ex_dev->ex_phy[phy_id]; struct domain_device *child, *n; - - list_for_each_entry_safe(child, n, &ex_dev->children, siblings) { - if (SAS_ADDR(child->sas_addr) == - SAS_ADDR(phy->attached_sas_addr)) { - if (child->dev_type == EDGE_DEV || - child->dev_type == FANOUT_DEV) - sas_unregister_ex_tree(child); - else - sas_unregister_dev(child); - break; + if (last) { + list_for_each_entry_safe(child, n, + &ex_dev->children, siblings) { + if (SAS_ADDR(child->sas_addr) == + SAS_ADDR(phy->attached_sas_addr)) { + if (child->dev_type == EDGE_DEV || + child->dev_type == FANOUT_DEV) + sas_unregister_ex_tree(child); + else + sas_unregister_dev(child); + break; + } } + sas_disable_routing(parent, phy->attached_sas_addr); } - sas_disable_routing(parent, phy->attached_sas_addr); memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE); sas_port_delete_phy(phy->port, phy->phy); if (phy->port->num_phys == 0) @@ -1770,15 +1806,31 @@ static int sas_discover_new(struct domain_device *dev, int phy_id) { struct ex_phy *ex_phy = &dev->ex_dev.ex_phy[phy_id]; struct domain_device *child; - int res; + bool found = false; + int res, i; SAS_DPRINTK("ex %016llx phy%d new device attached\n", SAS_ADDR(dev->sas_addr), phy_id); res = sas_ex_phy_discover(dev, phy_id); if (res) goto out; + /* to support the wide port inserted */ + for (i = 0; i < dev->ex_dev.num_phys; i++) { + struct ex_phy *ex_phy_temp = &dev->ex_dev.ex_phy[i]; + if (i == phy_id) + continue; + if (SAS_ADDR(ex_phy_temp->attached_sas_addr) == + SAS_ADDR(ex_phy->attached_sas_addr)) { + found = true; + break; + } + } + if (found) { + sas_ex_join_wide_port(dev, phy_id); + return 0; + } res = sas_ex_discover_devices(dev, phy_id); - if (res) + if (!res) goto out; list_for_each_entry(child, &dev->ex_dev.children, siblings) { if (SAS_ADDR(child->sas_addr) == @@ -1793,7 +1845,7 @@ out: return res; } -static int sas_rediscover_dev(struct domain_device *dev, int phy_id) +static int sas_rediscover_dev(struct domain_device *dev, int phy_id, bool last) { struct expander_device *ex = &dev->ex_dev; struct ex_phy *phy = &ex->ex_phy[phy_id]; @@ -1804,11 +1856,11 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id) switch (res) { case SMP_RESP_NO_PHY: phy->phy_state = PHY_NOT_PRESENT; - sas_unregister_devs_sas_addr(dev, phy_id); + sas_unregister_devs_sas_addr(dev, phy_id, last); goto out; break; case SMP_RESP_PHY_VACANT: phy->phy_state = PHY_VACANT; - sas_unregister_devs_sas_addr(dev, phy_id); + sas_unregister_devs_sas_addr(dev, phy_id, last); goto out; break; case SMP_RESP_FUNC_ACC: break; @@ -1816,7 +1868,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id) if (SAS_ADDR(attached_sas_addr) == 0) { phy->phy_state = PHY_EMPTY; - sas_unregister_devs_sas_addr(dev, phy_id); + sas_unregister_devs_sas_addr(dev, phy_id, last); } else if (SAS_ADDR(attached_sas_addr) == SAS_ADDR(phy->attached_sas_addr)) { SAS_DPRINTK("ex %016llx phy 0x%x broadcast flutter\n", @@ -1828,12 +1880,27 @@ out: return res; } +/** + * sas_rediscover - revalidate the domain. + * @dev:domain device to be detect. + * @phy_id: the phy id will be detected. + * + * NOTE: this process _must_ quit (return) as soon as any connection + * errors are encountered. Connection recovery is done elsewhere. + * Discover process only interrogates devices in order to discover the + * domain.For plugging out, we un-register the device only when it is + * the last phy in the port, for other phys in this port, we just delete it + * from the port.For inserting, we do discovery when it is the + * first phy,for other phys in this port, we add it to the port to + * forming the wide-port. + */ static int sas_rediscover(struct domain_device *dev, const int phy_id) { struct expander_device *ex = &dev->ex_dev; struct ex_phy *changed_phy = &ex->ex_phy[phy_id]; int res = 0; int i; + bool last = true; /* is this the last phy of the port */ SAS_DPRINTK("ex %016llx phy%d originated BROADCAST(CHANGE)\n", SAS_ADDR(dev->sas_addr), phy_id); @@ -1848,13 +1915,13 @@ static int sas_rediscover(struct domain_device *dev, const int phy_id) SAS_ADDR(changed_phy->attached_sas_addr)) { SAS_DPRINTK("phy%d part of wide port with " "phy%d\n", phy_id, i); - goto out; + last = false; + break; } } - res = sas_rediscover_dev(dev, phy_id); + res = sas_rediscover_dev(dev, phy_id, last); } else res = sas_discover_new(dev, phy_id); -out: return res; } @@ -1881,7 +1948,7 @@ int sas_ex_revalidate_domain(struct domain_device *port_dev) do { phy_id = -1; - res = sas_find_bcast_phy(dev, &phy_id, i); + res = sas_find_bcast_phy(dev, &phy_id, i, true); if (phy_id == -1) break; res = sas_rediscover(dev, phy_id); -- cgit v1.2.3 From ffd4bc2a984fab40ed969163efdff321490e8032 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 29 Jul 2009 14:06:53 -0400 Subject: [SCSI] sd: Avoid sending extended inquiry to legacy devices Some USB devices crash when we send them an inquiry with the EVPD bit set, regardless of page requested (i.e. including page 0). We only need the extended inquiry to gain access to VPD pages 0xB0 and 0xB1. These appeared in SBC2 and SBC3 respectively, so we can restrict sending the extended inquiry to devices reporting SPC3 or higher. This fixes bugzilla.kernel.org #13657. Signed-off-by: Martin K. Petersen [jejb: added comment] Signed-off-by: James Bottomley --- drivers/scsi/sd.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 5616cd780ff3..b7b9fec67a98 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1840,6 +1840,18 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp) kfree(buffer); } +static int sd_try_extended_inquiry(struct scsi_device *sdp) +{ + /* + * Although VPD inquiries can go to SCSI-2 type devices, + * some USB ones crash on receiving them, and the pages + * we currently ask for are for SPC-3 and beyond + */ + if (sdp->scsi_level > SCSI_SPC_2) + return 1; + return 0; +} + /** * sd_revalidate_disk - called the first time a new disk is seen, * performs disk spin up, read_capacity, etc. @@ -1877,8 +1889,12 @@ static int sd_revalidate_disk(struct gendisk *disk) */ if (sdkp->media_present) { sd_read_capacity(sdkp, buffer); - sd_read_block_limits(sdkp); - sd_read_block_characteristics(sdkp); + + if (sd_try_extended_inquiry(sdp)) { + sd_read_block_limits(sdkp); + sd_read_block_characteristics(sdkp); + } + sd_read_write_protect_flag(sdkp, buffer); sd_read_cache_type(sdkp, buffer); sd_read_app_tag_own(sdkp, buffer); -- cgit v1.2.3 From 2b8d33f714477d1719f96459370037993520de84 Mon Sep 17 00:00:00 2001 From: "ling.ma@intel.com" Date: Wed, 29 Jul 2009 11:31:18 +0800 Subject: drm/i915: Return disconnected for SDVO DVI when there's no digital EDID. The patch fixed a bug on MP965-D. When VGA is connected to a DVI-I connector, it incorrectly shows sdvo dvi as connected. Signed-off-by: Ma Ling [anholt: hand-resolved against previous commit and fixed up commit message] Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_sdvo.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index b68746f0380e..5371d9332554 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -31,6 +31,7 @@ #include "drm.h" #include "drm_crtc.h" #include "intel_drv.h" +#include "drm_edid.h" #include "i915_drm.h" #include "i915_drv.h" #include "intel_sdvo_regs.h" @@ -1477,20 +1478,35 @@ intel_sdvo_multifunc_encoder(struct intel_output *intel_output) return (caps > 1); } -static void -intel_sdvo_hdmi_sink_detect(struct drm_connector *connector) +enum drm_connector_status +intel_sdvo_hdmi_sink_detect(struct drm_connector *connector, u16 response) { struct intel_output *intel_output = to_intel_output(connector); struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv; + enum drm_connector_status status = connector_status_connected; struct edid *edid = NULL; edid = drm_get_edid(&intel_output->base, intel_output->ddc_bus); if (edid != NULL) { - sdvo_priv->is_hdmi = drm_detect_hdmi_monitor(edid); + /* Don't report the output as connected if it's a DVI-I + * connector with a non-digital EDID coming out. + */ + if (response & (SDVO_OUTPUT_TMDS0 | SDVO_OUTPUT_TMDS1)) { + if (edid->input & DRM_EDID_INPUT_DIGITAL) + sdvo_priv->is_hdmi = + drm_detect_hdmi_monitor(edid); + else + status = connector_status_disconnected; + } + kfree(edid); intel_output->base.display_info.raw_edid = NULL; - } + + } else if (response & (SDVO_OUTPUT_TMDS0 | SDVO_OUTPUT_TMDS1)) + status = connector_status_disconnected; + + return status; } static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connector) @@ -1518,8 +1534,7 @@ static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connect return connector_status_unknown; sdvo_priv->attached_output = response; } - intel_sdvo_hdmi_sink_detect(connector); - return connector_status_connected; + return intel_sdvo_hdmi_sink_detect(connector, response); } static void intel_sdvo_get_ddc_modes(struct drm_connector *connector) -- cgit v1.2.3 From 0c2e39525b3b53a97a0202c5f35058147e53977e Mon Sep 17 00:00:00 2001 From: "ling.ma@intel.com" Date: Fri, 17 Jul 2009 11:44:30 +0800 Subject: drm/i915: Add support for dual-channel LVDS on 8xx. This corresponds to a fix to UMS back in 2007. Fixes fd.o bug #20115. Signed-off-by: Ma Ling Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1da7b0b9ed31..d6fce2133413 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -90,7 +90,7 @@ struct intel_limit { #define I8XX_P2_SLOW 4 #define I8XX_P2_FAST 2 #define I8XX_P2_LVDS_SLOW 14 -#define I8XX_P2_LVDS_FAST 14 /* No fast option */ +#define I8XX_P2_LVDS_FAST 7 #define I8XX_P2_SLOW_LIMIT 165000 #define I9XX_DOT_MIN 20000 -- cgit v1.2.3 From a541f8401d8e9113a89ee902cb8d8e412d6d3569 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 29 Jul 2009 08:49:52 +0000 Subject: iscsi: Use GFP_ATOMIC in iscsi_offload_mesg(). Changing to GFP_ATOMIC because the only caller in cnic/bnx2i may be calling this function while holding spin_lock. This problem was discovered by Mike Christie. Signed-off-by: Michael Chan Acked-by: Mike Christie Signed-off-by: David S. Miller --- drivers/scsi/scsi_transport_iscsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 783e33c65eb7..b47240ca4b19 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -990,7 +990,7 @@ int iscsi_offload_mesg(struct Scsi_Host *shost, struct iscsi_uevent *ev; int len = NLMSG_SPACE(sizeof(*ev) + data_size); - skb = alloc_skb(len, GFP_NOIO); + skb = alloc_skb(len, GFP_ATOMIC); if (!skb) { printk(KERN_ERR "can not deliver iscsi offload message:OOM\n"); return -ENOMEM; @@ -1012,7 +1012,7 @@ int iscsi_offload_mesg(struct Scsi_Host *shost, memcpy((char *)ev + sizeof(*ev), data, data_size); - return iscsi_multicast_skb(skb, ISCSI_NL_GRP_UIP, GFP_NOIO); + return iscsi_multicast_skb(skb, ISCSI_NL_GRP_UIP, GFP_ATOMIC); } EXPORT_SYMBOL_GPL(iscsi_offload_mesg); -- cgit v1.2.3 From 3d54015b750e5d5e950a1dcee2735387fd4b6e1a Mon Sep 17 00:00:00 2001 From: roel kluin Date: Thu, 30 Jul 2009 00:26:32 +0000 Subject: 3c515: Write outside array bounds if dev_alloc_skb() fails on the first iteration, a write to cp->rx_ring[-1] occurs. Signed-off-by: Roel Kluin Signed-off-by: David S. Miller --- drivers/net/3c515.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/3c515.c b/drivers/net/3c515.c index 3e00fa8ea65f..4a7c32895be5 100644 --- a/drivers/net/3c515.c +++ b/drivers/net/3c515.c @@ -832,7 +832,9 @@ static int corkscrew_open(struct net_device *dev) skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ vp->rx_ring[i].addr = isa_virt_to_bus(skb->data); } - vp->rx_ring[i - 1].next = isa_virt_to_bus(&vp->rx_ring[0]); /* Wrap the ring. */ + if (i != 0) + vp->rx_ring[i - 1].next = + isa_virt_to_bus(&vp->rx_ring[0]); /* Wrap the ring. */ outl(isa_virt_to_bus(&vp->rx_ring[0]), ioaddr + UpListPtr); } if (vp->full_bus_master_tx) { /* Boomerang bus master Tx. */ -- cgit v1.2.3 From f0c5b35c6c93c89a9d8ccab19b0b4842f5dfddc5 Mon Sep 17 00:00:00 2001 From: roel kluin Date: Wed, 29 Jul 2009 03:18:56 +0000 Subject: eexpress: Read buffer overflow start_code is 69 words, but the code always writes a multiple of 16 words, so the last 11 words written are outside the array. Signed-off-by: Roel Kluin Signed-off-by: David S. Miller --- drivers/net/eexpress.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/net/eexpress.c b/drivers/net/eexpress.c index 1686dca28748..1f016d66684a 100644 --- a/drivers/net/eexpress.c +++ b/drivers/net/eexpress.c @@ -1474,13 +1474,13 @@ static void eexp_hw_init586(struct net_device *dev) outw(0x0000, ioaddr + 0x800c); outw(0x0000, ioaddr + 0x800e); - for (i = 0; i < (sizeof(start_code)); i+=32) { + for (i = 0; i < ARRAY_SIZE(start_code) * 2; i+=32) { int j; outw(i, ioaddr + SM_PTR); - for (j = 0; j < 16; j+=2) + for (j = 0; j < 16 && (i+j)/2 < ARRAY_SIZE(start_code); j+=2) outw(start_code[(i+j)/2], ioaddr+0x4000+j); - for (j = 0; j < 16; j+=2) + for (j = 0; j < 16 && (i+j+16)/2 < ARRAY_SIZE(start_code); j+=2) outw(start_code[(i+j+16)/2], ioaddr+0x8000+j); } -- cgit v1.2.3 From daed953721850381673687c59f3a0df553eb6626 Mon Sep 17 00:00:00 2001 From: Frans Pop Date: Thu, 30 Jul 2009 17:16:05 -0400 Subject: hp-wmi: check that an input device exists in resume handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some systems may not support input events, or registering the input handler may have failed. So check that an input device exists before trying to set the docking and tablet mode state during resume. Fixes: http://bugzilla.kernel.org/show_bug.cgi?id=13865 Reported-and-tested-by: Cédric Godin Signed-off-by: Frans Pop Acked-by: Matthew Garrett Signed-off-by: Len Brown --- drivers/platform/x86/hp-wmi.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index ca508564a181..a2ad53e15874 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -520,11 +520,13 @@ static int hp_wmi_resume_handler(struct platform_device *device) * the input layer will only actually pass it on if the state * changed. */ - - input_report_switch(hp_wmi_input_dev, SW_DOCK, hp_wmi_dock_state()); - input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE, - hp_wmi_tablet_state()); - input_sync(hp_wmi_input_dev); + if (hp_wmi_input_dev) { + input_report_switch(hp_wmi_input_dev, SW_DOCK, + hp_wmi_dock_state()); + input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE, + hp_wmi_tablet_state()); + input_sync(hp_wmi_input_dev); + } return 0; } -- cgit v1.2.3 From 72fc939789dbe7ca091b50b686d45ac0df15417a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Jul 2009 23:43:08 +0000 Subject: pppoe: fix /proc/net/pppoe If a socket is hashed in last slot of pppoe hash table (PPPOE_HASH_SIZE-1) we report it many times (up to filling seq buffer) (Only the last socket of last slot) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/pppoe.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index f0031f1f97e5..5f2090233d7b 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -1063,6 +1063,7 @@ static void *pppoe_seq_next(struct seq_file *seq, void *v, loff_t *pos) else { int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); + po = NULL; while (++hash < PPPOE_HASH_SIZE) { po = pn->hash_table[hash]; if (po) -- cgit v1.2.3 From accff95c2500c7bce671c1f722de6f8810fe550d Mon Sep 17 00:00:00 2001 From: Jiajun Wu Date: Thu, 30 Jul 2009 14:20:42 -0700 Subject: gianfar: fix coalescing setup in ethtool support Parameter order for using mk_ic_value(count, time) was reversed, the patch fixes this. Signed-off-by: Jiajun Wu Signed-off-by: Li Yang Signed-off-by: David S. Miller --- drivers/net/gianfar_ethtool.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/net/gianfar_ethtool.c b/drivers/net/gianfar_ethtool.c index dbf06e9313cc..2234118eedbb 100644 --- a/drivers/net/gianfar_ethtool.c +++ b/drivers/net/gianfar_ethtool.c @@ -366,9 +366,8 @@ static int gfar_scoalesce(struct net_device *dev, struct ethtool_coalesce *cvals return -EINVAL; } - priv->rxic = mk_ic_value( - gfar_usecs2ticks(priv, cvals->rx_coalesce_usecs), - cvals->rx_max_coalesced_frames); + priv->rxic = mk_ic_value(cvals->rx_max_coalesced_frames, + gfar_usecs2ticks(priv, cvals->rx_coalesce_usecs)); /* Set up tx coalescing */ if ((cvals->tx_coalesce_usecs == 0) || @@ -390,9 +389,8 @@ static int gfar_scoalesce(struct net_device *dev, struct ethtool_coalesce *cvals return -EINVAL; } - priv->txic = mk_ic_value( - gfar_usecs2ticks(priv, cvals->tx_coalesce_usecs), - cvals->tx_max_coalesced_frames); + priv->txic = mk_ic_value(cvals->tx_max_coalesced_frames, + gfar_usecs2ticks(priv, cvals->tx_coalesce_usecs)); gfar_write(&priv->regs->rxic, 0); if (priv->rxcoalescing) -- cgit v1.2.3 From 8fe70a5f7167e02bbc18086970f0e92a5e55b80d Mon Sep 17 00:00:00 2001 From: Ramax Lo Date: Thu, 9 Jul 2009 16:28:33 +0800 Subject: ARM: S3C24XX: serial: Fix section mismatch warnings During kernel build process, the following warning was found: WARNING: drivers/serial/built-in.o(.data+0x304): Section mismatch in reference from the variable s3c2440_serial_drv to the function .devexit.text:s3c24xx_serial_remove() The variable s3c2440_serial_drv references the function __devexit s3c24xx_serial_remove() If the reference is valid then annotate the variable with __exit* (see linux/init.h) or name the variable: *driver, *_template, *_timer, *_sht, *_ops, *_probe, *_probe_one, *_console, The same warning happened for s3c241x platform. We rename variables to avoid these warnings. These changes also apply to s3c2400 & s3c24a0 for consistency. Signed-off-by: Ramax Lo Signed-off-by: Ben Dooks --- drivers/serial/s3c2400.c | 8 ++++---- drivers/serial/s3c2410.c | 8 ++++---- drivers/serial/s3c2412.c | 8 ++++---- drivers/serial/s3c2440.c | 8 ++++---- drivers/serial/s3c24a0.c | 8 ++++---- 5 files changed, 20 insertions(+), 20 deletions(-) (limited to 'drivers') diff --git a/drivers/serial/s3c2400.c b/drivers/serial/s3c2400.c index fb00ed5296e6..fed1a9a1ffb4 100644 --- a/drivers/serial/s3c2400.c +++ b/drivers/serial/s3c2400.c @@ -76,7 +76,7 @@ static int s3c2400_serial_probe(struct platform_device *dev) return s3c24xx_serial_probe(dev, &s3c2400_uart_inf); } -static struct platform_driver s3c2400_serial_drv = { +static struct platform_driver s3c2400_serial_driver = { .probe = s3c2400_serial_probe, .remove = __devexit_p(s3c24xx_serial_remove), .driver = { @@ -85,16 +85,16 @@ static struct platform_driver s3c2400_serial_drv = { }, }; -s3c24xx_console_init(&s3c2400_serial_drv, &s3c2400_uart_inf); +s3c24xx_console_init(&s3c2400_serial_driver, &s3c2400_uart_inf); static inline int s3c2400_serial_init(void) { - return s3c24xx_serial_init(&s3c2400_serial_drv, &s3c2400_uart_inf); + return s3c24xx_serial_init(&s3c2400_serial_driver, &s3c2400_uart_inf); } static inline void s3c2400_serial_exit(void) { - platform_driver_unregister(&s3c2400_serial_drv); + platform_driver_unregister(&s3c2400_serial_driver); } module_init(s3c2400_serial_init); diff --git a/drivers/serial/s3c2410.c b/drivers/serial/s3c2410.c index b5d7cbcba2ae..c99f0821cae3 100644 --- a/drivers/serial/s3c2410.c +++ b/drivers/serial/s3c2410.c @@ -88,7 +88,7 @@ static int s3c2410_serial_probe(struct platform_device *dev) return s3c24xx_serial_probe(dev, &s3c2410_uart_inf); } -static struct platform_driver s3c2410_serial_drv = { +static struct platform_driver s3c2410_serial_driver = { .probe = s3c2410_serial_probe, .remove = __devexit_p(s3c24xx_serial_remove), .driver = { @@ -97,16 +97,16 @@ static struct platform_driver s3c2410_serial_drv = { }, }; -s3c24xx_console_init(&s3c2410_serial_drv, &s3c2410_uart_inf); +s3c24xx_console_init(&s3c2410_serial_driver, &s3c2410_uart_inf); static int __init s3c2410_serial_init(void) { - return s3c24xx_serial_init(&s3c2410_serial_drv, &s3c2410_uart_inf); + return s3c24xx_serial_init(&s3c2410_serial_driver, &s3c2410_uart_inf); } static void __exit s3c2410_serial_exit(void) { - platform_driver_unregister(&s3c2410_serial_drv); + platform_driver_unregister(&s3c2410_serial_driver); } module_init(s3c2410_serial_init); diff --git a/drivers/serial/s3c2412.c b/drivers/serial/s3c2412.c index 11dcb90bdfef..6e057d8809d3 100644 --- a/drivers/serial/s3c2412.c +++ b/drivers/serial/s3c2412.c @@ -121,7 +121,7 @@ static int s3c2412_serial_probe(struct platform_device *dev) return s3c24xx_serial_probe(dev, &s3c2412_uart_inf); } -static struct platform_driver s3c2412_serial_drv = { +static struct platform_driver s3c2412_serial_driver = { .probe = s3c2412_serial_probe, .remove = __devexit_p(s3c24xx_serial_remove), .driver = { @@ -130,16 +130,16 @@ static struct platform_driver s3c2412_serial_drv = { }, }; -s3c24xx_console_init(&s3c2412_serial_drv, &s3c2412_uart_inf); +s3c24xx_console_init(&s3c2412_serial_driver, &s3c2412_uart_inf); static inline int s3c2412_serial_init(void) { - return s3c24xx_serial_init(&s3c2412_serial_drv, &s3c2412_uart_inf); + return s3c24xx_serial_init(&s3c2412_serial_driver, &s3c2412_uart_inf); } static inline void s3c2412_serial_exit(void) { - platform_driver_unregister(&s3c2412_serial_drv); + platform_driver_unregister(&s3c2412_serial_driver); } module_init(s3c2412_serial_init); diff --git a/drivers/serial/s3c2440.c b/drivers/serial/s3c2440.c index 06c5b0cc47a3..69ff5d340f04 100644 --- a/drivers/serial/s3c2440.c +++ b/drivers/serial/s3c2440.c @@ -151,7 +151,7 @@ static int s3c2440_serial_probe(struct platform_device *dev) return s3c24xx_serial_probe(dev, &s3c2440_uart_inf); } -static struct platform_driver s3c2440_serial_drv = { +static struct platform_driver s3c2440_serial_driver = { .probe = s3c2440_serial_probe, .remove = __devexit_p(s3c24xx_serial_remove), .driver = { @@ -160,16 +160,16 @@ static struct platform_driver s3c2440_serial_drv = { }, }; -s3c24xx_console_init(&s3c2440_serial_drv, &s3c2440_uart_inf); +s3c24xx_console_init(&s3c2440_serial_driver, &s3c2440_uart_inf); static int __init s3c2440_serial_init(void) { - return s3c24xx_serial_init(&s3c2440_serial_drv, &s3c2440_uart_inf); + return s3c24xx_serial_init(&s3c2440_serial_driver, &s3c2440_uart_inf); } static void __exit s3c2440_serial_exit(void) { - platform_driver_unregister(&s3c2440_serial_drv); + platform_driver_unregister(&s3c2440_serial_driver); } module_init(s3c2440_serial_init); diff --git a/drivers/serial/s3c24a0.c b/drivers/serial/s3c24a0.c index 786a067d62ac..26c49e18bdd1 100644 --- a/drivers/serial/s3c24a0.c +++ b/drivers/serial/s3c24a0.c @@ -92,7 +92,7 @@ static int s3c24a0_serial_probe(struct platform_device *dev) return s3c24xx_serial_probe(dev, &s3c24a0_uart_inf); } -static struct platform_driver s3c24a0_serial_drv = { +static struct platform_driver s3c24a0_serial_driver = { .probe = s3c24a0_serial_probe, .remove = __devexit_p(s3c24xx_serial_remove), .driver = { @@ -101,16 +101,16 @@ static struct platform_driver s3c24a0_serial_drv = { }, }; -s3c24xx_console_init(&s3c24a0_serial_drv, &s3c24a0_uart_inf); +s3c24xx_console_init(&s3c24a0_serial_driver, &s3c24a0_uart_inf); static int __init s3c24a0_serial_init(void) { - return s3c24xx_serial_init(&s3c24a0_serial_drv, &s3c24a0_uart_inf); + return s3c24xx_serial_init(&s3c24a0_serial_driver, &s3c24a0_uart_inf); } static void __exit s3c24a0_serial_exit(void) { - platform_driver_unregister(&s3c24a0_serial_drv); + platform_driver_unregister(&s3c24a0_serial_driver); } module_init(s3c24a0_serial_init); -- cgit v1.2.3 From 909db80297ba65699a77d877f7bf618ba960f6fc Mon Sep 17 00:00:00 2001 From: Ramax Lo Date: Fri, 10 Jul 2009 19:30:56 +0800 Subject: ARM: S3C64XX: serial: Fix section mismatch warning Rename the structure to avoid the following warning: WARNING: vmlinux.o(.data+0x11ef4): Section mismatch in reference from the variable s3c6400_serial_drv to the function .devexit.text:s3c24xx_serial_remove() The variable s3c6400_serial_drv references the function __devexit s3c24xx_serial_remove() If the reference is valid then annotate the variable with __exit* (see linux/init.h) or name the variable: *driver, *_template, *_timer, *_sht, *_ops, *_probe, *_probe_one, *_console, Signed-off-by: Ramax Lo Signed-off-by: Ben Dooks --- drivers/serial/s3c6400.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/serial/s3c6400.c b/drivers/serial/s3c6400.c index 48f1a3781f0d..4be92ab50058 100644 --- a/drivers/serial/s3c6400.c +++ b/drivers/serial/s3c6400.c @@ -122,7 +122,7 @@ static int s3c6400_serial_probe(struct platform_device *dev) return s3c24xx_serial_probe(dev, &s3c6400_uart_inf); } -static struct platform_driver s3c6400_serial_drv = { +static struct platform_driver s3c6400_serial_driver = { .probe = s3c6400_serial_probe, .remove = __devexit_p(s3c24xx_serial_remove), .driver = { @@ -131,16 +131,16 @@ static struct platform_driver s3c6400_serial_drv = { }, }; -s3c24xx_console_init(&s3c6400_serial_drv, &s3c6400_uart_inf); +s3c24xx_console_init(&s3c6400_serial_driver, &s3c6400_uart_inf); static int __init s3c6400_serial_init(void) { - return s3c24xx_serial_init(&s3c6400_serial_drv, &s3c6400_uart_inf); + return s3c24xx_serial_init(&s3c6400_serial_driver, &s3c6400_uart_inf); } static void __exit s3c6400_serial_exit(void) { - platform_driver_unregister(&s3c6400_serial_drv); + platform_driver_unregister(&s3c6400_serial_driver); } module_init(s3c6400_serial_init); -- cgit v1.2.3 From 8f9a71673d9f397a365f4d18c307e91141b8fe92 Mon Sep 17 00:00:00 2001 From: Peter P Waskiewicz Jr Date: Thu, 30 Jul 2009 12:25:09 +0000 Subject: ixgbe: Fix netpoll to be properly multiqueue aware Our ndo_poll_controller callback is broken for anything but non-multiqueue setups. This fixes that issue. Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_main.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 200454f30f6a..60c4a8bf7d38 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -5360,12 +5360,19 @@ static int ixgbe_del_sanmac_netdev(struct net_device *dev) static void ixgbe_netpoll(struct net_device *netdev) { struct ixgbe_adapter *adapter = netdev_priv(netdev); + int i; - disable_irq(adapter->pdev->irq); adapter->flags |= IXGBE_FLAG_IN_NETPOLL; - ixgbe_intr(adapter->pdev->irq, netdev); + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { + int num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; + for (i = 0; i < num_q_vectors; i++) { + struct ixgbe_q_vector *q_vector = adapter->q_vector[i]; + ixgbe_msix_clean_many(0, q_vector); + } + } else { + ixgbe_intr(adapter->pdev->irq, netdev); + } adapter->flags &= ~IXGBE_FLAG_IN_NETPOLL; - enable_irq(adapter->pdev->irq); } #endif -- cgit v1.2.3 From 0c19d6af9253f19b41821c29b9c49c2214f19425 Mon Sep 17 00:00:00 2001 From: Peter P Waskiewicz Jr Date: Thu, 30 Jul 2009 12:25:28 +0000 Subject: ixgbe: Fix usage of second flags bitmap when using LRO/RSC A second set of feature flag bits was added, and the hardware RSC engine flags were moved there. However, the code itself didn't make the move completely to use the new bitmap. Signed-off-by: Peter P Waskiewicz Jr Acked-by: Mallikarjuna R Chilakala Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_ethtool.c | 6 +++--- drivers/net/ixgbe/ixgbe_main.c | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c index 2a978008fd6e..7ddb50c03f0d 100644 --- a/drivers/net/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ixgbe/ixgbe_ethtool.c @@ -1999,13 +1999,13 @@ static int ixgbe_set_flags(struct net_device *netdev, u32 data) ethtool_op_set_flags(netdev, data); - if (!(adapter->flags & IXGBE_FLAG2_RSC_CAPABLE)) + if (!(adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE)) return 0; /* if state changes we need to update adapter->flags and reset */ if ((!!(data & ETH_FLAG_LRO)) != - (!!(adapter->flags & IXGBE_FLAG2_RSC_ENABLED))) { - adapter->flags ^= IXGBE_FLAG2_RSC_ENABLED; + (!!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED))) { + adapter->flags2 ^= IXGBE_FLAG2_RSC_ENABLED; if (netif_running(netdev)) ixgbe_reinit_locked(adapter); else diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 60c4a8bf7d38..110c65ab5cb5 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -780,7 +780,7 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, prefetch(next_rxd); cleaned_count++; - if (adapter->flags & IXGBE_FLAG2_RSC_CAPABLE) + if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) rsc_count = ixgbe_get_rsc_count(rx_desc); if (rsc_count) { @@ -2036,7 +2036,7 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype); } } else { - if (!(adapter->flags & IXGBE_FLAG2_RSC_ENABLED) && + if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) && (netdev->mtu <= ETH_DATA_LEN)) rx_buf_len = MAXIMUM_ETHERNET_VLAN_SIZE; else @@ -2165,7 +2165,7 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); } - if (adapter->flags & IXGBE_FLAG2_RSC_ENABLED) { + if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) { /* Enable 82599 HW-RSC */ for (i = 0; i < adapter->num_rx_queues; i++) { j = adapter->rx_ring[i].reg_idx; @@ -3812,8 +3812,8 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) adapter->max_msix_q_vectors = MAX_MSIX_Q_VECTORS_82598; } else if (hw->mac.type == ixgbe_mac_82599EB) { adapter->max_msix_q_vectors = MAX_MSIX_Q_VECTORS_82599; - adapter->flags |= IXGBE_FLAG2_RSC_CAPABLE; - adapter->flags |= IXGBE_FLAG2_RSC_ENABLED; + adapter->flags2 |= IXGBE_FLAG2_RSC_CAPABLE; + adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED; adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; adapter->ring_feature[RING_F_FDIR].indices = IXGBE_MAX_FDIR_INDICES; @@ -5618,7 +5618,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; - if (adapter->flags & IXGBE_FLAG2_RSC_ENABLED) + if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) netdev->features |= NETIF_F_LRO; /* make sure the EEPROM is good */ -- cgit v1.2.3 From 0a924578bc4a2823a95c151f56975c71f5c156bb Mon Sep 17 00:00:00 2001 From: Peter P Waskiewicz Jr Date: Thu, 30 Jul 2009 12:26:00 +0000 Subject: ixgbe: Fix RSC completion delay causing Rx interrupts to stop When a user disables interrupt throttling with ethtool on 82599 devices, the interrupt timer may not be re-enabled if hardware RSC is running. The RSC completions in hardware don't complete before the next ITR event tries to fire, so the ITR timer never gets re-armed. This patch increases the amount of time between interrupts when throttling is disabled (rx-usecs = 0) when the hardware RSC deature is enabled. Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe.h | 2 ++ drivers/net/ixgbe/ixgbe_ethtool.c | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h index 1b12c7ba275f..e11d83d5852b 100644 --- a/drivers/net/ixgbe/ixgbe.h +++ b/drivers/net/ixgbe/ixgbe.h @@ -96,6 +96,8 @@ #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0x0000e000 #define IXGBE_TX_FLAGS_VLAN_SHIFT 16 +#define IXGBE_MAX_RSC_INT_RATE 162760 + /* wrapper around a pointer to a socket buffer, * so a DMA handle can be stored along with the buffer */ struct ixgbe_tx_buffer { diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c index 7ddb50c03f0d..79144e950a34 100644 --- a/drivers/net/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ixgbe/ixgbe_ethtool.c @@ -1975,7 +1975,10 @@ static int ixgbe_set_coalesce(struct net_device *netdev, * any other value means disable eitr, which is best * served by setting the interrupt rate very high */ - adapter->eitr_param = IXGBE_MAX_INT_RATE; + if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) + adapter->eitr_param = IXGBE_MAX_RSC_INT_RATE; + else + adapter->eitr_param = IXGBE_MAX_INT_RATE; adapter->itr_setting = 0; } -- cgit v1.2.3 From 95fc17aac45300f45968aacd97a536ddd8db8101 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 31 Jul 2009 12:39:15 +1000 Subject: md/raid6: release spare page at ->stop() Add missing call to safe_put_page from stop() by unifying open coded raid5_conf_t de-allocation under free_conf(). Cc: Signed-off-by: Dan Williams Signed-off-by: NeilBrown --- drivers/md/raid5.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 37835538b58e..39374230a463 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4316,6 +4316,15 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) return sectors * (raid_disks - conf->max_degraded); } +static void free_conf(raid5_conf_t *conf) +{ + shrink_stripes(conf); + safe_put_page(conf->spare_page); + kfree(conf->disks); + kfree(conf->stripe_hashtbl); + kfree(conf); +} + static raid5_conf_t *setup_conf(mddev_t *mddev) { raid5_conf_t *conf; @@ -4447,11 +4456,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) abort: if (conf) { - shrink_stripes(conf); - safe_put_page(conf->spare_page); - kfree(conf->disks); - kfree(conf->stripe_hashtbl); - kfree(conf); + free_conf(conf); return ERR_PTR(-EIO); } else return ERR_PTR(-ENOMEM); @@ -4629,12 +4634,8 @@ abort: md_unregister_thread(mddev->thread); mddev->thread = NULL; if (conf) { - shrink_stripes(conf); print_raid5_conf(conf); - safe_put_page(conf->spare_page); - kfree(conf->disks); - kfree(conf->stripe_hashtbl); - kfree(conf); + free_conf(conf); } mddev->private = NULL; printk(KERN_ALERT "raid5: failed to run raid set %s\n", mdname(mddev)); @@ -4649,13 +4650,10 @@ static int stop(mddev_t *mddev) md_unregister_thread(mddev->thread); mddev->thread = NULL; - shrink_stripes(conf); - kfree(conf->stripe_hashtbl); mddev->queue->backing_dev_info.congested_fn = NULL; blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ sysfs_remove_group(&mddev->kobj, &raid5_attrs_group); - kfree(conf->disks); - kfree(conf); + free_conf(conf); mddev->private = NULL; return 0; } -- cgit v1.2.3 From da60a91d012bcb10bc5bcd86d585c4281742832c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 18 Jun 2009 09:33:32 +0200 Subject: sdhci: use SG_MITER_TO_SG/SG_MITER_FROM_SG so the page will be flushed on unmap on ARCH which need it. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Pierre Ossman --- drivers/mmc/host/sdhci.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 62041c7e9246..fc96f8cb9c0b 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -773,8 +773,14 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data) } if (!(host->flags & SDHCI_REQ_USE_DMA)) { - sg_miter_start(&host->sg_miter, - data->sg, data->sg_len, SG_MITER_ATOMIC); + int flags; + + flags = SG_MITER_ATOMIC; + if (host->data->flags & MMC_DATA_READ) + flags |= SG_MITER_TO_SG; + else + flags |= SG_MITER_FROM_SG; + sg_miter_start(&host->sg_miter, data->sg, data->sg_len, flags); host->blocks = data->blocks; } -- cgit v1.2.3 From 4b2a108cd0d34880fe9d932258ca5b2ccebcd05e Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 22 Jun 2009 09:18:05 +0200 Subject: cb710: use SG_MITER_TO_SG/SG_MITER_FROM_SG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit the code allready uses flush_kernel_dcache_page(). This patch updates the driver to the recent sg API changes which require that either SG_MITER_TO_SG or SG_MITER_FROM_SG is set. SG_MITER_TO_SG calls flush_kernel_dcache_page() in sg_mitter_stop() Signed-off-by: Sebastian Andrzej Siewior Acked-by: MichaÅ‚ MirosÅ‚aw Signed-off-by: Pierre Ossman --- drivers/misc/cb710/sgbuf2.c | 4 ---- drivers/mmc/host/cb710-mmc.c | 6 +++--- include/linux/cb710.h | 29 +++-------------------------- 3 files changed, 6 insertions(+), 33 deletions(-) (limited to 'drivers') diff --git a/drivers/misc/cb710/sgbuf2.c b/drivers/misc/cb710/sgbuf2.c index d38a7acdb6ec..d019746551f3 100644 --- a/drivers/misc/cb710/sgbuf2.c +++ b/drivers/misc/cb710/sgbuf2.c @@ -114,7 +114,6 @@ static void sg_dwiter_write_slow(struct sg_mapping_iter *miter, uint32_t data) if (!left) return; addr += len; - flush_kernel_dcache_page(miter->page); } while (sg_dwiter_next(miter)); } @@ -142,9 +141,6 @@ void cb710_sg_dwiter_write_next_block(struct sg_mapping_iter *miter, uint32_t da return; } else sg_dwiter_write_slow(miter, data); - - if (miter->length == miter->consumed) - flush_kernel_dcache_page(miter->page); } EXPORT_SYMBOL_GPL(cb710_sg_dwiter_write_next_block); diff --git a/drivers/mmc/host/cb710-mmc.c b/drivers/mmc/host/cb710-mmc.c index 11efefb1af51..4e72964a7b43 100644 --- a/drivers/mmc/host/cb710-mmc.c +++ b/drivers/mmc/host/cb710-mmc.c @@ -278,7 +278,7 @@ static int cb710_mmc_receive(struct cb710_slot *slot, struct mmc_data *data) if (unlikely(data->blksz & 15 && (data->blocks != 1 || data->blksz != 8))) return -EINVAL; - sg_miter_start(&miter, data->sg, data->sg_len, 0); + sg_miter_start(&miter, data->sg, data->sg_len, SG_MITER_TO_SG); cb710_modify_port_8(slot, CB710_MMC_CONFIG2_PORT, 15, CB710_MMC_C2_READ_PIO_SIZE_MASK); @@ -307,7 +307,7 @@ static int cb710_mmc_receive(struct cb710_slot *slot, struct mmc_data *data) goto out; } out: - cb710_sg_miter_stop_writing(&miter); + sg_miter_stop(&miter); return err; } @@ -322,7 +322,7 @@ static int cb710_mmc_send(struct cb710_slot *slot, struct mmc_data *data) if (unlikely(data->blocks > 1 && data->blksz & 15)) return -EINVAL; - sg_miter_start(&miter, data->sg, data->sg_len, 0); + sg_miter_start(&miter, data->sg, data->sg_len, SG_MITER_FROM_SG); cb710_modify_port_8(slot, CB710_MMC_CONFIG2_PORT, 0, CB710_MMC_C2_READ_PIO_SIZE_MASK); diff --git a/include/linux/cb710.h b/include/linux/cb710.h index 63bc9a4d2926..8cc10411bab2 100644 --- a/include/linux/cb710.h +++ b/include/linux/cb710.h @@ -140,29 +140,6 @@ void cb710_dump_regs(struct cb710_chip *chip, unsigned dump); #include #include -/** - * cb710_sg_miter_stop_writing - stop mapping iteration after writing - * @miter: sg mapping iter to be stopped - * - * Description: - * Stops mapping iterator @miter. @miter should have been started - * started using sg_miter_start(). A stopped iteration can be - * resumed by calling sg_miter_next() on it. This is useful when - * resources (kmap) need to be released during iteration. - * - * This is a convenience wrapper that will be optimized out for arches - * that don't need flush_kernel_dcache_page(). - * - * Context: - * IRQ disabled if the SG_MITER_ATOMIC is set. Don't care otherwise. - */ -static inline void cb710_sg_miter_stop_writing(struct sg_mapping_iter *miter) -{ - if (miter->page) - flush_kernel_dcache_page(miter->page); - sg_miter_stop(miter); -} - /* * 32-bit PIO mapping sg iterator * @@ -171,12 +148,12 @@ static inline void cb710_sg_miter_stop_writing(struct sg_mapping_iter *miter) * without DMA support). * * Best-case reading (transfer from device): - * sg_miter_start(); + * sg_miter_start(, SG_MITER_TO_SG); * cb710_sg_dwiter_write_from_io(); - * cb710_sg_miter_stop_writing(); + * sg_miter_stop(); * * Best-case writing (transfer to device): - * sg_miter_start(); + * sg_miter_start(, SG_MITER_FROM_SG); * cb710_sg_dwiter_read_to_io(); * sg_miter_stop(); */ -- cgit v1.2.3 From a9239d750d9991f2feee78fc5669a4613abc1adb Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 28 Jun 2009 09:26:31 -0700 Subject: imxmmc: Remove unnecessary semicolons Signed-off-by: Joe Perches Acked-by: Pavel Pisa Signed-off-by: Pierre Ossman --- drivers/mmc/host/imxmmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/mmc/host/imxmmc.c b/drivers/mmc/host/imxmmc.c index e0be21a4a696..bf98d7cc928a 100644 --- a/drivers/mmc/host/imxmmc.c +++ b/drivers/mmc/host/imxmmc.c @@ -652,7 +652,7 @@ static irqreturn_t imxmci_irq(int irq, void *devid) set_bit(IMXMCI_PEND_STARTED_b, &host->pending_events); tasklet_schedule(&host->tasklet); - return IRQ_RETVAL(handled);; + return IRQ_RETVAL(handled); } static void imxmci_tasklet_fnc(unsigned long data) -- cgit v1.2.3 From 550e7fd8afb7664ae7cedb398c407694e2bf7d3c Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sat, 1 Aug 2009 12:04:17 -0300 Subject: thinkpad-acpi: disable broken bay and dock subdrivers Currently, the ThinkPad-ACPI bay and dock drivers are completely broken, and cause a NULL pointer derreference in kernel mode (and, therefore, an OOPS) when they try to issue events (i.e. on dock, undock, bay ejection, etc). OTOH, the standard ACPI dock driver can handle the hotplug bays and docks of the ThinkPads just fine (including batteries) as of 2.6.27. In fact, it does a much better job of it than thinkpad-acpi ever did. It is just not worth the hassle to find a way to fix this crap without breaking the (deprecated) thinkpad-acpi dock/bay ABI. This is old, deprecated code that sees little testing or use. As a quick fix suitable for -stable backports, mark the thinkpad-acpi bay and dock subdrivers as BROKEN in Kconfig. The dead code will be removed by a later patch. This fixes bugzilla #13669, and should be applied to 2.6.27 and later. Signed-off-by: Henrique de Moraes Holschuh Reported-by: Joerg Platte Cc: stable@kernel.org Signed-off-by: Len Brown --- drivers/platform/x86/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 46dad12f952f..6335f63892dc 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -281,6 +281,7 @@ config THINKPAD_ACPI_DOCK bool "Legacy Docking Station Support" depends on THINKPAD_ACPI depends on ACPI_DOCK=n + depends on BROKEN default n ---help--- Allows the thinkpad_acpi driver to handle docking station events. @@ -294,7 +295,8 @@ config THINKPAD_ACPI_DOCK config THINKPAD_ACPI_BAY bool "Legacy Removable Bay Support" depends on THINKPAD_ACPI - default y + depends on BROKEN + default n ---help--- Allows the thinkpad_acpi driver to handle removable bays. It will electrically disable the device in the bay, and also generate -- cgit v1.2.3 From 1f6fc2de9525e34ee93bd392fa046369a8cfbf1e Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sat, 1 Aug 2009 12:04:18 -0300 Subject: thinkpad-acpi: remove dock and bay subdrivers The standard ACPI dock driver can handle the hotplug bays and docks of the ThinkPads just fine (including batteries) as of 2.6.27, and the code in thinkpad-acpi for the dock and bay subdrivers is currently broken anyway... Userspace needs some love to support the two-stage ejection nicely, but it is simple enough to do through udev rules (you don't even need HAL) so this wouldn't justify fixing the dock and bay subdrivers, either. That leaves warm-swap bays (_EJ3) support for thinkpad-acpi, as well as support for the weird dock of the model 570, but since such support has never left the "experimental" stage, it is also not a strong enough reason to find a way to fix this code. Users of ThinkPads with warm-swap bays are urged to request that _EJ3 support be added to the regular ACPI dock driver, if such feature is indeed useful for them. Signed-off-by: Henrique de Moraes Holschuh Signed-off-by: Len Brown --- Documentation/laptops/thinkpad-acpi.txt | 127 ------------- drivers/platform/x86/Kconfig | 27 --- drivers/platform/x86/thinkpad_acpi.c | 327 -------------------------------- 3 files changed, 481 deletions(-) (limited to 'drivers') diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt index f2296ecedb89..e2ddcdeb61b6 100644 --- a/Documentation/laptops/thinkpad-acpi.txt +++ b/Documentation/laptops/thinkpad-acpi.txt @@ -36,8 +36,6 @@ detailed description): - Bluetooth enable and disable - video output switching, expansion control - ThinkLight on and off - - limited docking and undocking - - UltraBay eject - CMOS/UCMS control - LED control - ACPI sounds @@ -729,131 +727,6 @@ cannot be read or if it is unknown, thinkpad-acpi will report it as "off". It is impossible to know if the status returned through sysfs is valid. -Docking / undocking -- /proc/acpi/ibm/dock ------------------------------------------- - -Docking and undocking (e.g. with the X4 UltraBase) requires some -actions to be taken by the operating system to safely make or break -the electrical connections with the dock. - -The docking feature of this driver generates the following ACPI events: - - ibm/dock GDCK 00000003 00000001 -- eject request - ibm/dock GDCK 00000003 00000002 -- undocked - ibm/dock GDCK 00000000 00000003 -- docked - -NOTE: These events will only be generated if the laptop was docked -when originally booted. This is due to the current lack of support for -hot plugging of devices in the Linux ACPI framework. If the laptop was -booted while not in the dock, the following message is shown in the -logs: - - Mar 17 01:42:34 aero kernel: thinkpad_acpi: dock device not present - -In this case, no dock-related events are generated but the dock and -undock commands described below still work. They can be executed -manually or triggered by Fn key combinations (see the example acpid -configuration files included in the driver tarball package available -on the web site). - -When the eject request button on the dock is pressed, the first event -above is generated. The handler for this event should issue the -following command: - - echo undock > /proc/acpi/ibm/dock - -After the LED on the dock goes off, it is safe to eject the laptop. -Note: if you pressed this key by mistake, go ahead and eject the -laptop, then dock it back in. Otherwise, the dock may not function as -expected. - -When the laptop is docked, the third event above is generated. The -handler for this event should issue the following command to fully -enable the dock: - - echo dock > /proc/acpi/ibm/dock - -The contents of the /proc/acpi/ibm/dock file shows the current status -of the dock, as provided by the ACPI framework. - -The docking support in this driver does not take care of enabling or -disabling any other devices you may have attached to the dock. For -example, a CD drive plugged into the UltraBase needs to be disabled or -enabled separately. See the provided example acpid configuration files -for how this can be accomplished. - -There is no support yet for PCI devices that may be attached to a -docking station, e.g. in the ThinkPad Dock II. The driver currently -does not recognize, enable or disable such devices. This means that -the only docking stations currently supported are the X-series -UltraBase docks and "dumb" port replicators like the Mini Dock (the -latter don't need any ACPI support, actually). - - -UltraBay eject -- /proc/acpi/ibm/bay ------------------------------------- - -Inserting or ejecting an UltraBay device requires some actions to be -taken by the operating system to safely make or break the electrical -connections with the device. - -This feature generates the following ACPI events: - - ibm/bay MSTR 00000003 00000000 -- eject request - ibm/bay MSTR 00000001 00000000 -- eject lever inserted - -NOTE: These events will only be generated if the UltraBay was present -when the laptop was originally booted (on the X series, the UltraBay -is in the dock, so it may not be present if the laptop was undocked). -This is due to the current lack of support for hot plugging of devices -in the Linux ACPI framework. If the laptop was booted without the -UltraBay, the following message is shown in the logs: - - Mar 17 01:42:34 aero kernel: thinkpad_acpi: bay device not present - -In this case, no bay-related events are generated but the eject -command described below still works. It can be executed manually or -triggered by a hot key combination. - -Sliding the eject lever generates the first event shown above. The -handler for this event should take whatever actions are necessary to -shut down the device in the UltraBay (e.g. call idectl), then issue -the following command: - - echo eject > /proc/acpi/ibm/bay - -After the LED on the UltraBay goes off, it is safe to pull out the -device. - -When the eject lever is inserted, the second event above is -generated. The handler for this event should take whatever actions are -necessary to enable the UltraBay device (e.g. call idectl). - -The contents of the /proc/acpi/ibm/bay file shows the current status -of the UltraBay, as provided by the ACPI framework. - -EXPERIMENTAL warm eject support on the 600e/x, A22p and A3x (To use -this feature, you need to supply the experimental=1 parameter when -loading the module): - -These models do not have a button near the UltraBay device to request -a hot eject but rather require the laptop to be put to sleep -(suspend-to-ram) before the bay device is ejected or inserted). -The sequence of steps to eject the device is as follows: - - echo eject > /proc/acpi/ibm/bay - put the ThinkPad to sleep - remove the drive - resume from sleep - cat /proc/acpi/ibm/bay should show that the drive was removed - -On the A3x, both the UltraBay 2000 and UltraBay Plus devices are -supported. Use "eject2" instead of "eject" for the second bay. - -Note: the UltraBay eject support on the 600e/x, A22p and A3x is -EXPERIMENTAL and may not work as expected. USE WITH CAUTION! - - CMOS/UCMS control ----------------- diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 6335f63892dc..77c6097ced80 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -277,33 +277,6 @@ config THINKPAD_ACPI_UNSAFE_LEDS Say N here, unless you are building a kernel for your own use, and need to control the important firmware LEDs. -config THINKPAD_ACPI_DOCK - bool "Legacy Docking Station Support" - depends on THINKPAD_ACPI - depends on ACPI_DOCK=n - depends on BROKEN - default n - ---help--- - Allows the thinkpad_acpi driver to handle docking station events. - This support was made obsolete by the generic ACPI docking station - support (CONFIG_ACPI_DOCK). It will allow locking and removing the - laptop from the docking station, but will not properly connect PCI - devices. - - If you are not sure, say N here. - -config THINKPAD_ACPI_BAY - bool "Legacy Removable Bay Support" - depends on THINKPAD_ACPI - depends on BROKEN - default n - ---help--- - Allows the thinkpad_acpi driver to handle removable bays. It will - electrically disable the device in the bay, and also generate - notifications when the bay lever is ejected or inserted. - - If you are not sure, say Y here. - config THINKPAD_ACPI_VIDEO bool "Video output control support" depends on THINKPAD_ACPI diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index a463fd72c495..27d68e719e90 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -239,12 +239,6 @@ struct ibm_init_struct { }; static struct { -#ifdef CONFIG_THINKPAD_ACPI_BAY - u32 bay_status:1; - u32 bay_eject:1; - u32 bay_status2:1; - u32 bay_eject2:1; -#endif u32 bluetooth:1; u32 hotkey:1; u32 hotkey_mask:1; @@ -589,18 +583,6 @@ static int acpi_ec_write(int i, u8 v) return 1; } -#if defined(CONFIG_THINKPAD_ACPI_DOCK) || defined(CONFIG_THINKPAD_ACPI_BAY) -static int _sta(acpi_handle handle) -{ - int status; - - if (!handle || !acpi_evalf(handle, &status, "_STA", "d")) - status = 0; - - return status; -} -#endif - static int issue_thinkpad_cmos_command(int cmos_cmd) { if (!cmos_handle) @@ -4441,293 +4423,6 @@ static struct ibm_struct light_driver_data = { .exit = light_exit, }; -/************************************************************************* - * Dock subdriver - */ - -#ifdef CONFIG_THINKPAD_ACPI_DOCK - -static void dock_notify(struct ibm_struct *ibm, u32 event); -static int dock_read(char *p); -static int dock_write(char *buf); - -TPACPI_HANDLE(dock, root, "\\_SB.GDCK", /* X30, X31, X40 */ - "\\_SB.PCI0.DOCK", /* 600e/x,770e,770x,A2xm/p,T20-22,X20-21 */ - "\\_SB.PCI0.PCI1.DOCK", /* all others */ - "\\_SB.PCI.ISA.SLCE", /* 570 */ - ); /* A21e,G4x,R30,R31,R32,R40,R40e,R50e */ - -/* don't list other alternatives as we install a notify handler on the 570 */ -TPACPI_HANDLE(pci, root, "\\_SB.PCI"); /* 570 */ - -static const struct acpi_device_id ibm_pci_device_ids[] = { - {PCI_ROOT_HID_STRING, 0}, - {"", 0}, -}; - -static struct tp_acpi_drv_struct ibm_dock_acpidriver[2] = { - { - .notify = dock_notify, - .handle = &dock_handle, - .type = ACPI_SYSTEM_NOTIFY, - }, - { - /* THIS ONE MUST NEVER BE USED FOR DRIVER AUTOLOADING. - * We just use it to get notifications of dock hotplug - * in very old thinkpads */ - .hid = ibm_pci_device_ids, - .notify = dock_notify, - .handle = &pci_handle, - .type = ACPI_SYSTEM_NOTIFY, - }, -}; - -static struct ibm_struct dock_driver_data[2] = { - { - .name = "dock", - .read = dock_read, - .write = dock_write, - .acpi = &ibm_dock_acpidriver[0], - }, - { - .name = "dock", - .acpi = &ibm_dock_acpidriver[1], - }, -}; - -#define dock_docked() (_sta(dock_handle) & 1) - -static int __init dock_init(struct ibm_init_struct *iibm) -{ - vdbg_printk(TPACPI_DBG_INIT, "initializing dock subdriver\n"); - - TPACPI_ACPIHANDLE_INIT(dock); - - vdbg_printk(TPACPI_DBG_INIT, "dock is %s\n", - str_supported(dock_handle != NULL)); - - return (dock_handle)? 0 : 1; -} - -static int __init dock_init2(struct ibm_init_struct *iibm) -{ - int dock2_needed; - - vdbg_printk(TPACPI_DBG_INIT, "initializing dock subdriver part 2\n"); - - if (dock_driver_data[0].flags.acpi_driver_registered && - dock_driver_data[0].flags.acpi_notify_installed) { - TPACPI_ACPIHANDLE_INIT(pci); - dock2_needed = (pci_handle != NULL); - vdbg_printk(TPACPI_DBG_INIT, - "dock PCI handler for the TP 570 is %s\n", - str_supported(dock2_needed)); - } else { - vdbg_printk(TPACPI_DBG_INIT, - "dock subdriver part 2 not required\n"); - dock2_needed = 0; - } - - return (dock2_needed)? 0 : 1; -} - -static void dock_notify(struct ibm_struct *ibm, u32 event) -{ - int docked = dock_docked(); - int pci = ibm->acpi->hid && ibm->acpi->device && - acpi_match_device_ids(ibm->acpi->device, ibm_pci_device_ids); - int data; - - if (event == 1 && !pci) /* 570 */ - data = 1; /* button */ - else if (event == 1 && pci) /* 570 */ - data = 3; /* dock */ - else if (event == 3 && docked) - data = 1; /* button */ - else if (event == 3 && !docked) - data = 2; /* undock */ - else if (event == 0 && docked) - data = 3; /* dock */ - else { - printk(TPACPI_ERR "unknown dock event %d, status %d\n", - event, _sta(dock_handle)); - data = 0; /* unknown */ - } - acpi_bus_generate_proc_event(ibm->acpi->device, event, data); - acpi_bus_generate_netlink_event(ibm->acpi->device->pnp.device_class, - dev_name(&ibm->acpi->device->dev), - event, data); -} - -static int dock_read(char *p) -{ - int len = 0; - int docked = dock_docked(); - - if (!dock_handle) - len += sprintf(p + len, "status:\t\tnot supported\n"); - else if (!docked) - len += sprintf(p + len, "status:\t\tundocked\n"); - else { - len += sprintf(p + len, "status:\t\tdocked\n"); - len += sprintf(p + len, "commands:\tdock, undock\n"); - } - - return len; -} - -static int dock_write(char *buf) -{ - char *cmd; - - if (!dock_docked()) - return -ENODEV; - - while ((cmd = next_cmd(&buf))) { - if (strlencmp(cmd, "undock") == 0) { - if (!acpi_evalf(dock_handle, NULL, "_DCK", "vd", 0) || - !acpi_evalf(dock_handle, NULL, "_EJ0", "vd", 1)) - return -EIO; - } else if (strlencmp(cmd, "dock") == 0) { - if (!acpi_evalf(dock_handle, NULL, "_DCK", "vd", 1)) - return -EIO; - } else - return -EINVAL; - } - - return 0; -} - -#endif /* CONFIG_THINKPAD_ACPI_DOCK */ - -/************************************************************************* - * Bay subdriver - */ - -#ifdef CONFIG_THINKPAD_ACPI_BAY - -TPACPI_HANDLE(bay, root, "\\_SB.PCI.IDE.SECN.MAST", /* 570 */ - "\\_SB.PCI0.IDE0.IDES.IDSM", /* 600e/x, 770e, 770x */ - "\\_SB.PCI0.SATA.SCND.MSTR", /* T60, X60, Z60 */ - "\\_SB.PCI0.IDE0.SCND.MSTR", /* all others */ - ); /* A21e, R30, R31 */ -TPACPI_HANDLE(bay_ej, bay, "_EJ3", /* 600e/x, A2xm/p, A3x */ - "_EJ0", /* all others */ - ); /* 570,A21e,G4x,R30,R31,R32,R40e,R50e */ -TPACPI_HANDLE(bay2, root, "\\_SB.PCI0.IDE0.PRIM.SLAV", /* A3x, R32 */ - "\\_SB.PCI0.IDE0.IDEP.IDPS", /* 600e/x, 770e, 770x */ - ); /* all others */ -TPACPI_HANDLE(bay2_ej, bay2, "_EJ3", /* 600e/x, 770e, A3x */ - "_EJ0", /* 770x */ - ); /* all others */ - -static int __init bay_init(struct ibm_init_struct *iibm) -{ - vdbg_printk(TPACPI_DBG_INIT, "initializing bay subdriver\n"); - - TPACPI_ACPIHANDLE_INIT(bay); - if (bay_handle) - TPACPI_ACPIHANDLE_INIT(bay_ej); - TPACPI_ACPIHANDLE_INIT(bay2); - if (bay2_handle) - TPACPI_ACPIHANDLE_INIT(bay2_ej); - - tp_features.bay_status = bay_handle && - acpi_evalf(bay_handle, NULL, "_STA", "qv"); - tp_features.bay_status2 = bay2_handle && - acpi_evalf(bay2_handle, NULL, "_STA", "qv"); - - tp_features.bay_eject = bay_handle && bay_ej_handle && - (strlencmp(bay_ej_path, "_EJ0") == 0 || experimental); - tp_features.bay_eject2 = bay2_handle && bay2_ej_handle && - (strlencmp(bay2_ej_path, "_EJ0") == 0 || experimental); - - vdbg_printk(TPACPI_DBG_INIT, - "bay 1: status %s, eject %s; bay 2: status %s, eject %s\n", - str_supported(tp_features.bay_status), - str_supported(tp_features.bay_eject), - str_supported(tp_features.bay_status2), - str_supported(tp_features.bay_eject2)); - - return (tp_features.bay_status || tp_features.bay_eject || - tp_features.bay_status2 || tp_features.bay_eject2)? 0 : 1; -} - -static void bay_notify(struct ibm_struct *ibm, u32 event) -{ - acpi_bus_generate_proc_event(ibm->acpi->device, event, 0); - acpi_bus_generate_netlink_event(ibm->acpi->device->pnp.device_class, - dev_name(&ibm->acpi->device->dev), - event, 0); -} - -#define bay_occupied(b) (_sta(b##_handle) & 1) - -static int bay_read(char *p) -{ - int len = 0; - int occupied = bay_occupied(bay); - int occupied2 = bay_occupied(bay2); - int eject, eject2; - - len += sprintf(p + len, "status:\t\t%s\n", - tp_features.bay_status ? - (occupied ? "occupied" : "unoccupied") : - "not supported"); - if (tp_features.bay_status2) - len += sprintf(p + len, "status2:\t%s\n", occupied2 ? - "occupied" : "unoccupied"); - - eject = tp_features.bay_eject && occupied; - eject2 = tp_features.bay_eject2 && occupied2; - - if (eject && eject2) - len += sprintf(p + len, "commands:\teject, eject2\n"); - else if (eject) - len += sprintf(p + len, "commands:\teject\n"); - else if (eject2) - len += sprintf(p + len, "commands:\teject2\n"); - - return len; -} - -static int bay_write(char *buf) -{ - char *cmd; - - if (!tp_features.bay_eject && !tp_features.bay_eject2) - return -ENODEV; - - while ((cmd = next_cmd(&buf))) { - if (tp_features.bay_eject && strlencmp(cmd, "eject") == 0) { - if (!acpi_evalf(bay_ej_handle, NULL, NULL, "vd", 1)) - return -EIO; - } else if (tp_features.bay_eject2 && - strlencmp(cmd, "eject2") == 0) { - if (!acpi_evalf(bay2_ej_handle, NULL, NULL, "vd", 1)) - return -EIO; - } else - return -EINVAL; - } - - return 0; -} - -static struct tp_acpi_drv_struct ibm_bay_acpidriver = { - .notify = bay_notify, - .handle = &bay_handle, - .type = ACPI_SYSTEM_NOTIFY, -}; - -static struct ibm_struct bay_driver_data = { - .name = "bay", - .read = bay_read, - .write = bay_write, - .acpi = &ibm_bay_acpidriver, -}; - -#endif /* CONFIG_THINKPAD_ACPI_BAY */ - /************************************************************************* * CMOS subdriver */ @@ -7854,22 +7549,6 @@ static struct ibm_init_struct ibms_init[] __initdata = { .init = light_init, .data = &light_driver_data, }, -#ifdef CONFIG_THINKPAD_ACPI_DOCK - { - .init = dock_init, - .data = &dock_driver_data[0], - }, - { - .init = dock_init2, - .data = &dock_driver_data[1], - }, -#endif -#ifdef CONFIG_THINKPAD_ACPI_BAY - { - .init = bay_init, - .data = &bay_driver_data, - }, -#endif { .init = cmos_init, .data = &cmos_driver_data, @@ -7968,12 +7647,6 @@ TPACPI_PARAM(hotkey); TPACPI_PARAM(bluetooth); TPACPI_PARAM(video); TPACPI_PARAM(light); -#ifdef CONFIG_THINKPAD_ACPI_DOCK -TPACPI_PARAM(dock); -#endif -#ifdef CONFIG_THINKPAD_ACPI_BAY -TPACPI_PARAM(bay); -#endif /* CONFIG_THINKPAD_ACPI_BAY */ TPACPI_PARAM(cmos); TPACPI_PARAM(led); TPACPI_PARAM(beep); -- cgit v1.2.3 From 5b05d4696d38c3172e79e855cc1e2ed044589508 Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Sat, 1 Aug 2009 12:04:19 -0300 Subject: thinkpad-acpi: restrict procfs count value to sane upper limit Signed-off-by: Michael Buesch Acked-by: Henrique de Moraes Holschuh Signed-off-by: Len Brown --- drivers/platform/x86/thinkpad_acpi.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 27d68e719e90..18f9ee63c50a 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -766,6 +766,8 @@ static int dispatch_procfs_write(struct file *file, if (!ibm || !ibm->write) return -EINVAL; + if (count > PAGE_SIZE - 2) + return -EINVAL; kernbuf = kmalloc(count + 2, GFP_KERNEL); if (!kernbuf) -- cgit v1.2.3 From 59fe4fe34d7afdf63208124f313be9056feaa2f4 Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sat, 1 Aug 2009 12:04:20 -0300 Subject: thinkpad-acpi: fix incorrect use of TPACPI_BRGHT_MODE_ECNVRAM HBRV-based default selection of backlight control strategy didn't work well, at least the X41 defines it but doesn't use it and I don't think it will stop there. Switch to a white/blacklist. All models that have HBRV defined have been included in the list, and initially all ATI GPUs will get ECNVRAM, and the Intel GPUs will get UCMS_STEP. Symptoms of incorrect backlight mode selection are: 1. Non-working backlight control through sysfs; 2. Backlight gets reset to the lowest level at every shutdown, reboot and when thinkpad-acpi gets unloaded; This fixes a regression in 2.6.30, bugzilla #13826 Signed-off-by: Henrique de Moraes Holschuh Reported-by: Tobias Diedrich Cc: stable@kernel.org Signed-off-by: Len Brown --- drivers/platform/x86/thinkpad_acpi.c | 61 +++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 18f9ee63c50a..e85600852502 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -5642,14 +5642,48 @@ static struct backlight_ops ibm_backlight_data = { /* --------------------------------------------------------------------- */ +/* + * These are only useful for models that have only one possibility + * of GPU. If the BIOS model handles both ATI and Intel, don't use + * these quirks. + */ +#define TPACPI_BRGHT_Q_NOEC 0x0001 /* Must NOT use EC HBRV */ +#define TPACPI_BRGHT_Q_EC 0x0002 /* Should or must use EC HBRV */ +#define TPACPI_BRGHT_Q_ASK 0x8000 /* Ask for user report */ + +static const struct tpacpi_quirk brightness_quirk_table[] __initconst = { + /* Models with ATI GPUs known to require ECNVRAM mode */ + TPACPI_Q_IBM('1', 'Y', TPACPI_BRGHT_Q_EC), /* T43/p ATI */ + + /* Models with ATI GPUs (waiting confirmation) */ + TPACPI_Q_IBM('1', 'R', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_EC), + TPACPI_Q_IBM('1', 'Q', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_EC), + TPACPI_Q_IBM('7', '6', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_EC), + TPACPI_Q_IBM('7', '8', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_EC), + + /* Models with Intel Extreme Graphics 2 (waiting confirmation) */ + TPACPI_Q_IBM('1', 'V', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_NOEC), + TPACPI_Q_IBM('1', 'W', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_NOEC), + TPACPI_Q_IBM('1', 'U', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_NOEC), + + /* Models with Intel GMA900 */ + TPACPI_Q_IBM('7', '0', TPACPI_BRGHT_Q_NOEC), /* T43, R52 */ + TPACPI_Q_IBM('7', '4', TPACPI_BRGHT_Q_NOEC), /* X41 */ + TPACPI_Q_IBM('7', '5', TPACPI_BRGHT_Q_NOEC), /* X41 Tablet */ +}; + static int __init brightness_init(struct ibm_init_struct *iibm) { int b; + unsigned long quirks; vdbg_printk(TPACPI_DBG_INIT, "initializing brightness subdriver\n"); mutex_init(&brightness_mutex); + quirks = tpacpi_check_quirks(brightness_quirk_table, + ARRAY_SIZE(brightness_quirk_table)); + /* * We always attempt to detect acpi support, so as to switch * Lenovo Vista BIOS to ACPI brightness mode even if we are not @@ -5706,23 +5740,13 @@ static int __init brightness_init(struct ibm_init_struct *iibm) /* TPACPI_BRGHT_MODE_AUTO not implemented yet, just use default */ if (brightness_mode == TPACPI_BRGHT_MODE_AUTO || brightness_mode == TPACPI_BRGHT_MODE_MAX) { - if (thinkpad_id.vendor == PCI_VENDOR_ID_IBM) { - /* - * IBM models that define HBRV probably have - * EC-based backlight level control - */ - if (acpi_evalf(ec_handle, NULL, "HBRV", "qd")) - /* T40-T43, R50-R52, R50e, R51e, X31-X41 */ - brightness_mode = TPACPI_BRGHT_MODE_ECNVRAM; - else - /* all other IBM ThinkPads */ - brightness_mode = TPACPI_BRGHT_MODE_UCMS_STEP; - } else - /* All Lenovo ThinkPads */ + if (quirks & TPACPI_BRGHT_Q_EC) + brightness_mode = TPACPI_BRGHT_MODE_ECNVRAM; + else brightness_mode = TPACPI_BRGHT_MODE_UCMS_STEP; dbg_printk(TPACPI_DBG_BRGHT, - "selected brightness_mode=%d\n", + "driver auto-selected brightness_mode=%d\n", brightness_mode); } @@ -5749,6 +5773,15 @@ static int __init brightness_init(struct ibm_init_struct *iibm) vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_BRGHT, "brightness is supported\n"); + if (quirks & TPACPI_BRGHT_Q_ASK) { + printk(TPACPI_NOTICE + "brightness: will use unverified default: " + "brightness_mode=%d\n", brightness_mode); + printk(TPACPI_NOTICE + "brightness: please report to %s whether it works well " + "or not on your ThinkPad\n", TPACPI_MAIL); + } + ibm_backlight_device->props.max_brightness = (tp_features.bright_16levels)? 15 : 7; ibm_backlight_device->props.brightness = b & TP_EC_BACKLIGHT_LVLMSK; -- cgit v1.2.3 From 447c233da4d109c6194fefd69e5185cbc93cc062 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Sun, 2 Aug 2009 08:02:28 +0000 Subject: parisc: Fix read buffer overflow in pdc_stable driver Check whether index is within bounds before testing the element. Signed-off-by: Roel Kluin Signed-off-by: Helge Deller --- drivers/parisc/pdc_stable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c index f9f9a5f1bbd0..13a64bc081b6 100644 --- a/drivers/parisc/pdc_stable.c +++ b/drivers/parisc/pdc_stable.c @@ -370,7 +370,7 @@ pdcspath_layer_read(struct pdcspath_entry *entry, char *buf) if (!i) /* entry is not ready */ return -ENODATA; - for (i = 0; devpath->layers[i] && (likely(i < 6)); i++) + for (i = 0; i < 6 && devpath->layers[i]; i++) out += sprintf(out, "%u ", devpath->layers[i]); out += sprintf(out, "\n"); -- cgit v1.2.3 From 450d6e306b4717bfae11218a02648509baf04ce1 Mon Sep 17 00:00:00 2001 From: Stoyan Gaydarov Date: Thu, 30 Jul 2009 10:25:19 +0000 Subject: parisc: fixed faulty check in lba_pci This patche fixes a spelling error that has resulted from copy and pasting. The location of the error was found using a semantic patch but the semantic patch was not trying to find these errors. After looking things over it seemed logical that this change was needed. Signed-off-by: Stoyan Gaydarov Signed-off-by: Helge Deller --- drivers/parisc/lba_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c index ede614616f8e..3aeb3279c92a 100644 --- a/drivers/parisc/lba_pci.c +++ b/drivers/parisc/lba_pci.c @@ -992,7 +992,7 @@ lba_pat_resources(struct parisc_device *pa_dev, struct lba_device *lba_dev) return; io_pdc_cell = kzalloc(sizeof(pdc_pat_cell_mod_maddr_block_t), GFP_KERNEL); - if (!pa_pdc_cell) { + if (!io_pdc_cell) { kfree(pa_pdc_cell); return; } -- cgit v1.2.3 From 6b4dbcd86a9d464057fcc7abe4d0574093071fcc Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Mon, 20 Jul 2009 22:58:44 +0000 Subject: parisc: isa-eeprom - Fix loff_t usage loff_t is a signed type. If userspace passes a negative ppos, the "count" range check is weakened. "count"s bigger than HPEE_MAX_LENGTH will pass the check. Also, if ppos is negative, the readb(eisa_eeprom_addr + *ppos) will poke in random memory. Signed-off-by: Michael Buesch Cc: stable@kernel.org Signed-off-by: Helge Deller --- drivers/parisc/eisa_eeprom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/parisc/eisa_eeprom.c b/drivers/parisc/eisa_eeprom.c index 685d94e69d44..8c0b26e9b98a 100644 --- a/drivers/parisc/eisa_eeprom.c +++ b/drivers/parisc/eisa_eeprom.c @@ -55,7 +55,7 @@ static ssize_t eisa_eeprom_read(struct file * file, ssize_t ret; int i; - if (*ppos >= HPEE_MAX_LENGTH) + if (*ppos < 0 || *ppos >= HPEE_MAX_LENGTH) return 0; count = *ppos + count < HPEE_MAX_LENGTH ? count : HPEE_MAX_LENGTH - *ppos; -- cgit v1.2.3 From b10ff54f9f58adfb708b53e6e56ed3d7804ade74 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 8 Jul 2009 15:27:20 +0000 Subject: parisc: includecheck fix for ccio-dma.c fix the following 'make includecheck' warning: drivers/parisc/ccio-dma.c: linux/proc_fs.h is included more than once. Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Helge Deller --- drivers/parisc/ccio-dma.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 0f0e0b919ef4..a45b0c0d574e 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -70,7 +70,6 @@ #undef CCIO_COLLECT_STATS #endif -#include #include /* for proc_runway_root */ #ifdef DEBUG_CCIO_INIT -- cgit v1.2.3 From c6fe6b0783a8fd923d11dd0388cbd561ff15bdf1 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 2 Aug 2009 15:13:29 +0200 Subject: parisc: hp_sdc_mlc.c - check return value of down_trylock() Signed-off-by: Helge Deller --- drivers/input/serio/hp_sdc_mlc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/input/serio/hp_sdc_mlc.c b/drivers/input/serio/hp_sdc_mlc.c index b587e2d576ac..820e51673b26 100644 --- a/drivers/input/serio/hp_sdc_mlc.c +++ b/drivers/input/serio/hp_sdc_mlc.c @@ -296,7 +296,7 @@ static void hp_sdc_mlc_out(hil_mlc *mlc) priv->tseq[3] = 0; if (mlc->opacket & HIL_CTRL_APE) { priv->tseq[3] |= HP_SDC_LPC_APE_IPF; - down_trylock(&mlc->csem); + BUG_ON(down_trylock(&mlc->csem)); } enqueue: hp_sdc_enqueue_transaction(&priv->trans); -- cgit v1.2.3 From 1e0deabd35f210f22c03cc734a0335c07ae71ff3 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 2 Aug 2009 15:17:37 +0200 Subject: parisc: dino.c - check return value of pci_assign_resource() Signed-off-by: Helge Deller --- drivers/parisc/dino.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c index c590974e9815..d69bde6a2343 100644 --- a/drivers/parisc/dino.c +++ b/drivers/parisc/dino.c @@ -614,7 +614,7 @@ dino_fixup_bus(struct pci_bus *bus) dev_name(&bus->self->dev), i, bus->self->resource[i].start, bus->self->resource[i].end); - pci_assign_resource(bus->self, i); + WARN_ON(pci_assign_resource(bus->self, i)); DBG("DEBUG %s after assign %d [0x%lx,0x%lx]\n", dev_name(&bus->self->dev), i, bus->self->resource[i].start, -- cgit v1.2.3 From 1a1dba32412c15c51d5fc0b9efadd2ea310356d7 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 2 Aug 2009 15:26:51 +0200 Subject: parisc: sticore.c - check return values Signed-off-by: Helge Deller --- drivers/video/console/sticore.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/video/console/sticore.c b/drivers/video/console/sticore.c index ef7870f5ea08..857b3668b3ba 100644 --- a/drivers/video/console/sticore.c +++ b/drivers/video/console/sticore.c @@ -957,9 +957,14 @@ static int __devinit sticore_pci_init(struct pci_dev *pd, #ifdef CONFIG_PCI unsigned long fb_base, rom_base; unsigned int fb_len, rom_len; + int err; struct sti_struct *sti; - pci_enable_device(pd); + err = pci_enable_device(pd); + if (err < 0) { + dev_err(&pd->dev, "Cannot enable PCI device\n"); + return err; + } fb_base = pci_resource_start(pd, 0); fb_len = pci_resource_len(pd, 0); @@ -1048,7 +1053,7 @@ static void __devinit sti_init_roms(void) /* Register drivers for native & PCI cards */ register_parisc_driver(&pa_sti_driver); - pci_register_driver(&pci_sti_driver); + WARN_ON(pci_register_driver(&pci_sti_driver)); /* if we didn't find the given default sti, take the first one */ if (!default_sti) -- cgit v1.2.3 From c43962321e8af5309dd3ffcd78743c89581265e5 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 2 Aug 2009 15:35:43 +0200 Subject: parisc: parisc-agp.c - use correct page_mask function Fix those compiler warnings, which indeed point to a bug: drivers/char/agp/parisc-agp.c:228: warning: initialization from incompatible pointer type drivers/char/agp/parisc-agp.c:201: warning: 'parisc_agp_page_mask_memory' defined but not used Signed-off-by: Helge Deller --- drivers/char/agp/parisc-agp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/char/agp/parisc-agp.c b/drivers/char/agp/parisc-agp.c index f4bb43fb8016..e077701ae3d9 100644 --- a/drivers/char/agp/parisc-agp.c +++ b/drivers/char/agp/parisc-agp.c @@ -225,7 +225,7 @@ static const struct agp_bridge_driver parisc_agp_driver = { .configure = parisc_agp_configure, .fetch_size = parisc_agp_fetch_size, .tlb_flush = parisc_agp_tlbflush, - .mask_memory = parisc_agp_mask_memory, + .mask_memory = parisc_agp_page_mask_memory, .masks = parisc_agp_masks, .agp_enable = parisc_agp_enable, .cache_flush = global_cache_flush, -- cgit v1.2.3 From cae5a39f34d52c46ca49edfc3f297656a0fd60b7 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 2 Aug 2009 15:42:39 +0200 Subject: parisc: hppb.c - fix printk format strings Fix those warnings: drivers/parisc/hppb.c: In function 'hppb_probe': drivers/parisc/hppb.c:65: warning: format '%x' expects type 'unsigned int', but argument 2 has type 'resource_size_t' drivers/parisc/hppb.c:77: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'resource_size_t' drivers/parisc/hppb.c:77: warning: format '%08x' expects type 'unsigned int', but argument 4 has type 'resource_size_t' Signed-off-by: Helge Deller --- drivers/parisc/hppb.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/parisc/hppb.c b/drivers/parisc/hppb.c index 13856415b432..815db175d427 100644 --- a/drivers/parisc/hppb.c +++ b/drivers/parisc/hppb.c @@ -62,7 +62,8 @@ static int hppb_probe(struct parisc_device *dev) } card = card->next; } - printk(KERN_INFO "Found GeckoBoa at 0x%x\n", dev->hpa.start); + printk(KERN_INFO "Found GeckoBoa at 0x%llx\n", + (unsigned long long) dev->hpa.start); card->hpa = dev->hpa.start; card->mmio_region.name = "HP-PB Bus"; @@ -73,8 +74,10 @@ static int hppb_probe(struct parisc_device *dev) status = ccio_request_resource(dev, &card->mmio_region); if(status < 0) { - printk(KERN_ERR "%s: failed to claim HP-PB bus space (%08x, %08x)\n", - __FILE__, card->mmio_region.start, card->mmio_region.end); + printk(KERN_ERR "%s: failed to claim HP-PB " + "bus space (0x%08llx, 0x%08llx)\n", + __FILE__, (unsigned long long) card->mmio_region.start, + (unsigned long long) card->mmio_region.end); } return 0; -- cgit v1.2.3 From d0006f3281c920fbfead0f5035c62ec8053f980a Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 30 Jul 2009 16:00:53 -0400 Subject: ACPI: root-only read protection on /sys/firmware/acpi/tables/* they were world readable. Signed-off-by: Len Brown --- drivers/acpi/system.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c index 0944daec064f..9c61ab2177cf 100644 --- a/drivers/acpi/system.c +++ b/drivers/acpi/system.c @@ -121,7 +121,7 @@ static void acpi_table_attr_init(struct acpi_table_attr *table_attr, table_attr->attr.size = 0; table_attr->attr.read = acpi_table_show; table_attr->attr.attr.name = table_attr->name; - table_attr->attr.attr.mode = 0444; + table_attr->attr.attr.mode = 0400; return; } -- cgit v1.2.3 From 74b5820808215f65b70b05a099d6d3c969b82689 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 29 Jul 2009 15:54:25 -0600 Subject: ACPI: bind workqueues to CPU 0 to avoid SMI corruption On some machines, a software-initiated SMI causes corruption unless the SMI runs on CPU 0. An SMI can be initiated by any AML, but typically it's done in GPE-related methods that are run via workqueues, so we can avoid the known corruption cases by binding the workqueues to CPU 0. References: http://bugzilla.kernel.org/show_bug.cgi?id=13751 https://bugs.launchpad.net/bugs/157171 https://bugs.launchpad.net/bugs/157691 Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown --- drivers/acpi/osl.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'drivers') diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 71670719d61a..5691f165a952 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -189,11 +189,36 @@ acpi_status __init acpi_os_initialize(void) return AE_OK; } +static void bind_to_cpu0(struct work_struct *work) +{ + set_cpus_allowed(current, cpumask_of_cpu(0)); + kfree(work); +} + +static void bind_workqueue(struct workqueue_struct *wq) +{ + struct work_struct *work; + + work = kzalloc(sizeof(struct work_struct), GFP_KERNEL); + INIT_WORK(work, bind_to_cpu0); + queue_work(wq, work); +} + acpi_status acpi_os_initialize1(void) { + /* + * On some machines, a software-initiated SMI causes corruption unless + * the SMI runs on CPU 0. An SMI can be initiated by any AML, but + * typically it's done in GPE-related methods that are run via + * workqueues, so we can avoid the known corruption cases by binding + * the workqueues to CPU 0. + */ kacpid_wq = create_singlethread_workqueue("kacpid"); + bind_workqueue(kacpid_wq); kacpi_notify_wq = create_singlethread_workqueue("kacpi_notify"); + bind_workqueue(kacpi_notify_wq); kacpi_hotplug_wq = create_singlethread_workqueue("kacpi_hotplug"); + bind_workqueue(kacpi_hotplug_wq); BUG_ON(!kacpid_wq); BUG_ON(!kacpi_notify_wq); BUG_ON(!kacpi_hotplug_wq); -- cgit v1.2.3 From aa7b2b2e973874df99a45b31adbed5978b46be1f Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 3 Jul 2009 10:49:03 +0800 Subject: ACPI: Don't treat generic error as ACPI error code in acpi memory hotplug driver Don't treat the generic error as ACPI error code. Otherwise when the generic code is returned, it will complain the following warning messag: >ACPI Exception (acpi_memhotplug-0171): UNKNOWN_STATUS_CODE, Cannot get acpi bus device [20080609] >ACPI: Cannot find driver data > ACPI Error (utglobal-0127): Unknown exception code: 0xFFFFFFED [20080609] > Pid: 85, comm: kacpi_notify Not tainted 2.6.27.19-5-default #1 Call Trace: [] show_trace_log_lvl+0x41/0x58 [] dump_stack+0x69/0x6f ..... At the same time when the generic error code is returned, the ACPI_EXCEPTION is replaced by the printk. Signed-off-by: Zhao Yakui Signed-off-by: Len Brown --- drivers/acpi/acpi_memhotplug.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 7a0f4aa4fa1e..37cbe72d17eb 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -38,6 +38,9 @@ #define _COMPONENT ACPI_MEMORY_DEVICE_COMPONENT +#undef PREFIX +#define PREFIX "ACPI:memory_hp:" + ACPI_MODULE_NAME("acpi_memhotplug"); MODULE_AUTHOR("Naveen B S "); MODULE_DESCRIPTION("Hotplug Mem Driver"); @@ -153,6 +156,7 @@ acpi_memory_get_device(acpi_handle handle, acpi_handle phandle; struct acpi_device *device = NULL; struct acpi_device *pdevice = NULL; + int result; if (!acpi_bus_get_device(handle, &device) && device) @@ -165,9 +169,9 @@ acpi_memory_get_device(acpi_handle handle, } /* Get the parent device */ - status = acpi_bus_get_device(phandle, &pdevice); - if (ACPI_FAILURE(status)) { - ACPI_EXCEPTION((AE_INFO, status, "Cannot get acpi bus device")); + result = acpi_bus_get_device(phandle, &pdevice); + if (result) { + printk(KERN_WARNING PREFIX "Cannot get acpi bus device"); return -EINVAL; } @@ -175,9 +179,9 @@ acpi_memory_get_device(acpi_handle handle, * Now add the notified device. This creates the acpi_device * and invokes .add function */ - status = acpi_bus_add(&device, pdevice, handle, ACPI_BUS_TYPE_DEVICE); - if (ACPI_FAILURE(status)) { - ACPI_EXCEPTION((AE_INFO, status, "Cannot add acpi bus")); + result = acpi_bus_add(&device, pdevice, handle, ACPI_BUS_TYPE_DEVICE); + if (result) { + printk(KERN_WARNING PREFIX "Cannot add acpi bus"); return -EINVAL; } -- cgit v1.2.3 From 5d2619fca753d270e63e76c9e18437b0d9bc8d75 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 7 Jul 2009 10:56:11 +0800 Subject: ACPI: Ingore the memory block with zero block size in course of memory hotplug If the memory block size is zero, ignore it and don't do the memory hotplug flowchart. Otherwise it will complain the following warning message: >System RAM resource 0 - ffffffffffffffff cannot be added Signed-off-by: Zhao Yakui Signed-off-by: Len Brown --- drivers/acpi/acpi_memhotplug.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 37cbe72d17eb..9a62224cc278 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -242,7 +242,12 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) num_enabled++; continue; } - + /* + * If the memory block size is zero, please ignore it. + * Don't try to do the following memory hotplug flowchart. + */ + if (!info->length) + continue; if (node < 0) node = memory_add_physaddr_to_nid(info->start_addr); @@ -257,8 +262,15 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) mem_device->state = MEMORY_INVALID_STATE; return -EINVAL; } - - return result; + /* + * Sometimes the memory device will contain several memory blocks. + * When one memory block is hot-added to the system memory, it will + * be regarded as a success. + * Otherwise if the last memory block can't be hot-added to the system + * memory, it will be failure and the memory device can't be bound with + * driver. + */ + return 0; } static int acpi_memory_powerdown_device(struct acpi_memory_device *mem_device) -- cgit v1.2.3 From 7334546a52c6764df120459509b1f803a073eacc Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Mon, 29 Jun 2009 09:40:07 +0100 Subject: eeepc-laptop: fix hot-unplug on resume OOPS on resume when the wireless adaptor is disabled during suspend was introduced by "eeepc-laptop: read rfkill soft-blocked state on resume". Unable to handle kernel NULL pointer dereference Process s2disk Tainted: G W IP: klist_put Call trace: ? klist_del ? device_del ? device_unregister ? pci_stop_dev ? pci_stop_bus ? pci_remove_device ? eeepc_rfkill_hotplug [eeepc_laptop] ? eeepc_hotk_resume [eeepc_laptop] ? acpi_device_resume ? device_resume ? hibernation_snapshot It appears the PCI device is removed twice. The eeepc_rfkill_hotplug() call from the resume handler is racing against the call from the ACPI notifier callback. The ACPI notification is triggered by the resume handler when it refreshes the value of CM_ASL_WLAN. The fix is to serialize hotplug calls using a workqueue. http://bugzilla.kernel.org/show_bug.cgi?id=13825 Signed-off-by: Alan Jenkins Acked-by: Corentin Chary Signed-off-by: Len Brown --- drivers/platform/x86/eeepc-laptop.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c index ec560f16d720..222ffb892f22 100644 --- a/drivers/platform/x86/eeepc-laptop.c +++ b/drivers/platform/x86/eeepc-laptop.c @@ -143,6 +143,7 @@ struct eeepc_hotk { struct rfkill *bluetooth_rfkill; struct rfkill *wwan3g_rfkill; struct hotplug_slot *hotplug_slot; + struct work_struct hotplug_work; }; /* The actual device the driver binds to */ @@ -660,7 +661,7 @@ static int eeepc_get_adapter_status(struct hotplug_slot *hotplug_slot, return 0; } -static void eeepc_rfkill_hotplug(void) +static void eeepc_hotplug_work(struct work_struct *work) { struct pci_dev *dev; struct pci_bus *bus = pci_find_bus(0, 1); @@ -701,7 +702,7 @@ static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data) if (event != ACPI_NOTIFY_BUS_CHECK) return; - eeepc_rfkill_hotplug(); + schedule_work(&ehotk->hotplug_work); } static void eeepc_hotk_notify(struct acpi_device *device, u32 event) @@ -892,7 +893,7 @@ static int eeepc_hotk_resume(struct acpi_device *device) rfkill_set_sw_state(ehotk->wlan_rfkill, wlan != 1); - eeepc_rfkill_hotplug(); + schedule_work(&ehotk->hotplug_work); } if (ehotk->bluetooth_rfkill) @@ -1093,6 +1094,8 @@ static int eeepc_rfkill_init(struct device *dev) { int result = 0; + INIT_WORK(&ehotk->hotplug_work, eeepc_hotplug_work); + eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P6"); eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P7"); -- cgit v1.2.3 From a53a8b56827cc429c6d9f861ad558beeb5f6103f Mon Sep 17 00:00:00 2001 From: Ben McKeegan Date: Tue, 28 Jul 2009 07:43:57 +0000 Subject: ppp: fix lost fragments in ppp_mp_explode() (resubmit) This patch fixes the corner cases where the sum of MTU of the free channels (adjusted for fragmentation overheads) is less than the MTU of PPP link. There are at least 3 situations where this case might arise: - some of the channels are busy - the multilink session is running in a degraded state (i.e. with less than its full complement of active channels) - by design, where multilink protocol is being used to artificially increase the effective link MTU of a single link. Without this patch, at most 1 fragment is ever sent per free channel for a given PPP frame and any remaining part of the PPP frame that does not fit into those fragments is silently discarded. This patch restores the original behaviour which was broken by commit 9c705260feea6ae329bc6b6d5f6d2ef0227eda0a 'ppp:ppp_mp_explode() redesign'. Once all 'free' channels have been given a fragment, an additional fragment is queued to each available channel in turn, as many times as necessary, until the entire PPP frame has been consumed. Signed-off-by: Ben McKeegan Signed-off-by: David S. Miller --- drivers/net/ppp_generic.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index 639d11bc444e..cd37d739ac74 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -1384,7 +1384,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) /* create a fragment for each channel */ bits = B; - while (nfree > 0 && len > 0) { + while (len > 0) { list = list->next; if (list == &ppp->channels) { i = 0; @@ -1431,29 +1431,31 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) *otherwise divide it according to the speed *of the channel we are going to transmit on */ - if (pch->speed == 0) { - flen = totlen/nfree ; - if (nbigger > 0) { - flen++; - nbigger--; - } - } else { - flen = (((totfree - nzero)*(totlen + hdrlen*totfree)) / - ((totspeed*totfree)/pch->speed)) - hdrlen; - if (nbigger > 0) { - flen += ((totfree - nzero)*pch->speed)/totspeed; - nbigger -= ((totfree - nzero)*pch->speed)/ + if (nfree > 0) { + if (pch->speed == 0) { + flen = totlen/nfree ; + if (nbigger > 0) { + flen++; + nbigger--; + } + } else { + flen = (((totfree - nzero)*(totlen + hdrlen*totfree)) / + ((totspeed*totfree)/pch->speed)) - hdrlen; + if (nbigger > 0) { + flen += ((totfree - nzero)*pch->speed)/totspeed; + nbigger -= ((totfree - nzero)*pch->speed)/ totspeed; + } } + nfree--; } - nfree--; /* *check if we are on the last channel or *we exceded the lenght of the data to *fragment */ - if ((nfree == 0) || (flen > len)) + if ((nfree <= 0) || (flen > len)) flen = len; /* *it is not worth to tx on slow channels: @@ -1467,7 +1469,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) continue; } - mtu = pch->chan->mtu + 2 - hdrlen; + mtu = pch->chan->mtu - hdrlen; if (mtu < 4) mtu = 4; if (flen > mtu) -- cgit v1.2.3 From 446e72f30eca76d6f9a1a54adf84d2c6ba2831f8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Jul 2009 03:47:39 +0000 Subject: pppol2tp: calls unregister_pernet_gen_device() at unload time Failure to call unregister_pernet_gen_device() can exhaust memory if module is loaded/unloaded many times. Signed-off-by: Eric Dumazet Acked-by: Cyrill Gorcunov Signed-off-by: David S. Miller --- drivers/net/pppol2tp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c index e7935d09c896..e0f9219a0aea 100644 --- a/drivers/net/pppol2tp.c +++ b/drivers/net/pppol2tp.c @@ -2680,6 +2680,7 @@ out_unregister_pppol2tp_proto: static void __exit pppol2tp_exit(void) { unregister_pppox_proto(PX_PROTO_OL2TP); + unregister_pernet_gen_device(pppol2tp_net_id, &pppol2tp_net_ops); proto_unregister(&pppol2tp_sk_proto); } -- cgit v1.2.3 From 1b994b5a1b3cb5395598a08ef3bb0ac118d75c1b Mon Sep 17 00:00:00 2001 From: roel kluin Date: Sat, 1 Aug 2009 20:26:52 +0000 Subject: tulip: Read buffer overflow Check whether index is within bounds before testing the element. Signed-off-by: Roel Kluin Signed-off-by: David S. Miller --- drivers/net/tulip/de4x5.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c index eb72d2e9ab3d..acfdccd44567 100644 --- a/drivers/net/tulip/de4x5.c +++ b/drivers/net/tulip/de4x5.c @@ -5059,7 +5059,7 @@ mii_get_phy(struct net_device *dev) if ((id == 0) || (id == 65535)) continue; /* Valid ID? */ for (j=0; jphy[k].id && (k < DE4X5_MAX_PHY); k++); + for (k=0; k < DE4X5_MAX_PHY && lp->phy[k].id; k++); if (k < DE4X5_MAX_PHY) { memcpy((char *)&lp->phy[k], (char *)&phy_info[j], sizeof(struct phy_table)); @@ -5072,7 +5072,7 @@ mii_get_phy(struct net_device *dev) break; } if ((j == limit) && (i < DE4X5_MAX_MII)) { - for (k=0; lp->phy[k].id && (k < DE4X5_MAX_PHY); k++); + for (k=0; k < DE4X5_MAX_PHY && lp->phy[k].id; k++); lp->phy[k].addr = i; lp->phy[k].id = id; lp->phy[k].spd.reg = GENERIC_REG; /* ANLPA register */ @@ -5091,7 +5091,7 @@ mii_get_phy(struct net_device *dev) purgatory: lp->active = 0; if (lp->phy[0].id) { /* Reset the PHY devices */ - for (k=0; lp->phy[k].id && (k < DE4X5_MAX_PHY); k++) { /*For each PHY*/ + for (k=0; k < DE4X5_MAX_PHY && lp->phy[k].id; k++) { /*For each PHY*/ mii_wr(MII_CR_RST, MII_CR, lp->phy[k].addr, DE4X5_MII); while (mii_rd(MII_CR, lp->phy[k].addr, DE4X5_MII) & MII_CR_RST); -- cgit v1.2.3 From 54706d99051582993037be5a076aa543fd7f1c38 Mon Sep 17 00:00:00 2001 From: roel kluin Date: Sat, 1 Aug 2009 20:20:13 +0000 Subject: s6gmac: Read buffer overflow Check whether index is within bounds before testing the element. In the last iteration i is PHY_MAX_ADDR. the condition `!(p = pd->mii.bus->phy_map[PHY_MAX_ADDR])' is undefined and may evaluate to false, which leads to a dereference of this invalid phy_map in the phy_connect() below. Signed-off-by: Roel Kluin Signed-off-by: David S. Miller --- drivers/net/s6gmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/s6gmac.c b/drivers/net/s6gmac.c index 5345e47b35ac..4525cbe8dd69 100644 --- a/drivers/net/s6gmac.c +++ b/drivers/net/s6gmac.c @@ -793,7 +793,7 @@ static inline int s6gmac_phy_start(struct net_device *dev) struct s6gmac *pd = netdev_priv(dev); int i = 0; struct phy_device *p = NULL; - while ((!(p = pd->mii.bus->phy_map[i])) && (i < PHY_MAX_ADDR)) + while ((i < PHY_MAX_ADDR) && (!(p = pd->mii.bus->phy_map[i]))) i++; p = phy_connect(dev, dev_name(&p->dev), &s6gmac_adjust_link, 0, PHY_INTERFACE_MODE_RGMII); -- cgit v1.2.3 From 9bfdac94c78faf68ce038d5c45a385927f2667ce Mon Sep 17 00:00:00 2001 From: roel kluin Date: Fri, 31 Jul 2009 03:43:59 +0000 Subject: mISDN: Read buffer overflow Check whether index is within bounds before testing the element. Signed-off-by: Roel Kluin Acked-by: Karsten Keil Signed-off-by: David S. Miller --- drivers/isdn/mISDN/l1oip_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c index 990e6a7e6674..0ebce046ade4 100644 --- a/drivers/isdn/mISDN/l1oip_core.c +++ b/drivers/isdn/mISDN/l1oip_core.c @@ -1480,7 +1480,7 @@ l1oip_init(void) return -ENOMEM; l1oip_cnt = 0; - while (type[l1oip_cnt] && l1oip_cnt < MAX_CARDS) { + while (l1oip_cnt < MAX_CARDS && type[l1oip_cnt]) { switch (type[l1oip_cnt] & 0xff) { case 1: pri = 0; -- cgit v1.2.3 From 50c643e7652458e649955408685a16e88ea6dbae Mon Sep 17 00:00:00 2001 From: Dhananjay Phadke Date: Sat, 1 Aug 2009 21:36:16 +0000 Subject: netxen: fix coherent dma mask setting Change default dma mask for NX3031 to 39 bit with ability to update it to 64-bit (if firmware indicates support). Old code was restricting it under 4GB (32-bit), sometimes causing failure to allocate descriptor rings on heavily populated system. NX2031 based NICs will still get 32-bit coherent mask. Signed-off-by: Dhananjay Phadke Signed-off-by: David S. Miller --- drivers/net/netxen/netxen_nic_main.c | 37 ++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index 637ac8b89bac..3cd8cfcf627b 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c @@ -221,7 +221,7 @@ netxen_napi_disable(struct netxen_adapter *adapter) } } -static int nx_set_dma_mask(struct netxen_adapter *adapter, uint8_t revision_id) +static int nx_set_dma_mask(struct netxen_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; uint64_t mask, cmask; @@ -229,19 +229,17 @@ static int nx_set_dma_mask(struct netxen_adapter *adapter, uint8_t revision_id) adapter->pci_using_dac = 0; mask = DMA_BIT_MASK(32); - /* - * Consistent DMA mask is set to 32 bit because it cannot be set to - * 35 bits. For P3 also leave it at 32 bits for now. Only the rings - * come off this pool. - */ cmask = DMA_BIT_MASK(32); + if (NX_IS_REVISION_P2(adapter->ahw.revision_id)) { #ifndef CONFIG_IA64 - if (revision_id >= NX_P3_B0) - mask = DMA_BIT_MASK(39); - else if (revision_id == NX_P2_C1) mask = DMA_BIT_MASK(35); #endif + } else { + mask = DMA_BIT_MASK(39); + cmask = mask; + } + if (pci_set_dma_mask(pdev, mask) == 0 && pci_set_consistent_dma_mask(pdev, cmask) == 0) { adapter->pci_using_dac = 1; @@ -256,7 +254,7 @@ static int nx_update_dma_mask(struct netxen_adapter *adapter) { int change, shift, err; - uint64_t mask, old_mask; + uint64_t mask, old_mask, old_cmask; struct pci_dev *pdev = adapter->pdev; change = 0; @@ -272,14 +270,29 @@ nx_update_dma_mask(struct netxen_adapter *adapter) if (change) { old_mask = pdev->dma_mask; + old_cmask = pdev->dev.coherent_dma_mask; + mask = (1ULL<<(32+shift)) - 1; err = pci_set_dma_mask(pdev, mask); if (err) - return pci_set_dma_mask(pdev, old_mask); + goto err_out; + + if (NX_IS_REVISION_P3(adapter->ahw.revision_id)) { + + err = pci_set_consistent_dma_mask(pdev, mask); + if (err) + goto err_out; + } + dev_info(&pdev->dev, "using %d-bit dma mask\n", 32+shift); } return 0; + +err_out: + pci_set_dma_mask(pdev, old_mask); + pci_set_consistent_dma_mask(pdev, old_cmask); + return err; } static void netxen_check_options(struct netxen_adapter *adapter) @@ -1006,7 +1019,7 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) revision_id = pdev->revision; adapter->ahw.revision_id = revision_id; - err = nx_set_dma_mask(adapter, revision_id); + err = nx_set_dma_mask(adapter); if (err) goto err_out_free_netdev; -- cgit v1.2.3 From df4e7f72f5156ef16a918da8a575ba90ec27ab77 Mon Sep 17 00:00:00 2001 From: Don Fry Date: Fri, 31 Jul 2009 08:40:06 +0000 Subject: pcnet32: remove superfluous NULL pointer check in pcnet32_probe1() Move the debug printk() into the proper place and remove superfluous NULL pointer check in pcnet32_probe1(). This takes care of the following entry from Dan's list: drivers/net/pcnet32.c +1889 pcnet32_probe1(298) warning: variable derefenced before check 'pdev' Reported-by: Dan Carpenter Signed-off-by: Bartlomiej Zolnierkiewicz Acked-by: Don Fry Signed-off-by: David S. Miller --- drivers/net/pcnet32.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c index 1c35e1d637a0..b61c97254b3f 100644 --- a/drivers/net/pcnet32.c +++ b/drivers/net/pcnet32.c @@ -1611,8 +1611,11 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) if (pcnet32_dwio_read_csr(ioaddr, 0) == 4 && pcnet32_dwio_check(ioaddr)) { a = &pcnet32_dwio; - } else + } else { + if (pcnet32_debug & NETIF_MSG_PROBE) + printk(KERN_ERR PFX "No access methods\n"); goto err_release_region; + } } chip_version = @@ -1852,12 +1855,6 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) ((cards_found >= MAX_UNITS) || full_duplex[cards_found])) lp->options |= PCNET32_PORT_FD; - if (!a) { - if (pcnet32_debug & NETIF_MSG_PROBE) - printk(KERN_ERR PFX "No access methods\n"); - ret = -ENODEV; - goto err_free_consistent; - } lp->a = *a; /* prior to register_netdev, dev->name is not yet correct */ @@ -1973,14 +1970,13 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) return 0; - err_free_ring: +err_free_ring: pcnet32_free_ring(dev); - err_free_consistent: pci_free_consistent(lp->pci_dev, sizeof(*lp->init_block), lp->init_block, lp->init_dma_addr); - err_free_netdev: +err_free_netdev: free_netdev(dev); - err_release_region: +err_release_region: release_region(ioaddr, PCNET32_TOTAL_SIZE); return ret; } -- cgit v1.2.3 From 63097b3ad85788a64c75091bff351ecc850761b2 Mon Sep 17 00:00:00 2001 From: Don Fry Date: Fri, 31 Jul 2009 08:45:29 +0000 Subject: pcnet32: VLB support fixes VLB support has been broken since at least 2004-2005 period as some changes introduced back then assumed that ->pci_dev is always valid, lets try to fix it: - remove duplicated SET_NETDEV_DEV() call - call SET_NETDEV_DEV() only for PCI devices - check for ->pci_dev validity in pcnet32_open() [ Alternatively we may consider removing VLB support but there would not be much gain in it since an extra driver code needed for VLB support is minimal and quite simple. ] This takes care of the following entry from Dan's list: drivers/net/pcnet32.c +1889 pcnet32_probe1(298) warning: variable derefenced before check 'pdev' Reported-by: Dan Carpenter Signed-off-by: Bartlomiej Zolnierkiewicz Acked-by: Don Fry Signed-off-by: David S. Miller --- drivers/net/pcnet32.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c index b61c97254b3f..16964ec73e67 100644 --- a/drivers/net/pcnet32.c +++ b/drivers/net/pcnet32.c @@ -1722,7 +1722,9 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) ret = -ENOMEM; goto err_release_region; } - SET_NETDEV_DEV(dev, &pdev->dev); + + if (pdev) + SET_NETDEV_DEV(dev, &pdev->dev); if (pcnet32_debug & NETIF_MSG_PROBE) printk(KERN_INFO PFX "%s at %#3lx,", chipname, ioaddr); @@ -1821,7 +1823,6 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) spin_lock_init(&lp->lock); - SET_NETDEV_DEV(dev, &pdev->dev); lp->name = chipname; lp->shared_irq = shared; lp->tx_ring_size = TX_RING_SIZE; /* default tx ring size */ @@ -2085,6 +2086,7 @@ static void pcnet32_free_ring(struct net_device *dev) static int pcnet32_open(struct net_device *dev) { struct pcnet32_private *lp = netdev_priv(dev); + struct pci_dev *pdev = lp->pci_dev; unsigned long ioaddr = dev->base_addr; u16 val; int i; @@ -2145,9 +2147,9 @@ static int pcnet32_open(struct net_device *dev) lp->a.write_csr(ioaddr, 124, val); /* Allied Telesyn AT 2700/2701 FX are 100Mbit only and do not negotiate */ - if (lp->pci_dev->subsystem_vendor == PCI_VENDOR_ID_AT && - (lp->pci_dev->subsystem_device == PCI_SUBDEVICE_ID_AT_2700FX || - lp->pci_dev->subsystem_device == PCI_SUBDEVICE_ID_AT_2701FX)) { + if (pdev && pdev->subsystem_vendor == PCI_VENDOR_ID_AT && + (pdev->subsystem_device == PCI_SUBDEVICE_ID_AT_2700FX || + pdev->subsystem_device == PCI_SUBDEVICE_ID_AT_2701FX)) { if (lp->options & PCNET32_PORT_ASEL) { lp->options = PCNET32_PORT_FD | PCNET32_PORT_100; if (netif_msg_link(lp)) -- cgit v1.2.3 From 5973bee46fe66db94fab198979dec87f263fc2a8 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 21 Jul 2009 00:40:46 +0200 Subject: [WATCHDOG] Fix COH 901 327 watchdog enablement Since the COH 901 327 found in U300 is clocked at 32 kHz we need to wait for the interrupt clearing flag to propagate through hardware in order not to accidentally fire off any interrupts when we enable them. Signed-off-by: Linus Walleij Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/coh901327_wdt.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers') diff --git a/drivers/watchdog/coh901327_wdt.c b/drivers/watchdog/coh901327_wdt.c index fecb307d28e9..aec7cefdef21 100644 --- a/drivers/watchdog/coh901327_wdt.c +++ b/drivers/watchdog/coh901327_wdt.c @@ -18,6 +18,7 @@ #include #include #include +#include #define DRV_NAME "WDOG COH 901 327" @@ -92,6 +93,8 @@ static struct clk *clk; static void coh901327_enable(u16 timeout) { u16 val; + unsigned long freq; + unsigned long delay_ns; clk_enable(clk); /* Restart timer if it is disabled */ @@ -102,6 +105,14 @@ static void coh901327_enable(u16 timeout) /* Acknowledge any pending interrupt so it doesn't just fire off */ writew(U300_WDOG_IER_WILL_BARK_IRQ_ACK_ENABLE, virtbase + U300_WDOG_IER); + /* + * The interrupt is cleared in the 32 kHz clock domain. + * Wait 3 32 kHz cycles for it to take effect + */ + freq = clk_get_rate(clk); + delay_ns = (1000000000 + freq - 1) / freq; /* Freq to ns and round up */ + delay_ns = 3 * delay_ns; /* Wait 3 cycles */ + ndelay(delay_ns); /* Enable the watchdog interrupt */ writew(U300_WDOG_IMR_WILL_BARK_IRQ_ENABLE, virtbase + U300_WDOG_IMR); /* Activate the watchdog timer */ -- cgit v1.2.3 From b564afcfb82fe3e63a7ce05a944eb5e11244d7cb Mon Sep 17 00:00:00 2001 From: Andreas Eversberg Date: Mon, 27 Jul 2009 07:24:04 +0000 Subject: mISDN: Fix handling of receive buffer size in L1oIP The size of receive buffer pointer was used to get size of receive buffer instead of recvbuf_size itself, so only 4/8 bytes could be transfered. This is a regression to 2.6.30 introduced by commit 8c90e11e3543d7de612194a042a148caeaab5f1d mISDN: Use kernel_{send,recv}msg instead of open coding Signed-off-by: Andreas Eversberg Signed-off-by: Karsten Keil Signed-off-by: David S. Miller --- drivers/isdn/mISDN/l1oip_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c index 0ebce046ade4..7e5f30dbc0a0 100644 --- a/drivers/isdn/mISDN/l1oip_core.c +++ b/drivers/isdn/mISDN/l1oip_core.c @@ -731,10 +731,10 @@ l1oip_socket_thread(void *data) while (!signal_pending(current)) { struct kvec iov = { .iov_base = recvbuf, - .iov_len = sizeof(recvbuf), + .iov_len = recvbuf_size, }; recvlen = kernel_recvmsg(socket, &msg, &iov, 1, - sizeof(recvbuf), 0); + recvbuf_size, 0); if (recvlen > 0) { l1oip_socket_parse(hc, &sin_rx, recvbuf, recvlen); } else { -- cgit v1.2.3 From 79896cf42f6a96d7e14f2dc3473443d68d74031d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 2 Aug 2009 14:04:19 -0700 Subject: Make pci_claim_resource() use request_resource() rather than insert_resource() This function has traditionally used "insert_resource()", because before commit cebd78a8c5 ("Fix pci_claim_resource") it used to just insert the resource into whatever root resource tree that was indicated by "pcibios_select_root()". So there Matthew fixed it to actually look up the proper parent resource, which means that now it's actively wrong to then traverse the resource tree any more: we already know exactly where the new resource should go. And when we then did commit a76117dfd6 ("x86: Use pci_claim_resource"), which changed the x86 PCI code from the open-coded pr = pci_find_parent_resource(dev, r); if (!pr || request_resource(pr, r) < 0) { to using if (pci_claim_resource(dev, idx) < 0) { that "insert_resource()" now suddenly became a problem, and causes a regression covered by http://bugzilla.kernel.org/show_bug.cgi?id=13891 which this fixes. Reported-and-tested-by: Rafael J. Wysocki Cc: Matthew Wilcox Cc: Andrew Patterson Cc: Linux PCI Signed-off-by: Linus Torvalds --- drivers/pci/setup-res.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index b711fb7181e2..1898c7b47907 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -100,16 +100,16 @@ int pci_claim_resource(struct pci_dev *dev, int resource) { struct resource *res = &dev->resource[resource]; struct resource *root; - char *dtype = resource < PCI_BRIDGE_RESOURCES ? "device" : "bridge"; int err; root = pci_find_parent_resource(dev, res); err = -EINVAL; if (root != NULL) - err = insert_resource(root, res); + err = request_resource(root, res); if (err) { + const char *dtype = resource < PCI_BRIDGE_RESOURCES ? "device" : "bridge"; dev_err(&dev->dev, "BAR %d: %s of %s %pR\n", resource, root ? "address space collision on" : -- cgit v1.2.3 From ac5e7113e74872928844d00085bd47c988f12728 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Mon, 3 Aug 2009 10:59:47 +1000 Subject: md: Push down data integrity code to personalities. This patch replaces md_integrity_check() by two new public functions: md_integrity_register() and md_integrity_add_rdev() which are both personality-independent. md_integrity_register() is called from the ->run and ->hot_remove methods of all personalities that support data integrity. The function iterates over the component devices of the array and determines if all active devices are integrity capable and if their profiles match. If this is the case, the common profile is registered for the mddev via blk_integrity_register(). The second new function, md_integrity_add_rdev() is called from the ->hot_add_disk methods, i.e. whenever a new device is being added to a raid array. If the new device does not support data integrity, or has a profile different from the one already registered, data integrity for the mddev is disabled. For raid0 and linear, only the call to md_integrity_register() from the ->run method is necessary. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/linear.c | 1 + drivers/md/md.c | 94 ++++++++++++++++++++++++++++++++++---------------- drivers/md/md.h | 2 ++ drivers/md/multipath.c | 5 ++- drivers/md/raid0.c | 1 + drivers/md/raid1.c | 6 ++-- drivers/md/raid10.c | 4 +++ 7 files changed, 80 insertions(+), 33 deletions(-) (limited to 'drivers') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 5810fa906af0..54c8677f1e59 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -220,6 +220,7 @@ static int linear_run (mddev_t *mddev) mddev->queue->unplug_fn = linear_unplug; mddev->queue->backing_dev_info.congested_fn = linear_congested; mddev->queue->backing_dev_info.congested_data = mddev; + md_integrity_register(mddev); return 0; } diff --git a/drivers/md/md.c b/drivers/md/md.c index d4351ff0849f..180949e94a7b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1487,37 +1487,76 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) static LIST_HEAD(pending_raid_disks); -static void md_integrity_check(mdk_rdev_t *rdev, mddev_t *mddev) +/* + * Try to register data integrity profile for an mddev + * + * This is called when an array is started and after a disk has been kicked + * from the array. It only succeeds if all working and active component devices + * are integrity capable with matching profiles. + */ +int md_integrity_register(mddev_t *mddev) +{ + mdk_rdev_t *rdev, *reference = NULL; + + if (list_empty(&mddev->disks)) + return 0; /* nothing to do */ + if (blk_get_integrity(mddev->gendisk)) + return 0; /* already registered */ + list_for_each_entry(rdev, &mddev->disks, same_set) { + /* skip spares and non-functional disks */ + if (test_bit(Faulty, &rdev->flags)) + continue; + if (rdev->raid_disk < 0) + continue; + /* + * If at least one rdev is not integrity capable, we can not + * enable data integrity for the md device. + */ + if (!bdev_get_integrity(rdev->bdev)) + return -EINVAL; + if (!reference) { + /* Use the first rdev as the reference */ + reference = rdev; + continue; + } + /* does this rdev's profile match the reference profile? */ + if (blk_integrity_compare(reference->bdev->bd_disk, + rdev->bdev->bd_disk) < 0) + return -EINVAL; + } + /* + * All component devices are integrity capable and have matching + * profiles, register the common profile for the md device. + */ + if (blk_integrity_register(mddev->gendisk, + bdev_get_integrity(reference->bdev)) != 0) { + printk(KERN_ERR "md: failed to register integrity for %s\n", + mdname(mddev)); + return -EINVAL; + } + printk(KERN_NOTICE "md: data integrity on %s enabled\n", + mdname(mddev)); + return 0; +} +EXPORT_SYMBOL(md_integrity_register); + +/* Disable data integrity if non-capable/non-matching disk is being added */ +void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev) { - struct mdk_personality *pers = mddev->pers; - struct gendisk *disk = mddev->gendisk; struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); - struct blk_integrity *bi_mddev = blk_get_integrity(disk); + struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk); - /* Data integrity passthrough not supported on RAID 4, 5 and 6 */ - if (pers && pers->level >= 4 && pers->level <= 6) + if (!bi_mddev) /* nothing to do */ return; - - /* If rdev is integrity capable, register profile for mddev */ - if (!bi_mddev && bi_rdev) { - if (blk_integrity_register(disk, bi_rdev)) - printk(KERN_ERR "%s: %s Could not register integrity!\n", - __func__, disk->disk_name); - else - printk(KERN_NOTICE "Enabling data integrity on %s\n", - disk->disk_name); + if (rdev->raid_disk < 0) /* skip spares */ return; - } - - /* Check that mddev and rdev have matching profiles */ - if (blk_integrity_compare(disk, rdev->bdev->bd_disk) < 0) { - printk(KERN_ERR "%s: %s/%s integrity mismatch!\n", __func__, - disk->disk_name, rdev->bdev->bd_disk->disk_name); - printk(KERN_NOTICE "Disabling data integrity on %s\n", - disk->disk_name); - blk_integrity_unregister(disk); - } + if (bi_rdev && blk_integrity_compare(mddev->gendisk, + rdev->bdev->bd_disk) >= 0) + return; + printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev)); + blk_integrity_unregister(mddev->gendisk); } +EXPORT_SYMBOL(md_integrity_add_rdev); static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) { @@ -1591,7 +1630,6 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) /* May as well allow recovery to be retried once */ mddev->recovery_disabled = 0; - md_integrity_check(rdev, mddev); return 0; fail: @@ -4048,10 +4086,6 @@ static int do_md_run(mddev_t * mddev) } strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); - if (pers->level >= 4 && pers->level <= 6) - /* Cannot support integrity (yet) */ - blk_integrity_unregister(mddev->gendisk); - if (mddev->reshape_position != MaxSector && pers->start_reshape == NULL) { /* This personality cannot handle reshaping... */ diff --git a/drivers/md/md.h b/drivers/md/md.h index 9430a110db93..78f03168baf9 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -431,5 +431,7 @@ extern int md_allow_write(mddev_t *mddev); extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); extern int md_check_no_bitmap(mddev_t *mddev); +extern int md_integrity_register(mddev_t *mddev); +void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev); #endif /* _MD_MD_H */ diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 237fe3fd235c..7140909f6662 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -313,6 +313,7 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) set_bit(In_sync, &rdev->flags); rcu_assign_pointer(p->rdev, rdev); err = 0; + md_integrity_add_rdev(rdev, mddev); break; } @@ -345,7 +346,9 @@ static int multipath_remove_disk(mddev_t *mddev, int number) /* lost the race, try later */ err = -EBUSY; p->rdev = rdev; + goto abort; } + md_integrity_register(mddev); } abort: @@ -519,7 +522,7 @@ static int multipath_run (mddev_t *mddev) mddev->queue->unplug_fn = multipath_unplug; mddev->queue->backing_dev_info.congested_fn = multipath_congested; mddev->queue->backing_dev_info.congested_data = mddev; - + md_integrity_register(mddev); return 0; out_free_conf: diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 335f490dcad6..898e2bdfee47 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -351,6 +351,7 @@ static int raid0_run(mddev_t *mddev) blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); dump_zones(mddev); + md_integrity_register(mddev); return 0; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 0569efba0c02..67e794d0097f 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1144,7 +1144,7 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) rcu_assign_pointer(p->rdev, rdev); break; } - + md_integrity_add_rdev(rdev, mddev); print_conf(conf); return err; } @@ -1178,7 +1178,9 @@ static int raid1_remove_disk(mddev_t *mddev, int number) /* lost the race, try later */ err = -EBUSY; p->rdev = rdev; + goto abort; } + md_integrity_register(mddev); } abort: @@ -2067,7 +2069,7 @@ static int run(mddev_t *mddev) mddev->queue->unplug_fn = raid1_unplug; mddev->queue->backing_dev_info.congested_fn = raid1_congested; mddev->queue->backing_dev_info.congested_data = mddev; - + md_integrity_register(mddev); return 0; out_no_mem: diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 7298a5e5a183..3d9020cf6f6e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1170,6 +1170,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) break; } + md_integrity_add_rdev(rdev, mddev); print_conf(conf); return err; } @@ -1203,7 +1204,9 @@ static int raid10_remove_disk(mddev_t *mddev, int number) /* lost the race, try later */ err = -EBUSY; p->rdev = rdev; + goto abort; } + md_integrity_register(mddev); } abort: @@ -2225,6 +2228,7 @@ static int run(mddev_t *mddev) if (conf->near_copies < mddev->raid_disks) blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); + md_integrity_register(mddev); return 0; out_free_conf: -- cgit v1.2.3 From 3a981b03f38dc3b8a69b77cbc679e66c1318a44a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Aug 2009 10:59:55 +1000 Subject: md: when a level change reduces the number of devices, remove the excess. When an array is changed from RAID6 to RAID5, fewer drives are needed. So any device that is made superfluous by the level conversion must be marked as not-active. For the RAID6->RAID5 conversion, this will be a drive which only has 'Q' blocks on it. Cc: stable@kernel.org Signed-off-by: NeilBrown --- drivers/md/md.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 180949e94a7b..c194955aecae 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2695,6 +2695,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) ssize_t rv = len; struct mdk_personality *pers; void *priv; + mdk_rdev_t *rdev; if (mddev->pers == NULL) { if (len == 0) @@ -2774,6 +2775,12 @@ level_store(mddev_t *mddev, const char *buf, size_t len) mddev_suspend(mddev); mddev->pers->stop(mddev); module_put(mddev->pers->owner); + /* Invalidate devices that are now superfluous */ + list_for_each_entry(rdev, &mddev->disks, same_set) + if (rdev->raid_disk >= mddev->raid_disks) { + rdev->raid_disk = -1; + clear_bit(In_sync, &rdev->flags); + } mddev->pers = pers; mddev->private = priv; strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); -- cgit v1.2.3 From 3673f305faf1bc66ead751344f8262ace851ff44 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Aug 2009 10:59:56 +1000 Subject: md: avoid array overflow with bad v1.x metadata We trust the 'desc_nr' field in v1.x metadata enough to use it as an index in an array. This isn't really safe. So range-check the value first. Signed-off-by: NeilBrown --- drivers/md/md.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index c194955aecae..249b2896d4ea 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1308,7 +1308,12 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) } if (mddev->level != LEVEL_MULTIPATH) { int role; - role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); + if (rdev->desc_nr < 0 || + rdev->desc_nr >= le32_to_cpu(sb->max_dev)) { + role = 0xffff; + rdev->desc_nr = -1; + } else + role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); switch(role) { case 0xffff: /* spare */ break; -- cgit v1.2.3 From 70471dafe3390243c598a3165dfb86b8b8b3f4fe Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Aug 2009 10:59:57 +1000 Subject: md: Handle growth of v1.x metadata correctly. The v1.x metadata does not have a fixed size and can grow when devices are added. If it grows enough to require an extra sector of storage, we need to update the 'sb_size' to match. Without this, md can write out an incomplete superblock with a bad checksum, which will be rejected when trying to re-assemble the array. Cc: stable@kernel.org Signed-off-by: NeilBrown --- drivers/md/md.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 249b2896d4ea..52c988b072d0 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1399,8 +1399,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) if (rdev2->desc_nr+1 > max_dev) max_dev = rdev2->desc_nr+1; - if (max_dev > le32_to_cpu(sb->max_dev)) + if (max_dev > le32_to_cpu(sb->max_dev)) { + int bmask; sb->max_dev = cpu_to_le32(max_dev); + rdev->sb_size = max_dev * 2 + 256; + bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1; + if (rdev->sb_size & bmask) + rdev->sb_size = (rdev->sb_size | bmask) + 1; + } for (i=0; idev_roles[i] = cpu_to_le16(0xfffe); -- cgit v1.2.3 From e516402c0d4fc02be4af9fa8c18954d4f9deb44e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Aug 2009 10:59:57 +1000 Subject: md/raid5: set reshape_position correctly when reshape starts. As the internal reshape_progress counter is the main driver for reshape, the fact that reshape_position sometimes starts with the wrong value has minimal effect. It is visible in sysfs and that is all. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 39374230a463..659151e5eda4 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5000,7 +5000,7 @@ static int raid5_start_reshape(mddev_t *mddev) spin_unlock_irqrestore(&conf->device_lock, flags); } mddev->raid_disks = conf->raid_disks; - mddev->reshape_position = 0; + mddev->reshape_position = conf->reshape_progress; set_bit(MD_CHANGE_DEVS, &mddev->flags); clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); -- cgit v1.2.3 From 64bd660b51b2da92e99a5e97349f6558349f11c5 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Aug 2009 10:59:58 +1000 Subject: md: allow raid5_quiesce to work properly when reshape is happening. The ->quiesce method is not supposed to stop resync/recovery/reshape, just normal IO. But in raid5 we don't have a way to know which stripes are being used for normal IO and which for resync etc, so we need to wait for all stripes to be idle to be sure that all writes have completed. However reshape keeps at least some stripe busy for an extended period of time, so a call to raid5_quiesce can block for several seconds needlessly. So arrange for reshape etc to pause briefly while raid5_quiesce is trying to quiesce the array so that the active_stripes count can drop to zero. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 659151e5eda4..2dc35b4c20ac 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3999,6 +3999,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski return 0; } + /* Allow raid5_quiesce to complete */ + wait_event(conf->wait_for_overlap, conf->quiesce != 2); + if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) return reshape_request(mddev, sector_nr, skipped); @@ -5104,12 +5107,18 @@ static void raid5_quiesce(mddev_t *mddev, int state) case 1: /* stop all writes */ spin_lock_irq(&conf->device_lock); - conf->quiesce = 1; + /* '2' tells resync/reshape to pause so that all + * active stripes can drain + */ + conf->quiesce = 2; wait_event_lock_irq(conf->wait_for_stripe, atomic_read(&conf->active_stripes) == 0 && atomic_read(&conf->active_aligned_reads) == 0, conf->device_lock, /* nothing */); + conf->quiesce = 1; spin_unlock_irq(&conf->device_lock); + /* allow reshape to continue */ + wake_up(&conf->wait_for_overlap); break; case 0: /* re-enable writes */ -- cgit v1.2.3 From 449aad3e25358812c43afc60918c5ad3819488e7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Aug 2009 10:59:58 +1000 Subject: md: Use revalidate_disk to effect changes in size of device. As revalidate_disk calls check_disk_size_change, it will cause any capacity change of a gendisk to be propagated to the blockdev inode. So use that instead of mucking about with locks and i_size_write. Also add a call to revalidate_disk in do_md_run and a few other places where the gendisk capacity is changed. Signed-off-by: NeilBrown --- drivers/md/linear.c | 1 + drivers/md/md.c | 28 +++++----------------------- drivers/md/raid1.c | 1 + drivers/md/raid5.c | 12 ++---------- 4 files changed, 9 insertions(+), 33 deletions(-) (limited to 'drivers') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 54c8677f1e59..5fe39c2a3d2b 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -257,6 +257,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) rcu_assign_pointer(mddev->private, newconf); md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); set_capacity(mddev->gendisk, mddev->array_sectors); + revalidate_disk(mddev->gendisk); call_rcu(&oldconf->rcu, free_conf); return 0; } diff --git a/drivers/md/md.c b/drivers/md/md.c index 52c988b072d0..5b98bea4ff9b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3741,17 +3741,8 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len) mddev->array_sectors = sectors; set_capacity(mddev->gendisk, mddev->array_sectors); - if (mddev->pers) { - struct block_device *bdev = bdget_disk(mddev->gendisk, 0); - - if (bdev) { - mutex_lock(&bdev->bd_inode->i_mutex); - i_size_write(bdev->bd_inode, - (loff_t)mddev->array_sectors << 9); - mutex_unlock(&bdev->bd_inode->i_mutex); - bdput(bdev); - } - } + if (mddev->pers) + revalidate_disk(mddev->gendisk); return len; } @@ -4241,6 +4232,7 @@ static int do_md_run(mddev_t * mddev) md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ + revalidate_disk(mddev->gendisk); mddev->changed = 1; md_new_event(mddev); sysfs_notify_dirent(mddev->sysfs_state); @@ -5139,18 +5131,8 @@ static int update_size(mddev_t *mddev, sector_t num_sectors) return -ENOSPC; } rv = mddev->pers->resize(mddev, num_sectors); - if (!rv) { - struct block_device *bdev; - - bdev = bdget_disk(mddev->gendisk, 0); - if (bdev) { - mutex_lock(&bdev->bd_inode->i_mutex); - i_size_write(bdev->bd_inode, - (loff_t)mddev->array_sectors << 9); - mutex_unlock(&bdev->bd_inode->i_mutex); - bdput(bdev); - } - } + if (!rv) + revalidate_disk(mddev->gendisk); return rv; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 67e794d0097f..8726fd7ebce5 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2134,6 +2134,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors) return -EINVAL; set_capacity(mddev->gendisk, mddev->array_sectors); mddev->changed = 1; + revalidate_disk(mddev->gendisk); if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { mddev->recovery_cp = mddev->dev_sectors; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2dc35b4c20ac..2b521ee67dfa 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4858,6 +4858,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) return -EINVAL; set_capacity(mddev->gendisk, mddev->array_sectors); mddev->changed = 1; + revalidate_disk(mddev->gendisk); if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { mddev->recovery_cp = mddev->dev_sectors; set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); @@ -5058,7 +5059,6 @@ static void end_reshape(raid5_conf_t *conf) */ static void raid5_finish_reshape(mddev_t *mddev) { - struct block_device *bdev; raid5_conf_t *conf = mddev->private; if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { @@ -5067,15 +5067,7 @@ static void raid5_finish_reshape(mddev_t *mddev) md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); set_capacity(mddev->gendisk, mddev->array_sectors); mddev->changed = 1; - - bdev = bdget_disk(mddev->gendisk, 0); - if (bdev) { - mutex_lock(&bdev->bd_inode->i_mutex); - i_size_write(bdev->bd_inode, - (loff_t)mddev->array_sectors << 9); - mutex_unlock(&bdev->bd_inode->i_mutex); - bdput(bdev); - } + revalidate_disk(mddev->gendisk); } else { int d; mddev->degraded = conf->raid_disks; -- cgit v1.2.3 From eb4ad826419ab5b1260bc1625249114767d36bea Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Sun, 2 Aug 2009 20:22:18 -0700 Subject: mlx4_en: Fix double pci unmapping. In cases of fragmented skb, with the data pointers being wrapped around the TX buffer, the completion handling code would not forward the data pointer and the firs fragment was unmapped several times, while others were not unmapped at all. Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller --- drivers/net/mlx4/en_tx.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/net/mlx4/en_tx.c b/drivers/net/mlx4/en_tx.c index 08c43f2ae72b..5a88b3f57693 100644 --- a/drivers/net/mlx4/en_tx.c +++ b/drivers/net/mlx4/en_tx.c @@ -249,6 +249,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, pci_unmap_page(mdev->pdev, (dma_addr_t) be64_to_cpu(data->addr), frag->size, PCI_DMA_TODEVICE); + ++data; } } /* Stamp the freed descriptor */ -- cgit v1.2.3 From 6afc4fdb3e94ba60cd566cb878b60c6c01979277 Mon Sep 17 00:00:00 2001 From: Saeed Bishara Date: Tue, 28 Jul 2009 04:56:43 -0700 Subject: mtd: fix the conversion from dev to mtd_info The patch fixes a bug when converting dev to mtd_info by using the drvdata of the dev, the previous code used container_of(dev, struct mtd_info, dev), but won't work for the mtdXro devices as they created without being contained inside mtd_info structure. Signed-off-by: Saeed Bishara Signed-off-by: David Woodhouse --- drivers/mtd/mtdcore.c | 7 ++++--- include/linux/mtd/mtd.h | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index fac54a3fa3f1..00ebf7af7467 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -65,8 +65,8 @@ static void mtd_release(struct device *dev) static int mtd_cls_suspend(struct device *dev, pm_message_t state) { struct mtd_info *mtd = dev_to_mtd(dev); - - if (mtd->suspend) + + if (mtd && mtd->suspend) return mtd->suspend(mtd); else return 0; @@ -76,7 +76,7 @@ static int mtd_cls_resume(struct device *dev) { struct mtd_info *mtd = dev_to_mtd(dev); - if (mtd->resume) + if (mtd && mtd->resume) mtd->resume(mtd); return 0; } @@ -298,6 +298,7 @@ int add_mtd_device(struct mtd_info *mtd) mtd->dev.class = &mtd_class; mtd->dev.devt = MTD_DEVT(i); dev_set_name(&mtd->dev, "mtd%d", i); + dev_set_drvdata(&mtd->dev, mtd); if (device_register(&mtd->dev) != 0) { mtd_table[i] = NULL; break; diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 5675b63a0631..0f32a9b6ff55 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -251,7 +251,7 @@ struct mtd_info { static inline struct mtd_info *dev_to_mtd(struct device *dev) { - return dev ? container_of(dev, struct mtd_info, dev) : NULL; + return dev ? dev_get_drvdata(dev) : NULL; } static inline uint32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd) -- cgit v1.2.3 From 8022c13c27b822cf22f13df10b42aae89cd56bf0 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 10 Jul 2009 17:02:17 +0300 Subject: mtd: blkdevs: do not forget to get MTD devices Nowadays MTD devices have to be "get" before they can be used. This has to be done with 'get_mtd_device()'. The 'blktrans_open()' function did not do this and instead used 'try_module_get()'. Fix this. Since 'get_mtd_device()' already gets the module, extra 'try_module_get()' is not needed. This fixes oops when one tries to use mtdblock on top of gluebi. Reported-by: Holger Brunck Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/mtd_blkdevs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index c3f62654b6df..7baba40c1ed2 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -144,7 +144,7 @@ static int blktrans_open(struct block_device *bdev, fmode_t mode) struct mtd_blktrans_ops *tr = dev->tr; int ret = -ENODEV; - if (!try_module_get(dev->mtd->owner)) + if (!get_mtd_device(NULL, dev->mtd->index)) goto out; if (!try_module_get(tr->owner)) @@ -158,7 +158,7 @@ static int blktrans_open(struct block_device *bdev, fmode_t mode) ret = 0; if (tr->open && (ret = tr->open(dev))) { dev->mtd->usecount--; - module_put(dev->mtd->owner); + put_mtd_device(dev->mtd); out_tr: module_put(tr->owner); } @@ -177,7 +177,7 @@ static int blktrans_release(struct gendisk *disk, fmode_t mode) if (!ret) { dev->mtd->usecount--; - module_put(dev->mtd->owner); + put_mtd_device(dev->mtd); module_put(tr->owner); } -- cgit v1.2.3 From 00acf4a80779611a7ea77ff5b5ffab886ed5cc42 Mon Sep 17 00:00:00 2001 From: Mika Korhonen Date: Thu, 11 Jun 2009 14:05:07 +0300 Subject: mtd: OneNAND: fix incorrect bufferram offset Fixes the case where CONFIG_MTD_ONENAND_2X_PROGRAM is set and the real page size differs from mtd_info.writesize. Signed-off-by: Mika Korhonen Acked-by: Kyungmin Park Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/onenand/omap2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c index 38d656b9b2ee..6fac1f496d4a 100644 --- a/drivers/mtd/onenand/omap2.c +++ b/drivers/mtd/onenand/omap2.c @@ -266,7 +266,7 @@ static inline int omap2_onenand_bufferram_offset(struct mtd_info *mtd, int area) if (ONENAND_CURRENT_BUFFERRAM(this)) { if (area == ONENAND_DATARAM) - return mtd->writesize; + return this->writesize; if (area == ONENAND_SPARERAM) return mtd->oobsize; } -- cgit v1.2.3 From 3cae1cc149c40c14424162496eb5a7c8db1cd4fb Mon Sep 17 00:00:00 2001 From: Mika Korhonen Date: Thu, 25 Jun 2009 15:32:19 +0300 Subject: mtd: OneNAND: OMAP2/3: free GPMC CS on module removal GPMC CS was not freed in omap2_onenand_remove() preventing the module from reloading after removal. Signed-off-by: Mika Korhonen Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/onenand/omap2.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c index 6fac1f496d4a..0108ed42e877 100644 --- a/drivers/mtd/onenand/omap2.c +++ b/drivers/mtd/onenand/omap2.c @@ -770,6 +770,7 @@ static int __devexit omap2_onenand_remove(struct platform_device *pdev) } iounmap(c->onenand.base); release_mem_region(c->phys_base, ONENAND_IO_SIZE); + gpmc_cs_free(c->gpmc_cs); kfree(c); return 0; -- cgit v1.2.3 From 2bf961b7ccd69e108ac435c67e2b0522b403c578 Mon Sep 17 00:00:00 2001 From: Subrata Modak Date: Wed, 1 Jul 2009 19:22:47 +0530 Subject: mtd: remove 'SBC8240 Wind River' Device Driver Code This driver is causing build errors and is no longer needed -- it is obsoleted by physmap_of. Signed-off-by: Subrata Modak Tested-on-PPC64-by: Subrata Modak Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/maps/Kconfig | 7 -- drivers/mtd/maps/Makefile | 1 - drivers/mtd/maps/sbc8240.c | 250 --------------------------------------------- 3 files changed, 258 deletions(-) (limited to 'drivers') diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig index 0b98654d8eed..7a58bd5522fd 100644 --- a/drivers/mtd/maps/Kconfig +++ b/drivers/mtd/maps/Kconfig @@ -284,13 +284,6 @@ config MTD_L440GX BE VERY CAREFUL. -config MTD_SBC8240 - tristate "Flash device on SBC8240" - depends on MTD_JEDECPROBE && 8260 - help - Flash access on the SBC8240 board from Wind River. See - - config MTD_TQM8XXL tristate "CFI Flash device mapped on TQM8XXL" depends on MTD_CFI && TQM8xxL diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile index 8bae7f9850c0..5beb0662d724 100644 --- a/drivers/mtd/maps/Makefile +++ b/drivers/mtd/maps/Makefile @@ -50,7 +50,6 @@ obj-$(CONFIG_MTD_UCLINUX) += uclinux.o obj-$(CONFIG_MTD_NETtel) += nettel.o obj-$(CONFIG_MTD_SCB2_FLASH) += scb2_flash.o obj-$(CONFIG_MTD_H720X) += h720x-flash.o -obj-$(CONFIG_MTD_SBC8240) += sbc8240.o obj-$(CONFIG_MTD_IXP4XX) += ixp4xx.o obj-$(CONFIG_MTD_IXP2000) += ixp2000.o obj-$(CONFIG_MTD_WRSBC8260) += wr_sbc82xx_flash.o diff --git a/drivers/mtd/maps/sbc8240.c b/drivers/mtd/maps/sbc8240.c index d5374cdcb163..e69de29bb2d1 100644 --- a/drivers/mtd/maps/sbc8240.c +++ b/drivers/mtd/maps/sbc8240.c @@ -1,250 +0,0 @@ -/* - * Handle mapping of the flash memory access routines on the SBC8240 board. - * - * Carolyn Smith, Tektronix, Inc. - * - * This code is GPLed - */ - -/* - * The SBC8240 has 2 flash banks. - * Bank 0 is a 512 KiB AMD AM29F040B; 8 x 64 KiB sectors. - * It contains the U-Boot code (7 sectors) and the environment (1 sector). - * Bank 1 is 4 x 1 MiB AMD AM29LV800BT; 15 x 64 KiB sectors, 1 x 32 KiB sector, - * 2 x 8 KiB sectors, 1 x 16 KiB sectors. - * Both parts are JEDEC compatible. - */ - -#include -#include -#include -#include - -#include -#include -#include - -#ifdef CONFIG_MTD_PARTITIONS -#include -#endif - -#define DEBUG - -#ifdef DEBUG -# define debugk(fmt,args...) printk(fmt ,##args) -#else -# define debugk(fmt,args...) -#endif - - -#define WINDOW_ADDR0 0xFFF00000 /* 512 KiB */ -#define WINDOW_SIZE0 0x00080000 -#define BUSWIDTH0 1 - -#define WINDOW_ADDR1 0xFF000000 /* 4 MiB */ -#define WINDOW_SIZE1 0x00400000 -#define BUSWIDTH1 8 - -#define MSG_PREFIX "sbc8240:" /* prefix for our printk()'s */ -#define MTDID "sbc8240-%d" /* for mtdparts= partitioning */ - - -static struct map_info sbc8240_map[2] = { - { - .name = "sbc8240 Flash Bank #0", - .size = WINDOW_SIZE0, - .bankwidth = BUSWIDTH0, - }, - { - .name = "sbc8240 Flash Bank #1", - .size = WINDOW_SIZE1, - .bankwidth = BUSWIDTH1, - } -}; - -#define NUM_FLASH_BANKS ARRAY_SIZE(sbc8240_map) - -/* - * The following defines the partition layout of SBC8240 boards. - * - * See include/linux/mtd/partitions.h for definition of the - * mtd_partition structure. - * - * The *_max_flash_size is the maximum possible mapped flash size - * which is not necessarily the actual flash size. It must correspond - * to the value specified in the mapping definition defined by the - * "struct map_desc *_io_desc" for the corresponding machine. - */ - -#ifdef CONFIG_MTD_PARTITIONS - -static struct mtd_partition sbc8240_uboot_partitions [] = { - /* Bank 0 */ - { - .name = "U-boot", /* U-Boot Firmware */ - .offset = 0, - .size = 0x00070000, /* 7 x 64 KiB sectors */ - .mask_flags = MTD_WRITEABLE, /* force read-only */ - }, - { - .name = "environment", /* U-Boot environment */ - .offset = 0x00070000, - .size = 0x00010000, /* 1 x 64 KiB sector */ - }, -}; - -static struct mtd_partition sbc8240_fs_partitions [] = { - { - .name = "jffs", /* JFFS filesystem */ - .offset = 0, - .size = 0x003C0000, /* 4 * 15 * 64KiB */ - }, - { - .name = "tmp32", - .offset = 0x003C0000, - .size = 0x00020000, /* 4 * 32KiB */ - }, - { - .name = "tmp8a", - .offset = 0x003E0000, - .size = 0x00008000, /* 4 * 8KiB */ - }, - { - .name = "tmp8b", - .offset = 0x003E8000, - .size = 0x00008000, /* 4 * 8KiB */ - }, - { - .name = "tmp16", - .offset = 0x003F0000, - .size = 0x00010000, /* 4 * 16KiB */ - } -}; - -/* trivial struct to describe partition information */ -struct mtd_part_def -{ - int nums; - unsigned char *type; - struct mtd_partition* mtd_part; -}; - -static struct mtd_info *sbc8240_mtd[NUM_FLASH_BANKS]; -static struct mtd_part_def sbc8240_part_banks[NUM_FLASH_BANKS]; - - -#endif /* CONFIG_MTD_PARTITIONS */ - - -static int __init init_sbc8240_mtd (void) -{ - static struct _cjs { - u_long addr; - u_long size; - } pt[NUM_FLASH_BANKS] = { - { - .addr = WINDOW_ADDR0, - .size = WINDOW_SIZE0 - }, - { - .addr = WINDOW_ADDR1, - .size = WINDOW_SIZE1 - }, - }; - - int devicesfound = 0; - int i,j; - - for (i = 0; i < NUM_FLASH_BANKS; i++) { - printk (KERN_NOTICE MSG_PREFIX - "Probing 0x%08lx at 0x%08lx\n", pt[i].size, pt[i].addr); - - sbc8240_map[i].map_priv_1 = - (unsigned long) ioremap (pt[i].addr, pt[i].size); - if (!sbc8240_map[i].map_priv_1) { - printk (MSG_PREFIX "failed to ioremap\n"); - for (j = 0; j < i; j++) { - iounmap((void *) sbc8240_map[j].map_priv_1); - sbc8240_map[j].map_priv_1 = 0; - } - return -EIO; - } - simple_map_init(&sbc8240_mtd[i]); - - sbc8240_mtd[i] = do_map_probe("jedec_probe", &sbc8240_map[i]); - - if (sbc8240_mtd[i]) { - sbc8240_mtd[i]->module = THIS_MODULE; - devicesfound++; - } else { - if (sbc8240_map[i].map_priv_1) { - iounmap((void *) sbc8240_map[i].map_priv_1); - sbc8240_map[i].map_priv_1 = 0; - } - } - } - - if (!devicesfound) { - printk(KERN_NOTICE MSG_PREFIX - "No suppported flash chips found!\n"); - return -ENXIO; - } - -#ifdef CONFIG_MTD_PARTITIONS - sbc8240_part_banks[0].mtd_part = sbc8240_uboot_partitions; - sbc8240_part_banks[0].type = "static image"; - sbc8240_part_banks[0].nums = ARRAY_SIZE(sbc8240_uboot_partitions); - sbc8240_part_banks[1].mtd_part = sbc8240_fs_partitions; - sbc8240_part_banks[1].type = "static file system"; - sbc8240_part_banks[1].nums = ARRAY_SIZE(sbc8240_fs_partitions); - - for (i = 0; i < NUM_FLASH_BANKS; i++) { - - if (!sbc8240_mtd[i]) continue; - if (sbc8240_part_banks[i].nums == 0) { - printk (KERN_NOTICE MSG_PREFIX - "No partition info available, registering whole device\n"); - add_mtd_device(sbc8240_mtd[i]); - } else { - printk (KERN_NOTICE MSG_PREFIX - "Using %s partition definition\n", sbc8240_part_banks[i].mtd_part->name); - add_mtd_partitions (sbc8240_mtd[i], - sbc8240_part_banks[i].mtd_part, - sbc8240_part_banks[i].nums); - } - } -#else - printk(KERN_NOTICE MSG_PREFIX - "Registering %d flash banks at once\n", devicesfound); - - for (i = 0; i < devicesfound; i++) { - add_mtd_device(sbc8240_mtd[i]); - } -#endif /* CONFIG_MTD_PARTITIONS */ - - return devicesfound == 0 ? -ENXIO : 0; -} - -static void __exit cleanup_sbc8240_mtd (void) -{ - int i; - - for (i = 0; i < NUM_FLASH_BANKS; i++) { - if (sbc8240_mtd[i]) { - del_mtd_device (sbc8240_mtd[i]); - map_destroy (sbc8240_mtd[i]); - } - if (sbc8240_map[i].map_priv_1) { - iounmap ((void *) sbc8240_map[i].map_priv_1); - sbc8240_map[i].map_priv_1 = 0; - } - } -} - -module_init (init_sbc8240_mtd); -module_exit (cleanup_sbc8240_mtd); - -MODULE_LICENSE ("GPL"); -MODULE_AUTHOR ("Carolyn Smith "); -MODULE_DESCRIPTION ("MTD map driver for SBC8240 boards"); - -- cgit v1.2.3 From d676c11727815761e41a81b00c054b4bec452ae5 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Tue, 14 Jul 2009 22:04:29 +0200 Subject: mtd: mtdblock: introduce mtdblks_lock The mtdblks array and its content are prone to race conditions. Introduce the mutex mtdblks_lock in order to solve this. [Amended by Artem Bityutskiy] Signed-off-by: Matthias Kaehlcke Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- drivers/mtd/mtdblock.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c index 208c6faa0358..77db5ce24d92 100644 --- a/drivers/mtd/mtdblock.c +++ b/drivers/mtd/mtdblock.c @@ -29,6 +29,8 @@ static struct mtdblk_dev { enum { STATE_EMPTY, STATE_CLEAN, STATE_DIRTY } cache_state; } *mtdblks[MAX_MTD_DEVICES]; +static struct mutex mtdblks_lock; + /* * Cache stuff... * @@ -270,15 +272,19 @@ static int mtdblock_open(struct mtd_blktrans_dev *mbd) DEBUG(MTD_DEBUG_LEVEL1,"mtdblock_open\n"); + mutex_lock(&mtdblks_lock); if (mtdblks[dev]) { mtdblks[dev]->count++; + mutex_unlock(&mtdblks_lock); return 0; } /* OK, it's not open. Create cache info for it */ mtdblk = kzalloc(sizeof(struct mtdblk_dev), GFP_KERNEL); - if (!mtdblk) + if (!mtdblk) { + mutex_unlock(&mtdblks_lock); return -ENOMEM; + } mtdblk->count = 1; mtdblk->mtd = mtd; @@ -291,6 +297,7 @@ static int mtdblock_open(struct mtd_blktrans_dev *mbd) } mtdblks[dev] = mtdblk; + mutex_unlock(&mtdblks_lock); DEBUG(MTD_DEBUG_LEVEL1, "ok\n"); @@ -304,6 +311,8 @@ static int mtdblock_release(struct mtd_blktrans_dev *mbd) DEBUG(MTD_DEBUG_LEVEL1, "mtdblock_release\n"); + mutex_lock(&mtdblks_lock); + mutex_lock(&mtdblk->cache_mutex); write_cached_data(mtdblk); mutex_unlock(&mtdblk->cache_mutex); @@ -316,6 +325,9 @@ static int mtdblock_release(struct mtd_blktrans_dev *mbd) vfree(mtdblk->cache_data); kfree(mtdblk); } + + mutex_unlock(&mtdblks_lock); + DEBUG(MTD_DEBUG_LEVEL1, "ok\n"); return 0; @@ -376,6 +388,8 @@ static struct mtd_blktrans_ops mtdblock_tr = { static int __init init_mtdblock(void) { + mutex_init(&mtdblks_lock); + return register_mtd_blktrans(&mtdblock_tr); } -- cgit v1.2.3 From 126b67b8d26f6623d199aa59279f2e3243f2144c Mon Sep 17 00:00:00 2001 From: Doug Thompson Date: Mon, 3 Aug 2009 12:37:06 +0200 Subject: amd64_edac: fix ECC checking On the good path of BIOS enabled ECC and no override, the value returned is 1 by omission and thus is deemed failing by the probe-function. Allow proper module initialization by clearing the retval explicitly. Signed-off-by: Doug Thompson Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 24964c1d0af9..5fa924d61b10 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2977,6 +2977,9 @@ static int amd64_check_ecc_enabled(struct amd64_pvt *pvt) "ECC is enabled by BIOS, Proceeding " "with EDAC module initialization\n"); + /* Signal good ECC status */ + ret = 0; + /* CLEAR the override, since BIOS controlled it */ ecc_enable_override = 0; } -- cgit v1.2.3 From 202ff1ec8e53d5dd36e1a5bd4b0a7ed7dbd45087 Mon Sep 17 00:00:00 2001 From: Mallikarjuna R Chilakala Date: Mon, 3 Aug 2009 07:20:38 +0000 Subject: ixgbe: Patch to modify 82598 PCIe completion timeout values The default completion timeout values for 82598 should be in the range of 50us to 50ms, however the hardware default for these parts is 500us to 1ms which is less than the 10ms recommended by the pcie spec. To address this we need to increase the value to either 10ms to 250ms for capability version 1 configuration, or 16ms to 55ms for version 2. Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_82598.c | 67 ++++++++++++++++++++++++++++++++++++++++- drivers/net/ixgbe/ixgbe_type.h | 8 +++++ 2 files changed, 74 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ixgbe/ixgbe_82598.c b/drivers/net/ixgbe/ixgbe_82598.c index b9923047ce11..522c03bc1dad 100644 --- a/drivers/net/ixgbe/ixgbe_82598.c +++ b/drivers/net/ixgbe/ixgbe_82598.c @@ -49,6 +49,51 @@ static s32 ixgbe_setup_copper_link_speed_82598(struct ixgbe_hw *hw, static s32 ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset, u8 *eeprom_data); +/** + * ixgbe_set_pcie_completion_timeout - set pci-e completion timeout + * @hw: pointer to the HW structure + * + * The defaults for 82598 should be in the range of 50us to 50ms, + * however the hardware default for these parts is 500us to 1ms which is less + * than the 10ms recommended by the pci-e spec. To address this we need to + * increase the value to either 10ms to 250ms for capability version 1 config, + * or 16ms to 55ms for version 2. + **/ +void ixgbe_set_pcie_completion_timeout(struct ixgbe_hw *hw) +{ + struct ixgbe_adapter *adapter = hw->back; + u32 gcr = IXGBE_READ_REG(hw, IXGBE_GCR); + u16 pcie_devctl2; + + /* only take action if timeout value is defaulted to 0 */ + if (gcr & IXGBE_GCR_CMPL_TMOUT_MASK) + goto out; + + /* + * if capababilities version is type 1 we can write the + * timeout of 10ms to 250ms through the GCR register + */ + if (!(gcr & IXGBE_GCR_CAP_VER2)) { + gcr |= IXGBE_GCR_CMPL_TMOUT_10ms; + goto out; + } + + /* + * for version 2 capabilities we need to write the config space + * directly in order to set the completion timeout value for + * 16ms to 55ms + */ + pci_read_config_word(adapter->pdev, + IXGBE_PCI_DEVICE_CONTROL2, &pcie_devctl2); + pcie_devctl2 |= IXGBE_PCI_DEVICE_CONTROL2_16ms; + pci_write_config_word(adapter->pdev, + IXGBE_PCI_DEVICE_CONTROL2, pcie_devctl2); +out: + /* disable completion timeout resend */ + gcr &= ~IXGBE_GCR_CMPL_TMOUT_RESEND; + IXGBE_WRITE_REG(hw, IXGBE_GCR, gcr); +} + /** * ixgbe_get_pcie_msix_count_82598 - Gets MSI-X vector count * @hw: pointer to hardware structure @@ -152,6 +197,26 @@ out: return ret_val; } +/** + * ixgbe_start_hw_82598 - Prepare hardware for Tx/Rx + * @hw: pointer to hardware structure + * + * Starts the hardware using the generic start_hw function. + * Then set pcie completion timeout + **/ +s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw) +{ + s32 ret_val = 0; + + ret_val = ixgbe_start_hw_generic(hw); + + /* set the completion timeout for interface */ + if (ret_val == 0) + ixgbe_set_pcie_completion_timeout(hw); + + return ret_val; +} + /** * ixgbe_get_link_capabilities_82598 - Determines link capabilities * @hw: pointer to hardware structure @@ -1085,7 +1150,7 @@ out: static struct ixgbe_mac_operations mac_ops_82598 = { .init_hw = &ixgbe_init_hw_generic, .reset_hw = &ixgbe_reset_hw_82598, - .start_hw = &ixgbe_start_hw_generic, + .start_hw = &ixgbe_start_hw_82598, .clear_hw_cntrs = &ixgbe_clear_hw_cntrs_generic, .get_media_type = &ixgbe_get_media_type_82598, .get_supported_physical_layer = &ixgbe_get_supported_physical_layer_82598, diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h index fa87309dc087..be90eb4575f6 100644 --- a/drivers/net/ixgbe/ixgbe_type.h +++ b/drivers/net/ixgbe/ixgbe_type.h @@ -718,6 +718,12 @@ #define IXGBE_ECC_STATUS_82599 0x110E0 #define IXGBE_BAR_CTRL_82599 0x110F4 +/* PCI Express Control */ +#define IXGBE_GCR_CMPL_TMOUT_MASK 0x0000F000 +#define IXGBE_GCR_CMPL_TMOUT_10ms 0x00001000 +#define IXGBE_GCR_CMPL_TMOUT_RESEND 0x00010000 +#define IXGBE_GCR_CAP_VER2 0x00040000 + /* Time Sync Registers */ #define IXGBE_TSYNCRXCTL 0x05188 /* Rx Time Sync Control register - RW */ #define IXGBE_TSYNCTXCTL 0x08C00 /* Tx Time Sync Control register - RW */ @@ -1521,6 +1527,7 @@ /* PCI Bus Info */ #define IXGBE_PCI_LINK_STATUS 0xB2 +#define IXGBE_PCI_DEVICE_CONTROL2 0xC8 #define IXGBE_PCI_LINK_WIDTH 0x3F0 #define IXGBE_PCI_LINK_WIDTH_1 0x10 #define IXGBE_PCI_LINK_WIDTH_2 0x20 @@ -1531,6 +1538,7 @@ #define IXGBE_PCI_LINK_SPEED_5000 0x2 #define IXGBE_PCI_HEADER_TYPE_REGISTER 0x0E #define IXGBE_PCI_HEADER_TYPE_MULTIFUNC 0x80 +#define IXGBE_PCI_DEVICE_CONTROL2_16ms 0x0005 /* Number of 100 microseconds we wait for PCI Express master disable */ #define IXGBE_PCI_MASTER_DISABLE_TIMEOUT 800 -- cgit v1.2.3 From 371842448c05b42d11a4be1c8e4e81d62ecc7534 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 30 Jul 2009 17:43:48 -0700 Subject: cfg80211: fix regression on beacon world roaming feature A regression was added through patch a4ed90d6: "cfg80211: respect API on orig_flags on channel for beacon hint" We did indeed respect _orig flags but the intention was not clearly stated in the commit log. This patch fixes firmware issues picked up by iwlwifi when we lift passive scan of beaconing restrictions on channels its EEPROM has been configured to always enable. By doing so though we also disallowed beacon hints on devices registering their wiphy with custom world regulatory domains enabled, this happens to be currently ath5k, ath9k and ar9170. The passive scan and beacon restrictions on those devices would never be lifted even if we did find a beacon and the hardware did support such enhancements when world roaming. Since Johannes indicates iwlwifi firmware cannot be changed to allow beacon hinting we set up a flag now to specifically allow drivers to disable beacon hints for devices which cannot use them. We enable the flag on iwlwifi to disable beacon hints and by default enable it for all other drivers. It should be noted beacon hints lift passive scan flags and beacon restrictions when we receive a beacon from an AP on any 5 GHz non-DFS channels, and channels 12-14 on the 2.4 GHz band. We don't bother with channels 1-11 as those channels are allowed world wide. This should fix world roaming for ath5k, ath9k and ar9170, thereby improving scan time when we receive the first beacon from any AP, and also enabling beaconing operation (AP/IBSS/Mesh) on cards which would otherwise not be allowed to do so. Drivers not using custom regulatory stuff (wiphy_apply_custom_regulatory()) were not affected by this as the orig_flags for the channels would have been cleared upon wiphy registration. I tested this with a world roaming ath5k card. Cc: Jouni Malinen Signed-off-by: Luis R. Rodriguez Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-core.c | 3 +++ drivers/net/wireless/iwlwifi/iwl3945-base.c | 3 +++ include/net/cfg80211.h | 5 +++++ net/wireless/reg.c | 9 +++++---- net/wireless/reg.h | 3 ++- 5 files changed, 18 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index 6ab07165ea28..18b135f510e5 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -1332,6 +1332,9 @@ int iwl_setup_mac(struct iwl_priv *priv) hw->wiphy->custom_regulatory = true; + /* Firmware does not support this */ + hw->wiphy->disable_beacon_hints = true; + hw->wiphy->max_scan_ssids = PROBE_OPTION_MAX; /* we create the 802.11 header and a zero-length SSID element */ hw->wiphy->max_scan_ie_len = IWL_MAX_PROBE_REQUEST - 24 - 2; diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 2f50ab60bfdf..523843369ca2 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -3968,6 +3968,9 @@ static int iwl3945_setup_mac(struct iwl_priv *priv) hw->wiphy->custom_regulatory = true; + /* Firmware does not support this */ + hw->wiphy->disable_beacon_hints = true; + hw->wiphy->max_scan_ssids = PROBE_OPTION_MAX_3945; /* we create the 802.11 header and a zero-length SSID element */ hw->wiphy->max_scan_ie_len = IWL_MAX_PROBE_REQUEST - 24 - 2; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 1a21895b732b..d1892d66701a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -979,6 +979,10 @@ struct cfg80211_ops { * channels at a later time. This can be used for devices which do not * have calibration information gauranteed for frequencies or settings * outside of its regulatory domain. + * @disable_beacon_hints: enable this if your driver needs to ensure that + * passive scan flags and beaconing flags may not be lifted by cfg80211 + * due to regulatory beacon hints. For more information on beacon + * hints read the documenation for regulatory_hint_found_beacon() * @reg_notifier: the driver's regulatory notification callback * @regd: the driver's regulatory domain, if one was requested via * the regulatory_hint() API. This can be used by the driver @@ -1004,6 +1008,7 @@ struct wiphy { bool custom_regulatory; bool strict_regulatory; + bool disable_beacon_hints; enum cfg80211_signal_type signal_type; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 5e14371cda70..75a406d33619 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1089,17 +1089,18 @@ static void handle_reg_beacon(struct wiphy *wiphy, chan->beacon_found = true; + if (wiphy->disable_beacon_hints) + return; + chan_before.center_freq = chan->center_freq; chan_before.flags = chan->flags; - if ((chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) && - !(chan->orig_flags & IEEE80211_CHAN_PASSIVE_SCAN)) { + if (chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) { chan->flags &= ~IEEE80211_CHAN_PASSIVE_SCAN; channel_changed = true; } - if ((chan->flags & IEEE80211_CHAN_NO_IBSS) && - !(chan->orig_flags & IEEE80211_CHAN_NO_IBSS)) { + if (chan->flags & IEEE80211_CHAN_NO_IBSS) { chan->flags &= ~IEEE80211_CHAN_NO_IBSS; channel_changed = true; } diff --git a/net/wireless/reg.h b/net/wireless/reg.h index e37829a49dc4..4e167a8e11be 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -30,7 +30,8 @@ int set_regdom(const struct ieee80211_regdomain *rd); * non-radar 5 GHz channels. * * Drivers do not need to call this, cfg80211 will do it for after a scan - * on a newly found BSS. + * on a newly found BSS. If you cannot make use of this feature you can + * set the wiphy->disable_beacon_hints to true. */ int regulatory_hint_found_beacon(struct wiphy *wiphy, struct ieee80211_channel *beacon_chan, -- cgit v1.2.3 From dbc1eec485625228895ded6baf6bd01ce2475410 Mon Sep 17 00:00:00 2001 From: Patrick Simmons Date: Sun, 2 Aug 2009 02:46:28 -0600 Subject: zd1211rw: fix unaligned access in zd_mac_rx Fix an unaligned memory access in the zd_mac_rx function of zd1211rw that causes problems on SPARC64. Signed-off-by: Patrick Simmons Signed-off-by: John W. Linville --- drivers/net/wireless/zd1211rw/zd_mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index 40b07b988224..3bd3c779fff3 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -698,7 +698,7 @@ int zd_mac_rx(struct ieee80211_hw *hw, const u8 *buffer, unsigned int length) && !mac->pass_ctrl) return 0; - fc = *(__le16 *)buffer; + fc = get_unaligned((__le16*)buffer); need_padding = ieee80211_is_data_qos(fc) ^ ieee80211_has_a4(fc); skb = dev_alloc_skb(length + (need_padding ? 2 : 0)); -- cgit v1.2.3 From c37457e69ffd7d3c94cbfcc1c39be9a45dd7ad21 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 3 Aug 2009 11:11:45 +0200 Subject: drivers/net/wireless/iwlwifi: introduce missing kfree Move orthogonal error handling code up before a kzalloc, so that it doesn't have to free the allocated data. The semantic match that finds the problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @r exists@ local idexpression x; statement S; expression E; identifier f,f1,l; position p1,p2; expression *ptr != NULL; @@ x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S <... when != x when != if (...) { <+...x...+> } ( x->f1 = E | (x->f1 == NULL || ...) | f(...,x->f1,...) ) ...> ( return \(0\|<+...x...+>\|ptr\); | return@p2 ...; ) @script:python@ p1 << r.p1; p2 << r.p2; @@ print "* file: %s kmalloc %s return %s" % (p1[0].file,p1[0].line,p2[0].line) // Signed-off-by: Julia Lawall Acked-by: Zhu Yi Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-debugfs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/net/wireless/iwlwifi/iwl-debugfs.c b/drivers/net/wireless/iwlwifi/iwl-debugfs.c index 11e08c068917..ca00cc8ad4c7 100644 --- a/drivers/net/wireless/iwlwifi/iwl-debugfs.c +++ b/drivers/net/wireless/iwlwifi/iwl-debugfs.c @@ -308,18 +308,18 @@ static ssize_t iwl_dbgfs_nvm_read(struct file *file, return -ENODATA; } + ptr = priv->eeprom; + if (!ptr) { + IWL_ERR(priv, "Invalid EEPROM/OTP memory\n"); + return -ENOMEM; + } + /* 4 characters for byte 0xYY */ buf = kzalloc(buf_size, GFP_KERNEL); if (!buf) { IWL_ERR(priv, "Can not allocate Buffer\n"); return -ENOMEM; } - - ptr = priv->eeprom; - if (!ptr) { - IWL_ERR(priv, "Invalid EEPROM/OTP memory\n"); - return -ENOMEM; - } pos += scnprintf(buf + pos, buf_size - pos, "NVM Type: %s\n", (priv->nvm_device_type == NVM_DEVICE_TYPE_OTP) ? "OTP" : "EEPROM"); -- cgit v1.2.3 From 9f9857bb5e147b977b9878c46e3dd87c9e8caf50 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 1 Aug 2009 10:55:53 +0200 Subject: drivers/net/wireless: introduce missing kfree Error handling code following a kzalloc should free the allocated data. The semantic match that finds the problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @r exists@ local idexpression x; statement S; expression E; identifier f,f1,l; position p1,p2; expression *ptr != NULL; @@ x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S <... when != x when != if (...) { <+...x...+> } ( x->f1 = E | (x->f1 == NULL || ...) | f(...,x->f1,...) ) ...> ( return \(0\|<+...x...+>\|ptr\); | return@p2 ...; ) @script:python@ p1 << r.p1; p2 << r.p2; @@ print "* file: %s kmalloc %s return %s" % (p1[0].file,p1[0].line,p2[0].line) // Signed-off-by: Julia Lawall Signed-off-by: John W. Linville --- drivers/net/wireless/iwmc3200wifi/commands.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/net/wireless/iwmc3200wifi/commands.c b/drivers/net/wireless/iwmc3200wifi/commands.c index 834a7f544e5d..e2334d123599 100644 --- a/drivers/net/wireless/iwmc3200wifi/commands.c +++ b/drivers/net/wireless/iwmc3200wifi/commands.c @@ -220,6 +220,7 @@ int iwm_store_rxiq_calib_result(struct iwm_priv *iwm) eeprom_rxiq = iwm_eeprom_access(iwm, IWM_EEPROM_CALIB_RXIQ); if (IS_ERR(eeprom_rxiq)) { IWM_ERR(iwm, "Couldn't access EEPROM RX IQ entry\n"); + kfree(rxiq); return PTR_ERR(eeprom_rxiq); } -- cgit v1.2.3 From b929c633b4067be18a335d278a66fd5deef3cabe Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Sun, 2 Aug 2009 09:44:12 +0200 Subject: libertas: Read buffer overflow Check whether index is within bounds before testing the element. Signed-off-by: Roel Kluin Signed-off-by: John W. Linville --- drivers/net/wireless/libertas/11d.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/wireless/libertas/11d.c b/drivers/net/wireless/libertas/11d.c index 9a5408e7d94a..5c6968101f0d 100644 --- a/drivers/net/wireless/libertas/11d.c +++ b/drivers/net/wireless/libertas/11d.c @@ -47,7 +47,7 @@ static u8 lbs_region_2_code(u8 *region) { u8 i; - for (i = 0; region[i] && i < COUNTRY_CODE_LEN; i++) + for (i = 0; i < COUNTRY_CODE_LEN && region[i]; i++) region[i] = toupper(region[i]); for (i = 0; i < ARRAY_SIZE(region_code_mapping); i++) { -- cgit v1.2.3 From 99f1b01562b7dcae75b043114f76163fbf84fcab Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Mon, 3 Aug 2009 12:10:16 -0700 Subject: iwlagn: do not send key clear commands when rfkill enabled Do all key clearing except sending sommands to device when rfkill enabled. When rfkill enabled the interface is brought down and will be brought back up correctly after rfkill is enabled again. Same change is not needed for iwl3945 as it ignores return code when sending key clearing command to device. This fixes http://bugzilla.kernel.org/show_bug.cgi?id=13742 Signed-off-by: Reinette Chatre Tested-by: Frans Pop Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-sta.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers') diff --git a/drivers/net/wireless/iwlwifi/iwl-sta.c b/drivers/net/wireless/iwlwifi/iwl-sta.c index 2addf735b193..ffd5c61a7553 100644 --- a/drivers/net/wireless/iwlwifi/iwl-sta.c +++ b/drivers/net/wireless/iwlwifi/iwl-sta.c @@ -566,6 +566,8 @@ int iwl_remove_default_wep_key(struct iwl_priv *priv, unsigned long flags; spin_lock_irqsave(&priv->sta_lock, flags); + IWL_DEBUG_WEP(priv, "Removing default WEP key: idx=%d\n", + keyconf->keyidx); if (!test_and_clear_bit(keyconf->keyidx, &priv->ucode_key_table)) IWL_ERR(priv, "index %d not used in uCode key table.\n", @@ -573,6 +575,11 @@ int iwl_remove_default_wep_key(struct iwl_priv *priv, priv->default_wep_key--; memset(&priv->wep_keys[keyconf->keyidx], 0, sizeof(priv->wep_keys[0])); + if (iwl_is_rfkill(priv)) { + IWL_DEBUG_WEP(priv, "Not sending REPLY_WEPKEY command due to RFKILL.\n"); + spin_unlock_irqrestore(&priv->sta_lock, flags); + return 0; + } ret = iwl_send_static_wepkey_cmd(priv, 1); IWL_DEBUG_WEP(priv, "Remove default WEP key: idx=%d ret=%d\n", keyconf->keyidx, ret); @@ -853,6 +860,11 @@ int iwl_remove_dynamic_key(struct iwl_priv *priv, priv->stations[sta_id].sta.sta.modify_mask = STA_MODIFY_KEY_MASK; priv->stations[sta_id].sta.mode = STA_CONTROL_MODIFY_MSK; + if (iwl_is_rfkill(priv)) { + IWL_DEBUG_WEP(priv, "Not sending REPLY_ADD_STA command because RFKILL enabled. \n"); + spin_unlock_irqrestore(&priv->sta_lock, flags); + return 0; + } ret = iwl_send_add_sta(priv, &priv->stations[sta_id].sta, CMD_ASYNC); spin_unlock_irqrestore(&priv->sta_lock, flags); return ret; -- cgit v1.2.3 From f6caa14aa0b126d4a2933907d1519611b2a8524a Mon Sep 17 00:00:00 2001 From: Mike McCormack Date: Fri, 31 Jul 2009 01:57:42 +0000 Subject: sky2: Avoid transmits during sky2_down() This patch supersedes my previous patch "sky2: Avoid transmitting during sky2_restart". I have reworked the patch to avoid crashes during both sky2_restart() and sky2_set_ringparam(). Without this patch, the sky2 driver can be crashed by doing: # pktgen eth1 & (transmit many packets on eth1) # ethtool -G eth1 tx 510 I am aware you object to storing extra state, but I can't see a way around this. Without remembering that we're restarting, netif_wake_queue() is called in the ISR from sky2_tx_complete(), and netif_tx_lock() is used in sky2_tx_done(). If anybody can see a way around this, please let me know. Signed-off-by: Mike McCormack Signed-off-by: David S. Miller --- drivers/net/sky2.c | 14 +++++++++++++- drivers/net/sky2.h | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index 3550c5dcd93c..0a551d8f5d95 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c @@ -1488,6 +1488,8 @@ static int sky2_up(struct net_device *dev) sky2_set_vlan_mode(hw, port, sky2->vlgrp != NULL); #endif + sky2->restarting = 0; + err = sky2_rx_start(sky2); if (err) goto err_out; @@ -1500,6 +1502,9 @@ static int sky2_up(struct net_device *dev) sky2_set_multicast(dev); + /* wake queue incase we are restarting */ + netif_wake_queue(dev); + if (netif_msg_ifup(sky2)) printk(KERN_INFO PFX "%s: enabling interface\n", dev->name); return 0; @@ -1533,6 +1538,8 @@ static inline int tx_dist(unsigned tail, unsigned head) /* Number of list elements available for next tx */ static inline int tx_avail(const struct sky2_port *sky2) { + if (unlikely(sky2->restarting)) + return 0; return sky2->tx_pending - tx_dist(sky2->tx_cons, sky2->tx_prod); } @@ -1818,6 +1825,10 @@ static int sky2_down(struct net_device *dev) if (netif_msg_ifdown(sky2)) printk(KERN_INFO PFX "%s: disabling interface\n", dev->name); + /* explicitly shut off tx incase we're restarting */ + sky2->restarting = 1; + netif_tx_disable(dev); + /* Force flow control off */ sky2_write8(hw, SK_REG(port, GMAC_CTRL), GMC_PAUSE_OFF); @@ -2359,7 +2370,7 @@ static inline void sky2_tx_done(struct net_device *dev, u16 last) { struct sky2_port *sky2 = netdev_priv(dev); - if (netif_running(dev)) { + if (likely(netif_running(dev) && !sky2->restarting)) { netif_tx_lock(dev); sky2_tx_complete(sky2, last); netif_tx_unlock(dev); @@ -4283,6 +4294,7 @@ static __devinit struct net_device *sky2_init_netdev(struct sky2_hw *hw, spin_lock_init(&sky2->phy_lock); sky2->tx_pending = TX_DEF_PENDING; sky2->rx_pending = RX_DEF_PENDING; + sky2->restarting = 0; hw->dev[port] = dev; diff --git a/drivers/net/sky2.h b/drivers/net/sky2.h index b5549c9e5107..4486b066b43f 100644 --- a/drivers/net/sky2.h +++ b/drivers/net/sky2.h @@ -2051,6 +2051,7 @@ struct sky2_port { u8 duplex; /* DUPLEX_HALF, DUPLEX_FULL */ u8 rx_csum; u8 wol; + u8 restarting; enum flow_control flow_mode; enum flow_control flow_status; -- cgit v1.2.3 From 7781de74568bddfefbd2d32a934a8c791a2420cd Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Mon, 3 Aug 2009 13:43:58 +0100 Subject: drm: Small logic fix in drm_mode_setcrtc Match the logic to the comments in the debug message Signed-off-by: Jakob Bornecrantz Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 8fab7890a363..33be210d6723 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -1461,7 +1461,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, goto out; } - if (crtc_req->count_connectors > 0 && !mode && !fb) { + if (crtc_req->count_connectors > 0 && (!mode || !fb)) { DRM_DEBUG("Count connectors is %d but no mode or fb set\n", crtc_req->count_connectors); ret = -EINVAL; -- cgit v1.2.3 From 4cb72b1727140f131b2df5f37c2e54f5965f98c2 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Mon, 3 Aug 2009 13:43:59 +0100 Subject: drm: Catch stop possible NULL pointer reference This was caught by Weiss. Also added some comments to the fb_changed and mode_changed variables to explain what they do. Signed-off-by: Jakob Bornecrantz Tested-by: Thomas White Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc_helper.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index 3da9cfa31848..6aaa2cb23365 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -706,8 +706,8 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) struct drm_encoder **save_encoders, *new_encoder; struct drm_framebuffer *old_fb = NULL; bool save_enabled; - bool mode_changed = false; - bool fb_changed = false; + bool mode_changed = false; /* if true do a full mode set */ + bool fb_changed = false; /* if true and !mode_changed just do a flip */ struct drm_connector *connector; int count = 0, ro, fail = 0; struct drm_crtc_helper_funcs *crtc_funcs; @@ -758,6 +758,8 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) if (set->crtc->fb == NULL) { DRM_DEBUG("crtc has no fb, full mode set\n"); mode_changed = true; + } else if (set->fb == NULL) { + mode_changed = true; } else if ((set->fb->bits_per_pixel != set->crtc->fb->bits_per_pixel) || set->fb->depth != set->crtc->fb->depth) -- cgit v1.2.3 From 0924d942256ac470c5f7b4ebaf7fe0415fc6fa59 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 3 Aug 2009 12:03:03 +1000 Subject: drm/radeon/kms: fix rv515 VRAM initialisation. This got missed in the VRAM init re-workings. Signed-of-by: Dave Airlie --- drivers/gpu/drm/radeon/rv515.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 551e608702e4..fd8f3ca716ea 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -370,6 +370,7 @@ void rv515_vram_info(struct radeon_device *rdev) rv515_vram_get_type(rdev); + r100_vram_init_sizes(rdev); /* FIXME: we should enforce default clock in case GPU is not in * default setup */ -- cgit v1.2.3 From 6d0897ba58139523d37e97855ee0fe2d78629da6 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Fri, 31 Jul 2009 10:47:51 +0200 Subject: drm/ttm: Fix a potential comparison of structs. On some architectures the comparison may cause a compilation failure. Original partial fix Signed-off-by: Thomas Hellstrom Signed-off-by: Pekka Paalanen Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo_util.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index ce2e6f38ea01..ad4ada07c6cf 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -150,7 +150,7 @@ static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void *src, #ifdef CONFIG_X86 dst = kmap_atomic_prot(d, KM_USER0, prot); #else - if (prot != PAGE_KERNEL) + if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) dst = vmap(&d, 1, 0, prot); else dst = kmap(d); @@ -163,7 +163,7 @@ static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void *src, #ifdef CONFIG_X86 kunmap_atomic(dst, KM_USER0); #else - if (prot != PAGE_KERNEL) + if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) vunmap(dst); else kunmap(d); @@ -186,7 +186,7 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst, #ifdef CONFIG_X86 src = kmap_atomic_prot(s, KM_USER0, prot); #else - if (prot != PAGE_KERNEL) + if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) src = vmap(&s, 1, 0, prot); else src = kmap(s); @@ -199,7 +199,7 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst, #ifdef CONFIG_X86 kunmap_atomic(src, KM_USER0); #else - if (prot != PAGE_KERNEL) + if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) vunmap(src); else kunmap(s); -- cgit v1.2.3 From de05065ff5d6878523317ff4a0b48a1239f80f73 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 3 Aug 2009 12:05:34 +1000 Subject: drm/radeon/kms: fix nomodeset. The ordering was wrong to get the nomodeset parameter to work. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_drv.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 3cfcee17dc56..0bd5879a4957 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -318,6 +318,14 @@ static int __init radeon_init(void) driver = &driver_old; driver->num_ioctls = radeon_max_ioctl; #if defined(CONFIG_DRM_RADEON_KMS) +#ifdef CONFIG_VGA_CONSOLE + if (vgacon_text_force() && radeon_modeset == -1) { + DRM_INFO("VGACON disable radeon kernel modesetting.\n"); + driver = &driver_old; + driver->driver_features &= ~DRIVER_MODESET; + radeon_modeset = 0; + } +#endif /* if enabled by default */ if (radeon_modeset == -1) { DRM_INFO("radeon default to kernel modesetting.\n"); @@ -329,17 +337,8 @@ static int __init radeon_init(void) driver->driver_features |= DRIVER_MODESET; driver->num_ioctls = radeon_max_kms_ioctl; } - /* if the vga console setting is enabled still * let modprobe override it */ -#ifdef CONFIG_VGA_CONSOLE - if (vgacon_text_force() && radeon_modeset == -1) { - DRM_INFO("VGACON disable radeon kernel modesetting.\n"); - driver = &driver_old; - driver->driver_features &= ~DRIVER_MODESET; - radeon_modeset = 0; - } -#endif #endif return drm_init(driver); } -- cgit v1.2.3 From c9b7fb54f0a51e587fa09be3a85666b43d36a850 Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Wed, 29 Jul 2009 21:28:24 +0800 Subject: drm/radeon/kms: fix memory leak in radeon_driver_load_kms This patch fixes following kmemleak report: unreferenced object 0xffff88022cb53000 (size 4096): comm "work_for_cpu", pid 97, jiffies 4294672345 backtrace: [] create_object+0x19f/0x2a0 [] kmemleak_alloc+0x26/0x4c [] __kmalloc+0x187/0x1b0 [] kzalloc.clone.0+0x13/0x15 [radeon] [] radeon_driver_load_kms+0x26/0xe1 [radeon] [] drm_get_dev+0x37f/0x480 [drm] [] radeon_pci_probe+0x15/0x269 [radeon] [] local_pci_probe+0x17/0x1b [] do_work_for_cpu+0x18/0x2a [] kthread+0x8a/0x92 [] child_rip+0xa/0x20 [] 0xffffffffffffffff Signed-off-by: Xiaotian Feng Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_kms.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 937a2f1cdb46..3357110e30ce 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -58,6 +58,8 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) if (r) { DRM_ERROR("Failed to initialize radeon, disabling IOCTL\n"); radeon_device_fini(rdev); + kfree(rdev); + dev->dev_private = NULL; return r; } return 0; -- cgit v1.2.3 From fee280d3fd9bc5247bef9f4ab35a4693bfffdcfd Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 3 Aug 2009 12:39:06 +0200 Subject: drm/ttm: Fix a sync object leak. If there are multiple simultaneous waiters for the same buffer object, a temporary reference to its sync object may be leaked. Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 6538d4236989..aa82d5370c38 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1575,6 +1575,10 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, driver->sync_obj_unref(&sync_obj); driver->sync_obj_unref(&tmp_obj); spin_lock(&bo->lock); + } else { + spin_unlock(&bo->lock); + driver->sync_obj_unref(&sync_obj); + spin_lock(&bo->lock); } } return 0; -- cgit v1.2.3 From fa99239cb73dbf419bea9f334b85ba94ac88a532 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Mon, 3 Aug 2009 14:20:32 +0200 Subject: drm/radeon: Read buffer overflow Check whether index is within bounds before grabbing the element. Signed-off-by: Roel Kluin Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r100.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 05a44896dffb..f1ba8ff41130 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -722,13 +722,14 @@ int r100_cs_packet_parse(struct radeon_cs_parser *p, unsigned idx) { struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; - uint32_t header = ib_chunk->kdata[idx]; + uint32_t header; if (idx >= ib_chunk->length_dw) { DRM_ERROR("Can not parse packet at %d after CS end %d !\n", idx, ib_chunk->length_dw); return -EINVAL; } + header = ib_chunk->kdata[idx]; pkt->idx = idx; pkt->type = CP_PACKET_GET_TYPE(header); pkt->count = CP_PACKET_GET_COUNT(header); -- cgit v1.2.3 From c96e7c7a3a79931446ecf9494a8415e4d164ebd8 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Mon, 3 Aug 2009 14:22:53 +0200 Subject: drm/ttm: Read buffer overflow Check whether index is within bounds before grabbing the element. Signed-off-by: Roel Kluin Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index aa82d5370c38..c2b0d710d10f 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1182,13 +1182,14 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev, int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type) { - struct ttm_mem_type_manager *man = &bdev->man[mem_type]; + struct ttm_mem_type_manager *man; int ret = -EINVAL; if (mem_type >= TTM_NUM_MEM_TYPES) { printk(KERN_ERR TTM_PFX "Illegal memory type %d\n", mem_type); return ret; } + man = &bdev->man[mem_type]; if (!man->has_type) { printk(KERN_ERR TTM_PFX "Trying to take down uninitialized " -- cgit v1.2.3 From 0cb13536c3382004bd9b833565e2af33f26ed1fb Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 3 Aug 2009 21:10:01 -0700 Subject: 3c59x: Fix build failure with gcc 3.2 Fix the following build failure with gcc 3.2: CC [M] drivers/net/3c59x.o drivers/net/3c59x.c:2726:1: directives may not be used inside a macro argument drivers/net/3c59x.c:2725:59: unterminated argument list invoking macro "pr_err" drivers/net/3c59x.c: In function `dump_tx_ring': drivers/net/3c59x.c:2727: implicit declaration of function `pr_err' drivers/net/3c59x.c:2731: syntax error before ')' token Apparently gcc 3.2 doesn't like #if interleaved with a macro call. Signed-off-by: Jean Delvare Signed-off-by: David S. Miller --- drivers/net/3c59x.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c index c34aee91250b..c20416850948 100644 --- a/drivers/net/3c59x.c +++ b/drivers/net/3c59x.c @@ -2721,13 +2721,15 @@ dump_tx_ring(struct net_device *dev) &vp->tx_ring[vp->dirty_tx % TX_RING_SIZE]); issue_and_wait(dev, DownStall); for (i = 0; i < TX_RING_SIZE; i++) { - pr_err(" %d: @%p length %8.8x status %8.8x\n", i, - &vp->tx_ring[i], + unsigned int length; + #if DO_ZEROCOPY - le32_to_cpu(vp->tx_ring[i].frag[0].length), + length = le32_to_cpu(vp->tx_ring[i].frag[0].length); #else - le32_to_cpu(vp->tx_ring[i].length), + length = le32_to_cpu(vp->tx_ring[i].length); #endif + pr_err(" %d: @%p length %8.8x status %8.8x\n", + i, &vp->tx_ring[i], length, le32_to_cpu(vp->tx_ring[i].status)); } if (!stalled) -- cgit v1.2.3 From c2718348b41a8e7646516d9af8bb0231c6a44374 Mon Sep 17 00:00:00 2001 From: Doug Thompson Date: Tue, 4 Aug 2009 12:02:20 +0200 Subject: amd64_edac: print debug statements only on error Add forgotten return calls for the successful cases. Signed-off-by: Doug Thompson Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 5fa924d61b10..e2a10bcba7a1 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -868,6 +868,8 @@ static void amd64_read_dbam_reg(struct amd64_pvt *pvt) goto err_reg; } + return; + err_reg: debugf0("Error reading F2x%03x.\n", reg); } @@ -2634,6 +2636,8 @@ static void amd64_read_mc_registers(struct amd64_pvt *pvt) amd64_dump_misc_regs(pvt); + return; + err_reg: debugf0("Reading an MC register failed\n"); -- cgit v1.2.3 From 1cef8e41073efe47e809f49670eb461307e52ccc Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Jul 2009 11:30:48 +0530 Subject: mfd: twl4030 irq fixes The TWL4030 IRQ handler has a bug which leads to spinlock lock-up. It is calling the 'unmask' function in a process context. :The mask/unmask/ack functions are only designed to be called from the IRQ handler code, or the proper API interfaces found in linux/interrupt.h. Also there is no need to have IRQ chaining mechanism. The right way to handle this is to claim the parent interrupt as a standard interrupt and arrange for handle_twl4030_pih to take care of the rest of the devices. Mail thread on this issue can be found at: http://marc.info/?l=linux-arm-kernel&m=124629940123396&w=2 Signed-off-by: Russell King Tested-by: Santosh Shilimkar Acked-by: Tony Lindgren Signed-off-by: Samuel Ortiz --- drivers/mfd/twl4030-irq.c | 55 +++++++++++++++++++++-------------------------- 1 file changed, 24 insertions(+), 31 deletions(-) (limited to 'drivers') diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c index bae61b22501c..7d430835655f 100644 --- a/drivers/mfd/twl4030-irq.c +++ b/drivers/mfd/twl4030-irq.c @@ -180,14 +180,9 @@ static struct completion irq_event; static int twl4030_irq_thread(void *data) { long irq = (long)data; - struct irq_desc *desc = irq_to_desc(irq); static unsigned i2c_errors; static const unsigned max_i2c_errors = 100; - if (!desc) { - pr_err("twl4030: Invalid IRQ: %ld\n", irq); - return -EINVAL; - } current->flags |= PF_NOFREEZE; @@ -240,7 +235,7 @@ static int twl4030_irq_thread(void *data) } local_irq_enable(); - desc->chip->unmask(irq); + enable_irq(irq); } return 0; @@ -255,25 +250,13 @@ static int twl4030_irq_thread(void *data) * thread. All we do here is acknowledge and mask the interrupt and wakeup * the kernel thread. */ -static void handle_twl4030_pih(unsigned int irq, struct irq_desc *desc) +static irqreturn_t handle_twl4030_pih(int irq, void *devid) { /* Acknowledge, clear *AND* mask the interrupt... */ - desc->chip->ack(irq); - complete(&irq_event); -} - -static struct task_struct *start_twl4030_irq_thread(long irq) -{ - struct task_struct *thread; - - init_completion(&irq_event); - thread = kthread_run(twl4030_irq_thread, (void *)irq, "twl4030-irq"); - if (!thread) - pr_err("twl4030: could not create irq %ld thread!\n", irq); - - return thread; + disable_irq_nosync(irq); + complete(devid); + return IRQ_HANDLED; } - /*----------------------------------------------------------------------*/ /* @@ -734,18 +717,28 @@ int twl_init_irq(int irq_num, unsigned irq_base, unsigned irq_end) } /* install an irq handler to demultiplex the TWL4030 interrupt */ - task = start_twl4030_irq_thread(irq_num); - if (!task) { - pr_err("twl4030: irq thread FAIL\n"); - status = -ESRCH; - goto fail; - } - set_irq_data(irq_num, task); - set_irq_chained_handler(irq_num, handle_twl4030_pih); - return status; + init_completion(&irq_event); + status = request_irq(irq_num, handle_twl4030_pih, IRQF_DISABLED, + "TWL4030-PIH", &irq_event); + if (status < 0) { + pr_err("twl4030: could not claim irq%d: %d\n", irq_num, status); + goto fail_rqirq; + } + + task = kthread_run(twl4030_irq_thread, (void *)irq_num, "twl4030-irq"); + if (IS_ERR(task)) { + pr_err("twl4030: could not create irq %d thread!\n", irq_num); + status = PTR_ERR(task); + goto fail_kthread; + } + return status; +fail_kthread: + free_irq(irq_num, &irq_event); +fail_rqirq: + /* clean up twl4030_sih_setup */ fail: for (i = irq_base; i < irq_end; i++) set_irq_chip_and_handler(i, NULL, NULL); -- cgit v1.2.3 From 26d204afa18f7df177f21bdb3759e0098ca8f7d5 Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Wed, 29 Jul 2009 13:36:10 -0700 Subject: [CPUFREQ] Fix NULL pointer dereference regression in conservative governor Commit ee88415caf736b89500f16e0a545614541a45005 introduced this regression when it removed enable bit in cpu_dbs_info_s. That added a possibility of dbs_cpufreq_notifier getting called for a CPU that is not yet managed by conservative governor. That will happen as the transition notifier is set as soon as one CPU switches to conservative governor and other CPUs can get a NULL pointer dereference without the enable bit check. Add the enable bit back again. Reported-by: Lermytte Christophe Signed-off-by: Venkatesh Pallipadi Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_conservative.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 57490502b21c..bdea7e2f94ba 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -63,6 +63,7 @@ struct cpu_dbs_info_s { unsigned int down_skip; unsigned int requested_freq; int cpu; + unsigned int enable:1; /* * percpu mutex that serializes governor limit change with * do_dbs_timer invocation. We do not want do_dbs_timer to run @@ -141,6 +142,9 @@ dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, struct cpufreq_policy *policy; + if (!this_dbs_info->enable) + return 0; + policy = this_dbs_info->cur_policy; /* @@ -497,6 +501,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); delay -= jiffies % delay; + dbs_info->enable = 1; INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); queue_delayed_work_on(dbs_info->cpu, kconservative_wq, &dbs_info->work, delay); @@ -504,6 +509,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) { + dbs_info->enable = 0; cancel_delayed_work_sync(&dbs_info->work); } -- cgit v1.2.3 From 42c74b84c64633dd3badbfc2abd2ef1728b64b30 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Mon, 3 Aug 2009 10:58:11 -0400 Subject: [CPUFREQ] Do not set policy for offline cpus Suspend/Resume fails on multi socket, multi core systems because the cpufreq code erroneously sets the per_cpu policy_cpu value when a logical cpu is offline. This most notably results in missing sysfs files that are used to set the cpu frequencies of the various cpus. Signed-off-by: Prarit Bhargava Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b90eda8b3440..120d236c0ffb 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -924,6 +924,8 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) spin_lock_irqsave(&cpufreq_driver_lock, flags); for_each_cpu(j, policy->cpus) { + if (!cpu_online(j)) + continue; per_cpu(cpufreq_cpu_data, j) = policy; per_cpu(policy_cpu, j) = policy->cpu; } -- cgit v1.2.3 From d5194decd0a6f792b2789eebd4ddf022a248f655 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Wed, 29 Jul 2009 11:26:20 +0200 Subject: [CPUFREQ] Fix a kobject reference bug related to managed CPUs The first offline/online cycle is successful, the second not. Doing: echo 0 >cpu1/online echo 1 >cpu1/online echo 0 >cpu1/online The last command will trigger: Jul 22 14:39:50 linux kernel: [ 593.210125] ------------[ cut here ]------------ Jul 22 14:39:50 linux kernel: [ 593.210139] WARNING: at lib/kref.c:43 kref_get+0x23/0x2b() Jul 22 14:39:50 linux kernel: [ 593.210144] Hardware name: To Be Filled By O.E.M. Jul 22 14:39:50 linux kernel: [ 593.210148] Modules linked in: powernow_k8 Jul 22 14:39:50 linux kernel: [ 593.210158] Pid: 378, comm: kondemand/2 Tainted: G W 2.6.31-rc2 #38 Jul 22 14:39:50 linux kernel: [ 593.210163] Call Trace: Jul 22 14:39:50 linux kernel: [ 593.210171] [] ? kref_get+0x23/0x2b Jul 22 14:39:50 linux kernel: [ 593.210181] [] warn_slowpath_common+0x77/0xa4 Jul 22 14:39:50 linux kernel: [ 593.210190] [] warn_slowpath_null+0xf/0x11 Jul 22 14:39:50 linux kernel: [ 593.210198] [] kref_get+0x23/0x2b Jul 22 14:39:50 linux kernel: [ 593.210206] [] kobject_get+0x1a/0x22 Jul 22 14:39:50 linux kernel: [ 593.210214] [] cpufreq_cpu_get+0x8a/0xcb Jul 22 14:39:50 linux kernel: [ 593.210222] [] __cpufreq_driver_getavg+0x1d/0x67 Jul 22 14:39:50 linux kernel: [ 593.210231] [] do_dbs_timer+0x158/0x27f Jul 22 14:39:50 linux kernel: [ 593.210240] [] worker_thread+0x200/0x313 ... The output continues on every do_dbs_timer ondemand freq checking poll. This regression was introduced by git commit: 3f4a782b5ce2698b1870b5a7b573cd721d4fce33 The policy is released when the cpufreq device is removed in: __cpufreq_remove_dev(): /* if this isn't the CPU which is the parent of the kobj, we * only need to unlink, put and exit */ Not creating the symlink is not sever at all. As long as: sysfs_remove_link(&sys_dev->kobj, "cpufreq"); handles it gracefully that the symlink did not exist. Possibly no error should be returned at all, because ondemand governor would still provide the same functionality. Userspace in userspace gov case might be confused if the link is missing. Resolves http://bugzilla.kernel.org/show_bug.cgi?id=13903 CC: Mathieu Desnoyers CC: Venkatesh Pallipadi Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 120d236c0ffb..bd74a0b12176 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -858,6 +858,8 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) /* Check for existing affected CPUs. * They may not be aware of it due to CPU Hotplug. + * cpufreq_cpu_put is called when the device is removed + * in __cpufreq_remove_dev() */ managed_policy = cpufreq_cpu_get(j); if (unlikely(managed_policy)) { @@ -884,7 +886,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) ret = sysfs_create_link(&sys_dev->kobj, &managed_policy->kobj, "cpufreq"); - if (!ret) + if (ret) cpufreq_cpu_put(managed_policy); /* * Success. We only needed to be added to the mask. -- cgit v1.2.3 From 4bc5d34135039566b8d6efa2de7515b2be505da8 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 4 Aug 2009 14:03:25 -0400 Subject: [CPUFREQ] Make cpufreq suspend code conditional on powerpc. The suspend code runs with interrupts disabled, and the powerpc workaround we do in the cpufreq suspend hook calls the drivers ->get method. powernow-k8's ->get does an smp_call_function_single which needs interrupts enabled cpufreq's suspend/resume code was added in 42d4dc3f4e1e to work around a hardware problem on ppc powerbooks. If we make all this code conditional on powerpc, we avoid the issue above. Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index bd74a0b12176..fd69086d08d5 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1248,13 +1248,22 @@ EXPORT_SYMBOL(cpufreq_get); static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg) { - int cpu = sysdev->id; int ret = 0; + +#ifdef __powerpc__ + int cpu = sysdev->id; unsigned int cur_freq = 0; struct cpufreq_policy *cpu_policy; dprintk("suspending cpu %u\n", cpu); + /* + * This whole bogosity is here because Powerbooks are made of fail. + * No sane platform should need any of the code below to be run. + * (it's entirely the wrong thing to do, as driver->get may + * reenable interrupts on some architectures). + */ + if (!cpu_online(cpu)) return 0; @@ -1313,6 +1322,7 @@ static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg) out: cpufreq_cpu_put(cpu_policy); +#endif /* __powerpc__ */ return ret; } @@ -1326,12 +1336,18 @@ out: */ static int cpufreq_resume(struct sys_device *sysdev) { - int cpu = sysdev->id; int ret = 0; + +#ifdef __powerpc__ + int cpu = sysdev->id; struct cpufreq_policy *cpu_policy; dprintk("resuming cpu %u\n", cpu); + /* As with the ->suspend method, all the code below is + * only necessary because Powerbooks suck. + * See commit 42d4dc3f4e1e for jokes. */ + if (!cpu_online(cpu)) return 0; @@ -1395,6 +1411,7 @@ out: schedule_work(&cpu_policy->update); fail: cpufreq_cpu_put(cpu_policy); +#endif /* __powerpc__ */ return ret; } -- cgit v1.2.3 From e0cff5ed27acd355264b210d9622da801a431e19 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 4 Aug 2009 11:46:41 -0700 Subject: igbvf: Allow VF driver to correctly recognize failure to set mac The VF driver was not correctly recognizing that it did not correctly set it's mac address. As a result the VF driver was unable to receive network traffic until being unloaded and reloaded. The issue was root caused to the fact that the CTS bit was not taken into account when checking for the request being NAKed. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/igbvf/vf.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/net/igbvf/vf.c b/drivers/net/igbvf/vf.c index 2a4faf9ade69..a9a61efa964c 100644 --- a/drivers/net/igbvf/vf.c +++ b/drivers/net/igbvf/vf.c @@ -274,6 +274,8 @@ static s32 e1000_set_vfta_vf(struct e1000_hw *hw, u16 vid, bool set) err = mbx->ops.read_posted(hw, msgbuf, 2); + msgbuf[0] &= ~E1000_VT_MSGTYPE_CTS; + /* if nacked the vlan was rejected */ if (!err && (msgbuf[0] == (E1000_VF_SET_VLAN | E1000_VT_MSGTYPE_NACK))) err = -E1000_ERR_MAC_INIT; @@ -317,6 +319,8 @@ static void e1000_rar_set_vf(struct e1000_hw *hw, u8 * addr, u32 index) if (!ret_val) ret_val = mbx->ops.read_posted(hw, msgbuf, 3); + msgbuf[0] &= ~E1000_VT_MSGTYPE_CTS; + /* if nacked the address was rejected, use "perm_addr" */ if (!ret_val && (msgbuf[0] == (E1000_VF_SET_MAC_ADDR | E1000_VT_MSGTYPE_NACK))) -- cgit v1.2.3 From 357eb46d8f275b4e8484541234ea3ba06065e258 Mon Sep 17 00:00:00 2001 From: Hannes Hering Date: Tue, 4 Aug 2009 11:48:39 -0700 Subject: ehea: Fix napi list corruption on ifconfig down This patch fixes the napi list handling when an ehea interface is shut down to avoid corruption of the napi list. Signed-off-by: Hannes Hering Signed-off-by: David S. Miller --- drivers/net/ehea/ehea.h | 2 +- drivers/net/ehea/ehea_main.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ehea/ehea.h b/drivers/net/ehea/ehea.h index 78952f8324e2..fa311a950996 100644 --- a/drivers/net/ehea/ehea.h +++ b/drivers/net/ehea/ehea.h @@ -40,7 +40,7 @@ #include #define DRV_NAME "ehea" -#define DRV_VERSION "EHEA_0101" +#define DRV_VERSION "EHEA_0102" /* eHEA capability flags */ #define DLPAR_PORT_ADD_REM 1 diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c index e8d46cc1bec2..977c3d358279 100644 --- a/drivers/net/ehea/ehea_main.c +++ b/drivers/net/ehea/ehea_main.c @@ -1545,6 +1545,9 @@ static int ehea_clean_portres(struct ehea_port *port, struct ehea_port_res *pr) { int ret, i; + if (pr->qp) + netif_napi_del(&pr->napi); + ret = ehea_destroy_qp(pr->qp); if (!ret) { -- cgit v1.2.3 From 18eac1cc100fa2afd5f39085aae6b694e417734b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 3 Aug 2009 10:58:29 -0700 Subject: tty-ldisc: make refcount be atomic_t 'users' count This is pure preparation of changing the ldisc reference counting to be a true refcount that defines the lifetime of the ldisc. But this is a purely syntactic change for now to make the next steps easier. This patch should make no semantic changes at all. But I wanted to make the ldisc refcount be an atomic (I will be touching it without locks soon enough), and I wanted to rename it so that there isn't quite as much confusion between 'ldo->refcount' (ldisk operations refcount) and 'ld->refcount' (ldisc refcount itself) in the same file. So it's now an atomic 'ld->users' count. It still starts at zero, despite having a reference from 'tty->ldisc', but that will change once we turn it into a _real_ refcount. Signed-off-by: Linus Torvalds Tested-by: OGAWA Hirofumi Tested-by: Sergey Senozhatsky Acked-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- drivers/char/tty_ldisc.c | 18 ++++++++---------- include/linux/tty_ldisc.h | 2 +- 2 files changed, 9 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c index acd76b767d4c..fd175e60aad5 100644 --- a/drivers/char/tty_ldisc.c +++ b/drivers/char/tty_ldisc.c @@ -142,7 +142,7 @@ static struct tty_ldisc *tty_ldisc_try_get(int disc) /* lock it */ ldops->refcount++; ld->ops = ldops; - ld->refcount = 0; + atomic_set(&ld->users, 0); err = 0; } } @@ -206,7 +206,7 @@ static void tty_ldisc_put(struct tty_ldisc *ld) ldo->refcount--; module_put(ldo->owner); spin_unlock_irqrestore(&tty_ldisc_lock, flags); - WARN_ON(ld->refcount); + WARN_ON(atomic_read(&ld->users)); kfree(ld); } @@ -297,7 +297,7 @@ static int tty_ldisc_try(struct tty_struct *tty) spin_lock_irqsave(&tty_ldisc_lock, flags); ld = tty->ldisc; if (test_bit(TTY_LDISC, &tty->flags)) { - ld->refcount++; + atomic_inc(&ld->users); ret = 1; } spin_unlock_irqrestore(&tty_ldisc_lock, flags); @@ -324,7 +324,7 @@ struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty) { /* wait_event is a macro */ wait_event(tty_ldisc_wait, tty_ldisc_try(tty)); - WARN_ON(tty->ldisc->refcount == 0); + WARN_ON(atomic_read(&tty->ldisc->users) == 0); return tty->ldisc; } EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); @@ -365,11 +365,9 @@ void tty_ldisc_deref(struct tty_ldisc *ld) BUG_ON(ld == NULL); spin_lock_irqsave(&tty_ldisc_lock, flags); - if (ld->refcount == 0) + if (atomic_read(&ld->users) == 0) printk(KERN_ERR "tty_ldisc_deref: no references.\n"); - else - ld->refcount--; - if (ld->refcount == 0) + else if (atomic_dec_and_test(&ld->users)) wake_up(&tty_ldisc_wait); spin_unlock_irqrestore(&tty_ldisc_lock, flags); } @@ -536,10 +534,10 @@ static int tty_ldisc_wait_idle(struct tty_struct *tty) { unsigned long flags; spin_lock_irqsave(&tty_ldisc_lock, flags); - while (tty->ldisc->refcount) { + while (atomic_read(&tty->ldisc->users)) { spin_unlock_irqrestore(&tty_ldisc_lock, flags); if (wait_event_timeout(tty_ldisc_wait, - tty->ldisc->refcount == 0, 5 * HZ) == 0) + atomic_read(&tty->ldisc->users) == 0, 5 * HZ) == 0) return -EBUSY; spin_lock_irqsave(&tty_ldisc_lock, flags); } diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index 40f38d896777..0c4ee9b88f85 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -144,7 +144,7 @@ struct tty_ldisc_ops { struct tty_ldisc { struct tty_ldisc_ops *ops; - int refcount; + atomic_t users; }; #define TTY_LDISC_MAGIC 0x5403 -- cgit v1.2.3 From 65b770468e98941e45e19780dff9283e663e6b8b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 3 Aug 2009 11:11:19 -0700 Subject: tty-ldisc: turn ldisc user count into a proper refcount By using the user count for the actual lifetime rules, we can get rid of the silly "wait_for_idle" logic, because any busy ldisc will automatically stay around until the last user releases it. This avoids a host of odd issues, and simplifies the code. So now, when the last ldisc reference is dropped, we just release the ldisc operations struct reference, and free the ldisc. It looks obvious enough, and it does work for me, but the counting _could_ be off. It probably isn't (bad counting in the new version would generally imply that the old code did something really bad, like free an ldisc with a non-zero count), but it does need some testing, and preferably somebody looking at it. With this change, both 'tty_ldisc_put()' and 'tty_ldisc_deref()' are just aliases for the new ref-counting 'put_ldisc()'. Both of them decrement the ldisc user count and free it if it goes down to zero. They're identical functions, in other words. But the reason they still exist as sepate functions is that one of them was exported (tty_ldisc_deref) and had a stupid name (so I don't want to use it as the main name), and the other one was used in multiple places (and I didn't want to make the patch larger just to rename the users). In addition to the refcounting, I did do some minimal cleanup. For example, now "tty_ldisc_try()" actually returns the ldisc it got under the lock, rather than returning true/false and then the caller would look up the ldisc again (now without the protection of the lock). That said, there's tons of dubious use of 'tty->ldisc' without obviously proper locking or refcounting left. I expressly did _not_ want to try to fix it all, keeping the patch minimal. There may or may not be bugs in that kind of code, but they wouldn't be _new_ bugs. That said, even if the bugs aren't new, the timing and lifetime will change. For example, some silly code may depend on the 'tty->ldisc' pointer not changing because they hold a refcount on the 'ldisc'. And that's no longer true - if you hold a ref on the ldisc, the 'ldisc' itself is safe, but tty->ldisc may change. So the proper locking (remains) to hold tty->ldisc_mutex if you expect tty->ldisc to be stable. That's not really a _new_ rule, but it's an example of something that the old code might have unintentionally depended on and hidden bugs. Whatever. The patch _looks_ sensible to me. The only users of ldisc->users are: - get_ldisc() - atomically increment the count - put_ldisc() - atomically decrements the count and releases if zero - tty_ldisc_try_get() - creates the ldisc, and sets the count to 1. The ldisc should then either be released, or be attached to a tty. Signed-off-by: Linus Torvalds Tested-by: OGAWA Hirofumi Tested-by: Sergey Senozhatsky Acked-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- drivers/char/tty_ldisc.c | 143 +++++++++++++++-------------------------------- 1 file changed, 46 insertions(+), 97 deletions(-) (limited to 'drivers') diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c index fd175e60aad5..be55dfcf59ac 100644 --- a/drivers/char/tty_ldisc.c +++ b/drivers/char/tty_ldisc.c @@ -48,6 +48,34 @@ static DECLARE_WAIT_QUEUE_HEAD(tty_ldisc_wait); /* Line disc dispatch table */ static struct tty_ldisc_ops *tty_ldiscs[NR_LDISCS]; +static inline struct tty_ldisc *get_ldisc(struct tty_ldisc *ld) +{ + if (ld) + atomic_inc(&ld->users); + return ld; +} + +static inline void put_ldisc(struct tty_ldisc *ld) +{ + if (WARN_ON_ONCE(!ld)) + return; + + /* + * If this is the last user, free the ldisc, and + * release the ldisc ops. + */ + if (atomic_dec_and_test(&ld->users)) { + unsigned long flags; + struct tty_ldisc_ops *ldo = ld->ops; + + kfree(ld); + spin_lock_irqsave(&tty_ldisc_lock, flags); + ldo->refcount--; + module_put(ldo->owner); + spin_unlock_irqrestore(&tty_ldisc_lock, flags); + } +} + /** * tty_register_ldisc - install a line discipline * @disc: ldisc number @@ -142,7 +170,7 @@ static struct tty_ldisc *tty_ldisc_try_get(int disc) /* lock it */ ldops->refcount++; ld->ops = ldops; - atomic_set(&ld->users, 0); + atomic_set(&ld->users, 1); err = 0; } } @@ -181,35 +209,6 @@ static struct tty_ldisc *tty_ldisc_get(int disc) return ld; } -/** - * tty_ldisc_put - drop ldisc reference - * @ld: ldisc - * - * Drop a reference to a line discipline. Manage refcounts and - * module usage counts. Free the ldisc once the recount hits zero. - * - * Locking: - * takes tty_ldisc_lock to guard against ldisc races - */ - -static void tty_ldisc_put(struct tty_ldisc *ld) -{ - unsigned long flags; - int disc = ld->ops->num; - struct tty_ldisc_ops *ldo; - - BUG_ON(disc < N_TTY || disc >= NR_LDISCS); - - spin_lock_irqsave(&tty_ldisc_lock, flags); - ldo = tty_ldiscs[disc]; - BUG_ON(ldo->refcount == 0); - ldo->refcount--; - module_put(ldo->owner); - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - WARN_ON(atomic_read(&ld->users)); - kfree(ld); -} - static void *tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos) { return (*pos < NR_LDISCS) ? pos : NULL; @@ -234,7 +233,7 @@ static int tty_ldiscs_seq_show(struct seq_file *m, void *v) if (IS_ERR(ld)) return 0; seq_printf(m, "%-10s %2d\n", ld->ops->name ? ld->ops->name : "???", i); - tty_ldisc_put(ld); + put_ldisc(ld); return 0; } @@ -288,20 +287,17 @@ static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld) * Locking: takes tty_ldisc_lock */ -static int tty_ldisc_try(struct tty_struct *tty) +static struct tty_ldisc *tty_ldisc_try(struct tty_struct *tty) { unsigned long flags; struct tty_ldisc *ld; - int ret = 0; spin_lock_irqsave(&tty_ldisc_lock, flags); - ld = tty->ldisc; - if (test_bit(TTY_LDISC, &tty->flags)) { - atomic_inc(&ld->users); - ret = 1; - } + ld = NULL; + if (test_bit(TTY_LDISC, &tty->flags)) + ld = get_ldisc(tty->ldisc); spin_unlock_irqrestore(&tty_ldisc_lock, flags); - return ret; + return ld; } /** @@ -322,10 +318,11 @@ static int tty_ldisc_try(struct tty_struct *tty) struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty) { + struct tty_ldisc *ld; + /* wait_event is a macro */ - wait_event(tty_ldisc_wait, tty_ldisc_try(tty)); - WARN_ON(atomic_read(&tty->ldisc->users) == 0); - return tty->ldisc; + wait_event(tty_ldisc_wait, (ld = tty_ldisc_try(tty)) != NULL); + return ld; } EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); @@ -342,9 +339,7 @@ EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty) { - if (tty_ldisc_try(tty)) - return tty->ldisc; - return NULL; + return tty_ldisc_try(tty); } EXPORT_SYMBOL_GPL(tty_ldisc_ref); @@ -360,19 +355,15 @@ EXPORT_SYMBOL_GPL(tty_ldisc_ref); void tty_ldisc_deref(struct tty_ldisc *ld) { - unsigned long flags; - - BUG_ON(ld == NULL); - - spin_lock_irqsave(&tty_ldisc_lock, flags); - if (atomic_read(&ld->users) == 0) - printk(KERN_ERR "tty_ldisc_deref: no references.\n"); - else if (atomic_dec_and_test(&ld->users)) - wake_up(&tty_ldisc_wait); - spin_unlock_irqrestore(&tty_ldisc_lock, flags); + put_ldisc(ld); } EXPORT_SYMBOL_GPL(tty_ldisc_deref); +static inline void tty_ldisc_put(struct tty_ldisc *ld) +{ + put_ldisc(ld); +} + /** * tty_ldisc_enable - allow ldisc use * @tty: terminal to activate ldisc on @@ -520,31 +511,6 @@ static int tty_ldisc_halt(struct tty_struct *tty) return cancel_delayed_work(&tty->buf.work); } -/** - * tty_ldisc_wait_idle - wait for the ldisc to become idle - * @tty: tty to wait for - * - * Wait for the line discipline to become idle. The discipline must - * have been halted for this to guarantee it remains idle. - * - * tty_ldisc_lock protects the ref counts currently. - */ - -static int tty_ldisc_wait_idle(struct tty_struct *tty) -{ - unsigned long flags; - spin_lock_irqsave(&tty_ldisc_lock, flags); - while (atomic_read(&tty->ldisc->users)) { - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - if (wait_event_timeout(tty_ldisc_wait, - atomic_read(&tty->ldisc->users) == 0, 5 * HZ) == 0) - return -EBUSY; - spin_lock_irqsave(&tty_ldisc_lock, flags); - } - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - return 0; -} - /** * tty_set_ldisc - set line discipline * @tty: the terminal to set @@ -640,14 +606,6 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc) flush_scheduled_work(); - /* Let any existing reference holders finish */ - retval = tty_ldisc_wait_idle(tty); - if (retval < 0) { - clear_bit(TTY_LDISC_CHANGING, &tty->flags); - tty_ldisc_put(new_ldisc); - return retval; - } - mutex_lock(&tty->ldisc_mutex); if (test_bit(TTY_HUPPED, &tty->flags)) { /* We were raced by the hangup method. It will have stomped @@ -793,7 +751,6 @@ void tty_ldisc_hangup(struct tty_struct *tty) if (tty->ldisc) { /* Not yet closed */ /* Switch back to N_TTY */ tty_ldisc_halt(tty); - tty_ldisc_wait_idle(tty); tty_ldisc_reinit(tty); /* At this point we have a closed ldisc and we want to reopen it. We could defer this to the next open but @@ -858,14 +815,6 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty) tty_ldisc_halt(tty); flush_scheduled_work(); - /* - * Wait for any short term users (we know they are just driver - * side waiters as the file is closing so user count on the file - * side is zero. - */ - - tty_ldisc_wait_idle(tty); - mutex_lock(&tty->ldisc_mutex); /* * Now kill off the ldisc -- cgit v1.2.3 From cbe9352fa08f90aa03b4dbf1bbabfc95d196e562 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 3 Aug 2009 14:54:56 -0700 Subject: tty-ldisc: be more careful in 'put_ldisc' locking Use 'atomic_dec_and_lock()' to make sure that we always hold the tty_ldisc_lock when the ldisc count goes to zero. That way we can never race against 'tty_ldisc_try()' increasing the count again. Reported-by: OGAWA Hirofumi Signed-off-by: Linus Torvalds Tested-by: Sergey Senozhatsky Signed-off-by: Greg Kroah-Hartman --- drivers/char/tty_ldisc.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c index be55dfcf59ac..1733d3439ad2 100644 --- a/drivers/char/tty_ldisc.c +++ b/drivers/char/tty_ldisc.c @@ -55,25 +55,32 @@ static inline struct tty_ldisc *get_ldisc(struct tty_ldisc *ld) return ld; } -static inline void put_ldisc(struct tty_ldisc *ld) +static void put_ldisc(struct tty_ldisc *ld) { + unsigned long flags; + if (WARN_ON_ONCE(!ld)) return; /* * If this is the last user, free the ldisc, and * release the ldisc ops. + * + * We really want an "atomic_dec_and_lock_irqsave()", + * but we don't have it, so this does it by hand. */ - if (atomic_dec_and_test(&ld->users)) { - unsigned long flags; + local_irq_save(flags); + if (atomic_dec_and_lock(&ld->users, &tty_ldisc_lock)) { struct tty_ldisc_ops *ldo = ld->ops; - kfree(ld); - spin_lock_irqsave(&tty_ldisc_lock, flags); ldo->refcount--; module_put(ldo->owner); spin_unlock_irqrestore(&tty_ldisc_lock, flags); + + kfree(ld); + return; } + local_irq_restore(flags); } /** -- cgit v1.2.3 From 6502fbfaf81b09b3f474bb7b3796257e9450273e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 4 Aug 2009 11:24:24 -0400 Subject: drm/radeon: Add support for RS880 chips These are new AMD IGP chips Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r600_cp.c | 22 +++++++++++++++------- drivers/gpu/drm/radeon/radeon_drv.h | 1 + include/drm/drm_pciids.h | 5 +++++ 3 files changed, 21 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c index 146f3570af8e..20f17908b036 100644 --- a/drivers/gpu/drm/radeon/r600_cp.c +++ b/drivers/gpu/drm/radeon/r600_cp.c @@ -384,8 +384,9 @@ static void r600_cp_load_microcode(drm_radeon_private_t *dev_priv) DRM_INFO("Loading RV670 PFP Microcode\n"); for (i = 0; i < PFP_UCODE_SIZE; i++) RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RV670_pfp_microcode[i]); - } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) { - DRM_INFO("Loading RS780 CP Microcode\n"); + } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) { + DRM_INFO("Loading RS780/RS880 CP Microcode\n"); for (i = 0; i < PM4_UCODE_SIZE; i++) { RADEON_WRITE(R600_CP_ME_RAM_DATA, RS780_cp_microcode[i][0]); @@ -396,7 +397,7 @@ static void r600_cp_load_microcode(drm_radeon_private_t *dev_priv) } RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); - DRM_INFO("Loading RS780 PFP Microcode\n"); + DRM_INFO("Loading RS780/RS880 PFP Microcode\n"); for (i = 0; i < PFP_UCODE_SIZE; i++) RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RS780_pfp_microcode[i]); } @@ -783,6 +784,7 @@ static void r600_gfx_init(struct drm_device *dev, break; case CHIP_RV610: case CHIP_RS780: + case CHIP_RS880: case CHIP_RV620: dev_priv->r600_max_pipes = 1; dev_priv->r600_max_tile_pipes = 1; @@ -917,7 +919,8 @@ static void r600_gfx_init(struct drm_device *dev, ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) || ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || - ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) RADEON_WRITE(R600_DB_DEBUG, R600_PREZ_MUST_WAIT_FOR_POSTZ_DONE); else RADEON_WRITE(R600_DB_DEBUG, 0); @@ -935,7 +938,8 @@ static void r600_gfx_init(struct drm_device *dev, sq_ms_fifo_sizes = RADEON_READ(R600_SQ_MS_FIFO_SIZES); if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || - ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) { + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) { sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(0xa) | R600_FETCH_FIFO_HIWATER(0xa) | R600_DONE_FIFO_HIWATER(0xe0) | @@ -978,7 +982,8 @@ static void r600_gfx_init(struct drm_device *dev, R600_NUM_ES_STACK_ENTRIES(0)); } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || - ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) { + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) { /* no vertex cache */ sq_config &= ~R600_VC_ENABLE; @@ -1035,7 +1040,8 @@ static void r600_gfx_init(struct drm_device *dev, if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || - ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_TC_ONLY)); else RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_VC_AND_TC)); @@ -1078,6 +1084,7 @@ static void r600_gfx_init(struct drm_device *dev, break; case CHIP_RV610: case CHIP_RS780: + case CHIP_RS880: case CHIP_RV620: gs_prim_buffer_depth = 32; break; @@ -1123,6 +1130,7 @@ static void r600_gfx_init(struct drm_device *dev, switch (dev_priv->flags & RADEON_FAMILY_MASK) { case CHIP_RV610: case CHIP_RS780: + case CHIP_RS880: case CHIP_RV620: tc_cntl = R600_TC_L2_SIZE(8); break; diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h index 127d0456f628..3933f8216a34 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.h +++ b/drivers/gpu/drm/radeon/radeon_drv.h @@ -143,6 +143,7 @@ enum radeon_family { CHIP_RV635, CHIP_RV670, CHIP_RS780, + CHIP_RS880, CHIP_RV770, CHIP_RV730, CHIP_RV710, diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 9d4c00491547..853508499d20 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -370,6 +370,11 @@ {0x1002, 0x9614, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS780|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9615, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS780|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9616, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS780|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9710, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9711, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9712, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9713, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9714, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0, 0, 0} #define r128_PCI_IDS \ -- cgit v1.2.3 From 685aaca751271b2c5191901931a19ccaceeae1ce Mon Sep 17 00:00:00 2001 From: "Jory A. Pratt" Date: Mon, 3 Aug 2009 09:32:34 -0700 Subject: Input: i8042 - add Asus G1S to noloop exception list The synaptic touchpad on the Asus G1S is not properly detected when rebooting machine or on cold boot from time to time. Adding the Asus G1S to the noloop exception table resolves the issue. # dmidecode 2.10 SMBIOS 2.4 present. Handle 0x0001, DMI type 1, 27 bytes System Information Manufacturer: ASUSTeK Computer Inc. Product Name: G1S Version: 1.0 Wake-up Type: Power Switch SKU Number: Family: Signed-off-by: Jory A. Pratt Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-x86ia64io.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers') diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 924e8ed7f2cf..ae04d8a494e5 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -77,6 +77,14 @@ static struct dmi_system_id __initdata i8042_dmi_noloop_table[] = { DMI_MATCH(DMI_BOARD_VERSION, "Rev E"), }, }, + { + .ident = "ASUS G1S", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer Inc."), + DMI_MATCH(DMI_BOARD_NAME, "G1S"), + DMI_MATCH(DMI_BOARD_VERSION, "1.0"), + }, + }, { /* AUX LOOP command does not raise AUX IRQ */ .ident = "ASUS P65UP5", -- cgit v1.2.3 From f532959b77e5e567c84c914cb7c7b07d2582448b Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 4 Aug 2009 15:09:37 -0700 Subject: intel-iommu: Correct sglist size calculation. In domain_sg_mapping(), use aligned_nrpages() instead of hand-coded rounding code for calculating the size of each sg elem. This means that on IA64 we correctly round up to the MM page size, not just to the VT-d page size. Also remove the incorrect mm_to_dma_pfn() when intel_map_sg() calls domain_sg_mapping() -- the 'size' variable is in VT-d pages already. Signed-off-by: Fenghua Yu Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index ebc9b8dca881..11b317a78b49 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1648,6 +1648,14 @@ static int domain_context_mapped(struct pci_dev *pdev) tmp->devfn); } +/* Returns a number of VTD pages, but aligned to MM page size */ +static inline unsigned long aligned_nrpages(unsigned long host_addr, + size_t size) +{ + host_addr &= ~PAGE_MASK; + return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; +} + static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, struct scatterlist *sg, unsigned long phys_pfn, unsigned long nr_pages, int prot) @@ -1675,7 +1683,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, uint64_t tmp; if (!sg_res) { - sg_res = (sg->offset + sg->length + VTD_PAGE_SIZE - 1) >> VTD_PAGE_SHIFT; + sg_res = aligned_nrpages(sg->offset, sg->length); sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; sg->dma_length = sg->length; pteval = page_to_phys(sg_page(sg)) | prot; @@ -2415,14 +2423,6 @@ error: return ret; } -/* Returns a number of VTD pages, but aligned to MM page size */ -static inline unsigned long aligned_nrpages(unsigned long host_addr, - size_t size) -{ - host_addr &= ~PAGE_MASK; - return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; -} - /* This takes a number of _MM_ pages, not VTD pages */ static struct iova *intel_alloc_iova(struct device *dev, struct dmar_domain *domain, @@ -2875,7 +2875,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne start_vpfn = mm_to_dma_pfn(iova->pfn_lo); - ret = domain_sg_mapping(domain, start_vpfn, sglist, mm_to_dma_pfn(size), prot); + ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot); if (unlikely(ret)) { /* clear the page */ dma_pte_clear_range(domain, start_vpfn, -- cgit v1.2.3 From 33041ec049d39a6e0463c7edc7b6f631d24559e3 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 4 Aug 2009 15:10:59 -0700 Subject: intel-iommu: Mask physical address to correct page size in intel_map_single() The physical address passed to domain_pfn_mapping() should be rounded down to the start of the MM page, not the VT-d page. This issue causes kernel panic on PAGE_SIZE>VTD_PAGE_SIZE platforms e.g. ia64 platforms. Signed-off-by: Fenghua Yu Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 11b317a78b49..af7ff9b5aed8 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2551,6 +2551,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, int prot = 0; int ret; struct intel_iommu *iommu; + unsigned long paddr_pfn = paddr >> PAGE_SHIFT; BUG_ON(dir == DMA_NONE); @@ -2585,7 +2586,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, * is not a big problem */ ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo), - paddr >> VTD_PAGE_SHIFT, size, prot); + mm_to_dma_pfn(paddr_pfn), size, prot); if (ret) goto error; -- cgit v1.2.3 From 9c9fe1f841745184bbc5460c6f3909fded3b929b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 3 Aug 2009 16:09:16 -0700 Subject: drm/i915: Use our own workqueue to avoid wedging the system along with the GPU. Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_dma.c | 15 +++++++++++++-- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 4 ++-- drivers/gpu/drm/i915/i915_irq.c | 5 +++-- 4 files changed, 19 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 8c4783180bf6..50d1f782768c 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1186,6 +1186,13 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) if (ret) goto out_iomapfree; + dev_priv->wq = create_workqueue("i915"); + if (dev_priv->wq == NULL) { + DRM_ERROR("Failed to create our workqueue.\n"); + ret = -ENOMEM; + goto out_iomapfree; + } + /* enable GEM by default */ dev_priv->has_gem = 1; @@ -1211,7 +1218,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) if (!I915_NEED_GFX_HWS(dev)) { ret = i915_init_phys_hws(dev); if (ret != 0) - goto out_iomapfree; + goto out_workqueue_free; } i915_get_mem_freq(dev); @@ -1245,7 +1252,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) ret = i915_load_modeset_init(dev, prealloc_size, agp_size); if (ret < 0) { DRM_ERROR("failed to init modeset\n"); - goto out_rmmap; + goto out_workqueue_free; } } @@ -1256,6 +1263,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) return 0; +out_workqueue_free: + destroy_workqueue(dev_priv->wq); out_iomapfree: io_mapping_free(dev_priv->mm.gtt_mapping); out_rmmap: @@ -1269,6 +1278,8 @@ int i915_driver_unload(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + destroy_workqueue(dev_priv->wq); + io_mapping_free(dev_priv->mm.gtt_mapping); if (dev_priv->mm.gtt_mtrr >= 0) { mtrr_del(dev_priv->mm.gtt_mtrr, dev->agp->base, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5f3a259d95e9..7537f57d8a87 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -231,6 +231,7 @@ typedef struct drm_i915_private { spinlock_t error_lock; struct drm_i915_error_state *first_error; struct work_struct error_work; + struct workqueue_struct *wq; /* Register state */ u8 saveLBB; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5bf420378b6d..140bee142fc2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1570,7 +1570,7 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv, } if (was_empty && !dev_priv->mm.suspended) - schedule_delayed_work(&dev_priv->mm.retire_work, HZ); + queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); return seqno; } @@ -1719,7 +1719,7 @@ i915_gem_retire_work_handler(struct work_struct *work) i915_gem_retire_requests(dev); if (!dev_priv->mm.suspended && !list_empty(&dev_priv->mm.request_list)) - schedule_delayed_work(&dev_priv->mm.retire_work, HZ); + queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); mutex_unlock(&dev->struct_mutex); } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index f340b3fd54e6..83aee80e77a6 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -482,7 +482,7 @@ static void i915_handle_error(struct drm_device *dev) I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT); } - schedule_work(&dev_priv->error_work); + queue_work(dev_priv->wq, &dev_priv->error_work); } irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) @@ -560,7 +560,8 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) DRM_DEBUG("hotplug event received, stat 0x%08x\n", hotplug_status); if (hotplug_status & dev_priv->hotplug_supported_mask) - schedule_work(&dev_priv->hotplug_work); + queue_work(dev_priv->wq, + &dev_priv->hotplug_work); I915_WRITE(PORT_HOTPLUG_STAT, hotplug_status); I915_READ(PORT_HOTPLUG_STAT); -- cgit v1.2.3 From 819e0064634f580ab618189e657ea58341d214b7 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Sun, 26 Jul 2009 00:50:38 +0200 Subject: drm/i915: Fix read outside array bounds in restoring the SWF10 range. dev_priv->saveSWF1 is a 16 element array, but this reads up to index 22, and restored values from the wrong registers. Signed-off-by: Roel Kluin Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_suspend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 9e1d16e5c3ea..1d04e1904ac6 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -598,7 +598,7 @@ int i915_restore_state(struct drm_device *dev) for (i = 0; i < 16; i++) { I915_WRITE(SWF00 + (i << 2), dev_priv->saveSWF0[i]); - I915_WRITE(SWF10 + (i << 2), dev_priv->saveSWF1[i+7]); + I915_WRITE(SWF10 + (i << 2), dev_priv->saveSWF1[i]); } for (i = 0; i < 3; i++) I915_WRITE(SWF30 + (i << 2), dev_priv->saveSWF2[i]); -- cgit v1.2.3 From 0a51810aa058a0a4ac76dd6f87f4d10bee774e2e Mon Sep 17 00:00:00 2001 From: Andrew Victor Date: Tue, 4 Aug 2009 19:55:56 +0100 Subject: ARM: 5637/1: [KS8695] Don't reference CLOCK_TICK_RATE in drivers Stop referencing CLOCK_TICK_RATE in the KS8695 drivers, rather refer to a KS8695_CLOCK_RATE. Issue pointed out by Russell King on arm-linux-kernel mailing list. Signed-off-by: Andrew Victor Signed-off-by: Russell King --- arch/arm/mach-ks8695/include/mach/hardware.h | 5 +++++ arch/arm/mach-ks8695/include/mach/timex.h | 5 +++-- drivers/serial/serial_ks8695.c | 2 +- drivers/watchdog/ks8695_wdt.c | 4 ++-- 4 files changed, 11 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/arch/arm/mach-ks8695/include/mach/hardware.h b/arch/arm/mach-ks8695/include/mach/hardware.h index 1d640d075b7e..e0f911d9e021 100644 --- a/arch/arm/mach-ks8695/include/mach/hardware.h +++ b/arch/arm/mach-ks8695/include/mach/hardware.h @@ -16,6 +16,11 @@ #include +/* + * Clocks are derived from MCLK, which is 25Mhz + */ +#define KS8695_CLOCK_RATE 25000000 + /* * Physical RAM address. */ diff --git a/arch/arm/mach-ks8695/include/mach/timex.h b/arch/arm/mach-ks8695/include/mach/timex.h index 4682e350369b..10f716371bd3 100644 --- a/arch/arm/mach-ks8695/include/mach/timex.h +++ b/arch/arm/mach-ks8695/include/mach/timex.h @@ -14,7 +14,8 @@ #ifndef __ASM_ARCH_TIMEX_H #define __ASM_ARCH_TIMEX_H -/* timers are derived from MCLK, which is 25MHz */ -#define CLOCK_TICK_RATE 25000000 +#include + +#define CLOCK_TICK_RATE KS8695_CLOCK_RATE #endif diff --git a/drivers/serial/serial_ks8695.c b/drivers/serial/serial_ks8695.c index 998e89dc5aaf..e0665630e4da 100644 --- a/drivers/serial/serial_ks8695.c +++ b/drivers/serial/serial_ks8695.c @@ -549,7 +549,7 @@ static struct uart_port ks8695uart_ports[SERIAL_KS8695_NR] = { .mapbase = KS8695_UART_VA, .iotype = SERIAL_IO_MEM, .irq = KS8695_IRQ_UART_TX, - .uartclk = CLOCK_TICK_RATE * 16, + .uartclk = KS8695_CLOCK_RATE * 16, .fifosize = 16, .ops = &ks8695uart_pops, .flags = ASYNC_BOOT_AUTOCONF, diff --git a/drivers/watchdog/ks8695_wdt.c b/drivers/watchdog/ks8695_wdt.c index 00b03eb43bf0..e1c82769b08e 100644 --- a/drivers/watchdog/ks8695_wdt.c +++ b/drivers/watchdog/ks8695_wdt.c @@ -66,7 +66,7 @@ static inline void ks8695_wdt_stop(void) static inline void ks8695_wdt_start(void) { unsigned long tmcon; - unsigned long tval = wdt_time * CLOCK_TICK_RATE; + unsigned long tval = wdt_time * KS8695_CLOCK_RATE; spin_lock(&ks8695_lock); /* disable timer0 */ @@ -103,7 +103,7 @@ static inline void ks8695_wdt_reload(void) static int ks8695_wdt_settimeout(int new_time) { /* - * All counting occurs at SLOW_CLOCK / 128 = 0.256 Hz + * All counting occurs at KS8695_CLOCK_RATE / 128 = 0.256 Hz * * Since WDV is a 16-bit counter, the maximum period is * 65536 / 0.256 = 256 seconds. -- cgit v1.2.3 From 985fe845aea9cd56fd351800302270444556e45a Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 29 Jul 2009 18:55:53 +0200 Subject: drm/radeon/kms: Fix caching mode selection for GTT object GTT object can either be cached,uncached or wc just let core ttm pick the best mode according to how the bo driver and GTT memory type was initialized. Signed-off-by: Jerome Glisse Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index dd9ac2fed6d6..e98cae3bf4a6 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -106,7 +106,7 @@ static inline uint32_t radeon_object_flags_from_domain(uint32_t domain) flags |= TTM_PL_FLAG_VRAM | TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED; } if (domain & RADEON_GEM_DOMAIN_GTT) { - flags |= TTM_PL_FLAG_TT | TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED; + flags |= TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING; } if (domain & RADEON_GEM_DOMAIN_CPU) { flags |= TTM_PL_FLAG_SYSTEM | TTM_PL_MASK_CACHING; -- cgit v1.2.3 From 194934785a846e0a7b1b674b7b475a9d0125d2f8 Mon Sep 17 00:00:00 2001 From: TJ Date: Mon, 3 Aug 2009 13:39:09 -0700 Subject: Input: wistron_btns - support Prestigio Wifi RF kill button The Prestigio 157, an old no-name clone laptop uses input keys very similar to the Wistron 1557/MS2141 with the addition of BIOS-controlled wireless radio frequency kill switch. This patch adds support for the RF kill switch control and adds manual identification of the model. The Prestigio does not expose any recognisable identity via dmidecode and so requires manual selection at module init using force=1 keymap=prestigio Signed-off-by: TJ Signed-off-by: Dmitry Torokhov --- drivers/input/misc/wistron_btns.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'drivers') diff --git a/drivers/input/misc/wistron_btns.c b/drivers/input/misc/wistron_btns.c index 26e17a9a22eb..27ee976eb54c 100644 --- a/drivers/input/misc/wistron_btns.c +++ b/drivers/input/misc/wistron_btns.c @@ -611,6 +611,20 @@ static struct key_entry keymap_wistron_generic[] __initdata = { { KE_END, 0 } }; +static struct key_entry keymap_prestigio[] __initdata = { + { KE_KEY, 0x11, {KEY_PROG1} }, + { KE_KEY, 0x12, {KEY_PROG2} }, + { KE_WIFI, 0x30 }, + { KE_KEY, 0x22, {KEY_REWIND} }, + { KE_KEY, 0x23, {KEY_FORWARD} }, + { KE_KEY, 0x24, {KEY_PLAYPAUSE} }, + { KE_KEY, 0x25, {KEY_STOPCD} }, + { KE_KEY, 0x31, {KEY_MAIL} }, + { KE_KEY, 0x36, {KEY_WWW} }, + { KE_END, 0 } +}; + + /* * If your machine is not here (which is currently rather likely), please send * a list of buttons and their key codes (reported when loading this module @@ -971,6 +985,8 @@ static int __init select_keymap(void) if (keymap_name != NULL) { if (strcmp (keymap_name, "1557/MS2141") == 0) keymap = keymap_wistron_ms2141; + else if (strcmp (keymap_name, "prestigio") == 0) + keymap = keymap_prestigio; else if (strcmp (keymap_name, "generic") == 0) keymap = keymap_wistron_generic; else { -- cgit v1.2.3 From d82f1c35348cebe2fb2d4a4d31ce0ab0769e3d93 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Wed, 5 Aug 2009 01:24:41 -0700 Subject: Input: matrix_keypad - make matrix keymap size dynamic Remove assumption on the shift and size of rows/columns form matrix_keypad driver. Signed-off-by: Eric Miao Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/matrix_keypad.c | 18 +++++++++--------- include/linux/input/matrix_keypad.h | 13 +++++++------ 2 files changed, 16 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c index e9b2e7cb05be..541b981ff075 100644 --- a/drivers/input/keyboard/matrix_keypad.c +++ b/drivers/input/keyboard/matrix_keypad.c @@ -27,6 +27,7 @@ struct matrix_keypad { const struct matrix_keypad_platform_data *pdata; struct input_dev *input_dev; unsigned short *keycodes; + unsigned int row_shift; uint32_t last_key_state[MATRIX_MAX_COLS]; struct delayed_work work; @@ -136,7 +137,7 @@ static void matrix_keypad_scan(struct work_struct *work) if ((bits_changed & (1 << row)) == 0) continue; - code = (row << 4) + col; + code = MATRIX_SCAN_CODE(row, col, keypad->row_shift); input_event(input_dev, EV_MSC, MSC_SCAN, code); input_report_key(input_dev, keypad->keycodes[code], @@ -317,6 +318,7 @@ static int __devinit matrix_keypad_probe(struct platform_device *pdev) struct matrix_keypad *keypad; struct input_dev *input_dev; unsigned short *keycodes; + unsigned int row_shift; int i; int err; @@ -332,14 +334,11 @@ static int __devinit matrix_keypad_probe(struct platform_device *pdev) return -EINVAL; } - if (!keymap_data->max_keymap_size) { - dev_err(&pdev->dev, "invalid keymap data supplied\n"); - return -EINVAL; - } + row_shift = get_count_order(pdata->num_col_gpios); keypad = kzalloc(sizeof(struct matrix_keypad), GFP_KERNEL); - keycodes = kzalloc(keymap_data->max_keymap_size * - sizeof(keypad->keycodes), + keycodes = kzalloc((pdata->num_row_gpios << row_shift) * + sizeof(*keycodes), GFP_KERNEL); input_dev = input_allocate_device(); if (!keypad || !keycodes || !input_dev) { @@ -350,6 +349,7 @@ static int __devinit matrix_keypad_probe(struct platform_device *pdev) keypad->input_dev = input_dev; keypad->pdata = pdata; keypad->keycodes = keycodes; + keypad->row_shift = row_shift; keypad->stopped = true; INIT_DELAYED_WORK(&keypad->work, matrix_keypad_scan); spin_lock_init(&keypad->lock); @@ -363,7 +363,7 @@ static int __devinit matrix_keypad_probe(struct platform_device *pdev) input_dev->keycode = keycodes; input_dev->keycodesize = sizeof(*keycodes); - input_dev->keycodemax = keymap_data->max_keymap_size; + input_dev->keycodemax = pdata->num_row_gpios << keypad->row_shift; for (i = 0; i < keymap_data->keymap_size; i++) { unsigned int key = keymap_data->keymap[i]; @@ -371,7 +371,7 @@ static int __devinit matrix_keypad_probe(struct platform_device *pdev) unsigned int col = KEY_COL(key); unsigned short code = KEY_VAL(key); - keycodes[(row << 4) + col] = code; + keycodes[MATRIX_SCAN_CODE(row, col, row_shift)] = code; __set_bit(code, input_dev->keybit); } __clear_bit(KEY_RESERVED, input_dev->keybit); diff --git a/include/linux/input/matrix_keypad.h b/include/linux/input/matrix_keypad.h index 7964516c6954..15d5903af2dd 100644 --- a/include/linux/input/matrix_keypad.h +++ b/include/linux/input/matrix_keypad.h @@ -15,12 +15,13 @@ #define KEY_COL(k) (((k) >> 16) & 0xff) #define KEY_VAL(k) ((k) & 0xffff) +#define MATRIX_SCAN_CODE(row, col, row_shift) (((row) << (row_shift)) + (col)) + /** * struct matrix_keymap_data - keymap for matrix keyboards * @keymap: pointer to array of uint32 values encoded with KEY() macro * representing keymap * @keymap_size: number of entries (initialized) in this keymap - * @max_keymap_size: maximum size of keymap supported by the device * * This structure is supposed to be used by platform code to supply * keymaps to drivers that implement matrix-like keypads/keyboards. @@ -28,14 +29,13 @@ struct matrix_keymap_data { const uint32_t *keymap; unsigned int keymap_size; - unsigned int max_keymap_size; }; /** * struct matrix_keypad_platform_data - platform-dependent keypad data * @keymap_data: pointer to &matrix_keymap_data - * @row_gpios: array of gpio numbers reporesenting rows - * @col_gpios: array of gpio numbers reporesenting colums + * @row_gpios: pointer to array of gpio numbers representing rows + * @col_gpios: pointer to array of gpio numbers reporesenting colums * @num_row_gpios: actual number of row gpios used by device * @num_col_gpios: actual number of col gpios used by device * @col_scan_delay_us: delay, measured in microseconds, that is @@ -48,8 +48,9 @@ struct matrix_keymap_data { struct matrix_keypad_platform_data { const struct matrix_keymap_data *keymap_data; - unsigned int row_gpios[MATRIX_MAX_ROWS]; - unsigned int col_gpios[MATRIX_MAX_COLS]; + const unsigned int *row_gpios; + const unsigned int *col_gpios; + unsigned int num_row_gpios; unsigned int num_col_gpios; -- cgit v1.2.3 From c5b1525533c484238015c48c78f86d49a1bfb86b Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 6 Aug 2009 13:31:56 +0800 Subject: intel-iommu: Fix enabling snooping feature by mistake Two defects work together result in KVM device passthrough randomly can't work: 1. iommu_snooping is not initialized to zero when vm_iommu_init() called. So it is possible to get a random value. 2. One line added by commit 2c2e2c38("IOMMU Identity Mapping Support") change the code path, let it bypass domain_update_iommu_cap(), as well as missing the increment of domain iommu reference count. The latter is also likely to cause a leak of domains on repeated VMM assignment and deassignment. Signed-off-by: Sheng Yang Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index af7ff9b5aed8..2314ad7ee5fe 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1505,7 +1505,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, } set_bit(num, iommu->domain_ids); - set_bit(iommu->seq_id, &domain->iommu_bmp); iommu->domains[num] = domain; id = num; } @@ -3409,6 +3408,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->iommu_count = 0; domain->iommu_coherency = 0; + domain->iommu_snooping = 0; domain->max_addr = 0; /* always allocate the top pgd */ -- cgit v1.2.3 From 909f10de5de81668e4d0a401f3cb5ca6b8a3d20d Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 6 Aug 2009 14:28:12 +0000 Subject: sh: LCDC SYS bus access wait fix Update the SuperH Mobile LCDC driver to wait for SYS bus to become idle after reading or writing. This is needed by the kfr2r09 board, but also fixes potential problems on other boards making use of the LCDC in a SYS configuration. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- drivers/video/sh_mobile_lcdcfb.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c index 98fb82f11611..d1eb9656ca55 100644 --- a/drivers/video/sh_mobile_lcdcfb.c +++ b/drivers/video/sh_mobile_lcdcfb.c @@ -154,6 +154,7 @@ static void lcdc_sys_write_index(void *handle, unsigned long data) lcdc_write(ch->lcdc, _LDDWD0R, data | 0x10000000); lcdc_wait_bit(ch->lcdc, _LDSR, 2, 0); lcdc_write(ch->lcdc, _LDDWAR, 1 | (lcdc_chan_is_sublcd(ch) ? 2 : 0)); + lcdc_wait_bit(ch->lcdc, _LDSR, 2, 0); } static void lcdc_sys_write_data(void *handle, unsigned long data) @@ -163,6 +164,7 @@ static void lcdc_sys_write_data(void *handle, unsigned long data) lcdc_write(ch->lcdc, _LDDWD0R, data | 0x11000000); lcdc_wait_bit(ch->lcdc, _LDSR, 2, 0); lcdc_write(ch->lcdc, _LDDWAR, 1 | (lcdc_chan_is_sublcd(ch) ? 2 : 0)); + lcdc_wait_bit(ch->lcdc, _LDSR, 2, 0); } static unsigned long lcdc_sys_read_data(void *handle) @@ -173,6 +175,7 @@ static unsigned long lcdc_sys_read_data(void *handle) lcdc_wait_bit(ch->lcdc, _LDSR, 2, 0); lcdc_write(ch->lcdc, _LDDRAR, 1 | (lcdc_chan_is_sublcd(ch) ? 2 : 0)); udelay(1); + lcdc_wait_bit(ch->lcdc, _LDSR, 2, 0); return lcdc_read(ch->lcdc, _LDDRDR) & 0xffff; } -- cgit v1.2.3 From ec56b66fed526e3b7dd58dba8945c405448f48d1 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 6 Aug 2009 14:34:38 +0000 Subject: sh: 18-bit SYS panel fix for SuperH Mobile LCDC Fix the SuperH Mobile LCDC driver to make use of the full 18-bit DRD field in the LDDRDR register. Without this patch only 16-bit register access is possible. Needed by 18-bit SYS panels such as the one used on kfr2r09. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- drivers/video/sh_mobile_lcdcfb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c index d1eb9656ca55..cff406de3d15 100644 --- a/drivers/video/sh_mobile_lcdcfb.c +++ b/drivers/video/sh_mobile_lcdcfb.c @@ -177,7 +177,7 @@ static unsigned long lcdc_sys_read_data(void *handle) udelay(1); lcdc_wait_bit(ch->lcdc, _LDSR, 2, 0); - return lcdc_read(ch->lcdc, _LDDRDR) & 0xffff; + return lcdc_read(ch->lcdc, _LDDRDR) & 0x3ffff; } struct sh_mobile_lcdc_sys_bus_ops sh_mobile_lcdc_sys_bus_ops = { -- cgit v1.2.3 From 17332925d7b11bb6c2d0c49450ae58dd836005da Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 7 Aug 2009 11:03:26 +1000 Subject: drm/radeon/kms: setup MC/VRAM the same way for suspend/resume we should align the GTT after VRAM no matter what, as we can come back from resume and put in a different place and bad things happen. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_device.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index a162ade74b7f..9ff6dcb97f9d 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -152,7 +152,9 @@ int radeon_mc_setup(struct radeon_device *rdev) } } else { rdev->mc.vram_location = 0; - rdev->mc.gtt_location = rdev->mc.mc_vram_size; + tmp = rdev->mc.mc_vram_size; + tmp = (tmp + rdev->mc.gtt_size - 1) & ~(rdev->mc.gtt_size - 1); + rdev->mc.gtt_location = tmp; } DRM_INFO("radeon: VRAM %uM\n", rdev->mc.real_vram_size >> 20); DRM_INFO("radeon: VRAM from 0x%08X to 0x%08X\n", -- cgit v1.2.3 From d25f14389a65c7f95512b01415d8d4a8d62855ab Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Mon, 27 Jul 2009 12:05:38 +0900 Subject: PCI hotplug: SGI hotplug: fix build failure The commit bd3d99c17039fd05a29587db3f4a180c48da115a ("PCI: Remove untested Electromechanical Interlock (EMI) support in pciehp."), which removes the definition of "struct hotplug_slot_attr", broke SGI hotplug driver. By this commit, we get the following compile error. drivers/pci/hotplug/sgi_hotplug.c:106: error: variable 'sn_slot_path_attr' has initializer but incomplete type drivers/pci/hotplug/sgi_hotplug.c:106: error: unknown field 'attr' specified in initializer drivers/pci/hotplug/sgi_hotplug.c:106: error: extra brace group at end of initializer drivers/pci/hotplug/sgi_hotplug.c:106: error: (near initialization for 'sn_slot_path_attr') drivers/pci/hotplug/sgi_hotplug.c:106: warning: excess elements in struct initializer drivers/pci/hotplug/sgi_hotplug.c:106: warning: (near initialization for 'sn_slot_path_attr') drivers/pci/hotplug/sgi_hotplug.c:106: error: unknown field 'show' specified in initializer drivers/pci/hotplug/sgi_hotplug.c:106: warning: excess elements in struct initializer drivers/pci/hotplug/sgi_hotplug.c:106: warning: (near initialization for 'sn_slot_path_attr') drivers/pci/hotplug/sgi_hotplug.c: In function 'sn_hp_destroy': drivers/pci/hotplug/sgi_hotplug.c:203: error: invalid use of undefined type 'struct hotplug_slot_attribute' drivers/pci/hotplug/sgi_hotplug.c: In function 'sn_hotplug_slot_register': drivers/pci/hotplug/sgi_hotplug.c:655: error: invalid use of undefined type 'struct hotplug_slot_attribute' This patch fixes this regression by adding the definition of struct hotplug_slot_attr into sgi_hotplug.c. Tested-by: Mike Habeck Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/sgi_hotplug.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c index a4494d78e7c2..0a724bb6ba10 100644 --- a/drivers/pci/hotplug/sgi_hotplug.c +++ b/drivers/pci/hotplug/sgi_hotplug.c @@ -103,6 +103,12 @@ static ssize_t path_show (struct hotplug_slot *bss_hotplug_slot, return retval; } +struct hotplug_slot_attribute { + struct attribute attr; + ssize_t (*show)(struct hotplug_slot *, char *); + ssize_t (*store)(struct hotplug_slot *, const char *, size_t); +}; + static struct hotplug_slot_attribute sn_slot_path_attr = __ATTR_RO(path); static int sn_pci_slot_valid(struct pci_bus *pci_bus, int device) -- cgit v1.2.3 From 94f81a47c4a7a2d7a16fcfdd6d81da381732c101 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Mon, 27 Jul 2009 12:06:46 +0900 Subject: PCI hotplug: SGI hotplug: do not use hotplug_slot_attr By the pci slot changes, callbacks of attributes under slot directory (/sys/bus/pci/slots) had been changed to get the pointer to struct pci_slot instead of struct hotplug_slot. So the path_show() that assumes the parameter is a pointer to struct hotplug_slot seems broken. Tested-by: Mike Habeck Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/sgi_hotplug.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c index 0a724bb6ba10..8aebe1e9d3d6 100644 --- a/drivers/pci/hotplug/sgi_hotplug.c +++ b/drivers/pci/hotplug/sgi_hotplug.c @@ -90,11 +90,10 @@ static struct hotplug_slot_ops sn_hotplug_slot_ops = { static DEFINE_MUTEX(sn_hotplug_mutex); -static ssize_t path_show (struct hotplug_slot *bss_hotplug_slot, - char *buf) +static ssize_t path_show(struct pci_slot *pci_slot, char *buf) { int retval = -ENOENT; - struct slot *slot = bss_hotplug_slot->private; + struct slot *slot = pci_slot->hotplug->private; if (!slot) return retval; @@ -103,13 +102,7 @@ static ssize_t path_show (struct hotplug_slot *bss_hotplug_slot, return retval; } -struct hotplug_slot_attribute { - struct attribute attr; - ssize_t (*show)(struct hotplug_slot *, char *); - ssize_t (*store)(struct hotplug_slot *, const char *, size_t); -}; - -static struct hotplug_slot_attribute sn_slot_path_attr = __ATTR_RO(path); +static struct pci_slot_attribute sn_slot_path_attr = __ATTR_RO(path); static int sn_pci_slot_valid(struct pci_bus *pci_bus, int device) { -- cgit v1.2.3 From 2020002a878403a6858868d85a43623f74859dba Mon Sep 17 00:00:00 2001 From: Stoyan Gaydarov Date: Thu, 6 Aug 2009 15:07:28 -0700 Subject: drivers/w1/masters/omap_hdq.c: fix missing mutex unlock This was found using a semantic patch, more info can be found at: http://www.emn.fr/x-info/coccinelle/ Signed-off-by: Stoyan Gaydarov Acked-by: Evgeniy Polyakov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/w1/masters/omap_hdq.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/w1/masters/omap_hdq.c b/drivers/w1/masters/omap_hdq.c index a7e3b706b9d3..0d92969404c3 100644 --- a/drivers/w1/masters/omap_hdq.c +++ b/drivers/w1/masters/omap_hdq.c @@ -687,6 +687,7 @@ static int omap_hdq_remove(struct platform_device *pdev) if (hdq_data->hdq_usecount) { dev_dbg(&pdev->dev, "removed when use count is not zero\n"); + mutex_unlock(&hdq_data->hdq_mutex); return -EBUSY; } -- cgit v1.2.3 From 93274e4d4e9416ad1fa47e2f26011e2c483fe5fe Mon Sep 17 00:00:00 2001 From: Stefani Seibold Date: Thu, 6 Aug 2009 15:07:30 -0700 Subject: fbcon: fix rotate upside down crash Fix the rotate_ud() function not to crash in case of a font which has not a width of multiple by 8: The inner loop of the font pixel copy should not access a bit outside the font memory area. Subtract the shift offset from the font width will prevent this. Signed-off-by: Stefani Seibold Cc: Krzysztof Helt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/console/fbcon_rotate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/video/console/fbcon_rotate.h b/drivers/video/console/fbcon_rotate.h index 75be5ce53dc5..e233444cda66 100644 --- a/drivers/video/console/fbcon_rotate.h +++ b/drivers/video/console/fbcon_rotate.h @@ -45,7 +45,7 @@ static inline void rotate_ud(const char *in, char *out, u32 width, u32 height) width = (width + 7) & ~7; for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { + for (j = 0; j < width - shift; j++) { if (pattern_test_bit(j, i, width, in)) pattern_set_bit(width - (1 + j + shift), height - (1 + i), -- cgit v1.2.3 From 521594442cc62d1c2af8436a05ab5918b7730b19 Mon Sep 17 00:00:00 2001 From: Florian Tobias Schandinat Date: Thu, 6 Aug 2009 15:07:34 -0700 Subject: viafb: fix rmmod bug This fixes a bug caused by changing pointers (viafb_mode, viafb_mode1) assigned by module_param. It reduces driver complexity by not needlessly changing these vars as they are only read once and removing now superfluous code. On unpatched kernels loading viafb with viafb_mode or viafb_mode1 option used and afterwards unloading it results in: kernel BUG at mm/slub.c:2926! invalid opcode: 0000 [#1] PREEMPT last sysfs file: /sys/devices/virtual/block/loop0/removable Modules linked in: snd_hda_codec_realtek snd_hda_intel snd_hda_codec snd_hwdep snd_pcm rtl8187 snd_timer eeprom_93cx6 mmc_block snd soundcore via_sdmmc fb snd_page_alloc i2c_algo_bit i2c_viapro ehci_hcd uhci_hcd cfbcopyarea mmc_core cfbimgblt cfbfillrect video output [last unloaded: viafb] Pid: 3355, comm: rmmod Not tainted (2.6.31-rc1 #0) EIP: 0060:[] EFLAGS: 00010246 CPU: 0 EIP is at kfree+0x80/0xda EAX: c17c2da0 EBX: dc7edbdc ECX: 0000010f EDX: 00000000 ESI: c102c700 EDI: dc7ed8fa EBP: d703ff2c ESP: d703ff20 DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068 Process rmmod (pid: 3355, ti=d703e000 task=db1412c0 task.ti=d703e000) Stack: dc7edbdc 00000014 00000016 d703ff40 c102c700 dc7f45d4 dc7f45d4 00000880 d703ff4c c103e571 00000000 d703ffac c103e751 66616976 da140062 db89ba80 00000328 d702edf8 db89ba80 d703ff9c c105d0f0 00000200 da14f898 00000014 Call Trace: [] ? destroy_params+0x1e/0x2b [] ? free_module+0xa2/0xd7 [] ? sys_delete_module+0x1ab/0x1da [] ? do_munmap+0x20a/0x225 [] ? sysenter_do_call+0x12/0x26 Code: 10 76 7a 8d 87 00 00 00 40 c1 e8 0c c1 e0 05 03 05 1c 87 41 c1 66 83 38 00 79 03 8b 40 0c 8b 10 84 d2 78 12 66 f7 c2 00 c0 75 04 <0f> 0b eb fe e8 6f 5a fe ff eb 47 8b 55 04 8b 58 0c 9c 5e fa 3b EIP: [] kfree+0x80/0xda SS:ESP 0068:d703ff20 This is caused by the current code changing the pointers assigned by module_param. During unload it tries to free the memory the pointers point at which is now part of an internal structure. The patch simply avoids changing the pointers. This is okay as they are read only once during the initialization process. Signed-off-by: Florian Tobias Schandinat Cc: Scott Fang Cc: Joseph Chan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/via/hw.c | 4 +- drivers/video/via/lcd.c | 15 ++----- drivers/video/via/viafbdev.c | 101 ++++++++++++++++++++----------------------- drivers/video/via/viafbdev.h | 3 +- 4 files changed, 53 insertions(+), 70 deletions(-) (limited to 'drivers') diff --git a/drivers/video/via/hw.c b/drivers/video/via/hw.c index fcd53ceb88fa..c8960003f47d 100644 --- a/drivers/video/via/hw.c +++ b/drivers/video/via/hw.c @@ -2407,14 +2407,14 @@ int viafb_setmode(int vmode_index, int hor_res, int ver_res, int video_bpp, viafb_dvi_set_mode(viafb_get_mode_index (viaparinfo->tmds_setting_info->h_active, viaparinfo->tmds_setting_info-> - v_active, 1), + v_active), video_bpp1, viaparinfo-> tmds_setting_info->iga_path); } else { viafb_dvi_set_mode(viafb_get_mode_index (viaparinfo->tmds_setting_info->h_active, viaparinfo-> - tmds_setting_info->v_active, 0), + tmds_setting_info->v_active), video_bpp, viaparinfo-> tmds_setting_info->iga_path); } diff --git a/drivers/video/via/lcd.c b/drivers/video/via/lcd.c index 6c7290a6a447..78c6b3387947 100644 --- a/drivers/video/via/lcd.c +++ b/drivers/video/via/lcd.c @@ -580,10 +580,7 @@ static void load_lcd_k400_patch_tbl(int set_hres, int set_vres, int reg_num = 0; struct io_reg *lcd_patch_reg = NULL; - if (viaparinfo->lvds_setting_info->iga_path == IGA2) - vmode_index = viafb_get_mode_index(set_hres, set_vres, 1); - else - vmode_index = viafb_get_mode_index(set_hres, set_vres, 0); + vmode_index = viafb_get_mode_index(set_hres, set_vres); switch (panel_id) { /* LCD 800x600 */ case LCD_PANEL_ID1_800X600: @@ -761,10 +758,7 @@ static void load_lcd_p880_patch_tbl(int set_hres, int set_vres, int reg_num = 0; struct io_reg *lcd_patch_reg = NULL; - if (viaparinfo->lvds_setting_info->iga_path == IGA2) - vmode_index = viafb_get_mode_index(set_hres, set_vres, 1); - else - vmode_index = viafb_get_mode_index(set_hres, set_vres, 0); + vmode_index = viafb_get_mode_index(set_hres, set_vres); switch (panel_id) { case LCD_PANEL_ID5_1400X1050: @@ -832,10 +826,7 @@ static void load_lcd_patch_regs(int set_hres, int set_vres, { int vmode_index; - if (viaparinfo->lvds_setting_info->iga_path == IGA2) - vmode_index = viafb_get_mode_index(set_hres, set_vres, 1); - else - vmode_index = viafb_get_mode_index(set_hres, set_vres, 0); + vmode_index = viafb_get_mode_index(set_hres, set_vres); viafb_unlock_crt(); diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c index a0fec298216e..72833f3334b5 100644 --- a/drivers/video/via/viafbdev.c +++ b/drivers/video/via/viafbdev.c @@ -32,7 +32,6 @@ static u32 pseudo_pal[17]; /* video mode */ static char *viafb_mode = "640x480"; static char *viafb_mode1 = "640x480"; -static int viafb_resMode = VIA_RES_640X480; /* Added for specifying active devices.*/ char *viafb_active_dev = ""; @@ -56,47 +55,47 @@ static void viafb_get_video_device(u32 *video_dev_info); /* Mode information */ static const struct viafb_modeinfo viafb_modentry[] = { - {480, 640, VIA_RES_480X640, "480x640"}, - {640, 480, VIA_RES_640X480, "640x480"}, - {800, 480, VIA_RES_800X480, "800x480"}, - {800, 600, VIA_RES_800X600, "800x600"}, - {1024, 768, VIA_RES_1024X768, "1024x768"}, - {1152, 864, VIA_RES_1152X864, "1152x864"}, - {1280, 1024, VIA_RES_1280X1024, "1280x1024"}, - {1600, 1200, VIA_RES_1600X1200, "1600x1200"}, - {1440, 1050, VIA_RES_1440X1050, "1440x1050"}, - {1280, 768, VIA_RES_1280X768, "1280x768"}, - {1280, 800, VIA_RES_1280X800, "1280x800"}, - {1280, 960, VIA_RES_1280X960, "1280x960"}, - {1920, 1440, VIA_RES_1920X1440, "1920x1440"}, - {848, 480, VIA_RES_848X480, "848x480"}, - {1400, 1050, VIA_RES_1400X1050, "1400x1050"}, - {720, 480, VIA_RES_720X480, "720x480"}, - {720, 576, VIA_RES_720X576, "720x576"}, - {1024, 512, VIA_RES_1024X512, "1024x512"}, - {1024, 576, VIA_RES_1024X576, "1024x576"}, - {1024, 600, VIA_RES_1024X600, "1024x600"}, - {1280, 720, VIA_RES_1280X720, "1280x720"}, - {1920, 1080, VIA_RES_1920X1080, "1920x1080"}, - {1366, 768, VIA_RES_1368X768, "1368x768"}, - {1680, 1050, VIA_RES_1680X1050, "1680x1050"}, - {960, 600, VIA_RES_960X600, "960x600"}, - {1000, 600, VIA_RES_1000X600, "1000x600"}, - {1024, 576, VIA_RES_1024X576, "1024x576"}, - {1024, 600, VIA_RES_1024X600, "1024x600"}, - {1088, 612, VIA_RES_1088X612, "1088x612"}, - {1152, 720, VIA_RES_1152X720, "1152x720"}, - {1200, 720, VIA_RES_1200X720, "1200x720"}, - {1280, 600, VIA_RES_1280X600, "1280x600"}, - {1360, 768, VIA_RES_1360X768, "1360x768"}, - {1440, 900, VIA_RES_1440X900, "1440x900"}, - {1600, 900, VIA_RES_1600X900, "1600x900"}, - {1600, 1024, VIA_RES_1600X1024, "1600x1024"}, - {1792, 1344, VIA_RES_1792X1344, "1792x1344"}, - {1856, 1392, VIA_RES_1856X1392, "1856x1392"}, - {1920, 1200, VIA_RES_1920X1200, "1920x1200"}, - {2048, 1536, VIA_RES_2048X1536, "2048x1536"}, - {0, 0, VIA_RES_INVALID, "640x480"} + {480, 640, VIA_RES_480X640}, + {640, 480, VIA_RES_640X480}, + {800, 480, VIA_RES_800X480}, + {800, 600, VIA_RES_800X600}, + {1024, 768, VIA_RES_1024X768}, + {1152, 864, VIA_RES_1152X864}, + {1280, 1024, VIA_RES_1280X1024}, + {1600, 1200, VIA_RES_1600X1200}, + {1440, 1050, VIA_RES_1440X1050}, + {1280, 768, VIA_RES_1280X768,}, + {1280, 800, VIA_RES_1280X800}, + {1280, 960, VIA_RES_1280X960}, + {1920, 1440, VIA_RES_1920X1440}, + {848, 480, VIA_RES_848X480}, + {1400, 1050, VIA_RES_1400X1050}, + {720, 480, VIA_RES_720X480}, + {720, 576, VIA_RES_720X576}, + {1024, 512, VIA_RES_1024X512}, + {1024, 576, VIA_RES_1024X576}, + {1024, 600, VIA_RES_1024X600}, + {1280, 720, VIA_RES_1280X720}, + {1920, 1080, VIA_RES_1920X1080}, + {1366, 768, VIA_RES_1368X768}, + {1680, 1050, VIA_RES_1680X1050}, + {960, 600, VIA_RES_960X600}, + {1000, 600, VIA_RES_1000X600}, + {1024, 576, VIA_RES_1024X576}, + {1024, 600, VIA_RES_1024X600}, + {1088, 612, VIA_RES_1088X612}, + {1152, 720, VIA_RES_1152X720}, + {1200, 720, VIA_RES_1200X720}, + {1280, 600, VIA_RES_1280X600}, + {1360, 768, VIA_RES_1360X768}, + {1440, 900, VIA_RES_1440X900}, + {1600, 900, VIA_RES_1600X900}, + {1600, 1024, VIA_RES_1600X1024}, + {1792, 1344, VIA_RES_1792X1344}, + {1856, 1392, VIA_RES_1856X1392}, + {1920, 1200, VIA_RES_1920X1200}, + {2048, 1536, VIA_RES_2048X1536}, + {0, 0, VIA_RES_INVALID} }; static struct fb_ops viafb_ops; @@ -177,7 +176,7 @@ static int viafb_check_var(struct fb_var_screeninfo *var, if (var->vmode & FB_VMODE_INTERLACED || var->vmode & FB_VMODE_DOUBLE) return -EINVAL; - vmode_index = viafb_get_mode_index(var->xres, var->yres, 0); + vmode_index = viafb_get_mode_index(var->xres, var->yres); if (vmode_index == VIA_RES_INVALID) { DEBUG_MSG(KERN_INFO "viafb: Mode %dx%dx%d not supported!!\n", @@ -233,14 +232,14 @@ static int viafb_set_par(struct fb_info *info) viafb_update_device_setting(info->var.xres, info->var.yres, info->var.bits_per_pixel, viafb_refresh, 0); - vmode_index = viafb_get_mode_index(info->var.xres, info->var.yres, 0); + vmode_index = viafb_get_mode_index(info->var.xres, info->var.yres); if (viafb_SAMM_ON == 1) { DEBUG_MSG(KERN_INFO "viafb_second_xres = %d, viafb_second_yres = %d, bpp = %d\n", viafb_second_xres, viafb_second_yres, viafb_bpp1); vmode_index1 = viafb_get_mode_index(viafb_second_xres, - viafb_second_yres, 1); + viafb_second_yres); DEBUG_MSG(KERN_INFO "->viafb_SAMM_ON: index=%d\n", vmode_index1); @@ -1262,7 +1261,7 @@ static int viafb_sync(struct fb_info *info) return 0; } -int viafb_get_mode_index(int hres, int vres, int flag) +int viafb_get_mode_index(int hres, int vres) { u32 i; DEBUG_MSG(KERN_INFO "viafb_get_mode_index!\n"); @@ -1272,13 +1271,7 @@ int viafb_get_mode_index(int hres, int vres, int flag) viafb_modentry[i].yres == vres) break; - viafb_resMode = viafb_modentry[i].mode_index; - if (flag) - viafb_mode1 = viafb_modentry[i].mode_res; - else - viafb_mode = viafb_modentry[i].mode_res; - - return viafb_resMode; + return viafb_modentry[i].mode_index; } static void check_available_device_to_enable(int device_id) @@ -2199,7 +2192,7 @@ static int __devinit via_pci_probe(void) strict_strtoul(tmpc, 0, &default_xres); strict_strtoul(tmpm, 0, &default_yres); - vmode_index = viafb_get_mode_index(default_xres, default_yres, 0); + vmode_index = viafb_get_mode_index(default_xres, default_yres); DEBUG_MSG(KERN_INFO "0->index=%d\n", vmode_index); if (viafb_SAMM_ON == 1) { diff --git a/drivers/video/via/viafbdev.h b/drivers/video/via/viafbdev.h index a4158e872878..227b000feb38 100644 --- a/drivers/video/via/viafbdev.h +++ b/drivers/video/via/viafbdev.h @@ -81,7 +81,6 @@ struct viafb_modeinfo { u32 xres; u32 yres; int mode_index; - char *mode_res; }; extern unsigned int viafb_second_virtual_yres; extern unsigned int viafb_second_virtual_xres; @@ -102,7 +101,7 @@ extern int strict_strtoul(const char *cp, unsigned int base, void viafb_memory_pitch_patch(struct fb_info *info); void viafb_fill_var_timing_info(struct fb_var_screeninfo *var, int refresh, int mode_index); -int viafb_get_mode_index(int hres, int vres, int flag); +int viafb_get_mode_index(int hres, int vres); u8 viafb_gpio_i2c_read_lvds(struct lvds_setting_information *plvds_setting_info, struct lvds_chip_information *plvds_chip_info, u8 index); -- cgit v1.2.3 From 0035fe00f77d2b0a1a2d001f7442136d1ec5aefa Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 6 Aug 2009 15:07:36 -0700 Subject: fbcon: don't use vc_resize() on initialization Catalin and kmemleak spotted a leak of a VC screen buffer in vc_allocate() due to the following chain of events: vc_allocate() visual_init(init=1) vc->vc_sw->con_init(init=1) fbcon_init() vc_resize() vc->screen_buf = kmalloc() vc->screen_buf = kmalloc() The common way for the VC drivers is to set the screen dimension parameters manually in the init case and only call vc_resize() for !init - which allocates a screen buffer according to the new dimensions. fbcon instead would do vc_resize() unconditionally and afterwards set the dimensions manually (again) for !init - i.e. completely upside down. The vc_resize() allocated buffer would then get lost by vc_allocate() allocating a fresh one. Use vc_resize() only for actual resizing to close the leak. Set the dimensions manually only in initialization mode to remove the redundant setting in resize mode. The kmemleak trace from Catalin: unreferenced object 0xde158000 (size 12288): comm "Xorg", pid 1439, jiffies 4294961016 hex dump (first 32 bytes): 20 00 20 00 20 00 20 00 20 00 20 00 20 00 20 00 . . . . . . . . 20 00 20 00 20 00 20 00 20 00 20 00 20 00 20 00 . . . . . . . . backtrace: [] __save_stack_trace+0x17/0x1c [] create_object+0xcd/0x188 [] kmemleak_alloc+0x1b/0x3c [] __kmalloc+0xdb/0xe8 [] vc_do_resize+0x73/0x1e0 [] vc_resize+0x15/0x18 [] fbcon_init+0x1f9/0x2b8 [] visual_init+0x9f/0xdc [] vc_allocate+0x7f/0xfc [] con_open+0x17/0x80 [] tty_open+0x1f7/0x2e4 [] chrdev_open+0x101/0x118 [] __dentry_open+0x105/0x1cc [] nameidata_to_filp+0x2d/0x38 [] do_filp_open+0x2c1/0x54c [] do_sys_open+0x3b/0xb4 Reported-by: Catalin Marinas Signed-off-by: Johannes Weiner Tested-by: Catalin Marinas Cc: Pekka Enberg Cc: Krzysztof Helt Tested-by: Dave Young Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/console/fbcon.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c index 471a9a60376a..3a44695b9c09 100644 --- a/drivers/video/console/fbcon.c +++ b/drivers/video/console/fbcon.c @@ -1082,7 +1082,6 @@ static void fbcon_init(struct vc_data *vc, int init) new_rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres); new_cols /= vc->vc_font.width; new_rows /= vc->vc_font.height; - vc_resize(vc, new_cols, new_rows); /* * We must always set the mode. The mode of the previous console @@ -1111,10 +1110,11 @@ static void fbcon_init(struct vc_data *vc, int init) * vc_{cols,rows}, but we must not set those if we are only * resizing the console. */ - if (!init) { + if (init) { vc->vc_cols = new_cols; vc->vc_rows = new_rows; - } + } else + vc_resize(vc, new_cols, new_rows); if (logo) fbcon_prepare_logo(vc, info, cols, rows, new_cols, new_rows); -- cgit v1.2.3 From 20de03dae54e10271ffd308654dfb4a117f4789d Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Thu, 6 Aug 2009 15:07:40 -0700 Subject: i.MX31: fix framebuffer locking regressions Recent framebuffer locking patches first made affected systems unbootable, then the dead-lock has been fixed but as of 2.6.31-rc4 the framebuffer on mx3 machines doesn't work. Fix this. Signed-off-by: Guennadi Liakhovetski Cc: Sascha Hauer Cc: Krzysztof Helt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/mx3fb.c | 86 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 30 deletions(-) (limited to 'drivers') diff --git a/drivers/video/mx3fb.c b/drivers/video/mx3fb.c index f8778cde2183..054ef29be479 100644 --- a/drivers/video/mx3fb.c +++ b/drivers/video/mx3fb.c @@ -669,7 +669,8 @@ static uint32_t bpp_to_pixfmt(int bpp) } static int mx3fb_blank(int blank, struct fb_info *fbi); -static int mx3fb_map_video_memory(struct fb_info *fbi, unsigned int mem_len); +static int mx3fb_map_video_memory(struct fb_info *fbi, unsigned int mem_len, + bool lock); static int mx3fb_unmap_video_memory(struct fb_info *fbi); /** @@ -711,12 +712,7 @@ static void mx3fb_dma_done(void *arg) complete(&mx3_fbi->flip_cmpl); } -/** - * mx3fb_set_par() - set framebuffer parameters and change the operating mode. - * @fbi: framebuffer information pointer. - * @return: 0 on success or negative error code on failure. - */ -static int mx3fb_set_par(struct fb_info *fbi) +static int __set_par(struct fb_info *fbi, bool lock) { u32 mem_len; struct ipu_di_signal_cfg sig_cfg; @@ -727,10 +723,6 @@ static int mx3fb_set_par(struct fb_info *fbi) struct idmac_video_param *video = &ichan->params.video; struct scatterlist *sg = mx3_fbi->sg; - dev_dbg(mx3fb->dev, "%s [%c]\n", __func__, list_empty(&ichan->queue) ? '-' : '+'); - - mutex_lock(&mx3_fbi->mutex); - /* Total cleanup */ if (mx3_fbi->txd) sdc_disable_channel(mx3_fbi); @@ -742,10 +734,8 @@ static int mx3fb_set_par(struct fb_info *fbi) if (fbi->fix.smem_start) mx3fb_unmap_video_memory(fbi); - if (mx3fb_map_video_memory(fbi, mem_len) < 0) { - mutex_unlock(&mx3_fbi->mutex); + if (mx3fb_map_video_memory(fbi, mem_len, lock) < 0) return -ENOMEM; - } } sg_init_table(&sg[0], 1); @@ -791,7 +781,6 @@ static int mx3fb_set_par(struct fb_info *fbi) fbi->var.vsync_len, fbi->var.lower_margin + fbi->var.vsync_len, sig_cfg) != 0) { - mutex_unlock(&mx3_fbi->mutex); dev_err(fbi->device, "mx3fb: Error initializing panel.\n"); return -EINVAL; @@ -810,9 +799,30 @@ static int mx3fb_set_par(struct fb_info *fbi) if (mx3_fbi->blank == FB_BLANK_UNBLANK) sdc_enable_channel(mx3_fbi); + return 0; +} + +/** + * mx3fb_set_par() - set framebuffer parameters and change the operating mode. + * @fbi: framebuffer information pointer. + * @return: 0 on success or negative error code on failure. + */ +static int mx3fb_set_par(struct fb_info *fbi) +{ + struct mx3fb_info *mx3_fbi = fbi->par; + struct mx3fb_data *mx3fb = mx3_fbi->mx3fb; + struct idmac_channel *ichan = mx3_fbi->idmac_channel; + int ret; + + dev_dbg(mx3fb->dev, "%s [%c]\n", __func__, list_empty(&ichan->queue) ? '-' : '+'); + + mutex_lock(&mx3_fbi->mutex); + + ret = __set_par(fbi, true); + mutex_unlock(&mx3_fbi->mutex); - return 0; + return ret; } /** @@ -966,21 +976,11 @@ static int mx3fb_setcolreg(unsigned int regno, unsigned int red, return ret; } -/** - * mx3fb_blank() - blank the display. - */ -static int mx3fb_blank(int blank, struct fb_info *fbi) +static void __blank(int blank, struct fb_info *fbi) { struct mx3fb_info *mx3_fbi = fbi->par; struct mx3fb_data *mx3fb = mx3_fbi->mx3fb; - dev_dbg(fbi->device, "%s, blank = %d, base %p, len %u\n", __func__, - blank, fbi->screen_base, fbi->fix.smem_len); - - if (mx3_fbi->blank == blank) - return 0; - - mutex_lock(&mx3_fbi->mutex); mx3_fbi->blank = blank; switch (blank) { @@ -999,6 +999,23 @@ static int mx3fb_blank(int blank, struct fb_info *fbi) sdc_set_brightness(mx3fb, mx3fb->backlight_level); break; } +} + +/** + * mx3fb_blank() - blank the display. + */ +static int mx3fb_blank(int blank, struct fb_info *fbi) +{ + struct mx3fb_info *mx3_fbi = fbi->par; + + dev_dbg(fbi->device, "%s, blank = %d, base %p, len %u\n", __func__, + blank, fbi->screen_base, fbi->fix.smem_len); + + if (mx3_fbi->blank == blank) + return 0; + + mutex_lock(&mx3_fbi->mutex); + __blank(blank, fbi); mutex_unlock(&mx3_fbi->mutex); return 0; @@ -1198,6 +1215,7 @@ static int mx3fb_resume(struct platform_device *pdev) * mx3fb_map_video_memory() - allocates the DRAM memory for the frame buffer. * @fbi: framebuffer information pointer * @mem_len: length of mapped memory + * @lock: do not lock during initialisation * @return: Error code indicating success or failure * * This buffer is remapped into a non-cached, non-buffered, memory region to @@ -1205,7 +1223,8 @@ static int mx3fb_resume(struct platform_device *pdev) * area is remapped, all virtual memory access to the video memory should occur * at the new region. */ -static int mx3fb_map_video_memory(struct fb_info *fbi, unsigned int mem_len) +static int mx3fb_map_video_memory(struct fb_info *fbi, unsigned int mem_len, + bool lock) { int retval = 0; dma_addr_t addr; @@ -1221,10 +1240,12 @@ static int mx3fb_map_video_memory(struct fb_info *fbi, unsigned int mem_len) goto err0; } - mutex_lock(&fbi->mm_lock); + if (lock) + mutex_lock(&fbi->mm_lock); fbi->fix.smem_start = addr; fbi->fix.smem_len = mem_len; - mutex_unlock(&fbi->mm_lock); + if (lock) + mutex_unlock(&fbi->mm_lock); dev_dbg(fbi->device, "allocated fb @ p=0x%08x, v=0x%p, size=%d.\n", (uint32_t) fbi->fix.smem_start, fbi->screen_base, fbi->fix.smem_len); @@ -1365,6 +1386,11 @@ static int init_fb_chan(struct mx3fb_data *mx3fb, struct idmac_channel *ichan) init_completion(&mx3fbi->flip_cmpl); disable_irq(ichan->eof_irq); dev_dbg(mx3fb->dev, "disabling irq %d\n", ichan->eof_irq); + ret = __set_par(fbi, false); + if (ret < 0) + goto esetpar; + + __blank(FB_BLANK_UNBLANK, fbi); dev_info(dev, "registered, using mode %s\n", fb_mode); -- cgit v1.2.3 From 2198a64a7487aba036f71998ade8a6528070d32c Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 6 Aug 2009 15:07:41 -0700 Subject: drivers/mmc: correct error-handling code sdhci_alloc_host returns an ERR_PTR value in an error case instead of NULL. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @match exists@ expression x, E; statement S1, S2; @@ x = sdhci_alloc_host(...) ... when != x = E ( * if (x == NULL || ...) S1 else S2 | * if (x == NULL && ...) S1 else S2 ) // Signed-off-by: Julia Lawall Acked-by: Anton Vorontsov Cc: Matt Fleming Cc: Ian Molton Cc: "Roberto A. Foglietta" Cc: Philip Langdale Cc: Pierre Ossman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/mmc/host/sdhci-of.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/mmc/host/sdhci-of.c b/drivers/mmc/host/sdhci-of.c index 908844327db0..1e8aa590bb39 100644 --- a/drivers/mmc/host/sdhci-of.c +++ b/drivers/mmc/host/sdhci-of.c @@ -234,7 +234,7 @@ static int __devinit sdhci_of_probe(struct of_device *ofdev, return -ENODEV; host = sdhci_alloc_host(&ofdev->dev, sizeof(*of_host)); - if (!host) + if (IS_ERR(host)) return -ENOMEM; of_host = sdhci_priv(host); -- cgit v1.2.3 From 49276560c9004fce24c42e3c0ad75f34d956fc63 Mon Sep 17 00:00:00 2001 From: Khanh-Dang Nguyen Thu Lam Date: Tue, 28 Jul 2009 19:41:17 +0200 Subject: USB: pl2303: New vendor and product id I am submitting a patch for the pl2303 driver. This patch adds support for the "Sony QN-3USB" cable (vendor=0x054c, product=0x0437). This USB cable is a so-called data cable used to connect a Sony mobile phone to a computer. Supported models are Sony CMD-J5, J6, J7, J16, J26, J70 and Z7. I have used this patch with my Sony CMD-J70 for several days and I haven't encountered any kernel/hardware issue. From: Khanh-Dang Nguyen Thu Lam Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/pl2303.c | 1 + drivers/usb/serial/pl2303.h | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'drivers') diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 7d15bfa7c2db..3e86815b2705 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -95,6 +95,7 @@ static struct usb_device_id id_table [] = { { USB_DEVICE(SUPERIAL_VENDOR_ID, SUPERIAL_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD220_PRODUCT_ID) }, { USB_DEVICE(CRESSI_VENDOR_ID, CRESSI_EDY_PRODUCT_ID) }, + { USB_DEVICE(SONY_VENDOR_ID, SONY_QN3USB_PRODUCT_ID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index 12aac7d2462d..ee9505e1dd92 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -126,3 +126,7 @@ /* Cressi Edy (diving computer) PC interface */ #define CRESSI_VENDOR_ID 0x04b8 #define CRESSI_EDY_PRODUCT_ID 0x0521 + +/* Sony, USB data cable for CMD-Jxx mobile phones */ +#define SONY_VENDOR_ID 0x054c +#define SONY_QN3USB_PRODUCT_ID 0x0437 -- cgit v1.2.3 From 18753ebc8a98efe0e8ff6167afb31cef220c8e50 Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Wed, 29 Jul 2009 11:39:03 +0200 Subject: USB: devio: Properly do access_ok() checks access_ok() checks must be done on every part of the userspace structure that is accessed. If access_ok() on one part of the struct succeeded, it does not imply it will succeed on other parts of the struct. (Does depend on the architecture implementation of access_ok()). This changes the __get_user() users to first check access_ok() on the data structure. Signed-off-by: Michael Buesch Cc: stable Cc: Pete Zaitcev Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 38b8bce782d6..e192fa05f8a1 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1321,7 +1321,8 @@ static int get_urb32(struct usbdevfs_urb *kurb, struct usbdevfs_urb32 __user *uurb) { __u32 uptr; - if (get_user(kurb->type, &uurb->type) || + if (!access_ok(VERIFY_READ, uurb, sizeof(*uurb)) || + __get_user(kurb->type, &uurb->type) || __get_user(kurb->endpoint, &uurb->endpoint) || __get_user(kurb->status, &uurb->status) || __get_user(kurb->flags, &uurb->flags) || @@ -1536,8 +1537,9 @@ static int proc_ioctl_compat(struct dev_state *ps, compat_uptr_t arg) u32 udata; uioc = compat_ptr((long)arg); - if (get_user(ctrl.ifno, &uioc->ifno) || - get_user(ctrl.ioctl_code, &uioc->ioctl_code) || + if (!access_ok(VERIFY_READ, uioc, sizeof(*uioc)) || + __get_user(ctrl.ifno, &uioc->ifno) || + __get_user(ctrl.ioctl_code, &uioc->ioctl_code) || __get_user(udata, &uioc->data)) return -EFAULT; ctrl.data = compat_ptr(udata); -- cgit v1.2.3 From e8e2ff462dd92693f29eb848f42d3eb720390d59 Mon Sep 17 00:00:00 2001 From: "Gupta, Ajay Kumar" Date: Wed, 29 Jul 2009 11:58:57 +0530 Subject: USB: musb: fix the nop registration for OMAP3EVM OMAP3EVM uses ISP1504 phy which doesn't require any programming and thus has to use NOP otg transceiver. Cleanups being done: - Remove unwanted code in usb-musb.c file - Register NOP in OMAP3EVM board file using usb_nop_xceiv_register(). - Select NOP_USB_XCEIV for OMAP3EVM boards. - Don't enable TWL4030_USB in omap3_evm_defconfig Signed-off-by: Ajay Kumar Gupta Signed-off-by: Eino-Ville Talvala Acked-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- arch/arm/configs/omap3_evm_defconfig | 2 +- arch/arm/mach-omap2/board-omap3evm.c | 5 +++++ arch/arm/mach-omap2/usb-musb.c | 21 --------------------- drivers/usb/musb/Kconfig | 1 + 4 files changed, 7 insertions(+), 22 deletions(-) (limited to 'drivers') diff --git a/arch/arm/configs/omap3_evm_defconfig b/arch/arm/configs/omap3_evm_defconfig index 28be17fbc157..d5ff4776cd0a 100644 --- a/arch/arm/configs/omap3_evm_defconfig +++ b/arch/arm/configs/omap3_evm_defconfig @@ -1107,7 +1107,7 @@ CONFIG_USB_ZERO=m CONFIG_USB_OTG_UTILS=y # CONFIG_USB_GPIO_VBUS is not set # CONFIG_ISP1301_OMAP is not set -CONFIG_TWL4030_USB=y +# CONFIG_TWL4030_USB is not set # CONFIG_NOP_USB_XCEIV is not set CONFIG_MMC=y # CONFIG_MMC_DEBUG is not set diff --git a/arch/arm/mach-omap2/board-omap3evm.c b/arch/arm/mach-omap2/board-omap3evm.c index d3cc145814d0..cf3dd771a678 100644 --- a/arch/arm/mach-omap2/board-omap3evm.c +++ b/arch/arm/mach-omap2/board-omap3evm.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -307,6 +308,10 @@ static void __init omap3_evm_init(void) ARRAY_SIZE(omap3evm_spi_board_info)); omap_serial_init(); +#ifdef CONFIG_NOP_USB_XCEIV + /* OMAP3EVM uses ISP1504 phy and so register nop transceiver */ + usb_nop_xceiv_register(); +#endif usb_musb_init(); ads7846_dev_init(); } diff --git a/arch/arm/mach-omap2/usb-musb.c b/arch/arm/mach-omap2/usb-musb.c index d85296dc896c..739e59e8025c 100644 --- a/arch/arm/mach-omap2/usb-musb.c +++ b/arch/arm/mach-omap2/usb-musb.c @@ -155,20 +155,6 @@ static struct platform_device musb_device = { .resource = musb_resources, }; -#ifdef CONFIG_NOP_USB_XCEIV -static u64 nop_xceiv_dmamask = DMA_BIT_MASK(32); - -static struct platform_device nop_xceiv_device = { - .name = "nop_usb_xceiv", - .id = -1, - .dev = { - .dma_mask = &nop_xceiv_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(32), - .platform_data = NULL, - }, -}; -#endif - void __init usb_musb_init(void) { if (cpu_is_omap243x()) @@ -183,13 +169,6 @@ void __init usb_musb_init(void) */ musb_plat.clock = "ick"; -#ifdef CONFIG_NOP_USB_XCEIV - if (platform_device_register(&nop_xceiv_device) < 0) { - printk(KERN_ERR "Unable to register NOP-XCEIV device\n"); - return; - } -#endif - if (platform_device_register(&musb_device) < 0) { printk(KERN_ERR "Unable to register HS-USB (MUSB) device\n"); return; diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig index 70073b157f0a..803adcb5ac1d 100644 --- a/drivers/usb/musb/Kconfig +++ b/drivers/usb/musb/Kconfig @@ -12,6 +12,7 @@ config USB_MUSB_HDRC depends on !SUPERH select NOP_USB_XCEIV if ARCH_DAVINCI select TWL4030_USB if MACH_OMAP_3430SDP + select NOP_USB_XCEIV if MACH_OMAP3EVM select USB_OTG_UTILS tristate 'Inventra Highspeed Dual Role Controller (TI, ADI, ...)' help -- cgit v1.2.3 From 01105a246345f011fde64d24a601090b646e9e4c Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 30 Jul 2009 15:28:14 -0400 Subject: USB: usbfs: fix -ENOENT error code to be -ENODEV This patch (as1272) changes the error code returned when an open call for a USB device node fails to locate the corresponding device. The appropriate error code is -ENODEV, not -ENOENT. Signed-off-by: Alan Stern CC: Kay Sievers Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index e192fa05f8a1..4247eccf858c 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -595,7 +595,7 @@ static int usbdev_open(struct inode *inode, struct file *file) if (!ps) goto out; - ret = -ENOENT; + ret = -ENODEV; /* usbdev device-node */ if (imajor(inode) == USB_DEVICE_MAJOR) -- cgit v1.2.3 From 7a0f0d951273eee889c2441846842348ebc00a2a Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 31 Jul 2009 10:40:22 -0400 Subject: USB: EHCI: fix two new bugs related to Clear-TT-Buffer This patch (as1273) fixes two(!) bugs introduced by the new Clear-TT-Buffer implementation in ehci-hcd. It is now possible for an idle QH to have some URBs on its queue -- this will happen if a Clear-TT-Buffer is pending for the QH's endpoint. Consequently we should not issue a warning when someone tries to unlink an URB from an idle QH; instead we should process the request immediately. The refcounts for QHs could get messed up, because submit_async() would increment the refcount when calling qh_link_async() and qh_link_async() would then refuse to link the QH into the schedule if a Clear-TT-Buffer was pending. Instead we should increment the refcount only when the QH actually is added to the schedule. The current code tries to be clever by leaving the refcount alone if an unlink is immediately followed by a relink; the patch changes this to an unconditional decrement and increment (although they occur in the opposite order). Signed-off-by: Alan Stern CC: David Brownell Tested-by: Manuel Lauss Tested-by: Matthijs Kooijman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-hcd.c | 3 ++- drivers/usb/host/ehci-q.c | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index 7d03549c3339..11c627ce6022 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -903,7 +903,8 @@ static int ehci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) /* already started */ break; case QH_STATE_IDLE: - WARN_ON(1); + /* QH might be waiting for a Clear-TT-Buffer */ + qh_completions(ehci, qh); break; } break; diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c index 9a1384747f3b..b27380505576 100644 --- a/drivers/usb/host/ehci-q.c +++ b/drivers/usb/host/ehci-q.c @@ -940,6 +940,7 @@ static void qh_link_async (struct ehci_hcd *ehci, struct ehci_qh *qh) head->qh_next.qh = qh; head->hw_next = dma; + qh_get(qh); qh->xacterrs = QH_XACTERR_MAX; qh->qh_state = QH_STATE_LINKED; /* qtd completions reported later by interrupt */ @@ -1080,7 +1081,7 @@ submit_async ( * the HC and TT handle it when the TT has a buffer ready. */ if (likely (qh->qh_state == QH_STATE_IDLE)) - qh_link_async (ehci, qh_get (qh)); + qh_link_async(ehci, qh); done: spin_unlock_irqrestore (&ehci->lock, flags); if (unlikely (qh == NULL)) @@ -1115,8 +1116,6 @@ static void end_unlink_async (struct ehci_hcd *ehci) && HC_IS_RUNNING (ehci_to_hcd(ehci)->state)) qh_link_async (ehci, qh); else { - qh_put (qh); // refcount from async list - /* it's not free to turn the async schedule on/off; leave it * active but idle for a while once it empties. */ @@ -1124,6 +1123,7 @@ static void end_unlink_async (struct ehci_hcd *ehci) && ehci->async->qh_next.qh == NULL) timer_action (ehci, TIMER_ASYNC_OFF); } + qh_put(qh); /* refcount from async list */ if (next) { ehci->reclaim = NULL; -- cgit v1.2.3 From ef4638f955f2c4a667c8af20769d03f5ed3781ca Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 31 Jul 2009 10:41:40 -0400 Subject: USB: EHCI: fix counting of transaction error retries This patch (as1274) simplifies the counting of transaction-error retries. Now we will count up from 0 to QH_XACTERR_MAX instead of down from QH_XACTERR_MAX to 0. The patch also fixes a small bug: qh->xacterr was not getting initialized for interrupt endpoints. Signed-off-by: Alan Stern Tested-by: Matthijs Kooijman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-q.c | 9 ++++----- drivers/usb/host/ehci-sched.c | 1 + 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c index b27380505576..7673554fa64d 100644 --- a/drivers/usb/host/ehci-q.c +++ b/drivers/usb/host/ehci-q.c @@ -375,12 +375,11 @@ qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh) */ if ((token & QTD_STS_XACT) && QTD_CERR(token) == 0 && - --qh->xacterrs > 0 && + ++qh->xacterrs < QH_XACTERR_MAX && !urb->unlinked) { ehci_dbg(ehci, "detected XactErr len %zu/%zu retry %d\n", - qtd->length - QTD_LENGTH(token), qtd->length, - QH_XACTERR_MAX - qh->xacterrs); + qtd->length - QTD_LENGTH(token), qtd->length, qh->xacterrs); /* reset the token in the qtd and the * qh overlay (which still contains @@ -494,7 +493,7 @@ halt: last = qtd; /* reinit the xacterr counter for the next qtd */ - qh->xacterrs = QH_XACTERR_MAX; + qh->xacterrs = 0; } /* last urb's completion might still need calling */ @@ -941,7 +940,7 @@ static void qh_link_async (struct ehci_hcd *ehci, struct ehci_qh *qh) head->hw_next = dma; qh_get(qh); - qh->xacterrs = QH_XACTERR_MAX; + qh->xacterrs = 0; qh->qh_state = QH_STATE_LINKED; /* qtd completions reported later by interrupt */ } diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c index 74f7f83b29ad..edd61ee90323 100644 --- a/drivers/usb/host/ehci-sched.c +++ b/drivers/usb/host/ehci-sched.c @@ -542,6 +542,7 @@ static int qh_link_periodic (struct ehci_hcd *ehci, struct ehci_qh *qh) } } qh->qh_state = QH_STATE_LINKED; + qh->xacterrs = 0; qh_get (qh); /* update per-qh bandwidth for usbfs */ -- cgit v1.2.3 From c47aacc67a3d26dfab9c9b8965975ed2b2010b30 Mon Sep 17 00:00:00 2001 From: Marko Hänninen Date: Fri, 31 Jul 2009 22:32:39 +0300 Subject: USB: ftdi_sio: add vendor and product id for Bayer glucose meter serial converter cable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Attached patch adds USB vendor and product IDs for Bayer's USB to serial converter cable used by Bayer blood glucose meters. It seems to be a FT232RL based device and works without any problem with ftdi_sio driver when this patch is applied. See: http://winglucofacts.com/cables/ Signed-off-by: Marko Hänninen Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 1 + drivers/usb/serial/ftdi_sio.h | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'drivers') diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index b574878c78b2..500087ea58a5 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -699,6 +699,7 @@ static struct usb_device_id id_table_combined [] = { .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(LARSENBRUSGAARD_VID, LB_ALTITRACK_PID) }, { USB_DEVICE(GN_OTOMETRICS_VID, AURICAL_USB_PID) }, + { USB_DEVICE(BAYER_VID, BAYER_CONTOUR_CABLE_PID) }, { }, /* Optional parameter entry */ { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio.h b/drivers/usb/serial/ftdi_sio.h index 24dbd99e87d7..f203b2a51302 100644 --- a/drivers/usb/serial/ftdi_sio.h +++ b/drivers/usb/serial/ftdi_sio.h @@ -953,6 +953,13 @@ #define GN_OTOMETRICS_VID 0x0c33 /* Vendor ID */ #define AURICAL_USB_PID 0x0010 /* Aurical USB Audiometer */ +/* + * Bayer Ascensia Contour blood glucose meter USB-converter cable. + * http://winglucofacts.com/cables/ + */ +#define BAYER_VID 0x1A79 +#define BAYER_CONTOUR_CABLE_PID 0x6001 + /* * BmRequestType: 1100 0000b * bRequest: FTDI_E2_READ -- cgit v1.2.3 From 50d0678e2026c18e4147f0b16b5853113659b82d Mon Sep 17 00:00:00 2001 From: Dhaval Vasa Date: Fri, 7 Aug 2009 17:26:49 +0530 Subject: USB: ftdi_sio: add product_id for Marvell OpenRD Base, Client reference: http://www.open-rd.org Signed-off-by: Dhaval Vasa Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 2 ++ drivers/usb/serial/ftdi_sio.h | 7 +++++++ 2 files changed, 9 insertions(+) (limited to 'drivers') diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 500087ea58a5..8fec5d4455c9 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -700,6 +700,8 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(LARSENBRUSGAARD_VID, LB_ALTITRACK_PID) }, { USB_DEVICE(GN_OTOMETRICS_VID, AURICAL_USB_PID) }, { USB_DEVICE(BAYER_VID, BAYER_CONTOUR_CABLE_PID) }, + { USB_DEVICE(FTDI_VID, MARVELL_OPENRD_PID), + .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { }, /* Optional parameter entry */ { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio.h b/drivers/usb/serial/ftdi_sio.h index f203b2a51302..8c92b88166ae 100644 --- a/drivers/usb/serial/ftdi_sio.h +++ b/drivers/usb/serial/ftdi_sio.h @@ -960,6 +960,13 @@ #define BAYER_VID 0x1A79 #define BAYER_CONTOUR_CABLE_PID 0x6001 +/* + * Marvell OpenRD Base, Client + * http://www.open-rd.org + * OpenRD Base, Client use VID 0x0403 + */ +#define MARVELL_OPENRD_PID 0x9e90 + /* * BmRequestType: 1100 0000b * bRequest: FTDI_E2_READ -- cgit v1.2.3 From c15e3ca1d822abba78c00b1ffc3e7b382a50396e Mon Sep 17 00:00:00 2001 From: Rogerio Brito Date: Thu, 6 Aug 2009 15:20:19 -0700 Subject: USB: storage: include Prolific Technology USB drive in unusual_devs list Add a quirk entry for the Leading Driver UD-11 usb flash drive. As Alan Stern told me, the device doesn't deal correctly with the locking media feature of the device, and this patch incorporates it. Compiled, tested, working. Signed-off-by: Rogerio Brito Cc: Phil Dibowitz Cc: Alan Stern Cc: Robert Hancock Cc: stable Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers') diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 1b9c5dd0fb27..7477d411959f 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -838,6 +838,13 @@ UNUSUAL_DEV( 0x066f, 0x8000, 0x0001, 0x0001, US_SC_DEVICE, US_PR_DEVICE, NULL, US_FL_FIX_CAPACITY ), +/* Reported by Rogerio Brito */ +UNUSUAL_DEV( 0x067b, 0x2317, 0x0001, 0x001, + "Prolific Technology, Inc.", + "Mass Storage Device", + US_SC_DEVICE, US_PR_DEVICE, NULL, + US_FL_NOT_LOCKABLE ), + /* Reported by Richard -=[]=- */ /* Change to bcdDeviceMin (0x0100 to 0x0001) reported by * Thomas Bartosik */ -- cgit v1.2.3 From cf7fdd57f978d40ceb9a0f58a25f5cf9c84d6f33 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 4 Aug 2009 23:52:09 +0200 Subject: USB: fix oops on disconnect in cdc-acm This patch fixes an oops caused when during an unplug a device's table of endpoints is zeroed before the driver is notified. A pointer to the endpoint must be cached. this fixes a regression caused by commit 5186ffee2320942c3dc9745f7930e0eb15329ca6 Therefore it should go into 2.6.31 Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 10 +++++----- drivers/usb/class/cdc-acm.h | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index e1f89416ef8c..2bfc41ece0e1 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -387,7 +387,6 @@ static void acm_rx_tasklet(unsigned long _acm) struct acm_ru *rcv; unsigned long flags; unsigned char throttled; - struct usb_host_endpoint *ep; dbg("Entering acm_rx_tasklet"); @@ -463,14 +462,12 @@ urbs: rcv->buffer = buf; - ep = (usb_pipein(acm->rx_endpoint) ? acm->dev->ep_in : acm->dev->ep_out) - [usb_pipeendpoint(acm->rx_endpoint)]; - if (usb_endpoint_xfer_int(&ep->desc)) + if (acm->is_int_ep) usb_fill_int_urb(rcv->urb, acm->dev, acm->rx_endpoint, buf->base, acm->readsize, - acm_read_bulk, rcv, ep->desc.bInterval); + acm_read_bulk, rcv, acm->bInterval); else usb_fill_bulk_urb(rcv->urb, acm->dev, acm->rx_endpoint, @@ -1183,6 +1180,9 @@ made_compressed_probe: spin_lock_init(&acm->read_lock); mutex_init(&acm->mutex); acm->rx_endpoint = usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress); + acm->is_int_ep = usb_endpoint_xfer_int(epread); + if (acm->is_int_ep) + acm->bInterval = epread->bInterval; tty_port_init(&acm->port); acm->port.ops = &acm_port_ops; diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index 1602324808ba..c4a0ee8ffccf 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -126,6 +126,8 @@ struct acm { unsigned int ctrl_caps; /* control capabilities from the class specific header */ unsigned int susp_count; /* number of suspended interfaces */ int combined_interfaces:1; /* control and data collapsed */ + int is_int_ep:1; /* interrupt endpoints contrary to spec used */ + u8 bInterval; struct acm_wb *delayed_wb; /* write queued for a device about to be woken */ }; -- cgit v1.2.3 From 4d2da07bc876fc5bc455e0721df388a3db7e4ba5 Mon Sep 17 00:00:00 2001 From: Jakob Gruber Date: Thu, 30 Jul 2009 20:37:36 +0200 Subject: Staging: rt2870: Add USB ID for Linksys, Planex Communications, Belkin Linksys WUSB100, Belkin F5D8053 N, Planex Communications unknown model. Signed-off-by: Jakob Gruber Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rt2870/rt2870.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/staging/rt2870/rt2870.h b/drivers/staging/rt2870/rt2870.h index 29e3b53e52a1..10e1640d61e5 100644 --- a/drivers/staging/rt2870/rt2870.h +++ b/drivers/staging/rt2870/rt2870.h @@ -79,6 +79,7 @@ { \ {USB_DEVICE(0x148F,0x2770)}, /* Ralink */ \ {USB_DEVICE(0x1737,0x0071)}, /* Linksys WUSB600N */ \ + {USB_DEVICE(0x1737,0x0070)}, /* Linksys */ \ {USB_DEVICE(0x148F,0x2870)}, /* Ralink */ \ {USB_DEVICE(0x148F,0x3070)}, /* Ralink */ \ {USB_DEVICE(0x0B05,0x1731)}, /* Asus */ \ @@ -93,12 +94,14 @@ {USB_DEVICE(0x14B2,0x3C06)}, /* Conceptronic */ \ {USB_DEVICE(0x14B2,0x3C28)}, /* Conceptronic */ \ {USB_DEVICE(0x2019,0xED06)}, /* Planex Communications, Inc. */ \ + {USB_DEVICE(0x2019,0xED14)}, /* Planex Communications, Inc. */ \ {USB_DEVICE(0x2019,0xAB25)}, /* Planex Communications, Inc. RT3070 */ \ {USB_DEVICE(0x07D1,0x3C09)}, /* D-Link */ \ {USB_DEVICE(0x07D1,0x3C11)}, /* D-Link */ \ {USB_DEVICE(0x14B2,0x3C07)}, /* AL */ \ {USB_DEVICE(0x14B2,0x3C12)}, /* AL */ \ {USB_DEVICE(0x050D,0x8053)}, /* Belkin */ \ + {USB_DEVICE(0x050D,0x815C)}, /* Belkin */ \ {USB_DEVICE(0x14B2,0x3C23)}, /* Airlink */ \ {USB_DEVICE(0x14B2,0x3C27)}, /* Airlink */ \ {USB_DEVICE(0x07AA,0x002F)}, /* Corega */ \ -- cgit v1.2.3 From 2c63abf9e8a51dec886da482dfd8ae752581a61c Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 31 Jul 2009 07:14:04 +0200 Subject: Staging: rt2870: Revert d44ca7 Removal of kernel_thread() API Staging: rt2870: Revert d44ca7 Removal of kernel_thread() API The sanity check this patch introduced triggers on shutdown, apparently due to threads having already exited by the time BUG_ON() is reached. Signed-off-by: Mike Galbraith Cc: Peter Teoh Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rt2860/rt_linux.h | 13 ++--- drivers/staging/rt2870/2870_main_dev.c | 67 +++++++++++++++++--------- drivers/staging/rt2870/common/2870_rtmp_init.c | 33 ++++++------- drivers/staging/rt2870/common/rtusb_io.c | 3 +- drivers/staging/rt2870/rt2870.h | 6 +-- 5 files changed, 68 insertions(+), 54 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/rt2860/rt_linux.h b/drivers/staging/rt2860/rt_linux.h index 85175c182432..25b53ac3f820 100644 --- a/drivers/staging/rt2860/rt_linux.h +++ b/drivers/staging/rt2860/rt_linux.h @@ -43,9 +43,6 @@ #include "rtmp_type.h" #include #include -#if !defined(RT2860) && !defined(RT30xx) -#include -#endif #include #include @@ -166,9 +163,7 @@ typedef int (*HARD_START_XMIT_FUNC)(struct sk_buff *skb, struct net_device *net_ #ifndef RT30xx typedef struct pid * THREAD_PID; -#ifdef RT2860 #define THREAD_PID_INIT_VALUE NULL -#endif #define GET_PID(_v) find_get_pid(_v) #define GET_PID_NUMBER(_v) pid_nr(_v) #define CHECK_PID_LEGALITY(_pid) if (pid_nr(_pid) >= 0) @@ -188,12 +183,12 @@ struct os_cookie { dma_addr_t pAd_pa; #endif #ifdef RT2870 - struct usb_device *pUsb_Dev; + struct usb_device *pUsb_Dev; #ifndef RT30xx - struct task_struct *MLMEThr_task; - struct task_struct *RTUSBCmdThr_task; - struct task_struct *TimerQThr_task; + THREAD_PID MLMEThr_pid; + THREAD_PID RTUSBCmdThr_pid; + THREAD_PID TimerQThr_pid; #endif #ifdef RT30xx struct pid *MLMEThr_pid; diff --git a/drivers/staging/rt2870/2870_main_dev.c b/drivers/staging/rt2870/2870_main_dev.c index dd01c64fbf61..a4e8696ca39c 100644 --- a/drivers/staging/rt2870/2870_main_dev.c +++ b/drivers/staging/rt2870/2870_main_dev.c @@ -235,7 +235,7 @@ INT MlmeThread( DBGPRINT(RT_DEBUG_TRACE,( "<---%s\n",__func__)); #ifndef RT30xx - pObj->MLMEThr_task = NULL; + pObj->MLMEThr_pid = THREAD_PID_INIT_VALUE; #endif #ifdef RT30xx pObj->MLMEThr_pid = NULL; @@ -348,7 +348,7 @@ INT RTUSBCmdThread( DBGPRINT(RT_DEBUG_TRACE,( "<---RTUSBCmdThread\n")); #ifndef RT30xx - pObj->RTUSBCmdThr_task = NULL; + pObj->RTUSBCmdThr_pid = THREAD_PID_INIT_VALUE; #endif #ifdef RT30xx pObj->RTUSBCmdThr_pid = NULL; @@ -447,7 +447,7 @@ INT TimerQThread( DBGPRINT(RT_DEBUG_TRACE,( "<---%s\n",__func__)); #ifndef RT30xx - pObj->TimerQThr_task = NULL; + pObj->TimerQThr_pid = THREAD_PID_INIT_VALUE; #endif #ifdef RT30xx pObj->TimerQThr_pid = NULL; @@ -883,46 +883,69 @@ VOID RT28xxThreadTerminate( // Terminate Threads #ifndef RT30xx - BUG_ON(pObj->TimerQThr_task == NULL); - CHECK_PID_LEGALITY(task_pid(pObj->TimerQThr_task)) + CHECK_PID_LEGALITY(pObj->TimerQThr_pid) { POS_COOKIE pObj = (POS_COOKIE)pAd->OS_Cookie; - printk(KERN_DEBUG "Terminate the TimerQThr pid=%d!\n", - pid_nr(task_pid(pObj->TimerQThr_task))); + printk("Terminate the TimerQThr_pid=%d!\n", GET_PID_NUMBER(pObj->TimerQThr_pid)); mb(); pAd->TimerFunc_kill = 1; mb(); - kthread_stop(pObj->TimerQThr_task); - pObj->TimerQThr_task = NULL; + ret = KILL_THREAD_PID(pObj->TimerQThr_pid, SIGTERM, 1); + if (ret) + { + printk(KERN_WARNING "%s: unable to stop TimerQThread, pid=%d, ret=%d!\n", + pAd->net_dev->name, GET_PID_NUMBER(pObj->TimerQThr_pid), ret); + } + else + { + wait_for_completion(&pAd->TimerQComplete); + pObj->TimerQThr_pid = THREAD_PID_INIT_VALUE; + } } - BUG_ON(pObj->MLMEThr_task == NULL); - CHECK_PID_LEGALITY(task_pid(pObj->MLMEThr_task)) + CHECK_PID_LEGALITY(pObj->MLMEThr_pid) { - printk(KERN_DEBUG "Terminate the MLMEThr pid=%d!\n", - pid_nr(task_pid(pObj->MLMEThr_task))); + printk("Terminate the MLMEThr_pid=%d!\n", GET_PID_NUMBER(pObj->MLMEThr_pid)); mb(); pAd->mlme_kill = 1; //RT28XX_MLME_HANDLER(pAd); mb(); - kthread_stop(pObj->MLMEThr_task); - pObj->MLMEThr_task = NULL; + ret = KILL_THREAD_PID(pObj->MLMEThr_pid, SIGTERM, 1); + if (ret) + { + printk (KERN_WARNING "%s: unable to Mlme thread, pid=%d, ret=%d!\n", + pAd->net_dev->name, GET_PID_NUMBER(pObj->MLMEThr_pid), ret); + } + else + { + //wait_for_completion (&pAd->notify); + wait_for_completion (&pAd->mlmeComplete); + pObj->MLMEThr_pid = THREAD_PID_INIT_VALUE; + } } - BUG_ON(pObj->RTUSBCmdThr_task == NULL); - CHECK_PID_LEGALITY(task_pid(pObj->RTUSBCmdThr_task)) + CHECK_PID_LEGALITY(pObj->RTUSBCmdThr_pid) { - printk(KERN_DEBUG "Terminate the RTUSBCmdThr pid=%d!\n", - pid_nr(task_pid(pObj->RTUSBCmdThr_task))); + printk("Terminate the RTUSBCmdThr_pid=%d!\n", GET_PID_NUMBER(pObj->RTUSBCmdThr_pid)); mb(); NdisAcquireSpinLock(&pAd->CmdQLock); pAd->CmdQ.CmdQState = RT2870_THREAD_STOPED; NdisReleaseSpinLock(&pAd->CmdQLock); mb(); //RTUSBCMDUp(pAd); - kthread_stop(pObj->RTUSBCmdThr_task); - pObj->RTUSBCmdThr_task = NULL; + ret = KILL_THREAD_PID(pObj->RTUSBCmdThr_pid, SIGTERM, 1); + if (ret) + { + printk(KERN_WARNING "%s: unable to RTUSBCmd thread, pid=%d, ret=%d!\n", + pAd->net_dev->name, GET_PID_NUMBER(pObj->RTUSBCmdThr_pid), ret); + } + else + { + //wait_for_completion (&pAd->notify); + wait_for_completion (&pAd->CmdQComplete); + pObj->RTUSBCmdThr_pid = THREAD_PID_INIT_VALUE; + } } #endif #ifdef RT30xx @@ -1045,7 +1068,7 @@ BOOLEAN RT28XXChipsetCheck( dev_p->descriptor.idProduct == rtusb_usb_id[i].idProduct) { #ifndef RT30xx - printk(KERN_DEBUG "rt2870: idVendor = 0x%x, idProduct = 0x%x\n", + printk("rt2870: idVendor = 0x%x, idProduct = 0x%x\n", #endif #ifdef RT30xx printk("rt2870: idVendor = 0x%x, idProduct = 0x%x\n", diff --git a/drivers/staging/rt2870/common/2870_rtmp_init.c b/drivers/staging/rt2870/common/2870_rtmp_init.c index 0f4c8af97e47..80909e9ab5ae 100644 --- a/drivers/staging/rt2870/common/2870_rtmp_init.c +++ b/drivers/staging/rt2870/common/2870_rtmp_init.c @@ -700,8 +700,8 @@ NDIS_STATUS AdapterBlockAllocateMemory( usb_dev = pObj->pUsb_Dev; #ifndef RT30xx - pObj->MLMEThr_task = NULL; - pObj->RTUSBCmdThr_task = NULL; + pObj->MLMEThr_pid = THREAD_PID_INIT_VALUE; + pObj->RTUSBCmdThr_pid = THREAD_PID_INIT_VALUE; #endif #ifdef RT30xx pObj->MLMEThr_pid = NULL; @@ -743,7 +743,7 @@ NDIS_STATUS CreateThreads( PRTMP_ADAPTER pAd = net_dev->ml_priv; POS_COOKIE pObj = (POS_COOKIE) pAd->OS_Cookie; #ifndef RT30xx - struct task_struct *tsk; + pid_t pid_number = -1; #endif #ifdef RT30xx pid_t pid_number; @@ -762,10 +762,10 @@ NDIS_STATUS CreateThreads( // Creat MLME Thread #ifndef RT30xx - pObj->MLMEThr_task = NULL; - tsk = kthread_run(MlmeThread, pAd, "%s", pAd->net_dev->name); - - if (IS_ERR(tsk)) { + pObj->MLMEThr_pid= THREAD_PID_INIT_VALUE; + pid_number = kernel_thread(MlmeThread, pAd, CLONE_VM); + if (pid_number < 0) + { #endif #ifdef RT30xx pObj->MLMEThr_pid = NULL; @@ -778,7 +778,7 @@ NDIS_STATUS CreateThreads( } #ifndef RT30xx - pObj->MLMEThr_task = tsk; + pObj->MLMEThr_pid = GET_PID(pid_number); #endif #ifdef RT30xx pObj->MLMEThr_pid = find_get_pid(pid_number); @@ -788,10 +788,9 @@ NDIS_STATUS CreateThreads( // Creat Command Thread #ifndef RT30xx - pObj->RTUSBCmdThr_task = NULL; - tsk = kthread_run(RTUSBCmdThread, pAd, "%s", pAd->net_dev->name); - - if (IS_ERR(tsk) < 0) + pObj->RTUSBCmdThr_pid= THREAD_PID_INIT_VALUE; + pid_number = kernel_thread(RTUSBCmdThread, pAd, CLONE_VM); + if (pid_number < 0) #endif #ifdef RT30xx pObj->RTUSBCmdThr_pid = NULL; @@ -804,7 +803,7 @@ NDIS_STATUS CreateThreads( } #ifndef RT30xx - pObj->RTUSBCmdThr_task = tsk; + pObj->RTUSBCmdThr_pid = GET_PID(pid_number); #endif #ifdef RT30xx pObj->RTUSBCmdThr_pid = find_get_pid(pid_number); @@ -812,9 +811,9 @@ NDIS_STATUS CreateThreads( wait_for_completion(&(pAd->CmdQComplete)); #ifndef RT30xx - pObj->TimerQThr_task = NULL; - tsk = kthread_run(TimerQThread, pAd, "%s", pAd->net_dev->name); - if (IS_ERR(tsk) < 0) + pObj->TimerQThr_pid= THREAD_PID_INIT_VALUE; + pid_number = kernel_thread(TimerQThread, pAd, CLONE_VM); + if (pid_number < 0) #endif #ifdef RT30xx pObj->TimerQThr_pid = NULL; @@ -826,7 +825,7 @@ NDIS_STATUS CreateThreads( return NDIS_STATUS_FAILURE; } #ifndef RT30xx - pObj->TimerQThr_task = tsk; + pObj->TimerQThr_pid = GET_PID(pid_number); #endif #ifdef RT30xx pObj->TimerQThr_pid = find_get_pid(pid_number); diff --git a/drivers/staging/rt2870/common/rtusb_io.c b/drivers/staging/rt2870/common/rtusb_io.c index fd1b0c18f2a0..704b5c2d5091 100644 --- a/drivers/staging/rt2870/common/rtusb_io.c +++ b/drivers/staging/rt2870/common/rtusb_io.c @@ -984,8 +984,7 @@ NDIS_STATUS RTUSBEnqueueCmdFromNdis( POS_COOKIE pObj = (POS_COOKIE) pAd->OS_Cookie; #ifndef RT30xx - BUG_ON(pObj->RTUSBCmdThr_task == NULL); - CHECK_PID_LEGALITY(task_pid(pObj->RTUSBCmdThr_task)) + CHECK_PID_LEGALITY(pObj->RTUSBCmdThr_pid) #endif #ifdef RT30xx if (pObj->RTUSBCmdThr_pid < 0) diff --git a/drivers/staging/rt2870/rt2870.h b/drivers/staging/rt2870/rt2870.h index 10e1640d61e5..2b8872b2fd9d 100644 --- a/drivers/staging/rt2870/rt2870.h +++ b/drivers/staging/rt2870/rt2870.h @@ -590,16 +590,14 @@ VOID RTUSBBulkRxComplete(purbb_t pUrb, struct pt_regs *pt_regs); #define RTUSBMlmeUp(pAd) \ { \ POS_COOKIE pObj = (POS_COOKIE) pAd->OS_Cookie; \ - BUG_ON(pObj->MLMEThr_task == NULL); \ - CHECK_PID_LEGALITY(task_pid(pObj->MLMEThr_task)) \ + CHECK_PID_LEGALITY(pObj->MLMEThr_pid) \ up(&(pAd->mlme_semaphore)); \ } #define RTUSBCMDUp(pAd) \ { \ POS_COOKIE pObj = (POS_COOKIE) pAd->OS_Cookie; \ - BUG_ON(pObj->RTUSBCmdThr_task == NULL); \ - CHECK_PID_LEGALITY(task_pid(pObj->RTUSBCmdThr_task)) \ + CHECK_PID_LEGALITY(pObj->RTUSBCmdThr_pid) \ up(&(pAd->RTUSBCmd_semaphore)); \ } #endif -- cgit v1.2.3 From ce9c010c5c39df05ed81a06aba492c45ee0c6f19 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 4 Aug 2009 16:53:36 -0700 Subject: Staging: rtl8192su: fix build error This fixes a build error when selecting the rtl8192su driver as a module. This has been reported by me, and the opensuse kernel developer team, and I finally tracked it down. Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192su/ieee80211.h | 2 +- drivers/staging/rtl8192su/ieee80211/ieee80211.h | 2 +- drivers/staging/rtl8192su/ieee80211/ieee80211_tx.c | 3 ++- drivers/staging/rtl8192su/r8192U_core.c | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/rtl8192su/ieee80211.h b/drivers/staging/rtl8192su/ieee80211.h index 0edb09a536f9..ea9739318037 100644 --- a/drivers/staging/rtl8192su/ieee80211.h +++ b/drivers/staging/rtl8192su/ieee80211.h @@ -2645,7 +2645,7 @@ extern int ieee80211_encrypt_fragment( struct sk_buff *frag, int hdr_len); -extern int ieee80211_xmit(struct sk_buff *skb, +extern int rtl8192_ieee80211_xmit(struct sk_buff *skb, struct net_device *dev); extern void ieee80211_txb_free(struct ieee80211_txb *); diff --git a/drivers/staging/rtl8192su/ieee80211/ieee80211.h b/drivers/staging/rtl8192su/ieee80211/ieee80211.h index 720bfcbfadc1..5e3a2cbed2b1 100644 --- a/drivers/staging/rtl8192su/ieee80211/ieee80211.h +++ b/drivers/staging/rtl8192su/ieee80211/ieee80211.h @@ -2645,7 +2645,7 @@ extern int ieee80211_encrypt_fragment( struct sk_buff *frag, int hdr_len); -extern int ieee80211_xmit(struct sk_buff *skb, +extern int rtl8192_ieee80211_xmit(struct sk_buff *skb, struct net_device *dev); extern void ieee80211_txb_free(struct ieee80211_txb *); diff --git a/drivers/staging/rtl8192su/ieee80211/ieee80211_tx.c b/drivers/staging/rtl8192su/ieee80211/ieee80211_tx.c index 7294572b990f..cba12b84be5c 100644 --- a/drivers/staging/rtl8192su/ieee80211/ieee80211_tx.c +++ b/drivers/staging/rtl8192su/ieee80211/ieee80211_tx.c @@ -618,7 +618,7 @@ void ieee80211_query_seqnum(struct ieee80211_device*ieee, struct sk_buff* skb, u } } -int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) +int rtl8192_ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) { #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) struct ieee80211_device *ieee = netdev_priv(dev); @@ -943,5 +943,6 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) return 1; } +EXPORT_SYMBOL(rtl8192_ieee80211_xmit); EXPORT_SYMBOL(ieee80211_txb_free); diff --git a/drivers/staging/rtl8192su/r8192U_core.c b/drivers/staging/rtl8192su/r8192U_core.c index 4ab250743e81..70f81a8f1291 100644 --- a/drivers/staging/rtl8192su/r8192U_core.c +++ b/drivers/staging/rtl8192su/r8192U_core.c @@ -12142,7 +12142,7 @@ static const struct net_device_ops rtl8192_netdev_ops = { .ndo_set_mac_address = r8192_set_mac_adr, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = eth_change_mtu, - .ndo_start_xmit = ieee80211_xmit, + .ndo_start_xmit = rtl8192_ieee80211_xmit, }; #if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0) -- cgit v1.2.3 From 5fb4d2525b6dcffbb8bc26a7dfc7ed17ad323a06 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Fri, 7 Aug 2009 16:12:03 -0700 Subject: staging: add dependencies on PCI for drivers that require it This patch adds PCI dependencies to staging drivers that require it. Signed-off-by: Jeff Mahoney Signed-off-by: Greg Kroah-Hartman --- drivers/staging/b3dfg/Kconfig | 1 + drivers/staging/heci/Kconfig | 1 + 2 files changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/staging/b3dfg/Kconfig b/drivers/staging/b3dfg/Kconfig index 524231047de5..9e6573cf97d3 100644 --- a/drivers/staging/b3dfg/Kconfig +++ b/drivers/staging/b3dfg/Kconfig @@ -1,5 +1,6 @@ config B3DFG tristate "Brontes 3d Frame Framegrabber" + depends on PCI default n ---help--- This driver provides support for the Brontes 3d Framegrabber diff --git a/drivers/staging/heci/Kconfig b/drivers/staging/heci/Kconfig index ae8d588d3a27..c7206f8bcd93 100644 --- a/drivers/staging/heci/Kconfig +++ b/drivers/staging/heci/Kconfig @@ -1,5 +1,6 @@ config HECI tristate "Intel Management Engine Interface (MEI) Support" + depends on PCI ---help--- The Intel Management Engine Interface (Intel MEI) driver allows applications to access the Active Management Technology -- cgit v1.2.3 From 749d00dbf154fc2f9ac59df669205039de0d5b45 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Fri, 7 Aug 2009 21:00:10 +0200 Subject: Staging: rspiusb: Fix buffer overflow usb_buffer_map_sg() may return -1. This will result in a read from pdx->PixelUrb[frameInfo][-1] Signed-off-by: Roel Kluin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rspiusb/rspiusb.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/staging/rspiusb/rspiusb.c b/drivers/staging/rspiusb/rspiusb.c index 2f8155c1968b..04e2f92c0f62 100644 --- a/drivers/staging/rspiusb/rspiusb.c +++ b/drivers/staging/rspiusb/rspiusb.c @@ -716,6 +716,8 @@ static int MapUserBuffer(struct ioctl_struct *io, struct device_extension *pdx) pdx->PixelUrb[frameInfo][i]->transfer_flags = URB_NO_TRANSFER_DMA_MAP | URB_NO_INTERRUPT; } + if (i == 0) + return -EINVAL; /* only interrupt when last URB completes */ pdx->PixelUrb[frameInfo][--i]->transfer_flags &= ~URB_NO_INTERRUPT; pdx->pendedPixelUrbs[frameInfo] = -- cgit v1.2.3 From 38d5487db7f289be1d56ac7df704ee49ed3213b9 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Mon, 20 Jul 2009 14:49:17 -0700 Subject: drm: When adding probed modes, preserve duplicate mode types The code which takes probed modes and adds them to a connector eliminates duplicate modes by comparing them using drm_mode_equal. That function doesn't consider the type bits, which means that any modes which differ only in the type field will be lost. One of the bits in the mode->type field is the DRM_MODE_TYPE_PREFERRED bit. If the mode with that bit is lost, then higher level code will not know which mode to select, causing a random mode to be used instead. This patch simply merges the two mode type bits together; that seems reasonable to me, but perhaps only a subset of the bits should be used? None of these can be user defined as they all come from looking at just the hardware. Signed-off-by: Keith Packard Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_modes.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 54f492a488a9..7914097b09c6 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -566,6 +566,8 @@ void drm_mode_connector_list_update(struct drm_connector *connector) found_it = 1; /* if equal delete the probed mode */ mode->status = pmode->status; + /* Merge type bits together */ + mode->type |= pmode->type; list_del(&pmode->head); drm_mode_destroy(connector->dev, pmode); break; -- cgit v1.2.3 From 8d3457ec3198a569dd14dc9e3ae8b6163bcaa0b5 Mon Sep 17 00:00:00 2001 From: Paul Rolland Date: Sun, 9 Aug 2009 12:24:01 +1000 Subject: drm: silence pointless vblank warning. Some applications/hardware combinations are triggering the message "failed to acquire vblank counter" to be issued up to 20 times a second, which makes it both useless and dangerous, as this may hide other important messages. This changes makes it only appear when people are debugging. Signed-off-by: Paul Rolland Reviewed-by: Jesse Barnes Lost-twice-by: Dave Airlie Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index b4a3dbcebe9b..f85aaf21e783 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -566,7 +566,7 @@ int drm_wait_vblank(struct drm_device *dev, void *data, ret = drm_vblank_get(dev, crtc); if (ret) { - DRM_ERROR("failed to acquire vblank counter, %d\n", ret); + DRM_DEBUG("failed to acquire vblank counter, %d\n", ret); return ret; } seq = drm_vblank_count(dev, crtc); -- cgit v1.2.3 From 6cb504c29b1338925c83e4430e42a51eaa43781e Mon Sep 17 00:00:00 2001 From: Frans Pop Date: Sun, 9 Aug 2009 12:25:29 +1000 Subject: drm/i915: silence vblank warnings these errors are pretty pointless Reviewed-by: Jesse Barnes Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 83aee80e77a6..7ebc84c2881e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -190,7 +190,7 @@ u32 i915_get_vblank_counter(struct drm_device *dev, int pipe) low_frame = pipe ? PIPEBFRAMEPIXEL : PIPEAFRAMEPIXEL; if (!i915_pipe_enabled(dev, pipe)) { - DRM_ERROR("trying to get vblank count for disabled pipe %d\n", pipe); + DRM_DEBUG("trying to get vblank count for disabled pipe %d\n", pipe); return 0; } @@ -219,7 +219,7 @@ u32 gm45_get_vblank_counter(struct drm_device *dev, int pipe) int reg = pipe ? PIPEB_FRMCOUNT_GM45 : PIPEA_FRMCOUNT_GM45; if (!i915_pipe_enabled(dev, pipe)) { - DRM_ERROR("trying to get vblank count for disabled pipe %d\n", pipe); + DRM_DEBUG("trying to get vblank count for disabled pipe %d\n", pipe); return 0; } -- cgit v1.2.3 From c8c00a6915a2e3d10416e8bdd3138429beb96210 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 10 Aug 2009 12:50:52 +1000 Subject: Remove deadlock potential in md_open A recent commit: commit 449aad3e25358812c43afc60918c5ad3819488e7 introduced the possibility of an A-B/B-A deadlock between bd_mutex and reconfig_mutex. __blkdev_get holds bd_mutex while calling md_open which takes reconfig_mutex, do_md_run is always called with reconfig_mutex held, and it now takes bd_mutex in the call the revalidate_disk. This potential deadlock was not caught by lockdep due to the use of mutex_lock_interruptible_nexted which was introduced by commit d63a5a74dee87883fda6b7d170244acaac5b05e8 do avoid a warning of an impossible deadlock. It is quite possible to split reconfig_mutex in to two locks. One protects the array data structures while it is being reconfigured, the other ensures that an array is never even partially open while it is being deactivated. In particular, the second lock prevents an open from completing between the time when do_md_stop checks if there are any active opens, and the time when the array is either set read-only, or when ->pers is set to NULL. So we can be certain that no IO is in flight as the array is being destroyed. So create a new lock, open_mutex, just to ensure exclusion between 'open' and 'stop'. This avoids the deadlock and also avoids the lockdep warning mentioned in commit d63a5a74d Reported-by: "Mike Snitzer" Reported-by: "H. Peter Anvin" Signed-off-by: NeilBrown --- drivers/md/md.c | 18 ++++++++++-------- drivers/md/md.h | 10 ++++++++++ 2 files changed, 20 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 5b98bea4ff9b..5614500092e3 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit) else new->md_minor = MINOR(unit) >> MdpMinorShift; + mutex_init(&new->open_mutex); mutex_init(&new->reconfig_mutex); INIT_LIST_HEAD(&new->disks); INIT_LIST_HEAD(&new->all_mddevs); @@ -4304,12 +4305,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) struct gendisk *disk = mddev->gendisk; mdk_rdev_t *rdev; + mutex_lock(&mddev->open_mutex); if (atomic_read(&mddev->openers) > is_open) { printk("md: %s still in use.\n",mdname(mddev)); - return -EBUSY; - } - - if (mddev->pers) { + err = -EBUSY; + } else if (mddev->pers) { if (mddev->sync_thread) { set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); @@ -4367,7 +4367,10 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) set_disk_ro(disk, 1); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); } - +out: + mutex_unlock(&mddev->open_mutex); + if (err) + return err; /* * Free resources if final stop */ @@ -4433,7 +4436,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) blk_integrity_unregister(disk); md_new_event(mddev); sysfs_notify_dirent(mddev->sysfs_state); -out: return err; } @@ -5518,12 +5520,12 @@ static int md_open(struct block_device *bdev, fmode_t mode) } BUG_ON(mddev != bdev->bd_disk->private_data); - if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) + if ((err = mutex_lock_interruptible(&mddev->open_mutex))) goto out; err = 0; atomic_inc(&mddev->openers); - mddev_unlock(mddev); + mutex_unlock(&mddev->open_mutex); check_disk_change(bdev); out: diff --git a/drivers/md/md.h b/drivers/md/md.h index 78f03168baf9..f8fc188bc762 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -223,6 +223,16 @@ struct mddev_s * so we don't loop trying */ int in_sync; /* know to not need resync */ + /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so + * that we are never stopping an array while it is open. + * 'reconfig_mutex' protects all other reconfiguration. + * These locks are separate due to conflicting interactions + * with bdev->bd_mutex. + * Lock ordering is: + * reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk + * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open + */ + struct mutex open_mutex; struct mutex reconfig_mutex; atomic_t active; /* general refcount */ atomic_t openers; /* number of active opens */ -- cgit v1.2.3 From 85dfd81dc57e8183a277ddd7a56aa65c96f3f487 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 10 Aug 2009 13:21:19 -0700 Subject: pty: fix data loss when stopped (^S/^Q) Commit d945cb9cc ("pty: Rework the pty layer to use the normal buffering logic") dropped the test for 'tty->stopped' in pty_write_room(), which then causes the n_tty line discipline thing to not throttle the data properly when the tty is stopped. So instead of pausing the write due to the tty being stopped, the ldisc layer would go ahead and push it down to the pty. The pty write() routine would then refuse to take the data (because it _did_ check 'stopped'), and the data wouldn't actually be written. This whole stopped test should eventually be moved into the tty ldisc layer rather than have low-level tty drivers care about these things, but right now the fix is to just re-instate the missing pty 'stopped' handling. Reported-and-tested-by: Artur Skawina Cc: Alan Cox Signed-off-by: Linus Torvalds --- drivers/char/pty.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/char/pty.c b/drivers/char/pty.c index 6e6942c45f5b..d083c73d784a 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -144,6 +144,8 @@ static int pty_write(struct tty_struct *tty, const unsigned char *buf, static int pty_write_room(struct tty_struct *tty) { + if (tty->stopped) + return 0; return pty_space(tty->link); } -- cgit v1.2.3 From 651b1f125c7e3806bbd635739d009433dc07372d Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 10 Aug 2009 23:41:18 +0200 Subject: PM / Driver Core: Kill dev_pm_ops platform warning for now Commit 783ea7d4eeefe895f2731fe73ac951e94418927b (Driver Core: Rework platform suspend/resume, print warning) added a warning message printed for platform drivers that use the legacy PM callbacks rather than struct dev_pm_ops. Unfortunately, this resulted in some confusion and made some people try to convert drivers by replacing the old callbacks with struct dev_pm_ops in automatic way, which generally is not a good idea. Remove the platform device runtime dev_pm_ops warning for now, because it's annoying to users and it's not really necessary right now. [rjw: Modified the changelog to be more informative.] Signed-off-by: Magnus Damm Acked-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- drivers/base/platform.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers') diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 81cb01bfc356..456594bd97bc 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -483,9 +483,6 @@ int platform_driver_register(struct platform_driver *drv) drv->driver.remove = platform_drv_remove; if (drv->shutdown) drv->driver.shutdown = platform_drv_shutdown; - if (drv->suspend || drv->resume) - pr_warning("Platform driver '%s' needs updating - please use " - "dev_pm_ops\n", drv->driver.name); return driver_register(&drv->driver); } -- cgit v1.2.3 From df9eba8c9febf53782ef896518e7177999d98188 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Aug 2009 11:15:20 +0900 Subject: pata_at91: fix resource release Julias Lawall discovered that pata_at91 wasn't freeing a memory region allocated with kzalloc() on init failure paths. Upon review, pata_at91 also seems to be doing unnecessary explicit resource releases for managed resources too. Convert memory allocation to managed one and drop unnecessary explicit resource releases. Signed-off-by: Tejun Heo Cc: Julia Lawall Cc: Sergey Matyukevich Signed-off-by: Jeff Garzik --- drivers/ata/pata_at91.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/ata/pata_at91.c b/drivers/ata/pata_at91.c index 5702affcb325..41c94b1ae493 100644 --- a/drivers/ata/pata_at91.c +++ b/drivers/ata/pata_at91.c @@ -250,7 +250,7 @@ static int __devinit pata_at91_probe(struct platform_device *pdev) ata_port_desc(ap, "no IRQ, using PIO polling"); } - info = kzalloc(sizeof(*info), GFP_KERNEL); + info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL); if (!info) { dev_err(dev, "failed to allocate memory for private data\n"); @@ -275,7 +275,7 @@ static int __devinit pata_at91_probe(struct platform_device *pdev) if (!info->ide_addr) { dev_err(dev, "failed to map IO base\n"); ret = -ENOMEM; - goto err_ide_ioremap; + goto err_put; } info->alt_addr = devm_ioremap(dev, @@ -284,7 +284,7 @@ static int __devinit pata_at91_probe(struct platform_device *pdev) if (!info->alt_addr) { dev_err(dev, "failed to map CTL base\n"); ret = -ENOMEM; - goto err_alt_ioremap; + goto err_put; } ap->ioaddr.cmd_addr = info->ide_addr; @@ -303,13 +303,8 @@ static int __devinit pata_at91_probe(struct platform_device *pdev) irq ? ata_sff_interrupt : NULL, irq_flags, &pata_at91_sht); -err_alt_ioremap: - devm_iounmap(dev, info->ide_addr); - -err_ide_ioremap: +err_put: clk_put(info->mck); - kfree(info); - return ret; } @@ -317,7 +312,6 @@ static int __devexit pata_at91_remove(struct platform_device *pdev) { struct ata_host *host = dev_get_drvdata(&pdev->dev); struct at91_ide_info *info; - struct device *dev = &pdev->dev; if (!host) return 0; @@ -328,11 +322,8 @@ static int __devexit pata_at91_remove(struct platform_device *pdev) if (!info) return 0; - devm_iounmap(dev, info->ide_addr); - devm_iounmap(dev, info->alt_addr); clk_put(info->mck); - kfree(info); return 0; } -- cgit v1.2.3 From 1fd4bbec8c0d6db96b02141f324066afa2e77e89 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 6 Aug 2009 17:47:05 +0200 Subject: pata_atiixp: fix second channel support PIO and MWDMA timings are never programmed for the second channel because timing registers are treated as 16-bit long ones. The bug is an attixp -> pata_atiixp regression and goes back to: commit 669a5db411d85a14f86cd92bc16bf7ab5b8aa235 Author: Jeff Garzik Date: Tue Aug 29 18:12:40 2006 -0400 [libata] Add a bunch of PATA drivers. Cc: Krystian Juskowiak Cc: Andrew Morton Cc: Borislav Petkov Cc: Robert Hancock Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jeff Garzik --- drivers/ata/pata_atiixp.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/ata/pata_atiixp.c b/drivers/ata/pata_atiixp.c index bec0b8ade66d..45915566e4e9 100644 --- a/drivers/ata/pata_atiixp.c +++ b/drivers/ata/pata_atiixp.c @@ -1,6 +1,7 @@ /* * pata_atiixp.c - ATI PATA for new ATA layer * (C) 2005 Red Hat Inc + * (C) 2009 Bartlomiej Zolnierkiewicz * * Based on * @@ -61,20 +62,19 @@ static void atiixp_set_pio_timing(struct ata_port *ap, struct ata_device *adev, struct pci_dev *pdev = to_pci_dev(ap->host->dev); int dn = 2 * ap->port_no + adev->devno; - - /* Check this is correct - the order is odd in both drivers */ int timing_shift = (16 * ap->port_no) + 8 * (adev->devno ^ 1); - u16 pio_mode_data, pio_timing_data; + u32 pio_timing_data; + u16 pio_mode_data; pci_read_config_word(pdev, ATIIXP_IDE_PIO_MODE, &pio_mode_data); pio_mode_data &= ~(0x7 << (4 * dn)); pio_mode_data |= pio << (4 * dn); pci_write_config_word(pdev, ATIIXP_IDE_PIO_MODE, pio_mode_data); - pci_read_config_word(pdev, ATIIXP_IDE_PIO_TIMING, &pio_timing_data); + pci_read_config_dword(pdev, ATIIXP_IDE_PIO_TIMING, &pio_timing_data); pio_timing_data &= ~(0xFF << timing_shift); pio_timing_data |= (pio_timings[pio] << timing_shift); - pci_write_config_word(pdev, ATIIXP_IDE_PIO_TIMING, pio_timing_data); + pci_write_config_dword(pdev, ATIIXP_IDE_PIO_TIMING, pio_timing_data); } /** @@ -119,16 +119,17 @@ static void atiixp_set_dmamode(struct ata_port *ap, struct ata_device *adev) udma_mode_data |= dma << (4 * dn); pci_write_config_word(pdev, ATIIXP_IDE_UDMA_MODE, udma_mode_data); } else { - u16 mwdma_timing_data; - /* Check this is correct - the order is odd in both drivers */ int timing_shift = (16 * ap->port_no) + 8 * (adev->devno ^ 1); + u32 mwdma_timing_data; dma -= XFER_MW_DMA_0; - pci_read_config_word(pdev, ATIIXP_IDE_MWDMA_TIMING, &mwdma_timing_data); + pci_read_config_dword(pdev, ATIIXP_IDE_MWDMA_TIMING, + &mwdma_timing_data); mwdma_timing_data &= ~(0xFF << timing_shift); mwdma_timing_data |= (mwdma_timings[dma] << timing_shift); - pci_write_config_word(pdev, ATIIXP_IDE_MWDMA_TIMING, mwdma_timing_data); + pci_write_config_dword(pdev, ATIIXP_IDE_MWDMA_TIMING, + mwdma_timing_data); } /* * We must now look at the PIO mode situation. We may need to -- cgit v1.2.3 From 7831387bda72af3059be48d39846d3eb6d8ce2f6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Aug 2009 01:59:15 +0900 Subject: libata: OCZ Vertex can't do HPA OCZ Vertex SSD can't do HPA and not in a usual way. It reports HPA, allows unlocking but then fails all IOs which fall in the unlocked area. Quirk it so that HPA unlocking is not used for the device. Reported by Daniel Perup in bnc#522414. https://bugzilla.novell.com/show_bug.cgi?id=522414 Signed-off-by: Tejun Heo Reported-by: Daniel Perup Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 8ac98ff16d7d..072ba5ea138f 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4302,6 +4302,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "WDC WD2500JD-00HBB0", "WD-WMAL71490727", ATA_HORKAGE_BROKEN_HPA }, { "MAXTOR 6L080L4", "A93.0500", ATA_HORKAGE_BROKEN_HPA }, + /* this one allows HPA unlocking but fails IOs on the area */ + { "OCZ-VERTEX", "1.30", ATA_HORKAGE_BROKEN_HPA }, + /* Devices which report 1 sector over size HPA */ { "ST340823A", NULL, ATA_HORKAGE_HPA_SIZE, }, { "ST320413A", NULL, ATA_HORKAGE_HPA_SIZE, }, -- cgit v1.2.3 From 51c8949950647afeeb897e08dd75ad99078adb50 Mon Sep 17 00:00:00 2001 From: Tony Vroon Date: Thu, 6 Aug 2009 00:50:09 +0100 Subject: sata_nv: MSI support, disabled by default At least the nVidia MCP55 controller quite happily supports MSI. This adds an option to use it. It is disabled by default. As per feedback by Robert Hancock, it will honour the user request as the kernel will not enable MSI where the controller or the specific system configuration do not support it. Signed-off-by: Tony Vroon Cc: Robert Hancock Signed-off-by: Jeff Garzik --- drivers/ata/sata_nv.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers') diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index b2d11f300c39..86a40582999c 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -602,6 +602,7 @@ MODULE_VERSION(DRV_VERSION); static int adma_enabled; static int swncq_enabled = 1; +static int msi_enabled; static void nv_adma_register_mode(struct ata_port *ap) { @@ -2459,6 +2460,11 @@ static int nv_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } else if (type == SWNCQ) nv_swncq_host_init(host); + if (msi_enabled) { + dev_printk(KERN_NOTICE, &pdev->dev, "Using MSI\n"); + pci_enable_msi(pdev); + } + pci_set_master(pdev); return ata_host_activate(host, pdev->irq, ipriv->irq_handler, IRQF_SHARED, ipriv->sht); @@ -2558,4 +2564,6 @@ module_param_named(adma, adma_enabled, bool, 0444); MODULE_PARM_DESC(adma, "Enable use of ADMA (Default: false)"); module_param_named(swncq, swncq_enabled, bool, 0444); MODULE_PARM_DESC(swncq, "Enable use of SWNCQ (Default: true)"); +module_param_named(msi, msi_enabled, bool, 0444); +MODULE_PARM_DESC(msi, "Enable use of MSI (Default: false)"); -- cgit v1.2.3 From b6931c1fbaf7fda9ea7f120228a96600d7090049 Mon Sep 17 00:00:00 2001 From: Shane Huang Date: Wed, 5 Aug 2009 10:10:41 +0800 Subject: ahci: Soften up the dmesg on SB600 PMP softreset failure recovery Too strong words led to spurious bug reports: Novell bugzilla #527748, RedHat bugzilla #468800. This patch is used to soften up the dmesg on SB600 PMP softreset failure recovery, so as to remove the scariness and concern from community. Reported-by: pgnet Dev Signed-off-by: Shane Huang Cc: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/ata/ahci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 958c1fa41900..838ff73b08e1 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1773,7 +1773,8 @@ static int ahci_sb600_softreset(struct ata_link *link, unsigned int *class, irq_sts = readl(port_mmio + PORT_IRQ_STAT); if (irq_sts & PORT_IRQ_BAD_PMP) { ata_link_printk(link, KERN_WARNING, - "failed due to HW bug, retry pmp=0\n"); + "applying SB600 PMP SRST workaround " + "and retrying\n"); rc = ahci_do_softreset(link, class, 0, deadline, ahci_check_ready); } -- cgit v1.2.3 From 5594639aab8b5614cb27a3e5b2b627505cbcd137 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 4 Aug 2009 14:30:08 +0900 Subject: ahci: add workaround for on-board 5723s on some gigabyte boards Some gigabytes have on-board SIMG5723s connected to JMB ahcis. These are used to implement hardware raid. Unfortunately some firmware revisions on these 5723s don't bring the link down when all the downstream ports are unoccupied while not responding to reset protocol which makes libata think that there's device attached to the port but is not responding and retry. This results in painfully wrong boot detection time for these ports when they're empty. This patch quirks those boards such that ahci gives up after the initial timeout. Combined with parallel probing, this gives quick enough probing and also is safe because SIMG5723 will respond to the first try if any of the downstream ports is occupied. Signed-off-by: Tejun Heo Reported-by: Marc Bowes Reported-by: Nicolas Mailhot Signed-off-by: Jeff Garzik --- drivers/ata/ahci.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 838ff73b08e1..fe3eba5d6b3e 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -219,6 +219,8 @@ enum { AHCI_HFLAG_SECT255 = (1 << 8), /* max 255 sectors */ AHCI_HFLAG_YES_NCQ = (1 << 9), /* force NCQ cap on */ AHCI_HFLAG_NO_SUSPEND = (1 << 10), /* don't suspend */ + AHCI_HFLAG_SRST_TOUT_IS_OFFLINE = (1 << 11), /* treat SRST timeout as + link offline */ /* ap->flags bits */ @@ -1663,6 +1665,7 @@ static int ahci_do_softreset(struct ata_link *link, unsigned int *class, int (*check_ready)(struct ata_link *link)) { struct ata_port *ap = link->ap; + struct ahci_host_priv *hpriv = ap->host->private_data; const char *reason = NULL; unsigned long now, msecs; struct ata_taskfile tf; @@ -1701,12 +1704,21 @@ static int ahci_do_softreset(struct ata_link *link, unsigned int *class, /* wait for link to become ready */ rc = ata_wait_after_reset(link, deadline, check_ready); - /* link occupied, -ENODEV too is an error */ - if (rc) { + if (rc == -EBUSY && hpriv->flags & AHCI_HFLAG_SRST_TOUT_IS_OFFLINE) { + /* + * Workaround for cases where link online status can't + * be trusted. Treat device readiness timeout as link + * offline. + */ + ata_link_printk(link, KERN_INFO, + "device not ready, treating as offline\n"); + *class = ATA_DEV_NONE; + } else if (rc) { + /* link occupied, -ENODEV too is an error */ reason = "device not ready"; goto fail; - } - *class = ahci_dev_classify(ap); + } else + *class = ahci_dev_classify(ap); DPRINTK("EXIT, class=%u\n", *class); return 0; @@ -2727,6 +2739,56 @@ static bool ahci_broken_suspend(struct pci_dev *pdev) return !ver || strcmp(ver, dmi->driver_data) < 0; } +static bool ahci_broken_online(struct pci_dev *pdev) +{ +#define ENCODE_BUSDEVFN(bus, slot, func) \ + (void *)(unsigned long)(((bus) << 8) | PCI_DEVFN((slot), (func))) + static const struct dmi_system_id sysids[] = { + /* + * There are several gigabyte boards which use + * SIMG5723s configured as hardware RAID. Certain + * 5723 firmware revisions shipped there keep the link + * online but fail to answer properly to SRST or + * IDENTIFY when no device is attached downstream + * causing libata to retry quite a few times leading + * to excessive detection delay. + * + * As these firmwares respond to the second reset try + * with invalid device signature, considering unknown + * sig as offline works around the problem acceptably. + */ + { + .ident = "EP45-DQ6", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, + "Gigabyte Technology Co., Ltd."), + DMI_MATCH(DMI_BOARD_NAME, "EP45-DQ6"), + }, + .driver_data = ENCODE_BUSDEVFN(0x0a, 0x00, 0), + }, + { + .ident = "EP45-DS5", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, + "Gigabyte Technology Co., Ltd."), + DMI_MATCH(DMI_BOARD_NAME, "EP45-DS5"), + }, + .driver_data = ENCODE_BUSDEVFN(0x03, 0x00, 0), + }, + { } /* terminate list */ + }; +#undef ENCODE_BUSDEVFN + const struct dmi_system_id *dmi = dmi_first_match(sysids); + unsigned int val; + + if (!dmi) + return false; + + val = (unsigned long)dmi->driver_data; + + return pdev->bus->number == (val >> 8) && pdev->devfn == (val & 0xff); +} + static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { static int printed_version; @@ -2842,6 +2904,12 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) "BIOS update required for suspend/resume\n"); } + if (ahci_broken_online(pdev)) { + hpriv->flags |= AHCI_HFLAG_SRST_TOUT_IS_OFFLINE; + dev_info(&pdev->dev, + "online status unreliable, applying workaround\n"); + } + /* CAP.NP sometimes indicate the index of the last enabled * port, at other times, that of the last possible port, so * determining the maximum port number requires looking at -- cgit v1.2.3 From 67fe0688082509c52bd451d10a61b3565169c23e Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Wed, 12 Aug 2009 06:29:57 -0400 Subject: Remove zero-length file drivers/mtd/maps/sbc8240.c It was "deleted" in commit 2bf961b7ccd69e108ac435c67e2b0522b403c578 Signed-off-by: Jeff Garzik --- drivers/mtd/maps/sbc8240.c | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 drivers/mtd/maps/sbc8240.c (limited to 'drivers') diff --git a/drivers/mtd/maps/sbc8240.c b/drivers/mtd/maps/sbc8240.c deleted file mode 100644 index e69de29bb2d1..000000000000 -- cgit v1.2.3 From 51d5668cb2e3fd1827a55184e48606fff054c5be Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Aug 2009 09:54:02 +1000 Subject: md: never advance 'events' counter by more than 1. When assembling arrays, md allows two devices to have different event counts as long as the difference is only '1'. This is to cope with a system failure between updating the metadata on two difference devices. However there are currently times when we update the event count by 2. This was done to keep the event count even when the array is clean and odd when it is dirty, which allows us to avoid writing common update to spare devices and so allow those spares to go to sleep. This is bad for the above reason. So change it to never increase by two. This means that the alignment between 'odd/even' and 'clean/dirty' might take a little longer to attain, but that is only a small cost. The spares will get a few more updates but that will still be spared (;-) most updates and can still go to sleep. Prior to this patch there was a small chance that after a crash an array would fail to assemble due to the overly large event count mismatch. Signed-off-by: NeilBrown --- drivers/md/md.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 5614500092e3..d18805fea111 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1975,17 +1975,14 @@ repeat: /* otherwise we have to go forward and ... */ mddev->events ++; if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ - /* .. if the array isn't clean, insist on an odd 'events' */ - if ((mddev->events&1)==0) { - mddev->events++; + /* .. if the array isn't clean, an 'even' event must also go + * to spares. */ + if ((mddev->events&1)==0) nospares = 0; - } } else { - /* otherwise insist on an even 'events' (for clean states) */ - if ((mddev->events&1)) { - mddev->events++; + /* otherwise an 'odd' event must go to spares */ + if ((mddev->events&1)) nospares = 0; - } } } -- cgit v1.2.3 From 67ac6011db5d2b0c853d573ff474b25c85dfb644 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Aug 2009 10:06:24 +1000 Subject: md/raid5: allow new reshape modes to be restarted in the middle. md/raid5 doesn't allow a reshape to restart if it involves writing over the same part of disk that it would be reading from. This happens at the beginning of a reshape that increases the number of devices, at the end of a reshape that decreases the number of devices, and continuously for a reshape that does not change the number of devices. The current code is correct for the "increase number of devices" case as the critical section at the start is handled by userspace performing a backup. It does not work for reducing the number of devices, or the no-change case. For 'reducing', we need to invert the test. For no-change we cannot really be sure things will be safe, so simply require the array to be read-only, which is how the user-space code which carefully starts such arrays works. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2b521ee67dfa..b8a22a2205cf 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4509,7 +4509,26 @@ static int run(mddev_t *mddev) (old_disks-max_degraded)); /* here_old is the first stripe that we might need to read * from */ - if (here_new >= here_old) { + if (mddev->delta_disks == 0) { + /* We cannot be sure it is safe to start an in-place + * reshape. It is only safe if user-space if monitoring + * and taking constant backups. + * mdadm always starts a situation like this in + * readonly mode so it can take control before + * allowing any writes. So just check for that. + */ + if ((here_new * mddev->new_chunk_sectors != + here_old * mddev->chunk_sectors) || + mddev->ro == 0) { + printk(KERN_ERR "raid5: in-place reshape must be started" + " in read-only mode - aborting\n"); + return -EINVAL; + } + } else if (mddev->delta_disks < 0 + ? (here_new * mddev->new_chunk_sectors <= + here_old * mddev->chunk_sectors) + : (here_new * mddev->new_chunk_sectors >= + here_old * mddev->chunk_sectors)) { /* Reading from the same stripe as writing to - bad */ printk(KERN_ERR "raid5: reshape_position too early for " "auto-recovery - aborting.\n"); -- cgit v1.2.3 From a639755cf885e437b2fe4168d35157fa90d530ab Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Aug 2009 10:13:00 +1000 Subject: md/raid5: make sure a reshape restarts at the correct address. This "if" don't allow for the possibility that the number of devices doesn't change, and so sector_nr isn't set correctly in that case. So change '>' to '>='. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b8a22a2205cf..94a74cb5cccb 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3785,7 +3785,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped conf->reshape_progress < raid5_size(mddev, 0, 0)) { sector_nr = raid5_size(mddev, 0, 0) - conf->reshape_progress; - } else if (mddev->delta_disks > 0 && + } else if (mddev->delta_disks >= 0 && conf->reshape_progress > 0) sector_nr = conf->reshape_progress; sector_div(sector_nr, new_data_disks); -- cgit v1.2.3 From 1a67dde0abba36421a1257d01ba9de2f6d1c160a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Aug 2009 10:41:49 +1000 Subject: md/raid5: Properly remove excess drives after shrinking a raid5/6 We were removing the drives, from the array, but not removing symlinks from /sys/.... and not marking the device as having been removed. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 94a74cb5cccb..b8a2c5dc67ba 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5097,8 +5097,15 @@ static void raid5_finish_reshape(mddev_t *mddev) mddev->degraded--; for (d = conf->raid_disks ; d < conf->raid_disks - mddev->delta_disks; - d++) - raid5_remove_disk(mddev, d); + d++) { + mdk_rdev_t *rdev = conf->disks[d].rdev; + if (rdev && raid5_remove_disk(mddev, d) == 0) { + char nm[20]; + sprintf(nm, "rd%d", rdev->raid_disk); + sysfs_remove_link(&mddev->kobj, nm); + rdev->raid_disk = -1; + } + } } mddev->layout = conf->algorithm; mddev->chunk_sectors = conf->chunk_sectors; -- cgit v1.2.3 From 4d484a4a7a5126410eed5f8dd329a33f6eeed068 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Aug 2009 10:41:50 +1000 Subject: md: allow upper limit for resync/reshape to be set when array is read-only Normally we only allow the upper limit for a reshape to be decreased when the array not performing a sync/recovery/reshape, otherwise there could be races. But if an array is part-way through a reshape when it is assembled the reshape is started immediately leaving no window to set an upper bound. If the array is started read-only, the reshape will be suspended until the array becomes writable, so that provides a window during which it is perfectly safe to reduce the upper limit of a reshape. So: allow the upper limit (sync_max) to be reduced even if the reshape thread is running, as long as the array is still read-only. Signed-off-by: NeilBrown --- drivers/md/md.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index d18805fea111..103f2d33fa89 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3599,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len) if (max < mddev->resync_min) return -EINVAL; if (max < mddev->resync_max && + mddev->ro == 0 && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) return -EBUSY; -- cgit v1.2.3 From 21bc1f024d0d4ea43fc0f2a43504e759261c7b18 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Sat, 15 Aug 2009 02:53:16 +0000 Subject: sh: skip disabled LCDC channels This patch updates the SuperH Mobile LCDC driver to skip over disabled channels. Without this patch suspend-to-ram operation will crash if deferred io is enabled. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- drivers/video/sh_mobile_lcdcfb.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers') diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c index 8f24564f77b0..07f22b625632 100644 --- a/drivers/video/sh_mobile_lcdcfb.c +++ b/drivers/video/sh_mobile_lcdcfb.c @@ -481,6 +481,9 @@ static int sh_mobile_lcdc_start(struct sh_mobile_lcdc_priv *priv) /* tell the board code to enable the panel */ for (k = 0; k < ARRAY_SIZE(priv->ch); k++) { ch = &priv->ch[k]; + if (!ch->enabled) + continue; + board_cfg = &ch->cfg.board_cfg; if (board_cfg->display_on) board_cfg->display_on(board_cfg->board_data); @@ -498,6 +501,8 @@ static void sh_mobile_lcdc_stop(struct sh_mobile_lcdc_priv *priv) /* clean up deferred io and ask board code to disable panel */ for (k = 0; k < ARRAY_SIZE(priv->ch); k++) { ch = &priv->ch[k]; + if (!ch->enabled) + continue; /* deferred io mode: * flush frame, and wait for frame end interrupt -- cgit v1.2.3 From f6431732f128a241b149c0aa85dfec852455ebf9 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Sat, 15 Aug 2009 02:53:25 +0000 Subject: sh: CMT suspend/resume This patch updates the SuperH CMT driver with suspend and resume callbacks for the suspend-to-ram case. This patch stops the CMT channel at suspend time to avoid unwanted wake up events. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- drivers/clocksource/sh_cmt.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'drivers') diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index 2964f5f4a7ef..6b3e0c2f33e2 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -40,6 +40,7 @@ struct sh_cmt_priv { struct platform_device *pdev; unsigned long flags; + unsigned long flags_suspend; unsigned long match_value; unsigned long next_match_value; unsigned long max_match_value; @@ -667,11 +668,38 @@ static int __devexit sh_cmt_remove(struct platform_device *pdev) return -EBUSY; /* cannot unregister clockevent and clocksource */ } +static int sh_cmt_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct sh_cmt_priv *p = platform_get_drvdata(pdev); + + /* save flag state and stop CMT channel */ + p->flags_suspend = p->flags; + sh_cmt_stop(p, p->flags); + return 0; +} + +static int sh_cmt_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct sh_cmt_priv *p = platform_get_drvdata(pdev); + + /* start CMT channel from saved state */ + sh_cmt_start(p, p->flags_suspend); + return 0; +} + +static struct dev_pm_ops sh_cmt_dev_pm_ops = { + .suspend = sh_cmt_suspend, + .resume = sh_cmt_resume, +}; + static struct platform_driver sh_cmt_device_driver = { .probe = sh_cmt_probe, .remove = __devexit_p(sh_cmt_remove), .driver = { .name = "sh_cmt", + .pm = &sh_cmt_dev_pm_ops, } }; -- cgit v1.2.3